Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
0002 /*
0003  * Copyright (c) 2013-2021, Mellanox Technologies inc.  All rights reserved.
0004  */
0005 
0006 #include <linux/interrupt.h>
0007 #include <linux/notifier.h>
0008 #include <linux/mlx5/driver.h>
0009 #include <linux/mlx5/vport.h>
0010 #include <linux/mlx5/eq.h>
0011 #ifdef CONFIG_RFS_ACCEL
0012 #include <linux/cpu_rmap.h>
0013 #endif
0014 #include "mlx5_core.h"
0015 #include "lib/eq.h"
0016 #include "fpga/core.h"
0017 #include "eswitch.h"
0018 #include "lib/clock.h"
0019 #include "diag/fw_tracer.h"
0020 #include "mlx5_irq.h"
0021 #include "devlink.h"
0022 
0023 enum {
0024     MLX5_EQE_OWNER_INIT_VAL = 0x1,
0025 };
0026 
0027 enum {
0028     MLX5_EQ_STATE_ARMED     = 0x9,
0029     MLX5_EQ_STATE_FIRED     = 0xa,
0030     MLX5_EQ_STATE_ALWAYS_ARMED  = 0xb,
0031 };
0032 
0033 enum {
0034     MLX5_EQ_DOORBEL_OFFSET  = 0x40,
0035 };
0036 
0037 /* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update
0038  * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is
0039  * used to set the EQ size, budget must be smaller than the EQ size.
0040  */
0041 enum {
0042     MLX5_EQ_POLLING_BUDGET  = 128,
0043 };
0044 
0045 static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE);
0046 
0047 struct mlx5_eq_table {
0048     struct list_head        comp_eqs_list;
0049     struct mlx5_eq_async    pages_eq;
0050     struct mlx5_eq_async    cmd_eq;
0051     struct mlx5_eq_async    async_eq;
0052 
0053     struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX];
0054 
0055     /* Since CQ DB is stored in async_eq */
0056     struct mlx5_nb          cq_err_nb;
0057 
0058     struct mutex            lock; /* sync async eqs creations */
0059     int         num_comp_eqs;
0060     struct mlx5_irq_table   *irq_table;
0061     struct mlx5_irq         **comp_irqs;
0062     struct mlx5_irq         *ctrl_irq;
0063 #ifdef CONFIG_RFS_ACCEL
0064     struct cpu_rmap     *rmap;
0065 #endif
0066 };
0067 
0068 #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG)       | \
0069                    (1ull << MLX5_EVENT_TYPE_COMM_EST)       | \
0070                    (1ull << MLX5_EVENT_TYPE_SQ_DRAINED)     | \
0071                    (1ull << MLX5_EVENT_TYPE_CQ_ERROR)       | \
0072                    (1ull << MLX5_EVENT_TYPE_WQ_CATAS_ERROR)     | \
0073                    (1ull << MLX5_EVENT_TYPE_PATH_MIG_FAILED)    | \
0074                    (1ull << MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
0075                    (1ull << MLX5_EVENT_TYPE_WQ_ACCESS_ERROR)    | \
0076                    (1ull << MLX5_EVENT_TYPE_PORT_CHANGE)        | \
0077                    (1ull << MLX5_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
0078                    (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE)       | \
0079                    (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT))
0080 
0081 static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
0082 {
0083     u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {};
0084 
0085     MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ);
0086     MLX5_SET(destroy_eq_in, in, eq_number, eqn);
0087     return mlx5_cmd_exec_in(dev, destroy_eq, in);
0088 }
0089 
0090 /* caller must eventually call mlx5_cq_put on the returned cq */
0091 static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
0092 {
0093     struct mlx5_cq_table *table = &eq->cq_table;
0094     struct mlx5_core_cq *cq = NULL;
0095 
0096     rcu_read_lock();
0097     cq = radix_tree_lookup(&table->tree, cqn);
0098     if (likely(cq))
0099         mlx5_cq_hold(cq);
0100     rcu_read_unlock();
0101 
0102     return cq;
0103 }
0104 
0105 static int mlx5_eq_comp_int(struct notifier_block *nb,
0106                 __always_unused unsigned long action,
0107                 __always_unused void *data)
0108 {
0109     struct mlx5_eq_comp *eq_comp =
0110         container_of(nb, struct mlx5_eq_comp, irq_nb);
0111     struct mlx5_eq *eq = &eq_comp->core;
0112     struct mlx5_eqe *eqe;
0113     int num_eqes = 0;
0114     u32 cqn = -1;
0115 
0116     eqe = next_eqe_sw(eq);
0117     if (!eqe)
0118         goto out;
0119 
0120     do {
0121         struct mlx5_core_cq *cq;
0122 
0123         /* Make sure we read EQ entry contents after we've
0124          * checked the ownership bit.
0125          */
0126         dma_rmb();
0127         /* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */
0128         cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
0129 
0130         cq = mlx5_eq_cq_get(eq, cqn);
0131         if (likely(cq)) {
0132             ++cq->arm_sn;
0133             cq->comp(cq, eqe);
0134             mlx5_cq_put(cq);
0135         } else {
0136             dev_dbg_ratelimited(eq->dev->device,
0137                         "Completion event for bogus CQ 0x%x\n", cqn);
0138         }
0139 
0140         ++eq->cons_index;
0141 
0142     } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
0143 
0144 out:
0145     eq_update_ci(eq, 1);
0146 
0147     if (cqn != -1)
0148         tasklet_schedule(&eq_comp->tasklet_ctx.task);
0149 
0150     return 0;
0151 }
0152 
0153 /* Some architectures don't latch interrupts when they are disabled, so using
0154  * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to
0155  * avoid losing them.  It is not recommended to use it, unless this is the last
0156  * resort.
0157  */
0158 u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
0159 {
0160     u32 count_eqe;
0161 
0162     disable_irq(eq->core.irqn);
0163     count_eqe = eq->core.cons_index;
0164     mlx5_eq_comp_int(&eq->irq_nb, 0, NULL);
0165     count_eqe = eq->core.cons_index - count_eqe;
0166     enable_irq(eq->core.irqn);
0167 
0168     return count_eqe;
0169 }
0170 
0171 static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, bool recovery,
0172                    unsigned long *flags)
0173     __acquires(&eq->lock)
0174 {
0175     if (!recovery)
0176         spin_lock(&eq->lock);
0177     else
0178         spin_lock_irqsave(&eq->lock, *flags);
0179 }
0180 
0181 static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, bool recovery,
0182                      unsigned long *flags)
0183     __releases(&eq->lock)
0184 {
0185     if (!recovery)
0186         spin_unlock(&eq->lock);
0187     else
0188         spin_unlock_irqrestore(&eq->lock, *flags);
0189 }
0190 
0191 enum async_eq_nb_action {
0192     ASYNC_EQ_IRQ_HANDLER = 0,
0193     ASYNC_EQ_RECOVER = 1,
0194 };
0195 
0196 static int mlx5_eq_async_int(struct notifier_block *nb,
0197                  unsigned long action, void *data)
0198 {
0199     struct mlx5_eq_async *eq_async =
0200         container_of(nb, struct mlx5_eq_async, irq_nb);
0201     struct mlx5_eq *eq = &eq_async->core;
0202     struct mlx5_eq_table *eqt;
0203     struct mlx5_core_dev *dev;
0204     struct mlx5_eqe *eqe;
0205     unsigned long flags;
0206     int num_eqes = 0;
0207     bool recovery;
0208 
0209     dev = eq->dev;
0210     eqt = dev->priv.eq_table;
0211 
0212     recovery = action == ASYNC_EQ_RECOVER;
0213     mlx5_eq_async_int_lock(eq_async, recovery, &flags);
0214 
0215     eqe = next_eqe_sw(eq);
0216     if (!eqe)
0217         goto out;
0218 
0219     do {
0220         /*
0221          * Make sure we read EQ entry contents after we've
0222          * checked the ownership bit.
0223          */
0224         dma_rmb();
0225 
0226         atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe);
0227         atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe);
0228 
0229         ++eq->cons_index;
0230 
0231     } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
0232 
0233 out:
0234     eq_update_ci(eq, 1);
0235     mlx5_eq_async_int_unlock(eq_async, recovery, &flags);
0236 
0237     return unlikely(recovery) ? num_eqes : 0;
0238 }
0239 
0240 void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev)
0241 {
0242     struct mlx5_eq_async *eq = &dev->priv.eq_table->cmd_eq;
0243     int eqes;
0244 
0245     eqes = mlx5_eq_async_int(&eq->irq_nb, ASYNC_EQ_RECOVER, NULL);
0246     if (eqes)
0247         mlx5_core_warn(dev, "Recovered %d EQEs on cmd_eq\n", eqes);
0248 }
0249 
0250 static void init_eq_buf(struct mlx5_eq *eq)
0251 {
0252     struct mlx5_eqe *eqe;
0253     int i;
0254 
0255     for (i = 0; i < eq_get_size(eq); i++) {
0256         eqe = get_eqe(eq, i);
0257         eqe->owner = MLX5_EQE_OWNER_INIT_VAL;
0258     }
0259 }
0260 
0261 static int
0262 create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
0263           struct mlx5_eq_param *param)
0264 {
0265     u8 log_eq_size = order_base_2(param->nent + MLX5_NUM_SPARE_EQE);
0266     struct mlx5_cq_table *cq_table = &eq->cq_table;
0267     u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
0268     u8 log_eq_stride = ilog2(MLX5_EQE_SIZE);
0269     struct mlx5_priv *priv = &dev->priv;
0270     __be64 *pas;
0271     u16 vecidx;
0272     void *eqc;
0273     int inlen;
0274     u32 *in;
0275     int err;
0276     int i;
0277 
0278     /* Init CQ table */
0279     memset(cq_table, 0, sizeof(*cq_table));
0280     spin_lock_init(&cq_table->lock);
0281     INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
0282 
0283     eq->cons_index = 0;
0284 
0285     err = mlx5_frag_buf_alloc_node(dev, wq_get_byte_sz(log_eq_size, log_eq_stride),
0286                        &eq->frag_buf, dev->priv.numa_node);
0287     if (err)
0288         return err;
0289 
0290     mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc);
0291     init_eq_buf(eq);
0292 
0293     eq->irq = param->irq;
0294     vecidx = mlx5_irq_get_index(eq->irq);
0295 
0296     inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
0297         MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages;
0298 
0299     in = kvzalloc(inlen, GFP_KERNEL);
0300     if (!in) {
0301         err = -ENOMEM;
0302         goto err_buf;
0303     }
0304 
0305     pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
0306     mlx5_fill_page_frag_array(&eq->frag_buf, pas);
0307 
0308     MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
0309     if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx))
0310         MLX5_SET(create_eq_in, in, uid, MLX5_SHARED_RESOURCE_UID);
0311 
0312     for (i = 0; i < 4; i++)
0313         MLX5_ARRAY_SET64(create_eq_in, in, event_bitmask, i,
0314                  param->mask[i]);
0315 
0316     eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
0317     MLX5_SET(eqc, eqc, log_eq_size, eq->fbc.log_sz);
0318     MLX5_SET(eqc, eqc, uar_page, priv->uar->index);
0319     MLX5_SET(eqc, eqc, intr, vecidx);
0320     MLX5_SET(eqc, eqc, log_page_size,
0321          eq->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
0322 
0323     err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
0324     if (err)
0325         goto err_in;
0326 
0327     eq->vecidx = vecidx;
0328     eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
0329     eq->irqn = pci_irq_vector(dev->pdev, vecidx);
0330     eq->dev = dev;
0331     eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
0332 
0333     err = mlx5_debug_eq_add(dev, eq);
0334     if (err)
0335         goto err_eq;
0336 
0337     kvfree(in);
0338     return 0;
0339 
0340 err_eq:
0341     mlx5_cmd_destroy_eq(dev, eq->eqn);
0342 
0343 err_in:
0344     kvfree(in);
0345 
0346 err_buf:
0347     mlx5_frag_buf_free(dev, &eq->frag_buf);
0348     return err;
0349 }
0350 
0351 /**
0352  * mlx5_eq_enable - Enable EQ for receiving EQEs
0353  * @dev : Device which owns the eq
0354  * @eq  : EQ to enable
0355  * @nb  : Notifier call block
0356  *
0357  * Must be called after EQ is created in device.
0358  *
0359  * @return: 0 if no error
0360  */
0361 int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
0362            struct notifier_block *nb)
0363 {
0364     int err;
0365 
0366     err = mlx5_irq_attach_nb(eq->irq, nb);
0367     if (!err)
0368         eq_update_ci(eq, 1);
0369 
0370     return err;
0371 }
0372 EXPORT_SYMBOL(mlx5_eq_enable);
0373 
0374 /**
0375  * mlx5_eq_disable - Disable EQ for receiving EQEs
0376  * @dev : Device which owns the eq
0377  * @eq  : EQ to disable
0378  * @nb  : Notifier call block
0379  *
0380  * Must be called before EQ is destroyed.
0381  */
0382 void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
0383              struct notifier_block *nb)
0384 {
0385     mlx5_irq_detach_nb(eq->irq, nb);
0386 }
0387 EXPORT_SYMBOL(mlx5_eq_disable);
0388 
0389 static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
0390 {
0391     int err;
0392 
0393     mlx5_debug_eq_remove(dev, eq);
0394 
0395     err = mlx5_cmd_destroy_eq(dev, eq->eqn);
0396     if (err)
0397         mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
0398                    eq->eqn);
0399 
0400     mlx5_frag_buf_free(dev, &eq->frag_buf);
0401     return err;
0402 }
0403 
0404 int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
0405 {
0406     struct mlx5_cq_table *table = &eq->cq_table;
0407     int err;
0408 
0409     spin_lock(&table->lock);
0410     err = radix_tree_insert(&table->tree, cq->cqn, cq);
0411     spin_unlock(&table->lock);
0412 
0413     return err;
0414 }
0415 
0416 void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
0417 {
0418     struct mlx5_cq_table *table = &eq->cq_table;
0419     struct mlx5_core_cq *tmp;
0420 
0421     spin_lock(&table->lock);
0422     tmp = radix_tree_delete(&table->tree, cq->cqn);
0423     spin_unlock(&table->lock);
0424 
0425     if (!tmp) {
0426         mlx5_core_dbg(eq->dev, "cq 0x%x not found in eq 0x%x tree\n",
0427                   eq->eqn, cq->cqn);
0428         return;
0429     }
0430 
0431     if (tmp != cq)
0432         mlx5_core_dbg(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n",
0433                   eq->eqn, cq->cqn);
0434 }
0435 
0436 int mlx5_eq_table_init(struct mlx5_core_dev *dev)
0437 {
0438     struct mlx5_eq_table *eq_table;
0439     int i;
0440 
0441     eq_table = kvzalloc_node(sizeof(*eq_table), GFP_KERNEL,
0442                  dev->priv.numa_node);
0443     if (!eq_table)
0444         return -ENOMEM;
0445 
0446     dev->priv.eq_table = eq_table;
0447 
0448     mlx5_eq_debugfs_init(dev);
0449 
0450     mutex_init(&eq_table->lock);
0451     for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++)
0452         ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]);
0453 
0454     eq_table->irq_table = mlx5_irq_table_get(dev);
0455     return 0;
0456 }
0457 
0458 void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev)
0459 {
0460     mlx5_eq_debugfs_cleanup(dev);
0461     kvfree(dev->priv.eq_table);
0462 }
0463 
0464 /* Async EQs */
0465 
0466 static int create_async_eq(struct mlx5_core_dev *dev,
0467                struct mlx5_eq *eq, struct mlx5_eq_param *param)
0468 {
0469     struct mlx5_eq_table *eq_table = dev->priv.eq_table;
0470     int err;
0471 
0472     mutex_lock(&eq_table->lock);
0473     err = create_map_eq(dev, eq, param);
0474     mutex_unlock(&eq_table->lock);
0475     return err;
0476 }
0477 
0478 static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
0479 {
0480     struct mlx5_eq_table *eq_table = dev->priv.eq_table;
0481     int err;
0482 
0483     mutex_lock(&eq_table->lock);
0484     err = destroy_unmap_eq(dev, eq);
0485     mutex_unlock(&eq_table->lock);
0486     return err;
0487 }
0488 
0489 static int cq_err_event_notifier(struct notifier_block *nb,
0490                  unsigned long type, void *data)
0491 {
0492     struct mlx5_eq_table *eqt;
0493     struct mlx5_core_cq *cq;
0494     struct mlx5_eqe *eqe;
0495     struct mlx5_eq *eq;
0496     u32 cqn;
0497 
0498     /* type == MLX5_EVENT_TYPE_CQ_ERROR */
0499 
0500     eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb);
0501     eq  = &eqt->async_eq.core;
0502     eqe = data;
0503 
0504     cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
0505     mlx5_core_warn(eq->dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
0506                cqn, eqe->data.cq_err.syndrome);
0507 
0508     cq = mlx5_eq_cq_get(eq, cqn);
0509     if (unlikely(!cq)) {
0510         mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
0511         return NOTIFY_OK;
0512     }
0513 
0514     if (cq->event)
0515         cq->event(cq, type);
0516 
0517     mlx5_cq_put(cq);
0518 
0519     return NOTIFY_OK;
0520 }
0521 
0522 static void gather_user_async_events(struct mlx5_core_dev *dev, u64 mask[4])
0523 {
0524     __be64 *user_unaffiliated_events;
0525     __be64 *user_affiliated_events;
0526     int i;
0527 
0528     user_affiliated_events =
0529         MLX5_CAP_DEV_EVENT(dev, user_affiliated_events);
0530     user_unaffiliated_events =
0531         MLX5_CAP_DEV_EVENT(dev, user_unaffiliated_events);
0532 
0533     for (i = 0; i < 4; i++)
0534         mask[i] |= be64_to_cpu(user_affiliated_events[i] |
0535                        user_unaffiliated_events[i]);
0536 }
0537 
0538 static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4])
0539 {
0540     u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
0541 
0542     if (MLX5_VPORT_MANAGER(dev))
0543         async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
0544 
0545     if (MLX5_CAP_GEN(dev, general_notification_event))
0546         async_event_mask |= (1ull << MLX5_EVENT_TYPE_GENERAL_EVENT);
0547 
0548     if (MLX5_CAP_GEN(dev, port_module_event))
0549         async_event_mask |= (1ull << MLX5_EVENT_TYPE_PORT_MODULE_EVENT);
0550     else
0551         mlx5_core_dbg(dev, "port_module_event is not set\n");
0552 
0553     if (MLX5_PPS_CAP(dev))
0554         async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT);
0555 
0556     if (MLX5_CAP_GEN(dev, fpga))
0557         async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR) |
0558                     (1ull << MLX5_EVENT_TYPE_FPGA_QP_ERROR);
0559     if (MLX5_CAP_GEN_MAX(dev, dct))
0560         async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED);
0561 
0562     if (MLX5_CAP_GEN(dev, temp_warn_event))
0563         async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT);
0564 
0565     if (MLX5_CAP_MCAM_REG(dev, tracer_registers))
0566         async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER);
0567 
0568     if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters))
0569         async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER);
0570 
0571     if (mlx5_eswitch_is_funcs_handler(dev))
0572         async_event_mask |=
0573             (1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED);
0574 
0575     if (MLX5_CAP_GEN_MAX(dev, vhca_state))
0576         async_event_mask |= (1ull << MLX5_EVENT_TYPE_VHCA_STATE_CHANGE);
0577 
0578     mask[0] = async_event_mask;
0579 
0580     if (MLX5_CAP_GEN(dev, event_cap))
0581         gather_user_async_events(dev, mask);
0582 }
0583 
0584 static int
0585 setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq,
0586            struct mlx5_eq_param *param, const char *name)
0587 {
0588     int err;
0589 
0590     eq->irq_nb.notifier_call = mlx5_eq_async_int;
0591     spin_lock_init(&eq->lock);
0592 
0593     err = create_async_eq(dev, &eq->core, param);
0594     if (err) {
0595         mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err);
0596         return err;
0597     }
0598     err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
0599     if (err) {
0600         mlx5_core_warn(dev, "failed to enable %s EQ %d\n", name, err);
0601         destroy_async_eq(dev, &eq->core);
0602     }
0603     return err;
0604 }
0605 
0606 static void cleanup_async_eq(struct mlx5_core_dev *dev,
0607                  struct mlx5_eq_async *eq, const char *name)
0608 {
0609     int err;
0610 
0611     mlx5_eq_disable(dev, &eq->core, &eq->irq_nb);
0612     err = destroy_async_eq(dev, &eq->core);
0613     if (err)
0614         mlx5_core_err(dev, "failed to destroy %s eq, err(%d)\n",
0615                   name, err);
0616 }
0617 
0618 static u16 async_eq_depth_devlink_param_get(struct mlx5_core_dev *dev)
0619 {
0620     struct devlink *devlink = priv_to_devlink(dev);
0621     union devlink_param_value val;
0622     int err;
0623 
0624     err = devlink_param_driverinit_value_get(devlink,
0625                          DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE,
0626                          &val);
0627     if (!err)
0628         return val.vu32;
0629     mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err);
0630     return MLX5_NUM_ASYNC_EQE;
0631 }
0632 static int create_async_eqs(struct mlx5_core_dev *dev)
0633 {
0634     struct mlx5_eq_table *table = dev->priv.eq_table;
0635     struct mlx5_eq_param param = {};
0636     int err;
0637 
0638     /* All the async_eqs are using single IRQ, request one IRQ and share its
0639      * index among all the async_eqs of this device.
0640      */
0641     table->ctrl_irq = mlx5_ctrl_irq_request(dev);
0642     if (IS_ERR(table->ctrl_irq))
0643         return PTR_ERR(table->ctrl_irq);
0644 
0645     MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
0646     mlx5_eq_notifier_register(dev, &table->cq_err_nb);
0647 
0648     param = (struct mlx5_eq_param) {
0649         .irq = table->ctrl_irq,
0650         .nent = MLX5_NUM_CMD_EQE,
0651         .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
0652     };
0653     mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_CREATE_EQ);
0654     err = setup_async_eq(dev, &table->cmd_eq, &param, "cmd");
0655     if (err)
0656         goto err1;
0657 
0658     mlx5_cmd_use_events(dev);
0659     mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
0660 
0661     param = (struct mlx5_eq_param) {
0662         .irq = table->ctrl_irq,
0663         .nent = async_eq_depth_devlink_param_get(dev),
0664     };
0665 
0666     gather_async_events_mask(dev, param.mask);
0667     err = setup_async_eq(dev, &table->async_eq, &param, "async");
0668     if (err)
0669         goto err2;
0670 
0671     param = (struct mlx5_eq_param) {
0672         .irq = table->ctrl_irq,
0673         .nent = /* TODO: sriov max_vf + */ 1,
0674         .mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST,
0675     };
0676 
0677     err = setup_async_eq(dev, &table->pages_eq, &param, "pages");
0678     if (err)
0679         goto err3;
0680 
0681     return 0;
0682 
0683 err3:
0684     cleanup_async_eq(dev, &table->async_eq, "async");
0685 err2:
0686     mlx5_cmd_use_polling(dev);
0687     cleanup_async_eq(dev, &table->cmd_eq, "cmd");
0688 err1:
0689     mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
0690     mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
0691     mlx5_ctrl_irq_release(table->ctrl_irq);
0692     return err;
0693 }
0694 
0695 static void destroy_async_eqs(struct mlx5_core_dev *dev)
0696 {
0697     struct mlx5_eq_table *table = dev->priv.eq_table;
0698 
0699     cleanup_async_eq(dev, &table->pages_eq, "pages");
0700     cleanup_async_eq(dev, &table->async_eq, "async");
0701     mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_DESTROY_EQ);
0702     mlx5_cmd_use_polling(dev);
0703     cleanup_async_eq(dev, &table->cmd_eq, "cmd");
0704     mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
0705     mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
0706     mlx5_ctrl_irq_release(table->ctrl_irq);
0707 }
0708 
0709 struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
0710 {
0711     return &dev->priv.eq_table->async_eq.core;
0712 }
0713 
0714 void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev)
0715 {
0716     synchronize_irq(dev->priv.eq_table->async_eq.core.irqn);
0717 }
0718 
0719 void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev)
0720 {
0721     synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn);
0722 }
0723 
0724 /* Generic EQ API for mlx5_core consumers
0725  * Needed For RDMA ODP EQ for now
0726  */
0727 struct mlx5_eq *
0728 mlx5_eq_create_generic(struct mlx5_core_dev *dev,
0729                struct mlx5_eq_param *param)
0730 {
0731     struct mlx5_eq *eq = kvzalloc_node(sizeof(*eq), GFP_KERNEL,
0732                        dev->priv.numa_node);
0733     int err;
0734 
0735     if (!eq)
0736         return ERR_PTR(-ENOMEM);
0737 
0738     param->irq = dev->priv.eq_table->ctrl_irq;
0739     err = create_async_eq(dev, eq, param);
0740     if (err) {
0741         kvfree(eq);
0742         eq = ERR_PTR(err);
0743     }
0744 
0745     return eq;
0746 }
0747 EXPORT_SYMBOL(mlx5_eq_create_generic);
0748 
0749 int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
0750 {
0751     int err;
0752 
0753     if (IS_ERR(eq))
0754         return -EINVAL;
0755 
0756     err = destroy_async_eq(dev, eq);
0757     if (err)
0758         goto out;
0759 
0760     kvfree(eq);
0761 out:
0762     return err;
0763 }
0764 EXPORT_SYMBOL(mlx5_eq_destroy_generic);
0765 
0766 struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc)
0767 {
0768     u32 ci = eq->cons_index + cc;
0769     u32 nent = eq_get_size(eq);
0770     struct mlx5_eqe *eqe;
0771 
0772     eqe = get_eqe(eq, ci & (nent - 1));
0773     eqe = ((eqe->owner & 1) ^ !!(ci & nent)) ? NULL : eqe;
0774     /* Make sure we read EQ entry contents after we've
0775      * checked the ownership bit.
0776      */
0777     if (eqe)
0778         dma_rmb();
0779 
0780     return eqe;
0781 }
0782 EXPORT_SYMBOL(mlx5_eq_get_eqe);
0783 
0784 void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
0785 {
0786     __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
0787     u32 val;
0788 
0789     eq->cons_index += cc;
0790     val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
0791 
0792     __raw_writel((__force u32)cpu_to_be32(val), addr);
0793     /* We still want ordering, just not swabbing, so add a barrier */
0794     wmb();
0795 }
0796 EXPORT_SYMBOL(mlx5_eq_update_ci);
0797 
0798 static void comp_irqs_release(struct mlx5_core_dev *dev)
0799 {
0800     struct mlx5_eq_table *table = dev->priv.eq_table;
0801 
0802     if (mlx5_core_is_sf(dev))
0803         mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs);
0804     else
0805         mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs);
0806     kfree(table->comp_irqs);
0807 }
0808 
0809 static int comp_irqs_request(struct mlx5_core_dev *dev)
0810 {
0811     struct mlx5_eq_table *table = dev->priv.eq_table;
0812     int ncomp_eqs = table->num_comp_eqs;
0813     u16 *cpus;
0814     int ret;
0815     int i;
0816 
0817     ncomp_eqs = table->num_comp_eqs;
0818     table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL);
0819     if (!table->comp_irqs)
0820         return -ENOMEM;
0821     if (mlx5_core_is_sf(dev)) {
0822         ret = mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs);
0823         if (ret < 0)
0824             goto free_irqs;
0825         return ret;
0826     }
0827 
0828     cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL);
0829     if (!cpus) {
0830         ret = -ENOMEM;
0831         goto free_irqs;
0832     }
0833     for (i = 0; i < ncomp_eqs; i++)
0834         cpus[i] = cpumask_local_spread(i, dev->priv.numa_node);
0835     ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs);
0836     kfree(cpus);
0837     if (ret < 0)
0838         goto free_irqs;
0839     return ret;
0840 
0841 free_irqs:
0842     kfree(table->comp_irqs);
0843     return ret;
0844 }
0845 
0846 static void destroy_comp_eqs(struct mlx5_core_dev *dev)
0847 {
0848     struct mlx5_eq_table *table = dev->priv.eq_table;
0849     struct mlx5_eq_comp *eq, *n;
0850 
0851     list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
0852         list_del(&eq->list);
0853         mlx5_eq_disable(dev, &eq->core, &eq->irq_nb);
0854         if (destroy_unmap_eq(dev, &eq->core))
0855             mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n",
0856                        eq->core.eqn);
0857         tasklet_disable(&eq->tasklet_ctx.task);
0858         kfree(eq);
0859     }
0860     comp_irqs_release(dev);
0861 }
0862 
0863 static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev)
0864 {
0865     struct devlink *devlink = priv_to_devlink(dev);
0866     union devlink_param_value val;
0867     int err;
0868 
0869     err = devlink_param_driverinit_value_get(devlink,
0870                          DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE,
0871                          &val);
0872     if (!err)
0873         return val.vu32;
0874     mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err);
0875     return MLX5_COMP_EQ_SIZE;
0876 }
0877 
0878 static int create_comp_eqs(struct mlx5_core_dev *dev)
0879 {
0880     struct mlx5_eq_table *table = dev->priv.eq_table;
0881     struct mlx5_eq_comp *eq;
0882     int ncomp_eqs;
0883     int nent;
0884     int err;
0885     int i;
0886 
0887     ncomp_eqs = comp_irqs_request(dev);
0888     if (ncomp_eqs < 0)
0889         return ncomp_eqs;
0890     INIT_LIST_HEAD(&table->comp_eqs_list);
0891     nent = comp_eq_depth_devlink_param_get(dev);
0892 
0893     for (i = 0; i < ncomp_eqs; i++) {
0894         struct mlx5_eq_param param = {};
0895 
0896         eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, dev->priv.numa_node);
0897         if (!eq) {
0898             err = -ENOMEM;
0899             goto clean;
0900         }
0901 
0902         INIT_LIST_HEAD(&eq->tasklet_ctx.list);
0903         INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
0904         spin_lock_init(&eq->tasklet_ctx.lock);
0905         tasklet_setup(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb);
0906 
0907         eq->irq_nb.notifier_call = mlx5_eq_comp_int;
0908         param = (struct mlx5_eq_param) {
0909             .irq = table->comp_irqs[i],
0910             .nent = nent,
0911         };
0912 
0913         err = create_map_eq(dev, &eq->core, &param);
0914         if (err)
0915             goto clean_eq;
0916         err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
0917         if (err) {
0918             destroy_unmap_eq(dev, &eq->core);
0919             goto clean_eq;
0920         }
0921 
0922         mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
0923         /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */
0924         list_add_tail(&eq->list, &table->comp_eqs_list);
0925     }
0926 
0927     table->num_comp_eqs = ncomp_eqs;
0928     return 0;
0929 
0930 clean_eq:
0931     kfree(eq);
0932 clean:
0933     destroy_comp_eqs(dev);
0934     return err;
0935 }
0936 
0937 static int vector2eqnirqn(struct mlx5_core_dev *dev, int vector, int *eqn,
0938               unsigned int *irqn)
0939 {
0940     struct mlx5_eq_table *table = dev->priv.eq_table;
0941     struct mlx5_eq_comp *eq, *n;
0942     int err = -ENOENT;
0943     int i = 0;
0944 
0945     list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
0946         if (i++ == vector) {
0947             if (irqn)
0948                 *irqn = eq->core.irqn;
0949             if (eqn)
0950                 *eqn = eq->core.eqn;
0951             err = 0;
0952             break;
0953         }
0954     }
0955 
0956     return err;
0957 }
0958 
0959 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn)
0960 {
0961     return vector2eqnirqn(dev, vector, eqn, NULL);
0962 }
0963 EXPORT_SYMBOL(mlx5_vector2eqn);
0964 
0965 int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn)
0966 {
0967     return vector2eqnirqn(dev, vector, NULL, irqn);
0968 }
0969 
0970 unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev)
0971 {
0972     return dev->priv.eq_table->num_comp_eqs;
0973 }
0974 EXPORT_SYMBOL(mlx5_comp_vectors_count);
0975 
0976 struct cpumask *
0977 mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
0978 {
0979     struct mlx5_eq_table *table = dev->priv.eq_table;
0980     struct mlx5_eq_comp *eq, *n;
0981     int i = 0;
0982 
0983     list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
0984         if (i++ == vector)
0985             break;
0986     }
0987 
0988     return mlx5_irq_get_affinity_mask(eq->core.irq);
0989 }
0990 EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
0991 
0992 #ifdef CONFIG_RFS_ACCEL
0993 struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
0994 {
0995     return dev->priv.eq_table->rmap;
0996 }
0997 #endif
0998 
0999 struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
1000 {
1001     struct mlx5_eq_table *table = dev->priv.eq_table;
1002     struct mlx5_eq_comp *eq;
1003 
1004     list_for_each_entry(eq, &table->comp_eqs_list, list) {
1005         if (eq->core.eqn == eqn)
1006             return eq;
1007     }
1008 
1009     return ERR_PTR(-ENOENT);
1010 }
1011 
1012 static void clear_rmap(struct mlx5_core_dev *dev)
1013 {
1014 #ifdef CONFIG_RFS_ACCEL
1015     struct mlx5_eq_table *eq_table = dev->priv.eq_table;
1016 
1017     free_irq_cpu_rmap(eq_table->rmap);
1018 #endif
1019 }
1020 
1021 static int set_rmap(struct mlx5_core_dev *mdev)
1022 {
1023     int err = 0;
1024 #ifdef CONFIG_RFS_ACCEL
1025     struct mlx5_eq_table *eq_table = mdev->priv.eq_table;
1026     int vecidx;
1027 
1028     eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs);
1029     if (!eq_table->rmap) {
1030         err = -ENOMEM;
1031         mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err);
1032         goto err_out;
1033     }
1034 
1035     for (vecidx = 0; vecidx < eq_table->num_comp_eqs; vecidx++) {
1036         err = irq_cpu_rmap_add(eq_table->rmap,
1037                        pci_irq_vector(mdev->pdev, vecidx));
1038         if (err) {
1039             mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d",
1040                       err);
1041             goto err_irq_cpu_rmap_add;
1042         }
1043     }
1044     return 0;
1045 
1046 err_irq_cpu_rmap_add:
1047     clear_rmap(mdev);
1048 err_out:
1049 #endif
1050     return err;
1051 }
1052 
1053 /* This function should only be called after mlx5_cmd_force_teardown_hca */
1054 void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
1055 {
1056     struct mlx5_eq_table *table = dev->priv.eq_table;
1057 
1058     mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
1059     if (!mlx5_core_is_sf(dev))
1060         clear_rmap(dev);
1061     mlx5_irq_table_destroy(dev);
1062     mutex_unlock(&table->lock);
1063 }
1064 
1065 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1066 #define MLX5_MAX_ASYNC_EQS 4
1067 #else
1068 #define MLX5_MAX_ASYNC_EQS 3
1069 #endif
1070 
1071 int mlx5_eq_table_create(struct mlx5_core_dev *dev)
1072 {
1073     struct mlx5_eq_table *eq_table = dev->priv.eq_table;
1074     int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
1075               MLX5_CAP_GEN(dev, max_num_eqs) :
1076               1 << MLX5_CAP_GEN(dev, log_max_eq);
1077     int max_eqs_sf;
1078     int err;
1079 
1080     eq_table->num_comp_eqs =
1081         min_t(int,
1082               mlx5_irq_table_get_num_comp(eq_table->irq_table),
1083               num_eqs - MLX5_MAX_ASYNC_EQS);
1084     if (mlx5_core_is_sf(dev)) {
1085         max_eqs_sf = min_t(int, MLX5_COMP_EQS_PER_SF,
1086                    mlx5_irq_table_get_sfs_vec(eq_table->irq_table));
1087         eq_table->num_comp_eqs = min_t(int, eq_table->num_comp_eqs,
1088                            max_eqs_sf);
1089     }
1090 
1091     err = create_async_eqs(dev);
1092     if (err) {
1093         mlx5_core_err(dev, "Failed to create async EQs\n");
1094         goto err_async_eqs;
1095     }
1096 
1097     if (!mlx5_core_is_sf(dev)) {
1098         /* rmap is a mapping between irq number and queue number.
1099          * each irq can be assign only to a single rmap.
1100          * since SFs share IRQs, rmap mapping cannot function correctly
1101          * for irqs that are shared for different core/netdev RX rings.
1102          * Hence we don't allow netdev rmap for SFs
1103          */
1104         err = set_rmap(dev);
1105         if (err)
1106             goto err_rmap;
1107     }
1108 
1109     err = create_comp_eqs(dev);
1110     if (err) {
1111         mlx5_core_err(dev, "Failed to create completion EQs\n");
1112         goto err_comp_eqs;
1113     }
1114 
1115     return 0;
1116 err_comp_eqs:
1117     if (!mlx5_core_is_sf(dev))
1118         clear_rmap(dev);
1119 err_rmap:
1120     destroy_async_eqs(dev);
1121 err_async_eqs:
1122     return err;
1123 }
1124 
1125 void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
1126 {
1127     if (!mlx5_core_is_sf(dev))
1128         clear_rmap(dev);
1129     destroy_comp_eqs(dev);
1130     destroy_async_eqs(dev);
1131 }
1132 
1133 int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
1134 {
1135     struct mlx5_eq_table *eqt = dev->priv.eq_table;
1136 
1137     return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb);
1138 }
1139 EXPORT_SYMBOL(mlx5_eq_notifier_register);
1140 
1141 int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb)
1142 {
1143     struct mlx5_eq_table *eqt = dev->priv.eq_table;
1144 
1145     return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb);
1146 }
1147 EXPORT_SYMBOL(mlx5_eq_notifier_unregister);