mlx5/core/pci_irq.c

0001 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
0002 /* Copyright (c) 2019 Mellanox Technologies. */
0003
0004 #include <linux/interrupt.h>
0005 #include <linux/notifier.h>
0006 #include <linux/mlx5/driver.h>
0007 #include "mlx5_core.h"
0008 #include "mlx5_irq.h"
0009 #include "pci_irq.h"
0010 #include "lib/sf.h"
0011 #ifdef CONFIG_RFS_ACCEL
0012 #include <linux/cpu_rmap.h>
0013 #endif
0014
0015 #define MLX5_SFS_PER_CTRL_IRQ 64
0016 #define MLX5_IRQ_CTRL_SF_MAX 8
0017 /* min num of vectors for SFs to be enabled */
0018 #define MLX5_IRQ_VEC_COMP_BASE_SF 2
0019
0020 #define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
0021 #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
0022 #define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
0023 #define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
0024
0025 struct mlx5_irq {
0026     struct atomic_notifier_head nh;
0027     cpumask_var_t mask;
0028     char name[MLX5_MAX_IRQ_NAME];
0029     struct mlx5_irq_pool *pool;
0030     int refcount;
0031     u32 index;
0032     int irqn;
0033 };
0034
0035 struct mlx5_irq_table {
0036     struct mlx5_irq_pool *pf_pool;
0037     struct mlx5_irq_pool *sf_ctrl_pool;
0038     struct mlx5_irq_pool *sf_comp_pool;
0039 };
0040
0041 /**
0042  * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors
0043  *                                   to be ssigned to each VF.
0044  * @dev: PF to work on
0045  * @num_vfs: Number of enabled VFs
0046  */
0047 int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs)
0048 {
0049     int num_vf_msix, min_msix, max_msix;
0050
0051     num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
0052     if (!num_vf_msix)
0053         return 0;
0054
0055     min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
0056     max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
0057
0058     /* Limit maximum number of MSI-X vectors so the default configuration
0059      * has some available in the pool. This will allow the user to increase
0060      * the number of vectors in a VF without having to first size-down other
0061      * VFs.
0062      */
0063     return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix);
0064 }
0065
0066 /**
0067  * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF
0068  * @dev: PF to work on
0069  * @function_id: Internal PCI VF function IDd
0070  * @msix_vec_count: Number of MSI-X vectors to set
0071  */
0072 int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
0073                 int msix_vec_count)
0074 {
0075     int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
0076     int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
0077     void *hca_cap = NULL, *query_cap = NULL, *cap;
0078     int num_vf_msix, min_msix, max_msix;
0079     int ret;
0080
0081     num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
0082     if (!num_vf_msix)
0083         return 0;
0084
0085     if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev))
0086         return -EOPNOTSUPP;
0087
0088     min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size);
0089     max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size);
0090
0091     if (msix_vec_count < min_msix)
0092         return -EINVAL;
0093
0094     if (msix_vec_count > max_msix)
0095         return -EOVERFLOW;
0096
0097     query_cap = kvzalloc(query_sz, GFP_KERNEL);
0098     hca_cap = kvzalloc(set_sz, GFP_KERNEL);
0099     if (!hca_cap || !query_cap) {
0100         ret = -ENOMEM;
0101         goto out;
0102     }
0103
0104     ret = mlx5_vport_get_other_func_cap(dev, function_id, query_cap);
0105     if (ret)
0106         goto out;
0107
0108     cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
0109     memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability),
0110            MLX5_UN_SZ_BYTES(hca_cap_union));
0111     MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count);
0112
0113     MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
0114     MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1);
0115     MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id);
0116
0117     MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
0118          MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
0119     ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
0120 out:
0121     kvfree(hca_cap);
0122     kvfree(query_cap);
0123     return ret;
0124 }
0125
0126 static void irq_release(struct mlx5_irq *irq)
0127 {
0128     struct mlx5_irq_pool *pool = irq->pool;
0129
0130     xa_erase(&pool->irqs, irq->index);
0131     /* free_irq requires that affinity_hint and rmap will be cleared
0132      * before calling it. This is why there is asymmetry with set_rmap
0133      * which should be called after alloc_irq but before request_irq.
0134      */
0135     irq_update_affinity_hint(irq->irqn, NULL);
0136     free_cpumask_var(irq->mask);
0137     free_irq(irq->irqn, &irq->nh);
0138     kfree(irq);
0139 }
0140
0141 int mlx5_irq_put(struct mlx5_irq *irq)
0142 {
0143     struct mlx5_irq_pool *pool = irq->pool;
0144     int ret = 0;
0145
0146     mutex_lock(&pool->lock);
0147     irq->refcount--;
0148     if (!irq->refcount) {
0149         irq_release(irq);
0150         ret = 1;
0151     }
0152     mutex_unlock(&pool->lock);
0153     return ret;
0154 }
0155
0156 int mlx5_irq_read_locked(struct mlx5_irq *irq)
0157 {
0158     lockdep_assert_held(&irq->pool->lock);
0159     return irq->refcount;
0160 }
0161
0162 int mlx5_irq_get_locked(struct mlx5_irq *irq)
0163 {
0164     lockdep_assert_held(&irq->pool->lock);
0165     if (WARN_ON_ONCE(!irq->refcount))
0166         return 0;
0167     irq->refcount++;
0168     return 1;
0169 }
0170
0171 static int irq_get(struct mlx5_irq *irq)
0172 {
0173     int err;
0174
0175     mutex_lock(&irq->pool->lock);
0176     err = mlx5_irq_get_locked(irq);
0177     mutex_unlock(&irq->pool->lock);
0178     return err;
0179 }
0180
0181 static irqreturn_t irq_int_handler(int irq, void *nh)
0182 {
0183     atomic_notifier_call_chain(nh, 0, NULL);
0184     return IRQ_HANDLED;
0185 }
0186
0187 static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
0188 {
0189     snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx);
0190 }
0191
0192 static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
0193 {
0194     if (!pool->xa_num_irqs.max) {
0195         /* in case we only have a single irq for the device */
0196         snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx);
0197         return;
0198     }
0199
0200     if (vecidx == pool->xa_num_irqs.max) {
0201         snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
0202         return;
0203     }
0204
0205     snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
0206 }
0207
0208 struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
0209                 const struct cpumask *affinity)
0210 {
0211     struct mlx5_core_dev *dev = pool->dev;
0212     char name[MLX5_MAX_IRQ_NAME];
0213     struct mlx5_irq *irq;
0214     int err;
0215
0216     irq = kzalloc(sizeof(*irq), GFP_KERNEL);
0217     if (!irq)
0218         return ERR_PTR(-ENOMEM);
0219     irq->irqn = pci_irq_vector(dev->pdev, i);
0220     if (!mlx5_irq_pool_is_sf_pool(pool))
0221         irq_set_name(pool, name, i);
0222     else
0223         irq_sf_set_name(pool, name, i);
0224     ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
0225     snprintf(irq->name, MLX5_MAX_IRQ_NAME,
0226          "%s@pci:%s", name, pci_name(dev->pdev));
0227     err = request_irq(irq->irqn, irq_int_handler, 0, irq->name,
0228               &irq->nh);
0229     if (err) {
0230         mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
0231         goto err_req_irq;
0232     }
0233     if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) {
0234         mlx5_core_warn(dev, "zalloc_cpumask_var failed\n");
0235         err = -ENOMEM;
0236         goto err_cpumask;
0237     }
0238     if (affinity) {
0239         cpumask_copy(irq->mask, affinity);
0240         irq_set_affinity_and_hint(irq->irqn, irq->mask);
0241     }
0242     irq->pool = pool;
0243     irq->refcount = 1;
0244     irq->index = i;
0245     err = xa_err(xa_store(&pool->irqs, irq->index, irq, GFP_KERNEL));
0246     if (err) {
0247         mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
0248                   irq->index, err);
0249         goto err_xa;
0250     }
0251     return irq;
0252 err_xa:
0253     irq_update_affinity_hint(irq->irqn, NULL);
0254     free_cpumask_var(irq->mask);
0255 err_cpumask:
0256     free_irq(irq->irqn, &irq->nh);
0257 err_req_irq:
0258     kfree(irq);
0259     return ERR_PTR(err);
0260 }
0261
0262 int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
0263 {
0264     int ret;
0265
0266     ret = irq_get(irq);
0267     if (!ret)
0268         /* Something very bad happens here, we are enabling EQ
0269          * on non-existing IRQ.
0270          */
0271         return -ENOENT;
0272     ret = atomic_notifier_chain_register(&irq->nh, nb);
0273     if (ret)
0274         mlx5_irq_put(irq);
0275     return ret;
0276 }
0277
0278 int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb)
0279 {
0280     int err = 0;
0281
0282     err = atomic_notifier_chain_unregister(&irq->nh, nb);
0283     mlx5_irq_put(irq);
0284     return err;
0285 }
0286
0287 struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
0288 {
0289     return irq->mask;
0290 }
0291
0292 int mlx5_irq_get_index(struct mlx5_irq *irq)
0293 {
0294     return irq->index;
0295 }
0296
0297 /* irq_pool API */
0298
0299 /* requesting an irq from a given pool according to given index */
0300 static struct mlx5_irq *
0301 irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
0302             struct cpumask *affinity)
0303 {
0304     struct mlx5_irq *irq;
0305
0306     mutex_lock(&pool->lock);
0307     irq = xa_load(&pool->irqs, vecidx);
0308     if (irq) {
0309         mlx5_irq_get_locked(irq);
0310         goto unlock;
0311     }
0312     irq = mlx5_irq_alloc(pool, vecidx, affinity);
0313 unlock:
0314     mutex_unlock(&pool->lock);
0315     return irq;
0316 }
0317
0318 static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table)
0319 {
0320     return irq_table->sf_ctrl_pool;
0321 }
0322
0323 static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table)
0324 {
0325     return irq_table->sf_comp_pool;
0326 }
0327
0328 struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
0329 {
0330     struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
0331     struct mlx5_irq_pool *pool = NULL;
0332
0333     if (mlx5_core_is_sf(dev))
0334         pool = sf_irq_pool_get(irq_table);
0335
0336     /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
0337      * the PF IRQs pool in case the SF pool doesn't exist.
0338      */
0339     return pool ? pool : irq_table->pf_pool;
0340 }
0341
0342 static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
0343 {
0344     struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
0345     struct mlx5_irq_pool *pool = NULL;
0346
0347     if (mlx5_core_is_sf(dev))
0348         pool = sf_ctrl_irq_pool_get(irq_table);
0349
0350     /* In some configs, there won't be a pool of SFs IRQs. Hence, returning
0351      * the PF IRQs pool in case the SF pool doesn't exist.
0352      */
0353     return pool ? pool : irq_table->pf_pool;
0354 }
0355
0356 /**
0357  * mlx5_irqs_release - release one or more IRQs back to the system.
0358  * @irqs: IRQs to be released.
0359  * @nirqs: number of IRQs to be released.
0360  */
0361 static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs)
0362 {
0363     int i;
0364
0365     for (i = 0; i < nirqs; i++) {
0366         synchronize_irq(irqs[i]->irqn);
0367         mlx5_irq_put(irqs[i]);
0368     }
0369 }
0370
0371 /**
0372  * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system.
0373  * @ctrl_irq: ctrl IRQ to be released.
0374  */
0375 void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq)
0376 {
0377     mlx5_irqs_release(&ctrl_irq, 1);
0378 }
0379
0380 /**
0381  * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device.
0382  * @dev: mlx5 device that requesting the IRQ.
0383  *
0384  * This function returns a pointer to IRQ, or ERR_PTR in case of error.
0385  */
0386 struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
0387 {
0388     struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev);
0389     cpumask_var_t req_mask;
0390     struct mlx5_irq *irq;
0391
0392     if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
0393         return ERR_PTR(-ENOMEM);
0394     cpumask_copy(req_mask, cpu_online_mask);
0395     if (!mlx5_irq_pool_is_sf_pool(pool)) {
0396         /* In case we are allocating a control IRQ for PF/VF */
0397         if (!pool->xa_num_irqs.max) {
0398             cpumask_clear(req_mask);
0399             /* In case we only have a single IRQ for PF/VF */
0400             cpumask_set_cpu(cpumask_first(cpu_online_mask), req_mask);
0401         }
0402         /* Allocate the IRQ in the last index of the pool */
0403         irq = irq_pool_request_vector(pool, pool->xa_num_irqs.max, req_mask);
0404     } else {
0405         irq = mlx5_irq_affinity_request(pool, req_mask);
0406     }
0407
0408     free_cpumask_var(req_mask);
0409     return irq;
0410 }
0411
0412 /**
0413  * mlx5_irq_request - request an IRQ for mlx5 PF/VF device.
0414  * @dev: mlx5 device that requesting the IRQ.
0415  * @vecidx: vector index of the IRQ. This argument is ignore if affinity is
0416  * provided.
0417  * @affinity: cpumask requested for this IRQ.
0418  *
0419  * This function returns a pointer to IRQ, or ERR_PTR in case of error.
0420  */
0421 struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
0422                   struct cpumask *affinity)
0423 {
0424     struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
0425     struct mlx5_irq_pool *pool;
0426     struct mlx5_irq *irq;
0427
0428     pool = irq_table->pf_pool;
0429     irq = irq_pool_request_vector(pool, vecidx, affinity);
0430     if (IS_ERR(irq))
0431         return irq;
0432     mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
0433               irq->irqn, cpumask_pr_args(affinity),
0434               irq->refcount / MLX5_EQ_REFS_PER_IRQ);
0435     return irq;
0436 }
0437
0438 /**
0439  * mlx5_irqs_release_vectors - release one or more IRQs back to the system.
0440  * @irqs: IRQs to be released.
0441  * @nirqs: number of IRQs to be released.
0442  */
0443 void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs)
0444 {
0445     mlx5_irqs_release(irqs, nirqs);
0446 }
0447
0448 /**
0449  * mlx5_irqs_request_vectors - request one or more IRQs for mlx5 device.
0450  * @dev: mlx5 device that is requesting the IRQs.
0451  * @cpus: CPUs array for binding the IRQs
0452  * @nirqs: number of IRQs to request.
0453  * @irqs: an output array of IRQs pointers.
0454  *
0455  * Each IRQ is bound to at most 1 CPU.
0456  * This function is requests nirqs IRQs, starting from @vecidx.
0457  *
0458  * This function returns the number of IRQs requested, (which might be smaller than
0459  * @nirqs), if successful, or a negative error code in case of an error.
0460  */
0461 int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
0462                   struct mlx5_irq **irqs)
0463 {
0464     cpumask_var_t req_mask;
0465     struct mlx5_irq *irq;
0466     int i;
0467
0468     if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
0469         return -ENOMEM;
0470     for (i = 0; i < nirqs; i++) {
0471         cpumask_set_cpu(cpus[i], req_mask);
0472         irq = mlx5_irq_request(dev, i, req_mask);
0473         if (IS_ERR(irq))
0474             break;
0475         cpumask_clear(req_mask);
0476         irqs[i] = irq;
0477     }
0478
0479     free_cpumask_var(req_mask);
0480     return i ? i : PTR_ERR(irq);
0481 }
0482
0483 static struct mlx5_irq_pool *
0484 irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
0485            u32 min_threshold, u32 max_threshold)
0486 {
0487     struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
0488
0489     if (!pool)
0490         return ERR_PTR(-ENOMEM);
0491     pool->dev = dev;
0492     mutex_init(&pool->lock);
0493     xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC);
0494     pool->xa_num_irqs.min = start;
0495     pool->xa_num_irqs.max = start + size - 1;
0496     if (name)
0497         snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
0498              "%s", name);
0499     pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
0500     pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
0501     mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
0502               name, size, start);
0503     return pool;
0504 }
0505
0506 static void irq_pool_free(struct mlx5_irq_pool *pool)
0507 {
0508     struct mlx5_irq *irq;
0509     unsigned long index;
0510
0511     /* There are cases in which we are destrying the irq_table before
0512      * freeing all the IRQs, fast teardown for example. Hence, free the irqs
0513      * which might not have been freed.
0514      */
0515     xa_for_each(&pool->irqs, index, irq)
0516         irq_release(irq);
0517     xa_destroy(&pool->irqs);
0518     mutex_destroy(&pool->lock);
0519     kfree(pool->irqs_per_cpu);
0520     kvfree(pool);
0521 }
0522
0523 static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
0524 {
0525     struct mlx5_irq_table *table = dev->priv.irq_table;
0526     int num_sf_ctrl_by_msix;
0527     int num_sf_ctrl_by_sfs;
0528     int num_sf_ctrl;
0529     int err;
0530
0531     /* init pf_pool */
0532     table->pf_pool = irq_pool_alloc(dev, 0, pf_vec, NULL,
0533                     MLX5_EQ_SHARE_IRQ_MIN_COMP,
0534                     MLX5_EQ_SHARE_IRQ_MAX_COMP);
0535     if (IS_ERR(table->pf_pool))
0536         return PTR_ERR(table->pf_pool);
0537     if (!mlx5_sf_max_functions(dev))
0538         return 0;
0539     if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
0540         mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n");
0541         return 0;
0542     }
0543
0544     /* init sf_ctrl_pool */
0545     num_sf_ctrl_by_msix = DIV_ROUND_UP(sf_vec, MLX5_COMP_EQS_PER_SF);
0546     num_sf_ctrl_by_sfs = DIV_ROUND_UP(mlx5_sf_max_functions(dev),
0547                       MLX5_SFS_PER_CTRL_IRQ);
0548     num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs);
0549     num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
0550     table->sf_ctrl_pool = irq_pool_alloc(dev, pf_vec, num_sf_ctrl,
0551                          "mlx5_sf_ctrl",
0552                          MLX5_EQ_SHARE_IRQ_MIN_CTRL,
0553                          MLX5_EQ_SHARE_IRQ_MAX_CTRL);
0554     if (IS_ERR(table->sf_ctrl_pool)) {
0555         err = PTR_ERR(table->sf_ctrl_pool);
0556         goto err_pf;
0557     }
0558     /* init sf_comp_pool */
0559     table->sf_comp_pool = irq_pool_alloc(dev, pf_vec + num_sf_ctrl,
0560                          sf_vec - num_sf_ctrl, "mlx5_sf_comp",
0561                          MLX5_EQ_SHARE_IRQ_MIN_COMP,
0562                          MLX5_EQ_SHARE_IRQ_MAX_COMP);
0563     if (IS_ERR(table->sf_comp_pool)) {
0564         err = PTR_ERR(table->sf_comp_pool);
0565         goto err_sf_ctrl;
0566     }
0567
0568     table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL);
0569     if (!table->sf_comp_pool->irqs_per_cpu) {
0570         err = -ENOMEM;
0571         goto err_irqs_per_cpu;
0572     }
0573
0574     return 0;
0575
0576 err_irqs_per_cpu:
0577     irq_pool_free(table->sf_comp_pool);
0578 err_sf_ctrl:
0579     irq_pool_free(table->sf_ctrl_pool);
0580 err_pf:
0581     irq_pool_free(table->pf_pool);
0582     return err;
0583 }
0584
0585 static void irq_pools_destroy(struct mlx5_irq_table *table)
0586 {
0587     if (table->sf_ctrl_pool) {
0588         irq_pool_free(table->sf_comp_pool);
0589         irq_pool_free(table->sf_ctrl_pool);
0590     }
0591     irq_pool_free(table->pf_pool);
0592 }
0593
0594 /* irq_table API */
0595
0596 int mlx5_irq_table_init(struct mlx5_core_dev *dev)
0597 {
0598     struct mlx5_irq_table *irq_table;
0599
0600     if (mlx5_core_is_sf(dev))
0601         return 0;
0602
0603     irq_table = kvzalloc_node(sizeof(*irq_table), GFP_KERNEL,
0604                   dev->priv.numa_node);
0605     if (!irq_table)
0606         return -ENOMEM;
0607
0608     dev->priv.irq_table = irq_table;
0609     return 0;
0610 }
0611
0612 void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
0613 {
0614     if (mlx5_core_is_sf(dev))
0615         return;
0616
0617     kvfree(dev->priv.irq_table);
0618 }
0619
0620 int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
0621 {
0622     if (!table->pf_pool->xa_num_irqs.max)
0623         return 1;
0624     return table->pf_pool->xa_num_irqs.max - table->pf_pool->xa_num_irqs.min;
0625 }
0626
0627 int mlx5_irq_table_create(struct mlx5_core_dev *dev)
0628 {
0629     int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
0630               MLX5_CAP_GEN(dev, max_num_eqs) :
0631               1 << MLX5_CAP_GEN(dev, log_max_eq);
0632     int total_vec;
0633     int pf_vec;
0634     int err;
0635
0636     if (mlx5_core_is_sf(dev))
0637         return 0;
0638
0639     pf_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
0640     pf_vec = min_t(int, pf_vec, num_eqs);
0641
0642     total_vec = pf_vec;
0643     if (mlx5_sf_max_functions(dev))
0644         total_vec += MLX5_IRQ_CTRL_SF_MAX +
0645             MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev);
0646
0647     total_vec = pci_alloc_irq_vectors(dev->pdev, 1, total_vec, PCI_IRQ_MSIX);
0648     if (total_vec < 0)
0649         return total_vec;
0650     pf_vec = min(pf_vec, total_vec);
0651
0652     err = irq_pools_init(dev, total_vec - pf_vec, pf_vec);
0653     if (err)
0654         pci_free_irq_vectors(dev->pdev);
0655
0656     return err;
0657 }
0658
0659 void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
0660 {
0661     struct mlx5_irq_table *table = dev->priv.irq_table;
0662
0663     if (mlx5_core_is_sf(dev))
0664         return;
0665
0666     /* There are cases where IRQs still will be in used when we reaching
0667      * to here. Hence, making sure all the irqs are released.
0668      */
0669     irq_pools_destroy(table);
0670     pci_free_irq_vectors(dev->pdev);
0671 }
0672
0673 int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
0674 {
0675     if (table->sf_comp_pool)
0676         return min_t(int, num_online_cpus(),
0677                  table->sf_comp_pool->xa_num_irqs.max -
0678                  table->sf_comp_pool->xa_num_irqs.min + 1);
0679     else
0680         return mlx5_irq_table_get_num_comp(table);
0681 }
0682
0683 struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)
0684 {
0685 #ifdef CONFIG_MLX5_SF
0686     if (mlx5_core_is_sf(dev))
0687         return dev->priv.parent_mdev->priv.irq_table;
0688 #endif
0689     return dev->priv.irq_table;
0690 }