mlx5/core/irq_affinity.c

0001 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
0002 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
0003
0004 #include "mlx5_core.h"
0005 #include "mlx5_irq.h"
0006 #include "pci_irq.h"
0007
0008 static void cpu_put(struct mlx5_irq_pool *pool, int cpu)
0009 {
0010     pool->irqs_per_cpu[cpu]--;
0011 }
0012
0013 static void cpu_get(struct mlx5_irq_pool *pool, int cpu)
0014 {
0015     pool->irqs_per_cpu[cpu]++;
0016 }
0017
0018 /* Gets the least loaded CPU. e.g.: the CPU with least IRQs bound to it */
0019 static int cpu_get_least_loaded(struct mlx5_irq_pool *pool,
0020                 const struct cpumask *req_mask)
0021 {
0022     int best_cpu = -1;
0023     int cpu;
0024
0025     for_each_cpu_and(cpu, req_mask, cpu_online_mask) {
0026         /* CPU has zero IRQs on it. No need to search any more CPUs. */
0027         if (!pool->irqs_per_cpu[cpu]) {
0028             best_cpu = cpu;
0029             break;
0030         }
0031         if (best_cpu < 0)
0032             best_cpu = cpu;
0033         if (pool->irqs_per_cpu[cpu] < pool->irqs_per_cpu[best_cpu])
0034             best_cpu = cpu;
0035     }
0036     if (best_cpu == -1) {
0037         /* There isn't online CPUs in req_mask */
0038         mlx5_core_err(pool->dev, "NO online CPUs in req_mask (%*pbl)\n",
0039                   cpumask_pr_args(req_mask));
0040         best_cpu = cpumask_first(cpu_online_mask);
0041     }
0042     pool->irqs_per_cpu[best_cpu]++;
0043     return best_cpu;
0044 }
0045
0046 /* Creating an IRQ from irq_pool */
0047 static struct mlx5_irq *
0048 irq_pool_request_irq(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
0049 {
0050     cpumask_var_t auto_mask;
0051     struct mlx5_irq *irq;
0052     u32 irq_index;
0053     int err;
0054
0055     if (!zalloc_cpumask_var(&auto_mask, GFP_KERNEL))
0056         return ERR_PTR(-ENOMEM);
0057     err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, GFP_KERNEL);
0058     if (err)
0059         return ERR_PTR(err);
0060     if (pool->irqs_per_cpu) {
0061         if (cpumask_weight(req_mask) > 1)
0062             /* if req_mask contain more then one CPU, set the least loadad CPU
0063              * of req_mask
0064              */
0065             cpumask_set_cpu(cpu_get_least_loaded(pool, req_mask), auto_mask);
0066         else
0067             cpu_get(pool, cpumask_first(req_mask));
0068     }
0069     irq = mlx5_irq_alloc(pool, irq_index, cpumask_empty(auto_mask) ? req_mask : auto_mask);
0070     free_cpumask_var(auto_mask);
0071     return irq;
0072 }
0073
0074 /* Looking for the IRQ with the smallest refcount that fits req_mask.
0075  * If pool is sf_comp_pool, then we are looking for an IRQ with any of the
0076  * requested CPUs in req_mask.
0077  * for example: req_mask = 0xf, irq0_mask = 0x10, irq1_mask = 0x1. irq0_mask
0078  * isn't subset of req_mask, so we will skip it. irq1_mask is subset of req_mask,
0079  * we don't skip it.
0080  * If pool is sf_ctrl_pool, then all IRQs have the same mask, so any IRQ will
0081  * fit. And since mask is subset of itself, we will pass the first if bellow.
0082  */
0083 static struct mlx5_irq *
0084 irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
0085 {
0086     int start = pool->xa_num_irqs.min;
0087     int end = pool->xa_num_irqs.max;
0088     struct mlx5_irq *irq = NULL;
0089     struct mlx5_irq *iter;
0090     int irq_refcount = 0;
0091     unsigned long index;
0092
0093     lockdep_assert_held(&pool->lock);
0094     xa_for_each_range(&pool->irqs, index, iter, start, end) {
0095         struct cpumask *iter_mask = mlx5_irq_get_affinity_mask(iter);
0096         int iter_refcount = mlx5_irq_read_locked(iter);
0097
0098         if (!cpumask_subset(iter_mask, req_mask))
0099             /* skip IRQs with a mask which is not subset of req_mask */
0100             continue;
0101         if (iter_refcount < pool->min_threshold)
0102             /* If we found an IRQ with less than min_thres, return it */
0103             return iter;
0104         if (!irq || iter_refcount < irq_refcount) {
0105             /* In case we won't find an IRQ with less than min_thres,
0106              * keep a pointer to the least used IRQ
0107              */
0108             irq_refcount = iter_refcount;
0109             irq = iter;
0110         }
0111     }
0112     return irq;
0113 }
0114
0115 /**
0116  * mlx5_irq_affinity_request - request an IRQ according to the given mask.
0117  * @pool: IRQ pool to request from.
0118  * @req_mask: cpumask requested for this IRQ.
0119  *
0120  * This function returns a pointer to IRQ, or ERR_PTR in case of error.
0121  */
0122 struct mlx5_irq *
0123 mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
0124 {
0125     struct mlx5_irq *least_loaded_irq, *new_irq;
0126
0127     mutex_lock(&pool->lock);
0128     least_loaded_irq = irq_pool_find_least_loaded(pool, req_mask);
0129     if (least_loaded_irq &&
0130         mlx5_irq_read_locked(least_loaded_irq) < pool->min_threshold)
0131         goto out;
0132     /* We didn't find an IRQ with less than min_thres, try to allocate a new IRQ */
0133     new_irq = irq_pool_request_irq(pool, req_mask);
0134     if (IS_ERR(new_irq)) {
0135         if (!least_loaded_irq) {
0136             /* We failed to create an IRQ and we didn't find an IRQ */
0137             mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n",
0138                       PTR_ERR(new_irq));
0139             mutex_unlock(&pool->lock);
0140             return new_irq;
0141         }
0142         /* We failed to create a new IRQ for the requested affinity,
0143          * sharing existing IRQ.
0144          */
0145         goto out;
0146     }
0147     least_loaded_irq = new_irq;
0148     goto unlock;
0149 out:
0150     mlx5_irq_get_locked(least_loaded_irq);
0151     if (mlx5_irq_read_locked(least_loaded_irq) > pool->max_threshold)
0152         mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n",
0153                   pci_irq_vector(pool->dev->pdev,
0154                          mlx5_irq_get_index(least_loaded_irq)), pool->name,
0155                   mlx5_irq_read_locked(least_loaded_irq) / MLX5_EQ_REFS_PER_IRQ);
0156 unlock:
0157     mutex_unlock(&pool->lock);
0158     return least_loaded_irq;
0159 }
0160
0161 void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs,
0162                     int num_irqs)
0163 {
0164     struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
0165     int i;
0166
0167     for (i = 0; i < num_irqs; i++) {
0168         int cpu = cpumask_first(mlx5_irq_get_affinity_mask(irqs[i]));
0169
0170         synchronize_irq(pci_irq_vector(pool->dev->pdev,
0171                            mlx5_irq_get_index(irqs[i])));
0172         if (mlx5_irq_put(irqs[i]))
0173             if (pool->irqs_per_cpu)
0174                 cpu_put(pool, cpu);
0175     }
0176 }
0177
0178 /**
0179  * mlx5_irq_affinity_irqs_request_auto - request one or more IRQs for mlx5 device.
0180  * @dev: mlx5 device that is requesting the IRQs.
0181  * @nirqs: number of IRQs to request.
0182  * @irqs: an output array of IRQs pointers.
0183  *
0184  * Each IRQ is bounded to at most 1 CPU.
0185  * This function is requesting IRQs according to the default assignment.
0186  * The default assignment policy is:
0187  * - in each iteration, request the least loaded IRQ which is not bound to any
0188  *   CPU of the previous IRQs requested.
0189  *
0190  * This function returns the number of IRQs requested, (which might be smaller than
0191  * @nirqs), if successful, or a negative error code in case of an error.
0192  */
0193 int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
0194                     struct mlx5_irq **irqs)
0195 {
0196     struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
0197     cpumask_var_t req_mask;
0198     struct mlx5_irq *irq;
0199     int i = 0;
0200
0201     if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
0202         return -ENOMEM;
0203     cpumask_copy(req_mask, cpu_online_mask);
0204     for (i = 0; i < nirqs; i++) {
0205         if (mlx5_irq_pool_is_sf_pool(pool))
0206             irq = mlx5_irq_affinity_request(pool, req_mask);
0207         else
0208             /* In case SF pool doesn't exists, fallback to the PF IRQs.
0209              * The PF IRQs are already allocated and binded to CPU
0210              * at this point. Hence, only an index is needed.
0211              */
0212             irq = mlx5_irq_request(dev, i, NULL);
0213         if (IS_ERR(irq))
0214             break;
0215         irqs[i] = irq;
0216         cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), req_mask);
0217         mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n",
0218                   pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)),
0219                   cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)),
0220                   mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ);
0221     }
0222     free_cpumask_var(req_mask);
0223     if (!i)
0224         return PTR_ERR(irq);
0225     return i;
0226 }