Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
0002 /*
0003  * Copyright (c) 2019 Mellanox Technologies. All rights reserved.
0004  */
0005 #include <rdma/ib_verbs.h>
0006 #include <rdma/rdma_counter.h>
0007 
0008 #include "core_priv.h"
0009 #include "restrack.h"
0010 
0011 #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID)
0012 
0013 static int __counter_set_mode(struct rdma_port_counter *port_counter,
0014                   enum rdma_nl_counter_mode new_mode,
0015                   enum rdma_nl_counter_mask new_mask)
0016 {
0017     if (new_mode == RDMA_COUNTER_MODE_AUTO) {
0018         if (new_mask & (~ALL_AUTO_MODE_MASKS))
0019             return -EINVAL;
0020         if (port_counter->num_counters)
0021             return -EBUSY;
0022     }
0023 
0024     port_counter->mode.mode = new_mode;
0025     port_counter->mode.mask = new_mask;
0026     return 0;
0027 }
0028 
0029 /*
0030  * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode
0031  *
0032  * @dev: Device to operate
0033  * @port: Port to use
0034  * @mask: Mask to configure
0035  * @extack: Message to the user
0036  *
0037  * Return 0 on success. If counter mode wasn't changed then it is considered
0038  * as success as well.
0039  * Return -EBUSY when changing to auto mode while there are bounded counters.
0040  *
0041  */
0042 int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port,
0043                    enum rdma_nl_counter_mask mask,
0044                    struct netlink_ext_ack *extack)
0045 {
0046     struct rdma_port_counter *port_counter;
0047     enum rdma_nl_counter_mode mode;
0048     int ret;
0049 
0050     port_counter = &dev->port_data[port].port_counter;
0051     if (!port_counter->hstats)
0052         return -EOPNOTSUPP;
0053 
0054     mutex_lock(&port_counter->lock);
0055     if (mask)
0056         mode = RDMA_COUNTER_MODE_AUTO;
0057     else
0058         mode = (port_counter->num_counters) ? RDMA_COUNTER_MODE_MANUAL :
0059                               RDMA_COUNTER_MODE_NONE;
0060 
0061     if (port_counter->mode.mode == mode &&
0062         port_counter->mode.mask == mask) {
0063         ret = 0;
0064         goto out;
0065     }
0066 
0067     ret = __counter_set_mode(port_counter, mode, mask);
0068 
0069 out:
0070     mutex_unlock(&port_counter->lock);
0071     if (ret == -EBUSY)
0072         NL_SET_ERR_MSG(
0073             extack,
0074             "Modifying auto mode is not allowed when there is a bound QP");
0075     return ret;
0076 }
0077 
0078 static void auto_mode_init_counter(struct rdma_counter *counter,
0079                    const struct ib_qp *qp,
0080                    enum rdma_nl_counter_mask new_mask)
0081 {
0082     struct auto_mode_param *param = &counter->mode.param;
0083 
0084     counter->mode.mode = RDMA_COUNTER_MODE_AUTO;
0085     counter->mode.mask = new_mask;
0086 
0087     if (new_mask & RDMA_COUNTER_MASK_QP_TYPE)
0088         param->qp_type = qp->qp_type;
0089 }
0090 
0091 static int __rdma_counter_bind_qp(struct rdma_counter *counter,
0092                   struct ib_qp *qp)
0093 {
0094     int ret;
0095 
0096     if (qp->counter)
0097         return -EINVAL;
0098 
0099     if (!qp->device->ops.counter_bind_qp)
0100         return -EOPNOTSUPP;
0101 
0102     mutex_lock(&counter->lock);
0103     ret = qp->device->ops.counter_bind_qp(counter, qp);
0104     mutex_unlock(&counter->lock);
0105 
0106     return ret;
0107 }
0108 
0109 int rdma_counter_modify(struct ib_device *dev, u32 port,
0110             unsigned int index, bool enable)
0111 {
0112     struct rdma_hw_stats *stats;
0113     int ret = 0;
0114 
0115     if (!dev->ops.modify_hw_stat)
0116         return -EOPNOTSUPP;
0117 
0118     stats = ib_get_hw_stats_port(dev, port);
0119     if (!stats || index >= stats->num_counters ||
0120         !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
0121         return -EINVAL;
0122 
0123     mutex_lock(&stats->lock);
0124 
0125     if (enable != test_bit(index, stats->is_disabled))
0126         goto out;
0127 
0128     ret = dev->ops.modify_hw_stat(dev, port, index, enable);
0129     if (ret)
0130         goto out;
0131 
0132     if (enable)
0133         clear_bit(index, stats->is_disabled);
0134     else
0135         set_bit(index, stats->is_disabled);
0136 out:
0137     mutex_unlock(&stats->lock);
0138     return ret;
0139 }
0140 
0141 static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
0142                        struct ib_qp *qp,
0143                        enum rdma_nl_counter_mode mode)
0144 {
0145     struct rdma_port_counter *port_counter;
0146     struct rdma_counter *counter;
0147     int ret;
0148 
0149     if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats)
0150         return NULL;
0151 
0152     counter = kzalloc(sizeof(*counter), GFP_KERNEL);
0153     if (!counter)
0154         return NULL;
0155 
0156     counter->device    = dev;
0157     counter->port      = port;
0158 
0159     rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER);
0160     counter->stats = dev->ops.counter_alloc_stats(counter);
0161     if (!counter->stats)
0162         goto err_stats;
0163 
0164     port_counter = &dev->port_data[port].port_counter;
0165     mutex_lock(&port_counter->lock);
0166     switch (mode) {
0167     case RDMA_COUNTER_MODE_MANUAL:
0168         ret = __counter_set_mode(port_counter, RDMA_COUNTER_MODE_MANUAL,
0169                      0);
0170         if (ret) {
0171             mutex_unlock(&port_counter->lock);
0172             goto err_mode;
0173         }
0174         break;
0175     case RDMA_COUNTER_MODE_AUTO:
0176         auto_mode_init_counter(counter, qp, port_counter->mode.mask);
0177         break;
0178     default:
0179         ret = -EOPNOTSUPP;
0180         mutex_unlock(&port_counter->lock);
0181         goto err_mode;
0182     }
0183 
0184     port_counter->num_counters++;
0185     mutex_unlock(&port_counter->lock);
0186 
0187     counter->mode.mode = mode;
0188     kref_init(&counter->kref);
0189     mutex_init(&counter->lock);
0190 
0191     ret = __rdma_counter_bind_qp(counter, qp);
0192     if (ret)
0193         goto err_mode;
0194 
0195     rdma_restrack_parent_name(&counter->res, &qp->res);
0196     rdma_restrack_add(&counter->res);
0197     return counter;
0198 
0199 err_mode:
0200     rdma_free_hw_stats_struct(counter->stats);
0201 err_stats:
0202     rdma_restrack_put(&counter->res);
0203     kfree(counter);
0204     return NULL;
0205 }
0206 
0207 static void rdma_counter_free(struct rdma_counter *counter)
0208 {
0209     struct rdma_port_counter *port_counter;
0210 
0211     port_counter = &counter->device->port_data[counter->port].port_counter;
0212     mutex_lock(&port_counter->lock);
0213     port_counter->num_counters--;
0214     if (!port_counter->num_counters &&
0215         (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL))
0216         __counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0);
0217 
0218     mutex_unlock(&port_counter->lock);
0219 
0220     rdma_restrack_del(&counter->res);
0221     rdma_free_hw_stats_struct(counter->stats);
0222     kfree(counter);
0223 }
0224 
0225 static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter,
0226                 enum rdma_nl_counter_mask auto_mask)
0227 {
0228     struct auto_mode_param *param = &counter->mode.param;
0229     bool match = true;
0230 
0231     if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE)
0232         match &= (param->qp_type == qp->qp_type);
0233 
0234     if (auto_mask & RDMA_COUNTER_MASK_PID)
0235         match &= (task_pid_nr(counter->res.task) ==
0236               task_pid_nr(qp->res.task));
0237 
0238     return match;
0239 }
0240 
0241 static int __rdma_counter_unbind_qp(struct ib_qp *qp)
0242 {
0243     struct rdma_counter *counter = qp->counter;
0244     int ret;
0245 
0246     if (!qp->device->ops.counter_unbind_qp)
0247         return -EOPNOTSUPP;
0248 
0249     mutex_lock(&counter->lock);
0250     ret = qp->device->ops.counter_unbind_qp(qp);
0251     mutex_unlock(&counter->lock);
0252 
0253     return ret;
0254 }
0255 
0256 static void counter_history_stat_update(struct rdma_counter *counter)
0257 {
0258     struct ib_device *dev = counter->device;
0259     struct rdma_port_counter *port_counter;
0260     int i;
0261 
0262     port_counter = &dev->port_data[counter->port].port_counter;
0263     if (!port_counter->hstats)
0264         return;
0265 
0266     rdma_counter_query_stats(counter);
0267 
0268     for (i = 0; i < counter->stats->num_counters; i++)
0269         port_counter->hstats->value[i] += counter->stats->value[i];
0270 }
0271 
0272 /*
0273  * rdma_get_counter_auto_mode - Find the counter that @qp should be bound
0274  *     with in auto mode
0275  *
0276  * Return: The counter (with ref-count increased) if found
0277  */
0278 static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp,
0279                                u32 port)
0280 {
0281     struct rdma_port_counter *port_counter;
0282     struct rdma_counter *counter = NULL;
0283     struct ib_device *dev = qp->device;
0284     struct rdma_restrack_entry *res;
0285     struct rdma_restrack_root *rt;
0286     unsigned long id = 0;
0287 
0288     port_counter = &dev->port_data[port].port_counter;
0289     rt = &dev->res[RDMA_RESTRACK_COUNTER];
0290     xa_lock(&rt->xa);
0291     xa_for_each(&rt->xa, id, res) {
0292         counter = container_of(res, struct rdma_counter, res);
0293         if ((counter->device != qp->device) || (counter->port != port))
0294             goto next;
0295 
0296         if (auto_mode_match(qp, counter, port_counter->mode.mask))
0297             break;
0298 next:
0299         counter = NULL;
0300     }
0301 
0302     if (counter && !kref_get_unless_zero(&counter->kref))
0303         counter = NULL;
0304 
0305     xa_unlock(&rt->xa);
0306     return counter;
0307 }
0308 
0309 static void counter_release(struct kref *kref)
0310 {
0311     struct rdma_counter *counter;
0312 
0313     counter = container_of(kref, struct rdma_counter, kref);
0314     counter_history_stat_update(counter);
0315     counter->device->ops.counter_dealloc(counter);
0316     rdma_counter_free(counter);
0317 }
0318 
0319 /*
0320  * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on
0321  *   the auto-mode rule
0322  */
0323 int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port)
0324 {
0325     struct rdma_port_counter *port_counter;
0326     struct ib_device *dev = qp->device;
0327     struct rdma_counter *counter;
0328     int ret;
0329 
0330     if (!rdma_restrack_is_tracked(&qp->res) || rdma_is_kernel_res(&qp->res))
0331         return 0;
0332 
0333     if (!rdma_is_port_valid(dev, port))
0334         return -EINVAL;
0335 
0336     port_counter = &dev->port_data[port].port_counter;
0337     if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO)
0338         return 0;
0339 
0340     counter = rdma_get_counter_auto_mode(qp, port);
0341     if (counter) {
0342         ret = __rdma_counter_bind_qp(counter, qp);
0343         if (ret) {
0344             kref_put(&counter->kref, counter_release);
0345             return ret;
0346         }
0347     } else {
0348         counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO);
0349         if (!counter)
0350             return -ENOMEM;
0351     }
0352 
0353     return 0;
0354 }
0355 
0356 /*
0357  * rdma_counter_unbind_qp - Unbind a qp from a counter
0358  * @force:
0359  *   true - Decrease the counter ref-count anyway (e.g., qp destroy)
0360  */
0361 int rdma_counter_unbind_qp(struct ib_qp *qp, bool force)
0362 {
0363     struct rdma_counter *counter = qp->counter;
0364     int ret;
0365 
0366     if (!counter)
0367         return -EINVAL;
0368 
0369     ret = __rdma_counter_unbind_qp(qp);
0370     if (ret && !force)
0371         return ret;
0372 
0373     kref_put(&counter->kref, counter_release);
0374     return 0;
0375 }
0376 
0377 int rdma_counter_query_stats(struct rdma_counter *counter)
0378 {
0379     struct ib_device *dev = counter->device;
0380     int ret;
0381 
0382     if (!dev->ops.counter_update_stats)
0383         return -EINVAL;
0384 
0385     mutex_lock(&counter->lock);
0386     ret = dev->ops.counter_update_stats(counter);
0387     mutex_unlock(&counter->lock);
0388 
0389     return ret;
0390 }
0391 
0392 static u64 get_running_counters_hwstat_sum(struct ib_device *dev,
0393                        u32 port, u32 index)
0394 {
0395     struct rdma_restrack_entry *res;
0396     struct rdma_restrack_root *rt;
0397     struct rdma_counter *counter;
0398     unsigned long id = 0;
0399     u64 sum = 0;
0400 
0401     rt = &dev->res[RDMA_RESTRACK_COUNTER];
0402     xa_lock(&rt->xa);
0403     xa_for_each(&rt->xa, id, res) {
0404         if (!rdma_restrack_get(res))
0405             continue;
0406 
0407         xa_unlock(&rt->xa);
0408 
0409         counter = container_of(res, struct rdma_counter, res);
0410         if ((counter->device != dev) || (counter->port != port) ||
0411             rdma_counter_query_stats(counter))
0412             goto next;
0413 
0414         sum += counter->stats->value[index];
0415 
0416 next:
0417         xa_lock(&rt->xa);
0418         rdma_restrack_put(res);
0419     }
0420 
0421     xa_unlock(&rt->xa);
0422     return sum;
0423 }
0424 
0425 /*
0426  * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a
0427  *   specific port, including the running ones and history data
0428  */
0429 u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u32 port, u32 index)
0430 {
0431     struct rdma_port_counter *port_counter;
0432     u64 sum;
0433 
0434     port_counter = &dev->port_data[port].port_counter;
0435     if (!port_counter->hstats)
0436         return 0;
0437 
0438     sum = get_running_counters_hwstat_sum(dev, port, index);
0439     sum += port_counter->hstats->value[index];
0440 
0441     return sum;
0442 }
0443 
0444 static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num)
0445 {
0446     struct rdma_restrack_entry *res = NULL;
0447     struct ib_qp *qp = NULL;
0448 
0449     res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_QP, qp_num);
0450     if (IS_ERR(res))
0451         return NULL;
0452 
0453     qp = container_of(res, struct ib_qp, res);
0454     if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
0455         goto err;
0456 
0457     return qp;
0458 
0459 err:
0460     rdma_restrack_put(res);
0461     return NULL;
0462 }
0463 
0464 static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev,
0465                            u32 counter_id)
0466 {
0467     struct rdma_restrack_entry *res;
0468     struct rdma_counter *counter;
0469 
0470     res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_COUNTER, counter_id);
0471     if (IS_ERR(res))
0472         return NULL;
0473 
0474     counter = container_of(res, struct rdma_counter, res);
0475     kref_get(&counter->kref);
0476     rdma_restrack_put(res);
0477 
0478     return counter;
0479 }
0480 
0481 /*
0482  * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id
0483  */
0484 int rdma_counter_bind_qpn(struct ib_device *dev, u32 port,
0485               u32 qp_num, u32 counter_id)
0486 {
0487     struct rdma_port_counter *port_counter;
0488     struct rdma_counter *counter;
0489     struct ib_qp *qp;
0490     int ret;
0491 
0492     port_counter = &dev->port_data[port].port_counter;
0493     if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO)
0494         return -EINVAL;
0495 
0496     qp = rdma_counter_get_qp(dev, qp_num);
0497     if (!qp)
0498         return -ENOENT;
0499 
0500     counter = rdma_get_counter_by_id(dev, counter_id);
0501     if (!counter) {
0502         ret = -ENOENT;
0503         goto err;
0504     }
0505 
0506     if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) {
0507         ret = -EINVAL;
0508         goto err_task;
0509     }
0510 
0511     if ((counter->device != qp->device) || (counter->port != qp->port)) {
0512         ret = -EINVAL;
0513         goto err_task;
0514     }
0515 
0516     ret = __rdma_counter_bind_qp(counter, qp);
0517     if (ret)
0518         goto err_task;
0519 
0520     rdma_restrack_put(&qp->res);
0521     return 0;
0522 
0523 err_task:
0524     kref_put(&counter->kref, counter_release);
0525 err:
0526     rdma_restrack_put(&qp->res);
0527     return ret;
0528 }
0529 
0530 /*
0531  * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it
0532  *   The id of new counter is returned in @counter_id
0533  */
0534 int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u32 port,
0535                 u32 qp_num, u32 *counter_id)
0536 {
0537     struct rdma_port_counter *port_counter;
0538     struct rdma_counter *counter;
0539     struct ib_qp *qp;
0540     int ret;
0541 
0542     if (!rdma_is_port_valid(dev, port))
0543         return -EINVAL;
0544 
0545     port_counter = &dev->port_data[port].port_counter;
0546     if (!port_counter->hstats)
0547         return -EOPNOTSUPP;
0548 
0549     if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO)
0550         return -EINVAL;
0551 
0552     qp = rdma_counter_get_qp(dev, qp_num);
0553     if (!qp)
0554         return -ENOENT;
0555 
0556     if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) {
0557         ret = -EINVAL;
0558         goto err;
0559     }
0560 
0561     counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL);
0562     if (!counter) {
0563         ret = -ENOMEM;
0564         goto err;
0565     }
0566 
0567     if (counter_id)
0568         *counter_id = counter->id;
0569 
0570     rdma_restrack_put(&qp->res);
0571     return 0;
0572 
0573 err:
0574     rdma_restrack_put(&qp->res);
0575     return ret;
0576 }
0577 
0578 /*
0579  * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter
0580  */
0581 int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port,
0582                 u32 qp_num, u32 counter_id)
0583 {
0584     struct rdma_port_counter *port_counter;
0585     struct ib_qp *qp;
0586     int ret;
0587 
0588     if (!rdma_is_port_valid(dev, port))
0589         return -EINVAL;
0590 
0591     qp = rdma_counter_get_qp(dev, qp_num);
0592     if (!qp)
0593         return -ENOENT;
0594 
0595     if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) {
0596         ret = -EINVAL;
0597         goto out;
0598     }
0599 
0600     port_counter = &dev->port_data[port].port_counter;
0601     if (!qp->counter || qp->counter->id != counter_id ||
0602         port_counter->mode.mode != RDMA_COUNTER_MODE_MANUAL) {
0603         ret = -EINVAL;
0604         goto out;
0605     }
0606 
0607     ret = rdma_counter_unbind_qp(qp, false);
0608 
0609 out:
0610     rdma_restrack_put(&qp->res);
0611     return ret;
0612 }
0613 
0614 int rdma_counter_get_mode(struct ib_device *dev, u32 port,
0615               enum rdma_nl_counter_mode *mode,
0616               enum rdma_nl_counter_mask *mask)
0617 {
0618     struct rdma_port_counter *port_counter;
0619 
0620     port_counter = &dev->port_data[port].port_counter;
0621     *mode = port_counter->mode.mode;
0622     *mask = port_counter->mode.mask;
0623 
0624     return 0;
0625 }
0626 
0627 void rdma_counter_init(struct ib_device *dev)
0628 {
0629     struct rdma_port_counter *port_counter;
0630     u32 port, i;
0631 
0632     if (!dev->port_data)
0633         return;
0634 
0635     rdma_for_each_port(dev, port) {
0636         port_counter = &dev->port_data[port].port_counter;
0637         port_counter->mode.mode = RDMA_COUNTER_MODE_NONE;
0638         mutex_init(&port_counter->lock);
0639 
0640         if (!dev->ops.alloc_hw_port_stats)
0641             continue;
0642 
0643         port_counter->hstats = dev->ops.alloc_hw_port_stats(dev, port);
0644         if (!port_counter->hstats)
0645             goto fail;
0646     }
0647 
0648     return;
0649 
0650 fail:
0651     for (i = port; i >= rdma_start_port(dev); i--) {
0652         port_counter = &dev->port_data[port].port_counter;
0653         rdma_free_hw_stats_struct(port_counter->hstats);
0654         port_counter->hstats = NULL;
0655         mutex_destroy(&port_counter->lock);
0656     }
0657 }
0658 
0659 void rdma_counter_release(struct ib_device *dev)
0660 {
0661     struct rdma_port_counter *port_counter;
0662     u32 port;
0663 
0664     rdma_for_each_port(dev, port) {
0665         port_counter = &dev->port_data[port].port_counter;
0666         rdma_free_hw_stats_struct(port_counter->hstats);
0667         mutex_destroy(&port_counter->lock);
0668     }
0669 }