0001
0002
0003
0004
0005 #include <rdma/ib_verbs.h>
0006 #include <rdma/rdma_counter.h>
0007
0008 #include "core_priv.h"
0009 #include "restrack.h"
0010
0011 #define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID)
0012
0013 static int __counter_set_mode(struct rdma_port_counter *port_counter,
0014 enum rdma_nl_counter_mode new_mode,
0015 enum rdma_nl_counter_mask new_mask)
0016 {
0017 if (new_mode == RDMA_COUNTER_MODE_AUTO) {
0018 if (new_mask & (~ALL_AUTO_MODE_MASKS))
0019 return -EINVAL;
0020 if (port_counter->num_counters)
0021 return -EBUSY;
0022 }
0023
0024 port_counter->mode.mode = new_mode;
0025 port_counter->mode.mask = new_mask;
0026 return 0;
0027 }
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042 int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port,
0043 enum rdma_nl_counter_mask mask,
0044 struct netlink_ext_ack *extack)
0045 {
0046 struct rdma_port_counter *port_counter;
0047 enum rdma_nl_counter_mode mode;
0048 int ret;
0049
0050 port_counter = &dev->port_data[port].port_counter;
0051 if (!port_counter->hstats)
0052 return -EOPNOTSUPP;
0053
0054 mutex_lock(&port_counter->lock);
0055 if (mask)
0056 mode = RDMA_COUNTER_MODE_AUTO;
0057 else
0058 mode = (port_counter->num_counters) ? RDMA_COUNTER_MODE_MANUAL :
0059 RDMA_COUNTER_MODE_NONE;
0060
0061 if (port_counter->mode.mode == mode &&
0062 port_counter->mode.mask == mask) {
0063 ret = 0;
0064 goto out;
0065 }
0066
0067 ret = __counter_set_mode(port_counter, mode, mask);
0068
0069 out:
0070 mutex_unlock(&port_counter->lock);
0071 if (ret == -EBUSY)
0072 NL_SET_ERR_MSG(
0073 extack,
0074 "Modifying auto mode is not allowed when there is a bound QP");
0075 return ret;
0076 }
0077
0078 static void auto_mode_init_counter(struct rdma_counter *counter,
0079 const struct ib_qp *qp,
0080 enum rdma_nl_counter_mask new_mask)
0081 {
0082 struct auto_mode_param *param = &counter->mode.param;
0083
0084 counter->mode.mode = RDMA_COUNTER_MODE_AUTO;
0085 counter->mode.mask = new_mask;
0086
0087 if (new_mask & RDMA_COUNTER_MASK_QP_TYPE)
0088 param->qp_type = qp->qp_type;
0089 }
0090
0091 static int __rdma_counter_bind_qp(struct rdma_counter *counter,
0092 struct ib_qp *qp)
0093 {
0094 int ret;
0095
0096 if (qp->counter)
0097 return -EINVAL;
0098
0099 if (!qp->device->ops.counter_bind_qp)
0100 return -EOPNOTSUPP;
0101
0102 mutex_lock(&counter->lock);
0103 ret = qp->device->ops.counter_bind_qp(counter, qp);
0104 mutex_unlock(&counter->lock);
0105
0106 return ret;
0107 }
0108
0109 int rdma_counter_modify(struct ib_device *dev, u32 port,
0110 unsigned int index, bool enable)
0111 {
0112 struct rdma_hw_stats *stats;
0113 int ret = 0;
0114
0115 if (!dev->ops.modify_hw_stat)
0116 return -EOPNOTSUPP;
0117
0118 stats = ib_get_hw_stats_port(dev, port);
0119 if (!stats || index >= stats->num_counters ||
0120 !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
0121 return -EINVAL;
0122
0123 mutex_lock(&stats->lock);
0124
0125 if (enable != test_bit(index, stats->is_disabled))
0126 goto out;
0127
0128 ret = dev->ops.modify_hw_stat(dev, port, index, enable);
0129 if (ret)
0130 goto out;
0131
0132 if (enable)
0133 clear_bit(index, stats->is_disabled);
0134 else
0135 set_bit(index, stats->is_disabled);
0136 out:
0137 mutex_unlock(&stats->lock);
0138 return ret;
0139 }
0140
0141 static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
0142 struct ib_qp *qp,
0143 enum rdma_nl_counter_mode mode)
0144 {
0145 struct rdma_port_counter *port_counter;
0146 struct rdma_counter *counter;
0147 int ret;
0148
0149 if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats)
0150 return NULL;
0151
0152 counter = kzalloc(sizeof(*counter), GFP_KERNEL);
0153 if (!counter)
0154 return NULL;
0155
0156 counter->device = dev;
0157 counter->port = port;
0158
0159 rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER);
0160 counter->stats = dev->ops.counter_alloc_stats(counter);
0161 if (!counter->stats)
0162 goto err_stats;
0163
0164 port_counter = &dev->port_data[port].port_counter;
0165 mutex_lock(&port_counter->lock);
0166 switch (mode) {
0167 case RDMA_COUNTER_MODE_MANUAL:
0168 ret = __counter_set_mode(port_counter, RDMA_COUNTER_MODE_MANUAL,
0169 0);
0170 if (ret) {
0171 mutex_unlock(&port_counter->lock);
0172 goto err_mode;
0173 }
0174 break;
0175 case RDMA_COUNTER_MODE_AUTO:
0176 auto_mode_init_counter(counter, qp, port_counter->mode.mask);
0177 break;
0178 default:
0179 ret = -EOPNOTSUPP;
0180 mutex_unlock(&port_counter->lock);
0181 goto err_mode;
0182 }
0183
0184 port_counter->num_counters++;
0185 mutex_unlock(&port_counter->lock);
0186
0187 counter->mode.mode = mode;
0188 kref_init(&counter->kref);
0189 mutex_init(&counter->lock);
0190
0191 ret = __rdma_counter_bind_qp(counter, qp);
0192 if (ret)
0193 goto err_mode;
0194
0195 rdma_restrack_parent_name(&counter->res, &qp->res);
0196 rdma_restrack_add(&counter->res);
0197 return counter;
0198
0199 err_mode:
0200 rdma_free_hw_stats_struct(counter->stats);
0201 err_stats:
0202 rdma_restrack_put(&counter->res);
0203 kfree(counter);
0204 return NULL;
0205 }
0206
0207 static void rdma_counter_free(struct rdma_counter *counter)
0208 {
0209 struct rdma_port_counter *port_counter;
0210
0211 port_counter = &counter->device->port_data[counter->port].port_counter;
0212 mutex_lock(&port_counter->lock);
0213 port_counter->num_counters--;
0214 if (!port_counter->num_counters &&
0215 (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL))
0216 __counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0);
0217
0218 mutex_unlock(&port_counter->lock);
0219
0220 rdma_restrack_del(&counter->res);
0221 rdma_free_hw_stats_struct(counter->stats);
0222 kfree(counter);
0223 }
0224
0225 static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter,
0226 enum rdma_nl_counter_mask auto_mask)
0227 {
0228 struct auto_mode_param *param = &counter->mode.param;
0229 bool match = true;
0230
0231 if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE)
0232 match &= (param->qp_type == qp->qp_type);
0233
0234 if (auto_mask & RDMA_COUNTER_MASK_PID)
0235 match &= (task_pid_nr(counter->res.task) ==
0236 task_pid_nr(qp->res.task));
0237
0238 return match;
0239 }
0240
0241 static int __rdma_counter_unbind_qp(struct ib_qp *qp)
0242 {
0243 struct rdma_counter *counter = qp->counter;
0244 int ret;
0245
0246 if (!qp->device->ops.counter_unbind_qp)
0247 return -EOPNOTSUPP;
0248
0249 mutex_lock(&counter->lock);
0250 ret = qp->device->ops.counter_unbind_qp(qp);
0251 mutex_unlock(&counter->lock);
0252
0253 return ret;
0254 }
0255
0256 static void counter_history_stat_update(struct rdma_counter *counter)
0257 {
0258 struct ib_device *dev = counter->device;
0259 struct rdma_port_counter *port_counter;
0260 int i;
0261
0262 port_counter = &dev->port_data[counter->port].port_counter;
0263 if (!port_counter->hstats)
0264 return;
0265
0266 rdma_counter_query_stats(counter);
0267
0268 for (i = 0; i < counter->stats->num_counters; i++)
0269 port_counter->hstats->value[i] += counter->stats->value[i];
0270 }
0271
0272
0273
0274
0275
0276
0277
0278 static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp,
0279 u32 port)
0280 {
0281 struct rdma_port_counter *port_counter;
0282 struct rdma_counter *counter = NULL;
0283 struct ib_device *dev = qp->device;
0284 struct rdma_restrack_entry *res;
0285 struct rdma_restrack_root *rt;
0286 unsigned long id = 0;
0287
0288 port_counter = &dev->port_data[port].port_counter;
0289 rt = &dev->res[RDMA_RESTRACK_COUNTER];
0290 xa_lock(&rt->xa);
0291 xa_for_each(&rt->xa, id, res) {
0292 counter = container_of(res, struct rdma_counter, res);
0293 if ((counter->device != qp->device) || (counter->port != port))
0294 goto next;
0295
0296 if (auto_mode_match(qp, counter, port_counter->mode.mask))
0297 break;
0298 next:
0299 counter = NULL;
0300 }
0301
0302 if (counter && !kref_get_unless_zero(&counter->kref))
0303 counter = NULL;
0304
0305 xa_unlock(&rt->xa);
0306 return counter;
0307 }
0308
0309 static void counter_release(struct kref *kref)
0310 {
0311 struct rdma_counter *counter;
0312
0313 counter = container_of(kref, struct rdma_counter, kref);
0314 counter_history_stat_update(counter);
0315 counter->device->ops.counter_dealloc(counter);
0316 rdma_counter_free(counter);
0317 }
0318
0319
0320
0321
0322
0323 int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port)
0324 {
0325 struct rdma_port_counter *port_counter;
0326 struct ib_device *dev = qp->device;
0327 struct rdma_counter *counter;
0328 int ret;
0329
0330 if (!rdma_restrack_is_tracked(&qp->res) || rdma_is_kernel_res(&qp->res))
0331 return 0;
0332
0333 if (!rdma_is_port_valid(dev, port))
0334 return -EINVAL;
0335
0336 port_counter = &dev->port_data[port].port_counter;
0337 if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO)
0338 return 0;
0339
0340 counter = rdma_get_counter_auto_mode(qp, port);
0341 if (counter) {
0342 ret = __rdma_counter_bind_qp(counter, qp);
0343 if (ret) {
0344 kref_put(&counter->kref, counter_release);
0345 return ret;
0346 }
0347 } else {
0348 counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO);
0349 if (!counter)
0350 return -ENOMEM;
0351 }
0352
0353 return 0;
0354 }
0355
0356
0357
0358
0359
0360
0361 int rdma_counter_unbind_qp(struct ib_qp *qp, bool force)
0362 {
0363 struct rdma_counter *counter = qp->counter;
0364 int ret;
0365
0366 if (!counter)
0367 return -EINVAL;
0368
0369 ret = __rdma_counter_unbind_qp(qp);
0370 if (ret && !force)
0371 return ret;
0372
0373 kref_put(&counter->kref, counter_release);
0374 return 0;
0375 }
0376
0377 int rdma_counter_query_stats(struct rdma_counter *counter)
0378 {
0379 struct ib_device *dev = counter->device;
0380 int ret;
0381
0382 if (!dev->ops.counter_update_stats)
0383 return -EINVAL;
0384
0385 mutex_lock(&counter->lock);
0386 ret = dev->ops.counter_update_stats(counter);
0387 mutex_unlock(&counter->lock);
0388
0389 return ret;
0390 }
0391
0392 static u64 get_running_counters_hwstat_sum(struct ib_device *dev,
0393 u32 port, u32 index)
0394 {
0395 struct rdma_restrack_entry *res;
0396 struct rdma_restrack_root *rt;
0397 struct rdma_counter *counter;
0398 unsigned long id = 0;
0399 u64 sum = 0;
0400
0401 rt = &dev->res[RDMA_RESTRACK_COUNTER];
0402 xa_lock(&rt->xa);
0403 xa_for_each(&rt->xa, id, res) {
0404 if (!rdma_restrack_get(res))
0405 continue;
0406
0407 xa_unlock(&rt->xa);
0408
0409 counter = container_of(res, struct rdma_counter, res);
0410 if ((counter->device != dev) || (counter->port != port) ||
0411 rdma_counter_query_stats(counter))
0412 goto next;
0413
0414 sum += counter->stats->value[index];
0415
0416 next:
0417 xa_lock(&rt->xa);
0418 rdma_restrack_put(res);
0419 }
0420
0421 xa_unlock(&rt->xa);
0422 return sum;
0423 }
0424
0425
0426
0427
0428
0429 u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u32 port, u32 index)
0430 {
0431 struct rdma_port_counter *port_counter;
0432 u64 sum;
0433
0434 port_counter = &dev->port_data[port].port_counter;
0435 if (!port_counter->hstats)
0436 return 0;
0437
0438 sum = get_running_counters_hwstat_sum(dev, port, index);
0439 sum += port_counter->hstats->value[index];
0440
0441 return sum;
0442 }
0443
0444 static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num)
0445 {
0446 struct rdma_restrack_entry *res = NULL;
0447 struct ib_qp *qp = NULL;
0448
0449 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_QP, qp_num);
0450 if (IS_ERR(res))
0451 return NULL;
0452
0453 qp = container_of(res, struct ib_qp, res);
0454 if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
0455 goto err;
0456
0457 return qp;
0458
0459 err:
0460 rdma_restrack_put(res);
0461 return NULL;
0462 }
0463
0464 static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev,
0465 u32 counter_id)
0466 {
0467 struct rdma_restrack_entry *res;
0468 struct rdma_counter *counter;
0469
0470 res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_COUNTER, counter_id);
0471 if (IS_ERR(res))
0472 return NULL;
0473
0474 counter = container_of(res, struct rdma_counter, res);
0475 kref_get(&counter->kref);
0476 rdma_restrack_put(res);
0477
0478 return counter;
0479 }
0480
0481
0482
0483
0484 int rdma_counter_bind_qpn(struct ib_device *dev, u32 port,
0485 u32 qp_num, u32 counter_id)
0486 {
0487 struct rdma_port_counter *port_counter;
0488 struct rdma_counter *counter;
0489 struct ib_qp *qp;
0490 int ret;
0491
0492 port_counter = &dev->port_data[port].port_counter;
0493 if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO)
0494 return -EINVAL;
0495
0496 qp = rdma_counter_get_qp(dev, qp_num);
0497 if (!qp)
0498 return -ENOENT;
0499
0500 counter = rdma_get_counter_by_id(dev, counter_id);
0501 if (!counter) {
0502 ret = -ENOENT;
0503 goto err;
0504 }
0505
0506 if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) {
0507 ret = -EINVAL;
0508 goto err_task;
0509 }
0510
0511 if ((counter->device != qp->device) || (counter->port != qp->port)) {
0512 ret = -EINVAL;
0513 goto err_task;
0514 }
0515
0516 ret = __rdma_counter_bind_qp(counter, qp);
0517 if (ret)
0518 goto err_task;
0519
0520 rdma_restrack_put(&qp->res);
0521 return 0;
0522
0523 err_task:
0524 kref_put(&counter->kref, counter_release);
0525 err:
0526 rdma_restrack_put(&qp->res);
0527 return ret;
0528 }
0529
0530
0531
0532
0533
0534 int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u32 port,
0535 u32 qp_num, u32 *counter_id)
0536 {
0537 struct rdma_port_counter *port_counter;
0538 struct rdma_counter *counter;
0539 struct ib_qp *qp;
0540 int ret;
0541
0542 if (!rdma_is_port_valid(dev, port))
0543 return -EINVAL;
0544
0545 port_counter = &dev->port_data[port].port_counter;
0546 if (!port_counter->hstats)
0547 return -EOPNOTSUPP;
0548
0549 if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO)
0550 return -EINVAL;
0551
0552 qp = rdma_counter_get_qp(dev, qp_num);
0553 if (!qp)
0554 return -ENOENT;
0555
0556 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) {
0557 ret = -EINVAL;
0558 goto err;
0559 }
0560
0561 counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL);
0562 if (!counter) {
0563 ret = -ENOMEM;
0564 goto err;
0565 }
0566
0567 if (counter_id)
0568 *counter_id = counter->id;
0569
0570 rdma_restrack_put(&qp->res);
0571 return 0;
0572
0573 err:
0574 rdma_restrack_put(&qp->res);
0575 return ret;
0576 }
0577
0578
0579
0580
0581 int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port,
0582 u32 qp_num, u32 counter_id)
0583 {
0584 struct rdma_port_counter *port_counter;
0585 struct ib_qp *qp;
0586 int ret;
0587
0588 if (!rdma_is_port_valid(dev, port))
0589 return -EINVAL;
0590
0591 qp = rdma_counter_get_qp(dev, qp_num);
0592 if (!qp)
0593 return -ENOENT;
0594
0595 if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) {
0596 ret = -EINVAL;
0597 goto out;
0598 }
0599
0600 port_counter = &dev->port_data[port].port_counter;
0601 if (!qp->counter || qp->counter->id != counter_id ||
0602 port_counter->mode.mode != RDMA_COUNTER_MODE_MANUAL) {
0603 ret = -EINVAL;
0604 goto out;
0605 }
0606
0607 ret = rdma_counter_unbind_qp(qp, false);
0608
0609 out:
0610 rdma_restrack_put(&qp->res);
0611 return ret;
0612 }
0613
0614 int rdma_counter_get_mode(struct ib_device *dev, u32 port,
0615 enum rdma_nl_counter_mode *mode,
0616 enum rdma_nl_counter_mask *mask)
0617 {
0618 struct rdma_port_counter *port_counter;
0619
0620 port_counter = &dev->port_data[port].port_counter;
0621 *mode = port_counter->mode.mode;
0622 *mask = port_counter->mode.mask;
0623
0624 return 0;
0625 }
0626
0627 void rdma_counter_init(struct ib_device *dev)
0628 {
0629 struct rdma_port_counter *port_counter;
0630 u32 port, i;
0631
0632 if (!dev->port_data)
0633 return;
0634
0635 rdma_for_each_port(dev, port) {
0636 port_counter = &dev->port_data[port].port_counter;
0637 port_counter->mode.mode = RDMA_COUNTER_MODE_NONE;
0638 mutex_init(&port_counter->lock);
0639
0640 if (!dev->ops.alloc_hw_port_stats)
0641 continue;
0642
0643 port_counter->hstats = dev->ops.alloc_hw_port_stats(dev, port);
0644 if (!port_counter->hstats)
0645 goto fail;
0646 }
0647
0648 return;
0649
0650 fail:
0651 for (i = port; i >= rdma_start_port(dev); i--) {
0652 port_counter = &dev->port_data[port].port_counter;
0653 rdma_free_hw_stats_struct(port_counter->hstats);
0654 port_counter->hstats = NULL;
0655 mutex_destroy(&port_counter->lock);
0656 }
0657 }
0658
0659 void rdma_counter_release(struct ib_device *dev)
0660 {
0661 struct rdma_port_counter *port_counter;
0662 u32 port;
0663
0664 rdma_for_each_port(dev, port) {
0665 port_counter = &dev->port_data[port].port_counter;
0666 rdma_free_hw_stats_struct(port_counter->hstats);
0667 mutex_destroy(&port_counter->lock);
0668 }
0669 }