0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034 #include <linux/module.h>
0035 #include <linux/string.h>
0036 #include <linux/errno.h>
0037 #include <linux/kernel.h>
0038 #include <linux/slab.h>
0039 #include <linux/init.h>
0040 #include <linux/netdevice.h>
0041 #include <net/net_namespace.h>
0042 #include <linux/security.h>
0043 #include <linux/notifier.h>
0044 #include <linux/hashtable.h>
0045 #include <rdma/rdma_netlink.h>
0046 #include <rdma/ib_addr.h>
0047 #include <rdma/ib_cache.h>
0048 #include <rdma/rdma_counter.h>
0049
0050 #include "core_priv.h"
0051 #include "restrack.h"
0052
0053 MODULE_AUTHOR("Roland Dreier");
0054 MODULE_DESCRIPTION("core kernel InfiniBand API");
0055 MODULE_LICENSE("Dual BSD/GPL");
0056
0057 struct workqueue_struct *ib_comp_wq;
0058 struct workqueue_struct *ib_comp_unbound_wq;
0059 struct workqueue_struct *ib_wq;
0060 EXPORT_SYMBOL_GPL(ib_wq);
0061 static struct workqueue_struct *ib_unreg_wq;
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093 static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC);
0094 static DECLARE_RWSEM(devices_rwsem);
0095 #define DEVICE_REGISTERED XA_MARK_1
0096
0097 static u32 highest_client_id;
0098 #define CLIENT_REGISTERED XA_MARK_1
0099 static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC);
0100 static DECLARE_RWSEM(clients_rwsem);
0101
0102 static void ib_client_put(struct ib_client *client)
0103 {
0104 if (refcount_dec_and_test(&client->uses))
0105 complete(&client->uses_zero);
0106 }
0107
0108
0109
0110
0111
0112 #define CLIENT_DATA_REGISTERED XA_MARK_1
0113
0114 unsigned int rdma_dev_net_id;
0115
0116
0117
0118
0119
0120
0121 static DEFINE_XARRAY_FLAGS(rdma_nets, XA_FLAGS_ALLOC);
0122
0123
0124
0125 static DECLARE_RWSEM(rdma_nets_rwsem);
0126
0127 bool ib_devices_shared_netns = true;
0128 module_param_named(netns_mode, ib_devices_shared_netns, bool, 0444);
0129 MODULE_PARM_DESC(netns_mode,
0130 "Share device among net namespaces; default=1 (shared)");
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141 bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net)
0142 {
0143 return (ib_devices_shared_netns ||
0144 net_eq(read_pnet(&dev->coredev.rdma_net), net));
0145 }
0146 EXPORT_SYMBOL(rdma_dev_access_netns);
0147
0148
0149
0150
0151
0152
0153
0154
0155 static void *xan_find_marked(struct xarray *xa, unsigned long *indexp,
0156 xa_mark_t filter)
0157 {
0158 XA_STATE(xas, xa, *indexp);
0159 void *entry;
0160
0161 rcu_read_lock();
0162 do {
0163 entry = xas_find_marked(&xas, ULONG_MAX, filter);
0164 if (xa_is_zero(entry))
0165 break;
0166 } while (xas_retry(&xas, entry));
0167 rcu_read_unlock();
0168
0169 if (entry) {
0170 *indexp = xas.xa_index;
0171 if (xa_is_zero(entry))
0172 return NULL;
0173 return entry;
0174 }
0175 return XA_ERROR(-ENOENT);
0176 }
0177 #define xan_for_each_marked(xa, index, entry, filter) \
0178 for (index = 0, entry = xan_find_marked(xa, &(index), filter); \
0179 !xa_is_err(entry); \
0180 (index)++, entry = xan_find_marked(xa, &(index), filter))
0181
0182
0183 static DEFINE_SPINLOCK(ndev_hash_lock);
0184 static DECLARE_HASHTABLE(ndev_hash, 5);
0185
0186 static void free_netdevs(struct ib_device *ib_dev);
0187 static void ib_unregister_work(struct work_struct *work);
0188 static void __ib_unregister_device(struct ib_device *device);
0189 static int ib_security_change(struct notifier_block *nb, unsigned long event,
0190 void *lsm_data);
0191 static void ib_policy_change_task(struct work_struct *work);
0192 static DECLARE_WORK(ib_policy_change_work, ib_policy_change_task);
0193
0194 static void __ibdev_printk(const char *level, const struct ib_device *ibdev,
0195 struct va_format *vaf)
0196 {
0197 if (ibdev && ibdev->dev.parent)
0198 dev_printk_emit(level[1] - '0',
0199 ibdev->dev.parent,
0200 "%s %s %s: %pV",
0201 dev_driver_string(ibdev->dev.parent),
0202 dev_name(ibdev->dev.parent),
0203 dev_name(&ibdev->dev),
0204 vaf);
0205 else if (ibdev)
0206 printk("%s%s: %pV",
0207 level, dev_name(&ibdev->dev), vaf);
0208 else
0209 printk("%s(NULL ib_device): %pV", level, vaf);
0210 }
0211
0212 void ibdev_printk(const char *level, const struct ib_device *ibdev,
0213 const char *format, ...)
0214 {
0215 struct va_format vaf;
0216 va_list args;
0217
0218 va_start(args, format);
0219
0220 vaf.fmt = format;
0221 vaf.va = &args;
0222
0223 __ibdev_printk(level, ibdev, &vaf);
0224
0225 va_end(args);
0226 }
0227 EXPORT_SYMBOL(ibdev_printk);
0228
0229 #define define_ibdev_printk_level(func, level) \
0230 void func(const struct ib_device *ibdev, const char *fmt, ...) \
0231 { \
0232 struct va_format vaf; \
0233 va_list args; \
0234 \
0235 va_start(args, fmt); \
0236 \
0237 vaf.fmt = fmt; \
0238 vaf.va = &args; \
0239 \
0240 __ibdev_printk(level, ibdev, &vaf); \
0241 \
0242 va_end(args); \
0243 } \
0244 EXPORT_SYMBOL(func);
0245
0246 define_ibdev_printk_level(ibdev_emerg, KERN_EMERG);
0247 define_ibdev_printk_level(ibdev_alert, KERN_ALERT);
0248 define_ibdev_printk_level(ibdev_crit, KERN_CRIT);
0249 define_ibdev_printk_level(ibdev_err, KERN_ERR);
0250 define_ibdev_printk_level(ibdev_warn, KERN_WARNING);
0251 define_ibdev_printk_level(ibdev_notice, KERN_NOTICE);
0252 define_ibdev_printk_level(ibdev_info, KERN_INFO);
0253
0254 static struct notifier_block ibdev_lsm_nb = {
0255 .notifier_call = ib_security_change,
0256 };
0257
0258 static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
0259 struct net *net);
0260
0261
0262 struct ib_port_data_rcu {
0263 struct rcu_head rcu_head;
0264 struct ib_port_data pdata[];
0265 };
0266
0267 static void ib_device_check_mandatory(struct ib_device *device)
0268 {
0269 #define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device_ops, x), #x }
0270 static const struct {
0271 size_t offset;
0272 char *name;
0273 } mandatory_table[] = {
0274 IB_MANDATORY_FUNC(query_device),
0275 IB_MANDATORY_FUNC(query_port),
0276 IB_MANDATORY_FUNC(alloc_pd),
0277 IB_MANDATORY_FUNC(dealloc_pd),
0278 IB_MANDATORY_FUNC(create_qp),
0279 IB_MANDATORY_FUNC(modify_qp),
0280 IB_MANDATORY_FUNC(destroy_qp),
0281 IB_MANDATORY_FUNC(post_send),
0282 IB_MANDATORY_FUNC(post_recv),
0283 IB_MANDATORY_FUNC(create_cq),
0284 IB_MANDATORY_FUNC(destroy_cq),
0285 IB_MANDATORY_FUNC(poll_cq),
0286 IB_MANDATORY_FUNC(req_notify_cq),
0287 IB_MANDATORY_FUNC(get_dma_mr),
0288 IB_MANDATORY_FUNC(reg_user_mr),
0289 IB_MANDATORY_FUNC(dereg_mr),
0290 IB_MANDATORY_FUNC(get_port_immutable)
0291 };
0292 int i;
0293
0294 device->kverbs_provider = true;
0295 for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
0296 if (!*(void **) ((void *) &device->ops +
0297 mandatory_table[i].offset)) {
0298 device->kverbs_provider = false;
0299 break;
0300 }
0301 }
0302 }
0303
0304
0305
0306
0307
0308 struct ib_device *ib_device_get_by_index(const struct net *net, u32 index)
0309 {
0310 struct ib_device *device;
0311
0312 down_read(&devices_rwsem);
0313 device = xa_load(&devices, index);
0314 if (device) {
0315 if (!rdma_dev_access_netns(device, net)) {
0316 device = NULL;
0317 goto out;
0318 }
0319
0320 if (!ib_device_try_get(device))
0321 device = NULL;
0322 }
0323 out:
0324 up_read(&devices_rwsem);
0325 return device;
0326 }
0327
0328
0329
0330
0331
0332
0333
0334
0335 void ib_device_put(struct ib_device *device)
0336 {
0337 if (refcount_dec_and_test(&device->refcount))
0338 complete(&device->unreg_completion);
0339 }
0340 EXPORT_SYMBOL(ib_device_put);
0341
0342 static struct ib_device *__ib_device_get_by_name(const char *name)
0343 {
0344 struct ib_device *device;
0345 unsigned long index;
0346
0347 xa_for_each (&devices, index, device)
0348 if (!strcmp(name, dev_name(&device->dev)))
0349 return device;
0350
0351 return NULL;
0352 }
0353
0354
0355
0356
0357
0358
0359
0360
0361
0362 struct ib_device *ib_device_get_by_name(const char *name,
0363 enum rdma_driver_id driver_id)
0364 {
0365 struct ib_device *device;
0366
0367 down_read(&devices_rwsem);
0368 device = __ib_device_get_by_name(name);
0369 if (device && driver_id != RDMA_DRIVER_UNKNOWN &&
0370 device->ops.driver_id != driver_id)
0371 device = NULL;
0372
0373 if (device) {
0374 if (!ib_device_try_get(device))
0375 device = NULL;
0376 }
0377 up_read(&devices_rwsem);
0378 return device;
0379 }
0380 EXPORT_SYMBOL(ib_device_get_by_name);
0381
0382 static int rename_compat_devs(struct ib_device *device)
0383 {
0384 struct ib_core_device *cdev;
0385 unsigned long index;
0386 int ret = 0;
0387
0388 mutex_lock(&device->compat_devs_mutex);
0389 xa_for_each (&device->compat_devs, index, cdev) {
0390 ret = device_rename(&cdev->dev, dev_name(&device->dev));
0391 if (ret) {
0392 dev_warn(&cdev->dev,
0393 "Fail to rename compatdev to new name %s\n",
0394 dev_name(&device->dev));
0395 break;
0396 }
0397 }
0398 mutex_unlock(&device->compat_devs_mutex);
0399 return ret;
0400 }
0401
0402 int ib_device_rename(struct ib_device *ibdev, const char *name)
0403 {
0404 unsigned long index;
0405 void *client_data;
0406 int ret;
0407
0408 down_write(&devices_rwsem);
0409 if (!strcmp(name, dev_name(&ibdev->dev))) {
0410 up_write(&devices_rwsem);
0411 return 0;
0412 }
0413
0414 if (__ib_device_get_by_name(name)) {
0415 up_write(&devices_rwsem);
0416 return -EEXIST;
0417 }
0418
0419 ret = device_rename(&ibdev->dev, name);
0420 if (ret) {
0421 up_write(&devices_rwsem);
0422 return ret;
0423 }
0424
0425 strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
0426 ret = rename_compat_devs(ibdev);
0427
0428 downgrade_write(&devices_rwsem);
0429 down_read(&ibdev->client_data_rwsem);
0430 xan_for_each_marked(&ibdev->client_data, index, client_data,
0431 CLIENT_DATA_REGISTERED) {
0432 struct ib_client *client = xa_load(&clients, index);
0433
0434 if (!client || !client->rename)
0435 continue;
0436
0437 client->rename(ibdev, client_data);
0438 }
0439 up_read(&ibdev->client_data_rwsem);
0440 up_read(&devices_rwsem);
0441 return 0;
0442 }
0443
0444 int ib_device_set_dim(struct ib_device *ibdev, u8 use_dim)
0445 {
0446 if (use_dim > 1)
0447 return -EINVAL;
0448 ibdev->use_cq_dim = use_dim;
0449
0450 return 0;
0451 }
0452
0453 static int alloc_name(struct ib_device *ibdev, const char *name)
0454 {
0455 struct ib_device *device;
0456 unsigned long index;
0457 struct ida inuse;
0458 int rc;
0459 int i;
0460
0461 lockdep_assert_held_write(&devices_rwsem);
0462 ida_init(&inuse);
0463 xa_for_each (&devices, index, device) {
0464 char buf[IB_DEVICE_NAME_MAX];
0465
0466 if (sscanf(dev_name(&device->dev), name, &i) != 1)
0467 continue;
0468 if (i < 0 || i >= INT_MAX)
0469 continue;
0470 snprintf(buf, sizeof buf, name, i);
0471 if (strcmp(buf, dev_name(&device->dev)) != 0)
0472 continue;
0473
0474 rc = ida_alloc_range(&inuse, i, i, GFP_KERNEL);
0475 if (rc < 0)
0476 goto out;
0477 }
0478
0479 rc = ida_alloc(&inuse, GFP_KERNEL);
0480 if (rc < 0)
0481 goto out;
0482
0483 rc = dev_set_name(&ibdev->dev, name, rc);
0484 out:
0485 ida_destroy(&inuse);
0486 return rc;
0487 }
0488
0489 static void ib_device_release(struct device *device)
0490 {
0491 struct ib_device *dev = container_of(device, struct ib_device, dev);
0492
0493 free_netdevs(dev);
0494 WARN_ON(refcount_read(&dev->refcount));
0495 if (dev->hw_stats_data)
0496 ib_device_release_hw_stats(dev->hw_stats_data);
0497 if (dev->port_data) {
0498 ib_cache_release_one(dev);
0499 ib_security_release_port_pkey_list(dev);
0500 rdma_counter_release(dev);
0501 kfree_rcu(container_of(dev->port_data, struct ib_port_data_rcu,
0502 pdata[0]),
0503 rcu_head);
0504 }
0505
0506 mutex_destroy(&dev->unregistration_lock);
0507 mutex_destroy(&dev->compat_devs_mutex);
0508
0509 xa_destroy(&dev->compat_devs);
0510 xa_destroy(&dev->client_data);
0511 kfree_rcu(dev, rcu_head);
0512 }
0513
0514 static int ib_device_uevent(struct device *device,
0515 struct kobj_uevent_env *env)
0516 {
0517 if (add_uevent_var(env, "NAME=%s", dev_name(device)))
0518 return -ENOMEM;
0519
0520
0521
0522
0523
0524 return 0;
0525 }
0526
0527 static const void *net_namespace(struct device *d)
0528 {
0529 struct ib_core_device *coredev =
0530 container_of(d, struct ib_core_device, dev);
0531
0532 return read_pnet(&coredev->rdma_net);
0533 }
0534
0535 static struct class ib_class = {
0536 .name = "infiniband",
0537 .dev_release = ib_device_release,
0538 .dev_uevent = ib_device_uevent,
0539 .ns_type = &net_ns_type_operations,
0540 .namespace = net_namespace,
0541 };
0542
0543 static void rdma_init_coredev(struct ib_core_device *coredev,
0544 struct ib_device *dev, struct net *net)
0545 {
0546
0547
0548
0549
0550
0551
0552 BUILD_BUG_ON(offsetof(struct ib_device, coredev.dev) !=
0553 offsetof(struct ib_device, dev));
0554
0555 coredev->dev.class = &ib_class;
0556 coredev->dev.groups = dev->groups;
0557 device_initialize(&coredev->dev);
0558 coredev->owner = dev;
0559 INIT_LIST_HEAD(&coredev->port_list);
0560 write_pnet(&coredev->rdma_net, net);
0561 }
0562
0563
0564
0565
0566
0567
0568
0569
0570
0571
0572
0573 struct ib_device *_ib_alloc_device(size_t size)
0574 {
0575 struct ib_device *device;
0576 unsigned int i;
0577
0578 if (WARN_ON(size < sizeof(struct ib_device)))
0579 return NULL;
0580
0581 device = kzalloc(size, GFP_KERNEL);
0582 if (!device)
0583 return NULL;
0584
0585 if (rdma_restrack_init(device)) {
0586 kfree(device);
0587 return NULL;
0588 }
0589
0590 rdma_init_coredev(&device->coredev, device, &init_net);
0591
0592 INIT_LIST_HEAD(&device->event_handler_list);
0593 spin_lock_init(&device->qp_open_list_lock);
0594 init_rwsem(&device->event_handler_rwsem);
0595 mutex_init(&device->unregistration_lock);
0596
0597
0598
0599
0600 xa_init_flags(&device->client_data, XA_FLAGS_ALLOC);
0601 init_rwsem(&device->client_data_rwsem);
0602 xa_init_flags(&device->compat_devs, XA_FLAGS_ALLOC);
0603 mutex_init(&device->compat_devs_mutex);
0604 init_completion(&device->unreg_completion);
0605 INIT_WORK(&device->unregistration_work, ib_unregister_work);
0606
0607 spin_lock_init(&device->cq_pools_lock);
0608 for (i = 0; i < ARRAY_SIZE(device->cq_pools); i++)
0609 INIT_LIST_HEAD(&device->cq_pools[i]);
0610
0611 rwlock_init(&device->cache_lock);
0612
0613 device->uverbs_cmd_mask =
0614 BIT_ULL(IB_USER_VERBS_CMD_ALLOC_MW) |
0615 BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) |
0616 BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) |
0617 BIT_ULL(IB_USER_VERBS_CMD_CLOSE_XRCD) |
0618 BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) |
0619 BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
0620 BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) |
0621 BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) |
0622 BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) |
0623 BIT_ULL(IB_USER_VERBS_CMD_CREATE_XSRQ) |
0624 BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_MW) |
0625 BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) |
0626 BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) |
0627 BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) |
0628 BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) |
0629 BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) |
0630 BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) |
0631 BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) |
0632 BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) |
0633 BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) |
0634 BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) |
0635 BIT_ULL(IB_USER_VERBS_CMD_OPEN_QP) |
0636 BIT_ULL(IB_USER_VERBS_CMD_OPEN_XRCD) |
0637 BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) |
0638 BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) |
0639 BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) |
0640 BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) |
0641 BIT_ULL(IB_USER_VERBS_CMD_REG_MR) |
0642 BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) |
0643 BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ);
0644 return device;
0645 }
0646 EXPORT_SYMBOL(_ib_alloc_device);
0647
0648
0649
0650
0651
0652
0653
0654 void ib_dealloc_device(struct ib_device *device)
0655 {
0656 if (device->ops.dealloc_driver)
0657 device->ops.dealloc_driver(device);
0658
0659
0660
0661
0662
0663
0664
0665 down_write(&devices_rwsem);
0666 if (xa_load(&devices, device->index) == device)
0667 xa_erase(&devices, device->index);
0668 up_write(&devices_rwsem);
0669
0670
0671 free_netdevs(device);
0672
0673 WARN_ON(!xa_empty(&device->compat_devs));
0674 WARN_ON(!xa_empty(&device->client_data));
0675 WARN_ON(refcount_read(&device->refcount));
0676 rdma_restrack_clean(device);
0677
0678 put_device(&device->dev);
0679 }
0680 EXPORT_SYMBOL(ib_dealloc_device);
0681
0682
0683
0684
0685
0686
0687
0688
0689
0690 static int add_client_context(struct ib_device *device,
0691 struct ib_client *client)
0692 {
0693 int ret = 0;
0694
0695 if (!device->kverbs_provider && !client->no_kverbs_req)
0696 return 0;
0697
0698 down_write(&device->client_data_rwsem);
0699
0700
0701
0702
0703 if (!refcount_inc_not_zero(&client->uses))
0704 goto out_unlock;
0705 refcount_inc(&device->refcount);
0706
0707
0708
0709
0710
0711 if (xa_get_mark(&device->client_data, client->client_id,
0712 CLIENT_DATA_REGISTERED))
0713 goto out;
0714
0715 ret = xa_err(xa_store(&device->client_data, client->client_id, NULL,
0716 GFP_KERNEL));
0717 if (ret)
0718 goto out;
0719 downgrade_write(&device->client_data_rwsem);
0720 if (client->add) {
0721 if (client->add(device)) {
0722
0723
0724
0725
0726
0727 xa_erase(&device->client_data, client->client_id);
0728 up_read(&device->client_data_rwsem);
0729 ib_device_put(device);
0730 ib_client_put(client);
0731 return 0;
0732 }
0733 }
0734
0735
0736 xa_set_mark(&device->client_data, client->client_id,
0737 CLIENT_DATA_REGISTERED);
0738 up_read(&device->client_data_rwsem);
0739 return 0;
0740
0741 out:
0742 ib_device_put(device);
0743 ib_client_put(client);
0744 out_unlock:
0745 up_write(&device->client_data_rwsem);
0746 return ret;
0747 }
0748
0749 static void remove_client_context(struct ib_device *device,
0750 unsigned int client_id)
0751 {
0752 struct ib_client *client;
0753 void *client_data;
0754
0755 down_write(&device->client_data_rwsem);
0756 if (!xa_get_mark(&device->client_data, client_id,
0757 CLIENT_DATA_REGISTERED)) {
0758 up_write(&device->client_data_rwsem);
0759 return;
0760 }
0761 client_data = xa_load(&device->client_data, client_id);
0762 xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED);
0763 client = xa_load(&clients, client_id);
0764 up_write(&device->client_data_rwsem);
0765
0766
0767
0768
0769
0770
0771
0772
0773
0774
0775 if (client->remove)
0776 client->remove(device, client_data);
0777
0778 xa_erase(&device->client_data, client_id);
0779 ib_device_put(device);
0780 ib_client_put(client);
0781 }
0782
0783 static int alloc_port_data(struct ib_device *device)
0784 {
0785 struct ib_port_data_rcu *pdata_rcu;
0786 u32 port;
0787
0788 if (device->port_data)
0789 return 0;
0790
0791
0792 if (WARN_ON(!device->phys_port_cnt))
0793 return -EINVAL;
0794
0795
0796 if (WARN_ON(device->phys_port_cnt == U32_MAX))
0797 return -EINVAL;
0798
0799
0800
0801
0802
0803
0804
0805
0806 pdata_rcu = kzalloc(struct_size(pdata_rcu, pdata,
0807 rdma_end_port(device) + 1),
0808 GFP_KERNEL);
0809 if (!pdata_rcu)
0810 return -ENOMEM;
0811
0812
0813
0814
0815
0816 device->port_data = pdata_rcu->pdata;
0817
0818 rdma_for_each_port (device, port) {
0819 struct ib_port_data *pdata = &device->port_data[port];
0820
0821 pdata->ib_dev = device;
0822 spin_lock_init(&pdata->pkey_list_lock);
0823 INIT_LIST_HEAD(&pdata->pkey_list);
0824 spin_lock_init(&pdata->netdev_lock);
0825 INIT_HLIST_NODE(&pdata->ndev_hash_link);
0826 }
0827 return 0;
0828 }
0829
0830 static int verify_immutable(const struct ib_device *dev, u32 port)
0831 {
0832 return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
0833 rdma_max_mad_size(dev, port) != 0);
0834 }
0835
0836 static int setup_port_data(struct ib_device *device)
0837 {
0838 u32 port;
0839 int ret;
0840
0841 ret = alloc_port_data(device);
0842 if (ret)
0843 return ret;
0844
0845 rdma_for_each_port (device, port) {
0846 struct ib_port_data *pdata = &device->port_data[port];
0847
0848 ret = device->ops.get_port_immutable(device, port,
0849 &pdata->immutable);
0850 if (ret)
0851 return ret;
0852
0853 if (verify_immutable(device, port))
0854 return -EINVAL;
0855 }
0856 return 0;
0857 }
0858
0859
0860
0861
0862
0863
0864
0865 const struct ib_port_immutable*
0866 ib_port_immutable_read(struct ib_device *dev, unsigned int port)
0867 {
0868 WARN_ON(!rdma_is_port_valid(dev, port));
0869 return &dev->port_data[port].immutable;
0870 }
0871 EXPORT_SYMBOL(ib_port_immutable_read);
0872
0873 void ib_get_device_fw_str(struct ib_device *dev, char *str)
0874 {
0875 if (dev->ops.get_dev_fw_str)
0876 dev->ops.get_dev_fw_str(dev, str);
0877 else
0878 str[0] = '\0';
0879 }
0880 EXPORT_SYMBOL(ib_get_device_fw_str);
0881
0882 static void ib_policy_change_task(struct work_struct *work)
0883 {
0884 struct ib_device *dev;
0885 unsigned long index;
0886
0887 down_read(&devices_rwsem);
0888 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
0889 unsigned int i;
0890
0891 rdma_for_each_port (dev, i) {
0892 u64 sp;
0893 ib_get_cached_subnet_prefix(dev, i, &sp);
0894 ib_security_cache_change(dev, i, sp);
0895 }
0896 }
0897 up_read(&devices_rwsem);
0898 }
0899
0900 static int ib_security_change(struct notifier_block *nb, unsigned long event,
0901 void *lsm_data)
0902 {
0903 if (event != LSM_POLICY_CHANGE)
0904 return NOTIFY_DONE;
0905
0906 schedule_work(&ib_policy_change_work);
0907 ib_mad_agent_security_change();
0908
0909 return NOTIFY_OK;
0910 }
0911
0912 static void compatdev_release(struct device *dev)
0913 {
0914 struct ib_core_device *cdev =
0915 container_of(dev, struct ib_core_device, dev);
0916
0917 kfree(cdev);
0918 }
0919
0920 static int add_one_compat_dev(struct ib_device *device,
0921 struct rdma_dev_net *rnet)
0922 {
0923 struct ib_core_device *cdev;
0924 int ret;
0925
0926 lockdep_assert_held(&rdma_nets_rwsem);
0927 if (!ib_devices_shared_netns)
0928 return 0;
0929
0930
0931
0932
0933
0934 if (net_eq(read_pnet(&rnet->net),
0935 read_pnet(&device->coredev.rdma_net)))
0936 return 0;
0937
0938
0939
0940
0941
0942
0943 mutex_lock(&device->compat_devs_mutex);
0944 cdev = xa_load(&device->compat_devs, rnet->id);
0945 if (cdev) {
0946 ret = 0;
0947 goto done;
0948 }
0949 ret = xa_reserve(&device->compat_devs, rnet->id, GFP_KERNEL);
0950 if (ret)
0951 goto done;
0952
0953 cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
0954 if (!cdev) {
0955 ret = -ENOMEM;
0956 goto cdev_err;
0957 }
0958
0959 cdev->dev.parent = device->dev.parent;
0960 rdma_init_coredev(cdev, device, read_pnet(&rnet->net));
0961 cdev->dev.release = compatdev_release;
0962 ret = dev_set_name(&cdev->dev, "%s", dev_name(&device->dev));
0963 if (ret)
0964 goto add_err;
0965
0966 ret = device_add(&cdev->dev);
0967 if (ret)
0968 goto add_err;
0969 ret = ib_setup_port_attrs(cdev);
0970 if (ret)
0971 goto port_err;
0972
0973 ret = xa_err(xa_store(&device->compat_devs, rnet->id,
0974 cdev, GFP_KERNEL));
0975 if (ret)
0976 goto insert_err;
0977
0978 mutex_unlock(&device->compat_devs_mutex);
0979 return 0;
0980
0981 insert_err:
0982 ib_free_port_attrs(cdev);
0983 port_err:
0984 device_del(&cdev->dev);
0985 add_err:
0986 put_device(&cdev->dev);
0987 cdev_err:
0988 xa_release(&device->compat_devs, rnet->id);
0989 done:
0990 mutex_unlock(&device->compat_devs_mutex);
0991 return ret;
0992 }
0993
0994 static void remove_one_compat_dev(struct ib_device *device, u32 id)
0995 {
0996 struct ib_core_device *cdev;
0997
0998 mutex_lock(&device->compat_devs_mutex);
0999 cdev = xa_erase(&device->compat_devs, id);
1000 mutex_unlock(&device->compat_devs_mutex);
1001 if (cdev) {
1002 ib_free_port_attrs(cdev);
1003 device_del(&cdev->dev);
1004 put_device(&cdev->dev);
1005 }
1006 }
1007
1008 static void remove_compat_devs(struct ib_device *device)
1009 {
1010 struct ib_core_device *cdev;
1011 unsigned long index;
1012
1013 xa_for_each (&device->compat_devs, index, cdev)
1014 remove_one_compat_dev(device, index);
1015 }
1016
1017 static int add_compat_devs(struct ib_device *device)
1018 {
1019 struct rdma_dev_net *rnet;
1020 unsigned long index;
1021 int ret = 0;
1022
1023 lockdep_assert_held(&devices_rwsem);
1024
1025 down_read(&rdma_nets_rwsem);
1026 xa_for_each (&rdma_nets, index, rnet) {
1027 ret = add_one_compat_dev(device, rnet);
1028 if (ret)
1029 break;
1030 }
1031 up_read(&rdma_nets_rwsem);
1032 return ret;
1033 }
1034
1035 static void remove_all_compat_devs(void)
1036 {
1037 struct ib_compat_device *cdev;
1038 struct ib_device *dev;
1039 unsigned long index;
1040
1041 down_read(&devices_rwsem);
1042 xa_for_each (&devices, index, dev) {
1043 unsigned long c_index = 0;
1044
1045
1046
1047
1048 down_read(&rdma_nets_rwsem);
1049 xa_for_each (&dev->compat_devs, c_index, cdev)
1050 remove_one_compat_dev(dev, c_index);
1051 up_read(&rdma_nets_rwsem);
1052 }
1053 up_read(&devices_rwsem);
1054 }
1055
1056 static int add_all_compat_devs(void)
1057 {
1058 struct rdma_dev_net *rnet;
1059 struct ib_device *dev;
1060 unsigned long index;
1061 int ret = 0;
1062
1063 down_read(&devices_rwsem);
1064 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
1065 unsigned long net_index = 0;
1066
1067
1068
1069
1070 down_read(&rdma_nets_rwsem);
1071 xa_for_each (&rdma_nets, net_index, rnet) {
1072 ret = add_one_compat_dev(dev, rnet);
1073 if (ret)
1074 break;
1075 }
1076 up_read(&rdma_nets_rwsem);
1077 }
1078 up_read(&devices_rwsem);
1079 if (ret)
1080 remove_all_compat_devs();
1081 return ret;
1082 }
1083
1084 int rdma_compatdev_set(u8 enable)
1085 {
1086 struct rdma_dev_net *rnet;
1087 unsigned long index;
1088 int ret = 0;
1089
1090 down_write(&rdma_nets_rwsem);
1091 if (ib_devices_shared_netns == enable) {
1092 up_write(&rdma_nets_rwsem);
1093 return 0;
1094 }
1095
1096
1097
1098
1099 xa_for_each (&rdma_nets, index, rnet) {
1100 ret++;
1101 break;
1102 }
1103 if (!ret)
1104 ib_devices_shared_netns = enable;
1105 up_write(&rdma_nets_rwsem);
1106 if (ret)
1107 return -EBUSY;
1108
1109 if (enable)
1110 ret = add_all_compat_devs();
1111 else
1112 remove_all_compat_devs();
1113 return ret;
1114 }
1115
1116 static void rdma_dev_exit_net(struct net *net)
1117 {
1118 struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
1119 struct ib_device *dev;
1120 unsigned long index;
1121 int ret;
1122
1123 down_write(&rdma_nets_rwsem);
1124
1125
1126
1127 ret = xa_err(xa_store(&rdma_nets, rnet->id, NULL, GFP_KERNEL));
1128 WARN_ON(ret);
1129 up_write(&rdma_nets_rwsem);
1130
1131 down_read(&devices_rwsem);
1132 xa_for_each (&devices, index, dev) {
1133 get_device(&dev->dev);
1134
1135
1136
1137
1138 up_read(&devices_rwsem);
1139
1140 remove_one_compat_dev(dev, rnet->id);
1141
1142
1143
1144
1145 rdma_dev_change_netns(dev, net, &init_net);
1146
1147 put_device(&dev->dev);
1148 down_read(&devices_rwsem);
1149 }
1150 up_read(&devices_rwsem);
1151
1152 rdma_nl_net_exit(rnet);
1153 xa_erase(&rdma_nets, rnet->id);
1154 }
1155
1156 static __net_init int rdma_dev_init_net(struct net *net)
1157 {
1158 struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
1159 unsigned long index;
1160 struct ib_device *dev;
1161 int ret;
1162
1163 write_pnet(&rnet->net, net);
1164
1165 ret = rdma_nl_net_init(rnet);
1166 if (ret)
1167 return ret;
1168
1169
1170 if (net_eq(net, &init_net))
1171 return 0;
1172
1173 ret = xa_alloc(&rdma_nets, &rnet->id, rnet, xa_limit_32b, GFP_KERNEL);
1174 if (ret) {
1175 rdma_nl_net_exit(rnet);
1176 return ret;
1177 }
1178
1179 down_read(&devices_rwsem);
1180 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
1181
1182
1183
1184 down_read(&rdma_nets_rwsem);
1185 ret = add_one_compat_dev(dev, rnet);
1186 up_read(&rdma_nets_rwsem);
1187 if (ret)
1188 break;
1189 }
1190 up_read(&devices_rwsem);
1191
1192 if (ret)
1193 rdma_dev_exit_net(net);
1194
1195 return ret;
1196 }
1197
1198
1199
1200
1201
1202 static int assign_name(struct ib_device *device, const char *name)
1203 {
1204 static u32 last_id;
1205 int ret;
1206
1207 down_write(&devices_rwsem);
1208
1209 if (strchr(name, '%'))
1210 ret = alloc_name(device, name);
1211 else
1212 ret = dev_set_name(&device->dev, name);
1213 if (ret)
1214 goto out;
1215
1216 if (__ib_device_get_by_name(dev_name(&device->dev))) {
1217 ret = -ENFILE;
1218 goto out;
1219 }
1220 strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
1221
1222 ret = xa_alloc_cyclic(&devices, &device->index, device, xa_limit_31b,
1223 &last_id, GFP_KERNEL);
1224 if (ret > 0)
1225 ret = 0;
1226
1227 out:
1228 up_write(&devices_rwsem);
1229 return ret;
1230 }
1231
1232
1233
1234
1235
1236
1237 static int setup_device(struct ib_device *device)
1238 {
1239 struct ib_udata uhw = {.outlen = 0, .inlen = 0};
1240 int ret;
1241
1242 ib_device_check_mandatory(device);
1243
1244 ret = setup_port_data(device);
1245 if (ret) {
1246 dev_warn(&device->dev, "Couldn't create per-port data\n");
1247 return ret;
1248 }
1249
1250 memset(&device->attrs, 0, sizeof(device->attrs));
1251 ret = device->ops.query_device(device, &device->attrs, &uhw);
1252 if (ret) {
1253 dev_warn(&device->dev,
1254 "Couldn't query the device attributes\n");
1255 return ret;
1256 }
1257
1258 return 0;
1259 }
1260
1261 static void disable_device(struct ib_device *device)
1262 {
1263 u32 cid;
1264
1265 WARN_ON(!refcount_read(&device->refcount));
1266
1267 down_write(&devices_rwsem);
1268 xa_clear_mark(&devices, device->index, DEVICE_REGISTERED);
1269 up_write(&devices_rwsem);
1270
1271
1272
1273
1274
1275
1276
1277 down_read(&clients_rwsem);
1278 cid = highest_client_id;
1279 up_read(&clients_rwsem);
1280 while (cid) {
1281 cid--;
1282 remove_client_context(device, cid);
1283 }
1284
1285 ib_cq_pool_cleanup(device);
1286
1287
1288 ib_device_put(device);
1289 wait_for_completion(&device->unreg_completion);
1290
1291
1292
1293
1294
1295
1296 remove_compat_devs(device);
1297 }
1298
1299
1300
1301
1302
1303
1304 static int enable_device_and_get(struct ib_device *device)
1305 {
1306 struct ib_client *client;
1307 unsigned long index;
1308 int ret = 0;
1309
1310
1311
1312
1313
1314 refcount_set(&device->refcount, 2);
1315 down_write(&devices_rwsem);
1316 xa_set_mark(&devices, device->index, DEVICE_REGISTERED);
1317
1318
1319
1320
1321
1322 downgrade_write(&devices_rwsem);
1323
1324 if (device->ops.enable_driver) {
1325 ret = device->ops.enable_driver(device);
1326 if (ret)
1327 goto out;
1328 }
1329
1330 down_read(&clients_rwsem);
1331 xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) {
1332 ret = add_client_context(device, client);
1333 if (ret)
1334 break;
1335 }
1336 up_read(&clients_rwsem);
1337 if (!ret)
1338 ret = add_compat_devs(device);
1339 out:
1340 up_read(&devices_rwsem);
1341 return ret;
1342 }
1343
1344 static void prevent_dealloc_device(struct ib_device *ib_dev)
1345 {
1346 }
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366 int ib_register_device(struct ib_device *device, const char *name,
1367 struct device *dma_device)
1368 {
1369 int ret;
1370
1371 ret = assign_name(device, name);
1372 if (ret)
1373 return ret;
1374
1375
1376
1377
1378
1379
1380 WARN_ON(dma_device && !dma_device->dma_parms);
1381 device->dma_device = dma_device;
1382
1383 ret = setup_device(device);
1384 if (ret)
1385 return ret;
1386
1387 ret = ib_cache_setup_one(device);
1388 if (ret) {
1389 dev_warn(&device->dev,
1390 "Couldn't set up InfiniBand P_Key/GID cache\n");
1391 return ret;
1392 }
1393
1394 device->groups[0] = &ib_dev_attr_group;
1395 device->groups[1] = device->ops.device_group;
1396 ret = ib_setup_device_attrs(device);
1397 if (ret)
1398 goto cache_cleanup;
1399
1400 ib_device_register_rdmacg(device);
1401
1402 rdma_counter_init(device);
1403
1404
1405
1406
1407
1408 dev_set_uevent_suppress(&device->dev, true);
1409 ret = device_add(&device->dev);
1410 if (ret)
1411 goto cg_cleanup;
1412
1413 ret = ib_setup_port_attrs(&device->coredev);
1414 if (ret) {
1415 dev_warn(&device->dev,
1416 "Couldn't register device with driver model\n");
1417 goto dev_cleanup;
1418 }
1419
1420 ret = enable_device_and_get(device);
1421 if (ret) {
1422 void (*dealloc_fn)(struct ib_device *);
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435 dealloc_fn = device->ops.dealloc_driver;
1436 device->ops.dealloc_driver = prevent_dealloc_device;
1437 ib_device_put(device);
1438 __ib_unregister_device(device);
1439 device->ops.dealloc_driver = dealloc_fn;
1440 dev_set_uevent_suppress(&device->dev, false);
1441 return ret;
1442 }
1443 dev_set_uevent_suppress(&device->dev, false);
1444
1445 kobject_uevent(&device->dev.kobj, KOBJ_ADD);
1446 ib_device_put(device);
1447
1448 return 0;
1449
1450 dev_cleanup:
1451 device_del(&device->dev);
1452 cg_cleanup:
1453 dev_set_uevent_suppress(&device->dev, false);
1454 ib_device_unregister_rdmacg(device);
1455 cache_cleanup:
1456 ib_cache_cleanup_one(device);
1457 return ret;
1458 }
1459 EXPORT_SYMBOL(ib_register_device);
1460
1461
1462 static void __ib_unregister_device(struct ib_device *ib_dev)
1463 {
1464
1465
1466
1467
1468
1469
1470
1471 mutex_lock(&ib_dev->unregistration_lock);
1472 if (!refcount_read(&ib_dev->refcount))
1473 goto out;
1474
1475 disable_device(ib_dev);
1476
1477
1478 free_netdevs(ib_dev);
1479
1480 ib_free_port_attrs(&ib_dev->coredev);
1481 device_del(&ib_dev->dev);
1482 ib_device_unregister_rdmacg(ib_dev);
1483 ib_cache_cleanup_one(ib_dev);
1484
1485
1486
1487
1488
1489 if (ib_dev->ops.dealloc_driver &&
1490 ib_dev->ops.dealloc_driver != prevent_dealloc_device) {
1491 WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1);
1492 ib_dealloc_device(ib_dev);
1493 }
1494 out:
1495 mutex_unlock(&ib_dev->unregistration_lock);
1496 }
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512 void ib_unregister_device(struct ib_device *ib_dev)
1513 {
1514 get_device(&ib_dev->dev);
1515 __ib_unregister_device(ib_dev);
1516 put_device(&ib_dev->dev);
1517 }
1518 EXPORT_SYMBOL(ib_unregister_device);
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534 void ib_unregister_device_and_put(struct ib_device *ib_dev)
1535 {
1536 WARN_ON(!ib_dev->ops.dealloc_driver);
1537 get_device(&ib_dev->dev);
1538 ib_device_put(ib_dev);
1539 __ib_unregister_device(ib_dev);
1540 put_device(&ib_dev->dev);
1541 }
1542 EXPORT_SYMBOL(ib_unregister_device_and_put);
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558 void ib_unregister_driver(enum rdma_driver_id driver_id)
1559 {
1560 struct ib_device *ib_dev;
1561 unsigned long index;
1562
1563 down_read(&devices_rwsem);
1564 xa_for_each (&devices, index, ib_dev) {
1565 if (ib_dev->ops.driver_id != driver_id)
1566 continue;
1567
1568 get_device(&ib_dev->dev);
1569 up_read(&devices_rwsem);
1570
1571 WARN_ON(!ib_dev->ops.dealloc_driver);
1572 __ib_unregister_device(ib_dev);
1573
1574 put_device(&ib_dev->dev);
1575 down_read(&devices_rwsem);
1576 }
1577 up_read(&devices_rwsem);
1578 }
1579 EXPORT_SYMBOL(ib_unregister_driver);
1580
1581 static void ib_unregister_work(struct work_struct *work)
1582 {
1583 struct ib_device *ib_dev =
1584 container_of(work, struct ib_device, unregistration_work);
1585
1586 __ib_unregister_device(ib_dev);
1587 put_device(&ib_dev->dev);
1588 }
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601 void ib_unregister_device_queued(struct ib_device *ib_dev)
1602 {
1603 WARN_ON(!refcount_read(&ib_dev->refcount));
1604 WARN_ON(!ib_dev->ops.dealloc_driver);
1605 get_device(&ib_dev->dev);
1606 if (!queue_work(ib_unreg_wq, &ib_dev->unregistration_work))
1607 put_device(&ib_dev->dev);
1608 }
1609 EXPORT_SYMBOL(ib_unregister_device_queued);
1610
1611
1612
1613
1614
1615
1616 static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
1617 struct net *net)
1618 {
1619 int ret2 = -EINVAL;
1620 int ret;
1621
1622 mutex_lock(&device->unregistration_lock);
1623
1624
1625
1626
1627
1628
1629 if (refcount_read(&device->refcount) == 0 ||
1630 !net_eq(cur_net, read_pnet(&device->coredev.rdma_net))) {
1631 ret = -ENODEV;
1632 goto out;
1633 }
1634
1635 kobject_uevent(&device->dev.kobj, KOBJ_REMOVE);
1636 disable_device(device);
1637
1638
1639
1640
1641
1642 write_pnet(&device->coredev.rdma_net, net);
1643
1644 down_read(&devices_rwsem);
1645
1646
1647
1648
1649
1650 ret = device_rename(&device->dev, dev_name(&device->dev));
1651 up_read(&devices_rwsem);
1652 if (ret) {
1653 dev_warn(&device->dev,
1654 "%s: Couldn't rename device after namespace change\n",
1655 __func__);
1656
1657 write_pnet(&device->coredev.rdma_net, cur_net);
1658 }
1659
1660 ret2 = enable_device_and_get(device);
1661 if (ret2) {
1662
1663
1664
1665
1666 dev_warn(&device->dev,
1667 "%s: Couldn't re-enable device after namespace change\n",
1668 __func__);
1669 }
1670 kobject_uevent(&device->dev.kobj, KOBJ_ADD);
1671
1672 ib_device_put(device);
1673 out:
1674 mutex_unlock(&device->unregistration_lock);
1675 if (ret)
1676 return ret;
1677 return ret2;
1678 }
1679
1680 int ib_device_set_netns_put(struct sk_buff *skb,
1681 struct ib_device *dev, u32 ns_fd)
1682 {
1683 struct net *net;
1684 int ret;
1685
1686 net = get_net_ns_by_fd(ns_fd);
1687 if (IS_ERR(net)) {
1688 ret = PTR_ERR(net);
1689 goto net_err;
1690 }
1691
1692 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
1693 ret = -EPERM;
1694 goto ns_err;
1695 }
1696
1697
1698
1699
1700
1701
1702 if (!dev->ops.disassociate_ucontext || ib_devices_shared_netns) {
1703 ret = -EOPNOTSUPP;
1704 goto ns_err;
1705 }
1706
1707 get_device(&dev->dev);
1708 ib_device_put(dev);
1709 ret = rdma_dev_change_netns(dev, current->nsproxy->net_ns, net);
1710 put_device(&dev->dev);
1711
1712 put_net(net);
1713 return ret;
1714
1715 ns_err:
1716 put_net(net);
1717 net_err:
1718 ib_device_put(dev);
1719 return ret;
1720 }
1721
1722 static struct pernet_operations rdma_dev_net_ops = {
1723 .init = rdma_dev_init_net,
1724 .exit = rdma_dev_exit_net,
1725 .id = &rdma_dev_net_id,
1726 .size = sizeof(struct rdma_dev_net),
1727 };
1728
1729 static int assign_client_id(struct ib_client *client)
1730 {
1731 int ret;
1732
1733 down_write(&clients_rwsem);
1734
1735
1736
1737
1738
1739 client->client_id = highest_client_id;
1740 ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL);
1741 if (ret)
1742 goto out;
1743
1744 highest_client_id++;
1745 xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
1746
1747 out:
1748 up_write(&clients_rwsem);
1749 return ret;
1750 }
1751
1752 static void remove_client_id(struct ib_client *client)
1753 {
1754 down_write(&clients_rwsem);
1755 xa_erase(&clients, client->client_id);
1756 for (; highest_client_id; highest_client_id--)
1757 if (xa_load(&clients, highest_client_id - 1))
1758 break;
1759 up_write(&clients_rwsem);
1760 }
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775 int ib_register_client(struct ib_client *client)
1776 {
1777 struct ib_device *device;
1778 unsigned long index;
1779 int ret;
1780
1781 refcount_set(&client->uses, 1);
1782 init_completion(&client->uses_zero);
1783 ret = assign_client_id(client);
1784 if (ret)
1785 return ret;
1786
1787 down_read(&devices_rwsem);
1788 xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) {
1789 ret = add_client_context(device, client);
1790 if (ret) {
1791 up_read(&devices_rwsem);
1792 ib_unregister_client(client);
1793 return ret;
1794 }
1795 }
1796 up_read(&devices_rwsem);
1797 return 0;
1798 }
1799 EXPORT_SYMBOL(ib_register_client);
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812 void ib_unregister_client(struct ib_client *client)
1813 {
1814 struct ib_device *device;
1815 unsigned long index;
1816
1817 down_write(&clients_rwsem);
1818 ib_client_put(client);
1819 xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED);
1820 up_write(&clients_rwsem);
1821
1822
1823 rcu_read_lock();
1824 xa_for_each (&devices, index, device) {
1825 if (!ib_device_try_get(device))
1826 continue;
1827 rcu_read_unlock();
1828
1829 remove_client_context(device, client->client_id);
1830
1831 ib_device_put(device);
1832 rcu_read_lock();
1833 }
1834 rcu_read_unlock();
1835
1836
1837
1838
1839
1840 wait_for_completion(&client->uses_zero);
1841 remove_client_id(client);
1842 }
1843 EXPORT_SYMBOL(ib_unregister_client);
1844
1845 static int __ib_get_global_client_nl_info(const char *client_name,
1846 struct ib_client_nl_info *res)
1847 {
1848 struct ib_client *client;
1849 unsigned long index;
1850 int ret = -ENOENT;
1851
1852 down_read(&clients_rwsem);
1853 xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) {
1854 if (strcmp(client->name, client_name) != 0)
1855 continue;
1856 if (!client->get_global_nl_info) {
1857 ret = -EOPNOTSUPP;
1858 break;
1859 }
1860 ret = client->get_global_nl_info(res);
1861 if (WARN_ON(ret == -ENOENT))
1862 ret = -EINVAL;
1863 if (!ret && res->cdev)
1864 get_device(res->cdev);
1865 break;
1866 }
1867 up_read(&clients_rwsem);
1868 return ret;
1869 }
1870
1871 static int __ib_get_client_nl_info(struct ib_device *ibdev,
1872 const char *client_name,
1873 struct ib_client_nl_info *res)
1874 {
1875 unsigned long index;
1876 void *client_data;
1877 int ret = -ENOENT;
1878
1879 down_read(&ibdev->client_data_rwsem);
1880 xan_for_each_marked (&ibdev->client_data, index, client_data,
1881 CLIENT_DATA_REGISTERED) {
1882 struct ib_client *client = xa_load(&clients, index);
1883
1884 if (!client || strcmp(client->name, client_name) != 0)
1885 continue;
1886 if (!client->get_nl_info) {
1887 ret = -EOPNOTSUPP;
1888 break;
1889 }
1890 ret = client->get_nl_info(ibdev, client_data, res);
1891 if (WARN_ON(ret == -ENOENT))
1892 ret = -EINVAL;
1893
1894
1895
1896
1897
1898
1899 if (!ret && res->cdev)
1900 get_device(res->cdev);
1901 break;
1902 }
1903 up_read(&ibdev->client_data_rwsem);
1904
1905 return ret;
1906 }
1907
1908
1909
1910
1911
1912
1913
1914 int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name,
1915 struct ib_client_nl_info *res)
1916 {
1917 int ret;
1918
1919 if (ibdev)
1920 ret = __ib_get_client_nl_info(ibdev, client_name, res);
1921 else
1922 ret = __ib_get_global_client_nl_info(client_name, res);
1923 #ifdef CONFIG_MODULES
1924 if (ret == -ENOENT) {
1925 request_module("rdma-client-%s", client_name);
1926 if (ibdev)
1927 ret = __ib_get_client_nl_info(ibdev, client_name, res);
1928 else
1929 ret = __ib_get_global_client_nl_info(client_name, res);
1930 }
1931 #endif
1932 if (ret) {
1933 if (ret == -ENOENT)
1934 return -EOPNOTSUPP;
1935 return ret;
1936 }
1937
1938 if (WARN_ON(!res->cdev))
1939 return -EINVAL;
1940 return 0;
1941 }
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954 void ib_set_client_data(struct ib_device *device, struct ib_client *client,
1955 void *data)
1956 {
1957 void *rc;
1958
1959 if (WARN_ON(IS_ERR(data)))
1960 data = NULL;
1961
1962 rc = xa_store(&device->client_data, client->client_id, data,
1963 GFP_KERNEL);
1964 WARN_ON(xa_is_err(rc));
1965 }
1966 EXPORT_SYMBOL(ib_set_client_data);
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977 void ib_register_event_handler(struct ib_event_handler *event_handler)
1978 {
1979 down_write(&event_handler->device->event_handler_rwsem);
1980 list_add_tail(&event_handler->list,
1981 &event_handler->device->event_handler_list);
1982 up_write(&event_handler->device->event_handler_rwsem);
1983 }
1984 EXPORT_SYMBOL(ib_register_event_handler);
1985
1986
1987
1988
1989
1990
1991
1992
1993 void ib_unregister_event_handler(struct ib_event_handler *event_handler)
1994 {
1995 down_write(&event_handler->device->event_handler_rwsem);
1996 list_del(&event_handler->list);
1997 up_write(&event_handler->device->event_handler_rwsem);
1998 }
1999 EXPORT_SYMBOL(ib_unregister_event_handler);
2000
2001 void ib_dispatch_event_clients(struct ib_event *event)
2002 {
2003 struct ib_event_handler *handler;
2004
2005 down_read(&event->device->event_handler_rwsem);
2006
2007 list_for_each_entry(handler, &event->device->event_handler_list, list)
2008 handler->handler(handler, event);
2009
2010 up_read(&event->device->event_handler_rwsem);
2011 }
2012
2013 static int iw_query_port(struct ib_device *device,
2014 u32 port_num,
2015 struct ib_port_attr *port_attr)
2016 {
2017 struct in_device *inetdev;
2018 struct net_device *netdev;
2019
2020 memset(port_attr, 0, sizeof(*port_attr));
2021
2022 netdev = ib_device_get_netdev(device, port_num);
2023 if (!netdev)
2024 return -ENODEV;
2025
2026 port_attr->max_mtu = IB_MTU_4096;
2027 port_attr->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
2028
2029 if (!netif_carrier_ok(netdev)) {
2030 port_attr->state = IB_PORT_DOWN;
2031 port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
2032 } else {
2033 rcu_read_lock();
2034 inetdev = __in_dev_get_rcu(netdev);
2035
2036 if (inetdev && inetdev->ifa_list) {
2037 port_attr->state = IB_PORT_ACTIVE;
2038 port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
2039 } else {
2040 port_attr->state = IB_PORT_INIT;
2041 port_attr->phys_state =
2042 IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING;
2043 }
2044
2045 rcu_read_unlock();
2046 }
2047
2048 dev_put(netdev);
2049 return device->ops.query_port(device, port_num, port_attr);
2050 }
2051
2052 static int __ib_query_port(struct ib_device *device,
2053 u32 port_num,
2054 struct ib_port_attr *port_attr)
2055 {
2056 int err;
2057
2058 memset(port_attr, 0, sizeof(*port_attr));
2059
2060 err = device->ops.query_port(device, port_num, port_attr);
2061 if (err || port_attr->subnet_prefix)
2062 return err;
2063
2064 if (rdma_port_get_link_layer(device, port_num) !=
2065 IB_LINK_LAYER_INFINIBAND)
2066 return 0;
2067
2068 ib_get_cached_subnet_prefix(device, port_num,
2069 &port_attr->subnet_prefix);
2070 return 0;
2071 }
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082 int ib_query_port(struct ib_device *device,
2083 u32 port_num,
2084 struct ib_port_attr *port_attr)
2085 {
2086 if (!rdma_is_port_valid(device, port_num))
2087 return -EINVAL;
2088
2089 if (rdma_protocol_iwarp(device, port_num))
2090 return iw_query_port(device, port_num, port_attr);
2091 else
2092 return __ib_query_port(device, port_num, port_attr);
2093 }
2094 EXPORT_SYMBOL(ib_query_port);
2095
2096 static void add_ndev_hash(struct ib_port_data *pdata)
2097 {
2098 unsigned long flags;
2099
2100 might_sleep();
2101
2102 spin_lock_irqsave(&ndev_hash_lock, flags);
2103 if (hash_hashed(&pdata->ndev_hash_link)) {
2104 hash_del_rcu(&pdata->ndev_hash_link);
2105 spin_unlock_irqrestore(&ndev_hash_lock, flags);
2106
2107
2108
2109
2110 synchronize_rcu();
2111 spin_lock_irqsave(&ndev_hash_lock, flags);
2112 }
2113 if (pdata->netdev)
2114 hash_add_rcu(ndev_hash, &pdata->ndev_hash_link,
2115 (uintptr_t)pdata->netdev);
2116 spin_unlock_irqrestore(&ndev_hash_lock, flags);
2117 }
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134 int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
2135 u32 port)
2136 {
2137 struct net_device *old_ndev;
2138 struct ib_port_data *pdata;
2139 unsigned long flags;
2140 int ret;
2141
2142
2143
2144
2145
2146 ret = alloc_port_data(ib_dev);
2147 if (ret)
2148 return ret;
2149
2150 if (!rdma_is_port_valid(ib_dev, port))
2151 return -EINVAL;
2152
2153 pdata = &ib_dev->port_data[port];
2154 spin_lock_irqsave(&pdata->netdev_lock, flags);
2155 old_ndev = rcu_dereference_protected(
2156 pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
2157 if (old_ndev == ndev) {
2158 spin_unlock_irqrestore(&pdata->netdev_lock, flags);
2159 return 0;
2160 }
2161
2162 if (ndev)
2163 dev_hold(ndev);
2164 rcu_assign_pointer(pdata->netdev, ndev);
2165 spin_unlock_irqrestore(&pdata->netdev_lock, flags);
2166
2167 add_ndev_hash(pdata);
2168 if (old_ndev)
2169 dev_put(old_ndev);
2170
2171 return 0;
2172 }
2173 EXPORT_SYMBOL(ib_device_set_netdev);
2174
2175 static void free_netdevs(struct ib_device *ib_dev)
2176 {
2177 unsigned long flags;
2178 u32 port;
2179
2180 if (!ib_dev->port_data)
2181 return;
2182
2183 rdma_for_each_port (ib_dev, port) {
2184 struct ib_port_data *pdata = &ib_dev->port_data[port];
2185 struct net_device *ndev;
2186
2187 spin_lock_irqsave(&pdata->netdev_lock, flags);
2188 ndev = rcu_dereference_protected(
2189 pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
2190 if (ndev) {
2191 spin_lock(&ndev_hash_lock);
2192 hash_del_rcu(&pdata->ndev_hash_link);
2193 spin_unlock(&ndev_hash_lock);
2194
2195
2196
2197
2198
2199
2200
2201 rcu_assign_pointer(pdata->netdev, NULL);
2202 dev_put(ndev);
2203 }
2204 spin_unlock_irqrestore(&pdata->netdev_lock, flags);
2205 }
2206 }
2207
2208 struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
2209 u32 port)
2210 {
2211 struct ib_port_data *pdata;
2212 struct net_device *res;
2213
2214 if (!rdma_is_port_valid(ib_dev, port))
2215 return NULL;
2216
2217 pdata = &ib_dev->port_data[port];
2218
2219
2220
2221
2222
2223 if (ib_dev->ops.get_netdev)
2224 res = ib_dev->ops.get_netdev(ib_dev, port);
2225 else {
2226 spin_lock(&pdata->netdev_lock);
2227 res = rcu_dereference_protected(
2228 pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
2229 if (res)
2230 dev_hold(res);
2231 spin_unlock(&pdata->netdev_lock);
2232 }
2233
2234
2235
2236
2237
2238 if (res && res->reg_state != NETREG_REGISTERED) {
2239 dev_put(res);
2240 return NULL;
2241 }
2242
2243 return res;
2244 }
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255 struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
2256 enum rdma_driver_id driver_id)
2257 {
2258 struct ib_device *res = NULL;
2259 struct ib_port_data *cur;
2260
2261 rcu_read_lock();
2262 hash_for_each_possible_rcu (ndev_hash, cur, ndev_hash_link,
2263 (uintptr_t)ndev) {
2264 if (rcu_access_pointer(cur->netdev) == ndev &&
2265 (driver_id == RDMA_DRIVER_UNKNOWN ||
2266 cur->ib_dev->ops.driver_id == driver_id) &&
2267 ib_device_try_get(cur->ib_dev)) {
2268 res = cur->ib_dev;
2269 break;
2270 }
2271 }
2272 rcu_read_unlock();
2273
2274 return res;
2275 }
2276 EXPORT_SYMBOL(ib_device_get_by_netdev);
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290 void ib_enum_roce_netdev(struct ib_device *ib_dev,
2291 roce_netdev_filter filter,
2292 void *filter_cookie,
2293 roce_netdev_callback cb,
2294 void *cookie)
2295 {
2296 u32 port;
2297
2298 rdma_for_each_port (ib_dev, port)
2299 if (rdma_protocol_roce(ib_dev, port)) {
2300 struct net_device *idev =
2301 ib_device_get_netdev(ib_dev, port);
2302
2303 if (filter(ib_dev, port, idev, filter_cookie))
2304 cb(ib_dev, port, idev, cookie);
2305
2306 if (idev)
2307 dev_put(idev);
2308 }
2309 }
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322 void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
2323 void *filter_cookie,
2324 roce_netdev_callback cb,
2325 void *cookie)
2326 {
2327 struct ib_device *dev;
2328 unsigned long index;
2329
2330 down_read(&devices_rwsem);
2331 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED)
2332 ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie);
2333 up_read(&devices_rwsem);
2334 }
2335
2336
2337
2338
2339
2340
2341
2342 int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
2343 struct netlink_callback *cb)
2344 {
2345 unsigned long index;
2346 struct ib_device *dev;
2347 unsigned int idx = 0;
2348 int ret = 0;
2349
2350 down_read(&devices_rwsem);
2351 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
2352 if (!rdma_dev_access_netns(dev, sock_net(skb->sk)))
2353 continue;
2354
2355 ret = nldev_cb(dev, skb, cb, idx);
2356 if (ret)
2357 break;
2358 idx++;
2359 }
2360 up_read(&devices_rwsem);
2361 return ret;
2362 }
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373 int ib_query_pkey(struct ib_device *device,
2374 u32 port_num, u16 index, u16 *pkey)
2375 {
2376 if (!rdma_is_port_valid(device, port_num))
2377 return -EINVAL;
2378
2379 if (!device->ops.query_pkey)
2380 return -EOPNOTSUPP;
2381
2382 return device->ops.query_pkey(device, port_num, index, pkey);
2383 }
2384 EXPORT_SYMBOL(ib_query_pkey);
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395 int ib_modify_device(struct ib_device *device,
2396 int device_modify_mask,
2397 struct ib_device_modify *device_modify)
2398 {
2399 if (!device->ops.modify_device)
2400 return -EOPNOTSUPP;
2401
2402 return device->ops.modify_device(device, device_modify_mask,
2403 device_modify);
2404 }
2405 EXPORT_SYMBOL(ib_modify_device);
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418 int ib_modify_port(struct ib_device *device,
2419 u32 port_num, int port_modify_mask,
2420 struct ib_port_modify *port_modify)
2421 {
2422 int rc;
2423
2424 if (!rdma_is_port_valid(device, port_num))
2425 return -EINVAL;
2426
2427 if (device->ops.modify_port)
2428 rc = device->ops.modify_port(device, port_num,
2429 port_modify_mask,
2430 port_modify);
2431 else if (rdma_protocol_roce(device, port_num) &&
2432 ((port_modify->set_port_cap_mask & ~IB_PORT_CM_SUP) == 0 ||
2433 (port_modify->clr_port_cap_mask & ~IB_PORT_CM_SUP) == 0))
2434 rc = 0;
2435 else
2436 rc = -EOPNOTSUPP;
2437 return rc;
2438 }
2439 EXPORT_SYMBOL(ib_modify_port);
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450 int ib_find_gid(struct ib_device *device, union ib_gid *gid,
2451 u32 *port_num, u16 *index)
2452 {
2453 union ib_gid tmp_gid;
2454 u32 port;
2455 int ret, i;
2456
2457 rdma_for_each_port (device, port) {
2458 if (!rdma_protocol_ib(device, port))
2459 continue;
2460
2461 for (i = 0; i < device->port_data[port].immutable.gid_tbl_len;
2462 ++i) {
2463 ret = rdma_query_gid(device, port, i, &tmp_gid);
2464 if (ret)
2465 continue;
2466
2467 if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
2468 *port_num = port;
2469 if (index)
2470 *index = i;
2471 return 0;
2472 }
2473 }
2474 }
2475
2476 return -ENOENT;
2477 }
2478 EXPORT_SYMBOL(ib_find_gid);
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488 int ib_find_pkey(struct ib_device *device,
2489 u32 port_num, u16 pkey, u16 *index)
2490 {
2491 int ret, i;
2492 u16 tmp_pkey;
2493 int partial_ix = -1;
2494
2495 for (i = 0; i < device->port_data[port_num].immutable.pkey_tbl_len;
2496 ++i) {
2497 ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
2498 if (ret)
2499 return ret;
2500 if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) {
2501
2502 if (tmp_pkey & 0x8000) {
2503 *index = i;
2504 return 0;
2505 }
2506 if (partial_ix < 0)
2507 partial_ix = i;
2508 }
2509 }
2510
2511
2512 if (partial_ix >= 0) {
2513 *index = partial_ix;
2514 return 0;
2515 }
2516 return -ENOENT;
2517 }
2518 EXPORT_SYMBOL(ib_find_pkey);
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531 struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
2532 u32 port,
2533 u16 pkey,
2534 const union ib_gid *gid,
2535 const struct sockaddr *addr)
2536 {
2537 struct net_device *net_dev = NULL;
2538 unsigned long index;
2539 void *client_data;
2540
2541 if (!rdma_protocol_ib(dev, port))
2542 return NULL;
2543
2544
2545
2546
2547
2548 down_read(&dev->client_data_rwsem);
2549 xan_for_each_marked (&dev->client_data, index, client_data,
2550 CLIENT_DATA_REGISTERED) {
2551 struct ib_client *client = xa_load(&clients, index);
2552
2553 if (!client || !client->get_net_dev_by_params)
2554 continue;
2555
2556 net_dev = client->get_net_dev_by_params(dev, port, pkey, gid,
2557 addr, client_data);
2558 if (net_dev)
2559 break;
2560 }
2561 up_read(&dev->client_data_rwsem);
2562
2563 return net_dev;
2564 }
2565 EXPORT_SYMBOL(ib_get_net_dev_by_params);
2566
2567 void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
2568 {
2569 struct ib_device_ops *dev_ops = &dev->ops;
2570 #define SET_DEVICE_OP(ptr, name) \
2571 do { \
2572 if (ops->name) \
2573 if (!((ptr)->name)) \
2574 (ptr)->name = ops->name; \
2575 } while (0)
2576
2577 #define SET_OBJ_SIZE(ptr, name) SET_DEVICE_OP(ptr, size_##name)
2578
2579 if (ops->driver_id != RDMA_DRIVER_UNKNOWN) {
2580 WARN_ON(dev_ops->driver_id != RDMA_DRIVER_UNKNOWN &&
2581 dev_ops->driver_id != ops->driver_id);
2582 dev_ops->driver_id = ops->driver_id;
2583 }
2584 if (ops->owner) {
2585 WARN_ON(dev_ops->owner && dev_ops->owner != ops->owner);
2586 dev_ops->owner = ops->owner;
2587 }
2588 if (ops->uverbs_abi_ver)
2589 dev_ops->uverbs_abi_ver = ops->uverbs_abi_ver;
2590
2591 dev_ops->uverbs_no_driver_id_binding |=
2592 ops->uverbs_no_driver_id_binding;
2593
2594 SET_DEVICE_OP(dev_ops, add_gid);
2595 SET_DEVICE_OP(dev_ops, advise_mr);
2596 SET_DEVICE_OP(dev_ops, alloc_dm);
2597 SET_DEVICE_OP(dev_ops, alloc_hw_device_stats);
2598 SET_DEVICE_OP(dev_ops, alloc_hw_port_stats);
2599 SET_DEVICE_OP(dev_ops, alloc_mr);
2600 SET_DEVICE_OP(dev_ops, alloc_mr_integrity);
2601 SET_DEVICE_OP(dev_ops, alloc_mw);
2602 SET_DEVICE_OP(dev_ops, alloc_pd);
2603 SET_DEVICE_OP(dev_ops, alloc_rdma_netdev);
2604 SET_DEVICE_OP(dev_ops, alloc_ucontext);
2605 SET_DEVICE_OP(dev_ops, alloc_xrcd);
2606 SET_DEVICE_OP(dev_ops, attach_mcast);
2607 SET_DEVICE_OP(dev_ops, check_mr_status);
2608 SET_DEVICE_OP(dev_ops, counter_alloc_stats);
2609 SET_DEVICE_OP(dev_ops, counter_bind_qp);
2610 SET_DEVICE_OP(dev_ops, counter_dealloc);
2611 SET_DEVICE_OP(dev_ops, counter_unbind_qp);
2612 SET_DEVICE_OP(dev_ops, counter_update_stats);
2613 SET_DEVICE_OP(dev_ops, create_ah);
2614 SET_DEVICE_OP(dev_ops, create_counters);
2615 SET_DEVICE_OP(dev_ops, create_cq);
2616 SET_DEVICE_OP(dev_ops, create_flow);
2617 SET_DEVICE_OP(dev_ops, create_qp);
2618 SET_DEVICE_OP(dev_ops, create_rwq_ind_table);
2619 SET_DEVICE_OP(dev_ops, create_srq);
2620 SET_DEVICE_OP(dev_ops, create_user_ah);
2621 SET_DEVICE_OP(dev_ops, create_wq);
2622 SET_DEVICE_OP(dev_ops, dealloc_dm);
2623 SET_DEVICE_OP(dev_ops, dealloc_driver);
2624 SET_DEVICE_OP(dev_ops, dealloc_mw);
2625 SET_DEVICE_OP(dev_ops, dealloc_pd);
2626 SET_DEVICE_OP(dev_ops, dealloc_ucontext);
2627 SET_DEVICE_OP(dev_ops, dealloc_xrcd);
2628 SET_DEVICE_OP(dev_ops, del_gid);
2629 SET_DEVICE_OP(dev_ops, dereg_mr);
2630 SET_DEVICE_OP(dev_ops, destroy_ah);
2631 SET_DEVICE_OP(dev_ops, destroy_counters);
2632 SET_DEVICE_OP(dev_ops, destroy_cq);
2633 SET_DEVICE_OP(dev_ops, destroy_flow);
2634 SET_DEVICE_OP(dev_ops, destroy_flow_action);
2635 SET_DEVICE_OP(dev_ops, destroy_qp);
2636 SET_DEVICE_OP(dev_ops, destroy_rwq_ind_table);
2637 SET_DEVICE_OP(dev_ops, destroy_srq);
2638 SET_DEVICE_OP(dev_ops, destroy_wq);
2639 SET_DEVICE_OP(dev_ops, device_group);
2640 SET_DEVICE_OP(dev_ops, detach_mcast);
2641 SET_DEVICE_OP(dev_ops, disassociate_ucontext);
2642 SET_DEVICE_OP(dev_ops, drain_rq);
2643 SET_DEVICE_OP(dev_ops, drain_sq);
2644 SET_DEVICE_OP(dev_ops, enable_driver);
2645 SET_DEVICE_OP(dev_ops, fill_res_cm_id_entry);
2646 SET_DEVICE_OP(dev_ops, fill_res_cq_entry);
2647 SET_DEVICE_OP(dev_ops, fill_res_cq_entry_raw);
2648 SET_DEVICE_OP(dev_ops, fill_res_mr_entry);
2649 SET_DEVICE_OP(dev_ops, fill_res_mr_entry_raw);
2650 SET_DEVICE_OP(dev_ops, fill_res_qp_entry);
2651 SET_DEVICE_OP(dev_ops, fill_res_qp_entry_raw);
2652 SET_DEVICE_OP(dev_ops, fill_stat_mr_entry);
2653 SET_DEVICE_OP(dev_ops, get_dev_fw_str);
2654 SET_DEVICE_OP(dev_ops, get_dma_mr);
2655 SET_DEVICE_OP(dev_ops, get_hw_stats);
2656 SET_DEVICE_OP(dev_ops, get_link_layer);
2657 SET_DEVICE_OP(dev_ops, get_netdev);
2658 SET_DEVICE_OP(dev_ops, get_numa_node);
2659 SET_DEVICE_OP(dev_ops, get_port_immutable);
2660 SET_DEVICE_OP(dev_ops, get_vector_affinity);
2661 SET_DEVICE_OP(dev_ops, get_vf_config);
2662 SET_DEVICE_OP(dev_ops, get_vf_guid);
2663 SET_DEVICE_OP(dev_ops, get_vf_stats);
2664 SET_DEVICE_OP(dev_ops, iw_accept);
2665 SET_DEVICE_OP(dev_ops, iw_add_ref);
2666 SET_DEVICE_OP(dev_ops, iw_connect);
2667 SET_DEVICE_OP(dev_ops, iw_create_listen);
2668 SET_DEVICE_OP(dev_ops, iw_destroy_listen);
2669 SET_DEVICE_OP(dev_ops, iw_get_qp);
2670 SET_DEVICE_OP(dev_ops, iw_reject);
2671 SET_DEVICE_OP(dev_ops, iw_rem_ref);
2672 SET_DEVICE_OP(dev_ops, map_mr_sg);
2673 SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
2674 SET_DEVICE_OP(dev_ops, mmap);
2675 SET_DEVICE_OP(dev_ops, mmap_free);
2676 SET_DEVICE_OP(dev_ops, modify_ah);
2677 SET_DEVICE_OP(dev_ops, modify_cq);
2678 SET_DEVICE_OP(dev_ops, modify_device);
2679 SET_DEVICE_OP(dev_ops, modify_hw_stat);
2680 SET_DEVICE_OP(dev_ops, modify_port);
2681 SET_DEVICE_OP(dev_ops, modify_qp);
2682 SET_DEVICE_OP(dev_ops, modify_srq);
2683 SET_DEVICE_OP(dev_ops, modify_wq);
2684 SET_DEVICE_OP(dev_ops, peek_cq);
2685 SET_DEVICE_OP(dev_ops, poll_cq);
2686 SET_DEVICE_OP(dev_ops, port_groups);
2687 SET_DEVICE_OP(dev_ops, post_recv);
2688 SET_DEVICE_OP(dev_ops, post_send);
2689 SET_DEVICE_OP(dev_ops, post_srq_recv);
2690 SET_DEVICE_OP(dev_ops, process_mad);
2691 SET_DEVICE_OP(dev_ops, query_ah);
2692 SET_DEVICE_OP(dev_ops, query_device);
2693 SET_DEVICE_OP(dev_ops, query_gid);
2694 SET_DEVICE_OP(dev_ops, query_pkey);
2695 SET_DEVICE_OP(dev_ops, query_port);
2696 SET_DEVICE_OP(dev_ops, query_qp);
2697 SET_DEVICE_OP(dev_ops, query_srq);
2698 SET_DEVICE_OP(dev_ops, query_ucontext);
2699 SET_DEVICE_OP(dev_ops, rdma_netdev_get_params);
2700 SET_DEVICE_OP(dev_ops, read_counters);
2701 SET_DEVICE_OP(dev_ops, reg_dm_mr);
2702 SET_DEVICE_OP(dev_ops, reg_user_mr);
2703 SET_DEVICE_OP(dev_ops, reg_user_mr_dmabuf);
2704 SET_DEVICE_OP(dev_ops, req_notify_cq);
2705 SET_DEVICE_OP(dev_ops, rereg_user_mr);
2706 SET_DEVICE_OP(dev_ops, resize_cq);
2707 SET_DEVICE_OP(dev_ops, set_vf_guid);
2708 SET_DEVICE_OP(dev_ops, set_vf_link_state);
2709
2710 SET_OBJ_SIZE(dev_ops, ib_ah);
2711 SET_OBJ_SIZE(dev_ops, ib_counters);
2712 SET_OBJ_SIZE(dev_ops, ib_cq);
2713 SET_OBJ_SIZE(dev_ops, ib_mw);
2714 SET_OBJ_SIZE(dev_ops, ib_pd);
2715 SET_OBJ_SIZE(dev_ops, ib_qp);
2716 SET_OBJ_SIZE(dev_ops, ib_rwq_ind_table);
2717 SET_OBJ_SIZE(dev_ops, ib_srq);
2718 SET_OBJ_SIZE(dev_ops, ib_ucontext);
2719 SET_OBJ_SIZE(dev_ops, ib_xrcd);
2720 }
2721 EXPORT_SYMBOL(ib_set_device_ops);
2722
2723 #ifdef CONFIG_INFINIBAND_VIRT_DMA
2724 int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents)
2725 {
2726 struct scatterlist *s;
2727 int i;
2728
2729 for_each_sg(sg, s, nents, i) {
2730 sg_dma_address(s) = (uintptr_t)sg_virt(s);
2731 sg_dma_len(s) = s->length;
2732 }
2733 return nents;
2734 }
2735 EXPORT_SYMBOL(ib_dma_virt_map_sg);
2736 #endif
2737
2738 static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
2739 [RDMA_NL_LS_OP_RESOLVE] = {
2740 .doit = ib_nl_handle_resolve_resp,
2741 .flags = RDMA_NL_ADMIN_PERM,
2742 },
2743 [RDMA_NL_LS_OP_SET_TIMEOUT] = {
2744 .doit = ib_nl_handle_set_timeout,
2745 .flags = RDMA_NL_ADMIN_PERM,
2746 },
2747 [RDMA_NL_LS_OP_IP_RESOLVE] = {
2748 .doit = ib_nl_handle_ip_res_resp,
2749 .flags = RDMA_NL_ADMIN_PERM,
2750 },
2751 };
2752
2753 static int __init ib_core_init(void)
2754 {
2755 int ret = -ENOMEM;
2756
2757 ib_wq = alloc_workqueue("infiniband", 0, 0);
2758 if (!ib_wq)
2759 return -ENOMEM;
2760
2761 ib_unreg_wq = alloc_workqueue("ib-unreg-wq", WQ_UNBOUND,
2762 WQ_UNBOUND_MAX_ACTIVE);
2763 if (!ib_unreg_wq)
2764 goto err;
2765
2766 ib_comp_wq = alloc_workqueue("ib-comp-wq",
2767 WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
2768 if (!ib_comp_wq)
2769 goto err_unbound;
2770
2771 ib_comp_unbound_wq =
2772 alloc_workqueue("ib-comp-unb-wq",
2773 WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
2774 WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
2775 if (!ib_comp_unbound_wq)
2776 goto err_comp;
2777
2778 ret = class_register(&ib_class);
2779 if (ret) {
2780 pr_warn("Couldn't create InfiniBand device class\n");
2781 goto err_comp_unbound;
2782 }
2783
2784 rdma_nl_init();
2785
2786 ret = addr_init();
2787 if (ret) {
2788 pr_warn("Couldn't init IB address resolution\n");
2789 goto err_ibnl;
2790 }
2791
2792 ret = ib_mad_init();
2793 if (ret) {
2794 pr_warn("Couldn't init IB MAD\n");
2795 goto err_addr;
2796 }
2797
2798 ret = ib_sa_init();
2799 if (ret) {
2800 pr_warn("Couldn't init SA\n");
2801 goto err_mad;
2802 }
2803
2804 ret = register_blocking_lsm_notifier(&ibdev_lsm_nb);
2805 if (ret) {
2806 pr_warn("Couldn't register LSM notifier. ret %d\n", ret);
2807 goto err_sa;
2808 }
2809
2810 ret = register_pernet_device(&rdma_dev_net_ops);
2811 if (ret) {
2812 pr_warn("Couldn't init compat dev. ret %d\n", ret);
2813 goto err_compat;
2814 }
2815
2816 nldev_init();
2817 rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table);
2818 roce_gid_mgmt_init();
2819
2820 return 0;
2821
2822 err_compat:
2823 unregister_blocking_lsm_notifier(&ibdev_lsm_nb);
2824 err_sa:
2825 ib_sa_cleanup();
2826 err_mad:
2827 ib_mad_cleanup();
2828 err_addr:
2829 addr_cleanup();
2830 err_ibnl:
2831 class_unregister(&ib_class);
2832 err_comp_unbound:
2833 destroy_workqueue(ib_comp_unbound_wq);
2834 err_comp:
2835 destroy_workqueue(ib_comp_wq);
2836 err_unbound:
2837 destroy_workqueue(ib_unreg_wq);
2838 err:
2839 destroy_workqueue(ib_wq);
2840 return ret;
2841 }
2842
2843 static void __exit ib_core_cleanup(void)
2844 {
2845 roce_gid_mgmt_cleanup();
2846 nldev_exit();
2847 rdma_nl_unregister(RDMA_NL_LS);
2848 unregister_pernet_device(&rdma_dev_net_ops);
2849 unregister_blocking_lsm_notifier(&ibdev_lsm_nb);
2850 ib_sa_cleanup();
2851 ib_mad_cleanup();
2852 addr_cleanup();
2853 rdma_nl_exit();
2854 class_unregister(&ib_class);
2855 destroy_workqueue(ib_comp_unbound_wq);
2856 destroy_workqueue(ib_comp_wq);
2857
2858 destroy_workqueue(ib_wq);
2859 destroy_workqueue(ib_unreg_wq);
2860 WARN_ON(!xa_empty(&clients));
2861 WARN_ON(!xa_empty(&devices));
2862 }
2863
2864 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4);
2865
2866
2867
2868
2869 fs_initcall(ib_core_init);
2870 module_exit(ib_core_cleanup);