0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046 #include <asm/page.h>
0047 #include <linux/inet.h>
0048 #include <linux/io.h>
0049 #include <rdma/ib_addr.h>
0050 #include <rdma/ib_smi.h>
0051 #include <rdma/ib_user_verbs.h>
0052 #include <rdma/vmw_pvrdma-abi.h>
0053 #include <rdma/uverbs_ioctl.h>
0054
0055 #include "pvrdma.h"
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065 int pvrdma_query_device(struct ib_device *ibdev,
0066 struct ib_device_attr *props,
0067 struct ib_udata *uhw)
0068 {
0069 struct pvrdma_dev *dev = to_vdev(ibdev);
0070
0071 if (uhw->inlen || uhw->outlen)
0072 return -EINVAL;
0073
0074 props->fw_ver = dev->dsr->caps.fw_ver;
0075 props->sys_image_guid = dev->dsr->caps.sys_image_guid;
0076 props->max_mr_size = dev->dsr->caps.max_mr_size;
0077 props->page_size_cap = dev->dsr->caps.page_size_cap;
0078 props->vendor_id = dev->dsr->caps.vendor_id;
0079 props->vendor_part_id = dev->pdev->device;
0080 props->hw_ver = dev->dsr->caps.hw_ver;
0081 props->max_qp = dev->dsr->caps.max_qp;
0082 props->max_qp_wr = dev->dsr->caps.max_qp_wr;
0083 props->device_cap_flags = dev->dsr->caps.device_cap_flags;
0084 props->max_send_sge = dev->dsr->caps.max_sge;
0085 props->max_recv_sge = dev->dsr->caps.max_sge;
0086 props->max_sge_rd = PVRDMA_GET_CAP(dev, dev->dsr->caps.max_sge,
0087 dev->dsr->caps.max_sge_rd);
0088 props->max_srq = dev->dsr->caps.max_srq;
0089 props->max_srq_wr = dev->dsr->caps.max_srq_wr;
0090 props->max_srq_sge = dev->dsr->caps.max_srq_sge;
0091 props->max_cq = dev->dsr->caps.max_cq;
0092 props->max_cqe = dev->dsr->caps.max_cqe;
0093 props->max_mr = dev->dsr->caps.max_mr;
0094 props->max_pd = dev->dsr->caps.max_pd;
0095 props->max_qp_rd_atom = dev->dsr->caps.max_qp_rd_atom;
0096 props->max_qp_init_rd_atom = dev->dsr->caps.max_qp_init_rd_atom;
0097 props->atomic_cap =
0098 dev->dsr->caps.atomic_ops &
0099 (PVRDMA_ATOMIC_OP_COMP_SWAP | PVRDMA_ATOMIC_OP_FETCH_ADD) ?
0100 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
0101 props->masked_atomic_cap = props->atomic_cap;
0102 props->max_ah = dev->dsr->caps.max_ah;
0103 props->max_pkeys = dev->dsr->caps.max_pkeys;
0104 props->local_ca_ack_delay = dev->dsr->caps.local_ca_ack_delay;
0105 if ((dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_LOCAL_INV) &&
0106 (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_REMOTE_INV) &&
0107 (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_FAST_REG_WR)) {
0108 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
0109 props->max_fast_reg_page_list_len = PVRDMA_GET_CAP(dev,
0110 PVRDMA_MAX_FAST_REG_PAGES,
0111 dev->dsr->caps.max_fast_reg_page_list_len);
0112 }
0113
0114 props->device_cap_flags |= IB_DEVICE_PORT_ACTIVE_EVENT |
0115 IB_DEVICE_RC_RNR_NAK_GEN;
0116
0117 return 0;
0118 }
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128 int pvrdma_query_port(struct ib_device *ibdev, u32 port,
0129 struct ib_port_attr *props)
0130 {
0131 struct pvrdma_dev *dev = to_vdev(ibdev);
0132 union pvrdma_cmd_req req;
0133 union pvrdma_cmd_resp rsp;
0134 struct pvrdma_cmd_query_port *cmd = &req.query_port;
0135 struct pvrdma_cmd_query_port_resp *resp = &rsp.query_port_resp;
0136 int err;
0137
0138 memset(cmd, 0, sizeof(*cmd));
0139 cmd->hdr.cmd = PVRDMA_CMD_QUERY_PORT;
0140 cmd->port_num = port;
0141
0142 err = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_PORT_RESP);
0143 if (err < 0) {
0144 dev_warn(&dev->pdev->dev,
0145 "could not query port, error: %d\n", err);
0146 return err;
0147 }
0148
0149
0150
0151 props->state = pvrdma_port_state_to_ib(resp->attrs.state);
0152 props->max_mtu = pvrdma_mtu_to_ib(resp->attrs.max_mtu);
0153 props->active_mtu = pvrdma_mtu_to_ib(resp->attrs.active_mtu);
0154 props->gid_tbl_len = resp->attrs.gid_tbl_len;
0155 props->port_cap_flags =
0156 pvrdma_port_cap_flags_to_ib(resp->attrs.port_cap_flags);
0157 props->port_cap_flags |= IB_PORT_CM_SUP;
0158 props->ip_gids = true;
0159 props->max_msg_sz = resp->attrs.max_msg_sz;
0160 props->bad_pkey_cntr = resp->attrs.bad_pkey_cntr;
0161 props->qkey_viol_cntr = resp->attrs.qkey_viol_cntr;
0162 props->pkey_tbl_len = resp->attrs.pkey_tbl_len;
0163 props->lid = resp->attrs.lid;
0164 props->sm_lid = resp->attrs.sm_lid;
0165 props->lmc = resp->attrs.lmc;
0166 props->max_vl_num = resp->attrs.max_vl_num;
0167 props->sm_sl = resp->attrs.sm_sl;
0168 props->subnet_timeout = resp->attrs.subnet_timeout;
0169 props->init_type_reply = resp->attrs.init_type_reply;
0170 props->active_width = pvrdma_port_width_to_ib(resp->attrs.active_width);
0171 props->active_speed = pvrdma_port_speed_to_ib(resp->attrs.active_speed);
0172 props->phys_state = resp->attrs.phys_state;
0173
0174 return 0;
0175 }
0176
0177
0178
0179
0180
0181
0182
0183
0184
0185
0186 int pvrdma_query_gid(struct ib_device *ibdev, u32 port, int index,
0187 union ib_gid *gid)
0188 {
0189 struct pvrdma_dev *dev = to_vdev(ibdev);
0190
0191 if (index >= dev->dsr->caps.gid_tbl_len)
0192 return -EINVAL;
0193
0194 memcpy(gid, &dev->sgid_tbl[index], sizeof(union ib_gid));
0195
0196 return 0;
0197 }
0198
0199
0200
0201
0202
0203
0204
0205
0206
0207
0208 int pvrdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
0209 u16 *pkey)
0210 {
0211 int err = 0;
0212 union pvrdma_cmd_req req;
0213 union pvrdma_cmd_resp rsp;
0214 struct pvrdma_cmd_query_pkey *cmd = &req.query_pkey;
0215
0216 memset(cmd, 0, sizeof(*cmd));
0217 cmd->hdr.cmd = PVRDMA_CMD_QUERY_PKEY;
0218 cmd->port_num = port;
0219 cmd->index = index;
0220
0221 err = pvrdma_cmd_post(to_vdev(ibdev), &req, &rsp,
0222 PVRDMA_CMD_QUERY_PKEY_RESP);
0223 if (err < 0) {
0224 dev_warn(&to_vdev(ibdev)->pdev->dev,
0225 "could not query pkey, error: %d\n", err);
0226 return err;
0227 }
0228
0229 *pkey = rsp.query_pkey_resp.pkey;
0230
0231 return 0;
0232 }
0233
0234 enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev,
0235 u32 port)
0236 {
0237 return IB_LINK_LAYER_ETHERNET;
0238 }
0239
0240 int pvrdma_modify_device(struct ib_device *ibdev, int mask,
0241 struct ib_device_modify *props)
0242 {
0243 unsigned long flags;
0244
0245 if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
0246 IB_DEVICE_MODIFY_NODE_DESC)) {
0247 dev_warn(&to_vdev(ibdev)->pdev->dev,
0248 "unsupported device modify mask %#x\n", mask);
0249 return -EOPNOTSUPP;
0250 }
0251
0252 if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
0253 spin_lock_irqsave(&to_vdev(ibdev)->desc_lock, flags);
0254 memcpy(ibdev->node_desc, props->node_desc, 64);
0255 spin_unlock_irqrestore(&to_vdev(ibdev)->desc_lock, flags);
0256 }
0257
0258 if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) {
0259 mutex_lock(&to_vdev(ibdev)->port_mutex);
0260 to_vdev(ibdev)->sys_image_guid =
0261 cpu_to_be64(props->sys_image_guid);
0262 mutex_unlock(&to_vdev(ibdev)->port_mutex);
0263 }
0264
0265 return 0;
0266 }
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276
0277 int pvrdma_modify_port(struct ib_device *ibdev, u32 port, int mask,
0278 struct ib_port_modify *props)
0279 {
0280 struct ib_port_attr attr;
0281 struct pvrdma_dev *vdev = to_vdev(ibdev);
0282 int ret;
0283
0284 if (mask & ~IB_PORT_SHUTDOWN) {
0285 dev_warn(&vdev->pdev->dev,
0286 "unsupported port modify mask %#x\n", mask);
0287 return -EOPNOTSUPP;
0288 }
0289
0290 mutex_lock(&vdev->port_mutex);
0291 ret = ib_query_port(ibdev, port, &attr);
0292 if (ret)
0293 goto out;
0294
0295 vdev->port_cap_mask |= props->set_port_cap_mask;
0296 vdev->port_cap_mask &= ~props->clr_port_cap_mask;
0297
0298 if (mask & IB_PORT_SHUTDOWN)
0299 vdev->ib_active = false;
0300
0301 out:
0302 mutex_unlock(&vdev->port_mutex);
0303 return ret;
0304 }
0305
0306
0307
0308
0309
0310
0311
0312
0313 int pvrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
0314 {
0315 struct ib_device *ibdev = uctx->device;
0316 struct pvrdma_dev *vdev = to_vdev(ibdev);
0317 struct pvrdma_ucontext *context = to_vucontext(uctx);
0318 union pvrdma_cmd_req req = {};
0319 union pvrdma_cmd_resp rsp = {};
0320 struct pvrdma_cmd_create_uc *cmd = &req.create_uc;
0321 struct pvrdma_cmd_create_uc_resp *resp = &rsp.create_uc_resp;
0322 struct pvrdma_alloc_ucontext_resp uresp = {};
0323 int ret;
0324
0325 if (!vdev->ib_active)
0326 return -EAGAIN;
0327
0328 context->dev = vdev;
0329 ret = pvrdma_uar_alloc(vdev, &context->uar);
0330 if (ret)
0331 return -ENOMEM;
0332
0333
0334 if (vdev->dsr_version < PVRDMA_PPN64_VERSION)
0335 cmd->pfn = context->uar.pfn;
0336 else
0337 cmd->pfn64 = context->uar.pfn;
0338
0339 cmd->hdr.cmd = PVRDMA_CMD_CREATE_UC;
0340 ret = pvrdma_cmd_post(vdev, &req, &rsp, PVRDMA_CMD_CREATE_UC_RESP);
0341 if (ret < 0) {
0342 dev_warn(&vdev->pdev->dev,
0343 "could not create ucontext, error: %d\n", ret);
0344 goto err;
0345 }
0346
0347 context->ctx_handle = resp->ctx_handle;
0348
0349
0350 uresp.qp_tab_size = vdev->dsr->caps.max_qp;
0351 ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
0352 if (ret) {
0353 pvrdma_uar_free(vdev, &context->uar);
0354 pvrdma_dealloc_ucontext(&context->ibucontext);
0355 return -EFAULT;
0356 }
0357
0358 return 0;
0359
0360 err:
0361 pvrdma_uar_free(vdev, &context->uar);
0362 return ret;
0363 }
0364
0365
0366
0367
0368
0369 void pvrdma_dealloc_ucontext(struct ib_ucontext *ibcontext)
0370 {
0371 struct pvrdma_ucontext *context = to_vucontext(ibcontext);
0372 union pvrdma_cmd_req req = {};
0373 struct pvrdma_cmd_destroy_uc *cmd = &req.destroy_uc;
0374 int ret;
0375
0376 cmd->hdr.cmd = PVRDMA_CMD_DESTROY_UC;
0377 cmd->ctx_handle = context->ctx_handle;
0378
0379 ret = pvrdma_cmd_post(context->dev, &req, NULL, 0);
0380 if (ret < 0)
0381 dev_warn(&context->dev->pdev->dev,
0382 "destroy ucontext failed, error: %d\n", ret);
0383
0384
0385 pvrdma_uar_free(to_vdev(ibcontext->device), &context->uar);
0386 }
0387
0388
0389
0390
0391
0392
0393
0394
0395 int pvrdma_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
0396 {
0397 struct pvrdma_ucontext *context = to_vucontext(ibcontext);
0398 unsigned long start = vma->vm_start;
0399 unsigned long size = vma->vm_end - vma->vm_start;
0400 unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
0401
0402 dev_dbg(&context->dev->pdev->dev, "create mmap region\n");
0403
0404 if ((size != PAGE_SIZE) || (offset & ~PAGE_MASK)) {
0405 dev_warn(&context->dev->pdev->dev,
0406 "invalid params for mmap region\n");
0407 return -EINVAL;
0408 }
0409
0410
0411 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
0412 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
0413 if (io_remap_pfn_range(vma, start, context->uar.pfn, size,
0414 vma->vm_page_prot))
0415 return -EAGAIN;
0416
0417 return 0;
0418 }
0419
0420
0421
0422
0423
0424
0425
0426
0427 int pvrdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
0428 {
0429 struct ib_device *ibdev = ibpd->device;
0430 struct pvrdma_pd *pd = to_vpd(ibpd);
0431 struct pvrdma_dev *dev = to_vdev(ibdev);
0432 union pvrdma_cmd_req req = {};
0433 union pvrdma_cmd_resp rsp = {};
0434 struct pvrdma_cmd_create_pd *cmd = &req.create_pd;
0435 struct pvrdma_cmd_create_pd_resp *resp = &rsp.create_pd_resp;
0436 struct pvrdma_alloc_pd_resp pd_resp = {0};
0437 int ret;
0438 struct pvrdma_ucontext *context = rdma_udata_to_drv_context(
0439 udata, struct pvrdma_ucontext, ibucontext);
0440
0441
0442 if (!atomic_add_unless(&dev->num_pds, 1, dev->dsr->caps.max_pd))
0443 return -ENOMEM;
0444
0445 cmd->hdr.cmd = PVRDMA_CMD_CREATE_PD;
0446 cmd->ctx_handle = context ? context->ctx_handle : 0;
0447 ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_PD_RESP);
0448 if (ret < 0) {
0449 dev_warn(&dev->pdev->dev,
0450 "failed to allocate protection domain, error: %d\n",
0451 ret);
0452 goto err;
0453 }
0454
0455 pd->privileged = !udata;
0456 pd->pd_handle = resp->pd_handle;
0457 pd->pdn = resp->pd_handle;
0458 pd_resp.pdn = resp->pd_handle;
0459
0460 if (udata) {
0461 if (ib_copy_to_udata(udata, &pd_resp, sizeof(pd_resp))) {
0462 dev_warn(&dev->pdev->dev,
0463 "failed to copy back protection domain\n");
0464 pvrdma_dealloc_pd(&pd->ibpd, udata);
0465 return -EFAULT;
0466 }
0467 }
0468
0469
0470 return 0;
0471
0472 err:
0473 atomic_dec(&dev->num_pds);
0474 return ret;
0475 }
0476
0477
0478
0479
0480
0481
0482
0483
0484 int pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
0485 {
0486 struct pvrdma_dev *dev = to_vdev(pd->device);
0487 union pvrdma_cmd_req req = {};
0488 struct pvrdma_cmd_destroy_pd *cmd = &req.destroy_pd;
0489 int ret;
0490
0491 cmd->hdr.cmd = PVRDMA_CMD_DESTROY_PD;
0492 cmd->pd_handle = to_vpd(pd)->pd_handle;
0493
0494 ret = pvrdma_cmd_post(dev, &req, NULL, 0);
0495 if (ret)
0496 dev_warn(&dev->pdev->dev,
0497 "could not dealloc protection domain, error: %d\n",
0498 ret);
0499
0500 atomic_dec(&dev->num_pds);
0501 return 0;
0502 }
0503
0504
0505
0506
0507
0508
0509
0510
0511
0512 int pvrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
0513 struct ib_udata *udata)
0514 {
0515 struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
0516 struct pvrdma_dev *dev = to_vdev(ibah->device);
0517 struct pvrdma_ah *ah = to_vah(ibah);
0518 const struct ib_global_route *grh;
0519 u32 port_num = rdma_ah_get_port_num(ah_attr);
0520
0521 if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
0522 return -EINVAL;
0523
0524 grh = rdma_ah_read_grh(ah_attr);
0525 if ((ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE) ||
0526 rdma_is_multicast_addr((struct in6_addr *)grh->dgid.raw))
0527 return -EINVAL;
0528
0529 if (!atomic_add_unless(&dev->num_ahs, 1, dev->dsr->caps.max_ah))
0530 return -ENOMEM;
0531
0532 ah->av.port_pd = to_vpd(ibah->pd)->pd_handle | (port_num << 24);
0533 ah->av.src_path_bits = rdma_ah_get_path_bits(ah_attr);
0534 ah->av.src_path_bits |= 0x80;
0535 ah->av.gid_index = grh->sgid_index;
0536 ah->av.hop_limit = grh->hop_limit;
0537 ah->av.sl_tclass_flowlabel = (grh->traffic_class << 20) |
0538 grh->flow_label;
0539 memcpy(ah->av.dgid, grh->dgid.raw, 16);
0540 memcpy(ah->av.dmac, ah_attr->roce.dmac, ETH_ALEN);
0541
0542 return 0;
0543 }
0544
0545
0546
0547
0548
0549
0550
0551 int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags)
0552 {
0553 struct pvrdma_dev *dev = to_vdev(ah->device);
0554
0555 atomic_dec(&dev->num_ahs);
0556 return 0;
0557 }