0001
0002
0003
0004
0005
0006
0007
0008
0009 #undef pr_fmt
0010 #define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt
0011
0012 #include <linux/module.h>
0013 #include <linux/inet.h>
0014
0015 #include "rtrs-pri.h"
0016 #include "rtrs-log.h"
0017
0018 MODULE_DESCRIPTION("RDMA Transport Core");
0019 MODULE_LICENSE("GPL");
0020
0021 struct rtrs_iu *rtrs_iu_alloc(u32 iu_num, size_t size, gfp_t gfp_mask,
0022 struct ib_device *dma_dev,
0023 enum dma_data_direction dir,
0024 void (*done)(struct ib_cq *cq, struct ib_wc *wc))
0025 {
0026 struct rtrs_iu *ius, *iu;
0027 int i;
0028
0029 ius = kcalloc(iu_num, sizeof(*ius), gfp_mask);
0030 if (!ius)
0031 return NULL;
0032 for (i = 0; i < iu_num; i++) {
0033 iu = &ius[i];
0034 iu->direction = dir;
0035 iu->buf = kzalloc(size, gfp_mask);
0036 if (!iu->buf)
0037 goto err;
0038
0039 iu->dma_addr = ib_dma_map_single(dma_dev, iu->buf, size, dir);
0040 if (ib_dma_mapping_error(dma_dev, iu->dma_addr))
0041 goto err;
0042
0043 iu->cqe.done = done;
0044 iu->size = size;
0045 }
0046 return ius;
0047 err:
0048 rtrs_iu_free(ius, dma_dev, i);
0049 return NULL;
0050 }
0051 EXPORT_SYMBOL_GPL(rtrs_iu_alloc);
0052
0053 void rtrs_iu_free(struct rtrs_iu *ius, struct ib_device *ibdev, u32 queue_num)
0054 {
0055 struct rtrs_iu *iu;
0056 int i;
0057
0058 if (!ius)
0059 return;
0060
0061 for (i = 0; i < queue_num; i++) {
0062 iu = &ius[i];
0063 ib_dma_unmap_single(ibdev, iu->dma_addr, iu->size, iu->direction);
0064 kfree(iu->buf);
0065 }
0066 kfree(ius);
0067 }
0068 EXPORT_SYMBOL_GPL(rtrs_iu_free);
0069
0070 int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu)
0071 {
0072 struct rtrs_path *path = con->path;
0073 struct ib_recv_wr wr;
0074 struct ib_sge list;
0075
0076 list.addr = iu->dma_addr;
0077 list.length = iu->size;
0078 list.lkey = path->dev->ib_pd->local_dma_lkey;
0079
0080 if (list.length == 0) {
0081 rtrs_wrn(con->path,
0082 "Posting receive work request failed, sg list is empty\n");
0083 return -EINVAL;
0084 }
0085 wr = (struct ib_recv_wr) {
0086 .wr_cqe = &iu->cqe,
0087 .sg_list = &list,
0088 .num_sge = 1,
0089 };
0090
0091 return ib_post_recv(con->qp, &wr, NULL);
0092 }
0093 EXPORT_SYMBOL_GPL(rtrs_iu_post_recv);
0094
0095 int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe)
0096 {
0097 struct ib_recv_wr wr;
0098
0099 wr = (struct ib_recv_wr) {
0100 .wr_cqe = cqe,
0101 };
0102
0103 return ib_post_recv(con->qp, &wr, NULL);
0104 }
0105 EXPORT_SYMBOL_GPL(rtrs_post_recv_empty);
0106
0107 static int rtrs_post_send(struct ib_qp *qp, struct ib_send_wr *head,
0108 struct ib_send_wr *wr, struct ib_send_wr *tail)
0109 {
0110 if (head) {
0111 struct ib_send_wr *next = head;
0112
0113 while (next->next)
0114 next = next->next;
0115 next->next = wr;
0116 } else {
0117 head = wr;
0118 }
0119
0120 if (tail)
0121 wr->next = tail;
0122
0123 return ib_post_send(qp, head, NULL);
0124 }
0125
0126 int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size,
0127 struct ib_send_wr *head)
0128 {
0129 struct rtrs_path *path = con->path;
0130 struct ib_send_wr wr;
0131 struct ib_sge list;
0132
0133 if (WARN_ON(size == 0))
0134 return -EINVAL;
0135
0136 list.addr = iu->dma_addr;
0137 list.length = size;
0138 list.lkey = path->dev->ib_pd->local_dma_lkey;
0139
0140 wr = (struct ib_send_wr) {
0141 .wr_cqe = &iu->cqe,
0142 .sg_list = &list,
0143 .num_sge = 1,
0144 .opcode = IB_WR_SEND,
0145 .send_flags = IB_SEND_SIGNALED,
0146 };
0147
0148 return rtrs_post_send(con->qp, head, &wr, NULL);
0149 }
0150 EXPORT_SYMBOL_GPL(rtrs_iu_post_send);
0151
0152 int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu,
0153 struct ib_sge *sge, unsigned int num_sge,
0154 u32 rkey, u64 rdma_addr, u32 imm_data,
0155 enum ib_send_flags flags,
0156 struct ib_send_wr *head,
0157 struct ib_send_wr *tail)
0158 {
0159 struct ib_rdma_wr wr;
0160 int i;
0161
0162 wr = (struct ib_rdma_wr) {
0163 .wr.wr_cqe = &iu->cqe,
0164 .wr.sg_list = sge,
0165 .wr.num_sge = num_sge,
0166 .rkey = rkey,
0167 .remote_addr = rdma_addr,
0168 .wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM,
0169 .wr.ex.imm_data = cpu_to_be32(imm_data),
0170 .wr.send_flags = flags,
0171 };
0172
0173
0174
0175
0176
0177 for (i = 0; i < num_sge; i++)
0178 if (WARN_ON(sge[i].length == 0))
0179 return -EINVAL;
0180
0181 return rtrs_post_send(con->qp, head, &wr.wr, tail);
0182 }
0183 EXPORT_SYMBOL_GPL(rtrs_iu_post_rdma_write_imm);
0184
0185 static int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con,
0186 struct ib_cqe *cqe,
0187 u32 imm_data,
0188 struct ib_send_wr *head)
0189 {
0190 struct ib_rdma_wr wr;
0191 struct rtrs_path *path = con->path;
0192 enum ib_send_flags sflags;
0193
0194 atomic_dec_if_positive(&con->sq_wr_avail);
0195 sflags = (atomic_inc_return(&con->wr_cnt) % path->signal_interval) ?
0196 0 : IB_SEND_SIGNALED;
0197
0198 wr = (struct ib_rdma_wr) {
0199 .wr.wr_cqe = cqe,
0200 .wr.send_flags = sflags,
0201 .wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM,
0202 .wr.ex.imm_data = cpu_to_be32(imm_data),
0203 };
0204
0205 return rtrs_post_send(con->qp, head, &wr.wr, NULL);
0206 }
0207
0208 static void qp_event_handler(struct ib_event *ev, void *ctx)
0209 {
0210 struct rtrs_con *con = ctx;
0211
0212 switch (ev->event) {
0213 case IB_EVENT_COMM_EST:
0214 rtrs_info(con->path, "QP event %s (%d) received\n",
0215 ib_event_msg(ev->event), ev->event);
0216 rdma_notify(con->cm_id, IB_EVENT_COMM_EST);
0217 break;
0218 default:
0219 rtrs_info(con->path, "Unhandled QP event %s (%d) received\n",
0220 ib_event_msg(ev->event), ev->event);
0221 break;
0222 }
0223 }
0224
0225 static bool is_pollqueue(struct rtrs_con *con)
0226 {
0227 return con->cid >= con->path->irq_con_num;
0228 }
0229
0230 static int create_cq(struct rtrs_con *con, int cq_vector, int nr_cqe,
0231 enum ib_poll_context poll_ctx)
0232 {
0233 struct rdma_cm_id *cm_id = con->cm_id;
0234 struct ib_cq *cq;
0235
0236 if (is_pollqueue(con))
0237 cq = ib_alloc_cq(cm_id->device, con, nr_cqe, cq_vector,
0238 poll_ctx);
0239 else
0240 cq = ib_cq_pool_get(cm_id->device, nr_cqe, cq_vector, poll_ctx);
0241
0242 if (IS_ERR(cq)) {
0243 rtrs_err(con->path, "Creating completion queue failed, errno: %ld\n",
0244 PTR_ERR(cq));
0245 return PTR_ERR(cq);
0246 }
0247 con->cq = cq;
0248 con->nr_cqe = nr_cqe;
0249
0250 return 0;
0251 }
0252
0253 static int create_qp(struct rtrs_con *con, struct ib_pd *pd,
0254 u32 max_send_wr, u32 max_recv_wr, u32 max_sge)
0255 {
0256 struct ib_qp_init_attr init_attr = {NULL};
0257 struct rdma_cm_id *cm_id = con->cm_id;
0258 int ret;
0259
0260 init_attr.cap.max_send_wr = max_send_wr;
0261 init_attr.cap.max_recv_wr = max_recv_wr;
0262 init_attr.cap.max_recv_sge = 1;
0263 init_attr.event_handler = qp_event_handler;
0264 init_attr.qp_context = con;
0265 init_attr.cap.max_send_sge = max_sge;
0266
0267 init_attr.qp_type = IB_QPT_RC;
0268 init_attr.send_cq = con->cq;
0269 init_attr.recv_cq = con->cq;
0270 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
0271
0272 ret = rdma_create_qp(cm_id, pd, &init_attr);
0273 if (ret) {
0274 rtrs_err(con->path, "Creating QP failed, err: %d\n", ret);
0275 return ret;
0276 }
0277 con->qp = cm_id->qp;
0278
0279 return ret;
0280 }
0281
0282 static void destroy_cq(struct rtrs_con *con)
0283 {
0284 if (con->cq) {
0285 if (is_pollqueue(con))
0286 ib_free_cq(con->cq);
0287 else
0288 ib_cq_pool_put(con->cq, con->nr_cqe);
0289 }
0290 con->cq = NULL;
0291 }
0292
0293 int rtrs_cq_qp_create(struct rtrs_path *path, struct rtrs_con *con,
0294 u32 max_send_sge, int cq_vector, int nr_cqe,
0295 u32 max_send_wr, u32 max_recv_wr,
0296 enum ib_poll_context poll_ctx)
0297 {
0298 int err;
0299
0300 err = create_cq(con, cq_vector, nr_cqe, poll_ctx);
0301 if (err)
0302 return err;
0303
0304 err = create_qp(con, path->dev->ib_pd, max_send_wr, max_recv_wr,
0305 max_send_sge);
0306 if (err) {
0307 destroy_cq(con);
0308 return err;
0309 }
0310 con->path = path;
0311
0312 return 0;
0313 }
0314 EXPORT_SYMBOL_GPL(rtrs_cq_qp_create);
0315
0316 void rtrs_cq_qp_destroy(struct rtrs_con *con)
0317 {
0318 if (con->qp) {
0319 rdma_destroy_qp(con->cm_id);
0320 con->qp = NULL;
0321 }
0322 destroy_cq(con);
0323 }
0324 EXPORT_SYMBOL_GPL(rtrs_cq_qp_destroy);
0325
0326 static void schedule_hb(struct rtrs_path *path)
0327 {
0328 queue_delayed_work(path->hb_wq, &path->hb_dwork,
0329 msecs_to_jiffies(path->hb_interval_ms));
0330 }
0331
0332 void rtrs_send_hb_ack(struct rtrs_path *path)
0333 {
0334 struct rtrs_con *usr_con = path->con[0];
0335 u32 imm;
0336 int err;
0337
0338 imm = rtrs_to_imm(RTRS_HB_ACK_IMM, 0);
0339 err = rtrs_post_rdma_write_imm_empty(usr_con, path->hb_cqe, imm,
0340 NULL);
0341 if (err) {
0342 rtrs_err(path, "send HB ACK failed, errno: %d\n", err);
0343 path->hb_err_handler(usr_con);
0344 return;
0345 }
0346 }
0347 EXPORT_SYMBOL_GPL(rtrs_send_hb_ack);
0348
0349 static void hb_work(struct work_struct *work)
0350 {
0351 struct rtrs_con *usr_con;
0352 struct rtrs_path *path;
0353 u32 imm;
0354 int err;
0355
0356 path = container_of(to_delayed_work(work), typeof(*path), hb_dwork);
0357 usr_con = path->con[0];
0358
0359 if (path->hb_missed_cnt > path->hb_missed_max) {
0360 rtrs_err(path, "HB missed max reached.\n");
0361 path->hb_err_handler(usr_con);
0362 return;
0363 }
0364 if (path->hb_missed_cnt++) {
0365
0366 schedule_hb(path);
0367 return;
0368 }
0369
0370 path->hb_last_sent = ktime_get();
0371
0372 imm = rtrs_to_imm(RTRS_HB_MSG_IMM, 0);
0373 err = rtrs_post_rdma_write_imm_empty(usr_con, path->hb_cqe, imm,
0374 NULL);
0375 if (err) {
0376 rtrs_err(path, "HB send failed, errno: %d\n", err);
0377 path->hb_err_handler(usr_con);
0378 return;
0379 }
0380
0381 schedule_hb(path);
0382 }
0383
0384 void rtrs_init_hb(struct rtrs_path *path, struct ib_cqe *cqe,
0385 unsigned int interval_ms, unsigned int missed_max,
0386 void (*err_handler)(struct rtrs_con *con),
0387 struct workqueue_struct *wq)
0388 {
0389 path->hb_cqe = cqe;
0390 path->hb_interval_ms = interval_ms;
0391 path->hb_err_handler = err_handler;
0392 path->hb_wq = wq;
0393 path->hb_missed_max = missed_max;
0394 path->hb_missed_cnt = 0;
0395 INIT_DELAYED_WORK(&path->hb_dwork, hb_work);
0396 }
0397 EXPORT_SYMBOL_GPL(rtrs_init_hb);
0398
0399 void rtrs_start_hb(struct rtrs_path *path)
0400 {
0401 schedule_hb(path);
0402 }
0403 EXPORT_SYMBOL_GPL(rtrs_start_hb);
0404
0405 void rtrs_stop_hb(struct rtrs_path *path)
0406 {
0407 cancel_delayed_work_sync(&path->hb_dwork);
0408 path->hb_missed_cnt = 0;
0409 }
0410 EXPORT_SYMBOL_GPL(rtrs_stop_hb);
0411
0412 static int rtrs_str_gid_to_sockaddr(const char *addr, size_t len,
0413 short port, struct sockaddr_storage *dst)
0414 {
0415 struct sockaddr_ib *dst_ib = (struct sockaddr_ib *)dst;
0416 int ret;
0417
0418
0419
0420
0421
0422 ret = in6_pton(addr, len, dst_ib->sib_addr.sib_raw, '\0', NULL);
0423 if (ret == 0)
0424 return -EINVAL;
0425
0426 dst_ib->sib_family = AF_IB;
0427
0428
0429
0430
0431 dst_ib->sib_sid = cpu_to_be64(RDMA_IB_IP_PS_IB | port);
0432 dst_ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL);
0433 dst_ib->sib_pkey = cpu_to_be16(0xffff);
0434
0435 return 0;
0436 }
0437
0438
0439
0440
0441
0442
0443
0444
0445
0446
0447
0448
0449
0450 static int rtrs_str_to_sockaddr(const char *addr, size_t len,
0451 u16 port, struct sockaddr_storage *dst)
0452 {
0453 if (strncmp(addr, "gid:", 4) == 0) {
0454 return rtrs_str_gid_to_sockaddr(addr + 4, len - 4, port, dst);
0455 } else if (strncmp(addr, "ip:", 3) == 0) {
0456 char port_str[8];
0457 char *cpy;
0458 int err;
0459
0460 snprintf(port_str, sizeof(port_str), "%u", port);
0461 cpy = kstrndup(addr + 3, len - 3, GFP_KERNEL);
0462 err = cpy ? inet_pton_with_scope(&init_net, AF_UNSPEC,
0463 cpy, port_str, dst) : -ENOMEM;
0464 kfree(cpy);
0465
0466 return err;
0467 }
0468 return -EPROTONOSUPPORT;
0469 }
0470
0471
0472
0473
0474
0475
0476
0477
0478
0479
0480 int sockaddr_to_str(const struct sockaddr *addr, char *buf, size_t len)
0481 {
0482 switch (addr->sa_family) {
0483 case AF_IB:
0484 return scnprintf(buf, len, "gid:%pI6",
0485 &((struct sockaddr_ib *)addr)->sib_addr.sib_raw);
0486 case AF_INET:
0487 return scnprintf(buf, len, "ip:%pI4",
0488 &((struct sockaddr_in *)addr)->sin_addr);
0489 case AF_INET6:
0490 return scnprintf(buf, len, "ip:%pI6c",
0491 &((struct sockaddr_in6 *)addr)->sin6_addr);
0492 }
0493 return scnprintf(buf, len, "<invalid address family>");
0494 }
0495 EXPORT_SYMBOL(sockaddr_to_str);
0496
0497
0498
0499
0500
0501
0502
0503
0504
0505
0506
0507
0508 int rtrs_addr_to_str(const struct rtrs_addr *addr, char *buf, size_t len)
0509 {
0510 int cnt;
0511
0512 cnt = sockaddr_to_str((struct sockaddr *)addr->src,
0513 buf, len);
0514 cnt += scnprintf(buf + cnt, len - cnt, "@");
0515 sockaddr_to_str((struct sockaddr *)addr->dst,
0516 buf + cnt, len - cnt);
0517 return cnt;
0518 }
0519 EXPORT_SYMBOL(rtrs_addr_to_str);
0520
0521
0522
0523
0524
0525
0526
0527
0528
0529
0530
0531
0532
0533
0534
0535 int rtrs_addr_to_sockaddr(const char *str, size_t len, u16 port,
0536 struct rtrs_addr *addr)
0537 {
0538 const char *d;
0539
0540 d = strchr(str, ',');
0541 if (!d)
0542 d = strchr(str, '@');
0543 if (d) {
0544 if (rtrs_str_to_sockaddr(str, d - str, 0, addr->src))
0545 return -EINVAL;
0546 d += 1;
0547 len -= d - str;
0548 str = d;
0549
0550 } else {
0551 addr->src = NULL;
0552 }
0553 return rtrs_str_to_sockaddr(str, len, port, addr->dst);
0554 }
0555 EXPORT_SYMBOL(rtrs_addr_to_sockaddr);
0556
0557 void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags,
0558 struct rtrs_rdma_dev_pd *pool)
0559 {
0560 WARN_ON(pool->ops && (!pool->ops->alloc ^ !pool->ops->free));
0561 INIT_LIST_HEAD(&pool->list);
0562 mutex_init(&pool->mutex);
0563 pool->pd_flags = pd_flags;
0564 }
0565 EXPORT_SYMBOL(rtrs_rdma_dev_pd_init);
0566
0567 void rtrs_rdma_dev_pd_deinit(struct rtrs_rdma_dev_pd *pool)
0568 {
0569 mutex_destroy(&pool->mutex);
0570 WARN_ON(!list_empty(&pool->list));
0571 }
0572 EXPORT_SYMBOL(rtrs_rdma_dev_pd_deinit);
0573
0574 static void dev_free(struct kref *ref)
0575 {
0576 struct rtrs_rdma_dev_pd *pool;
0577 struct rtrs_ib_dev *dev;
0578
0579 dev = container_of(ref, typeof(*dev), ref);
0580 pool = dev->pool;
0581
0582 mutex_lock(&pool->mutex);
0583 list_del(&dev->entry);
0584 mutex_unlock(&pool->mutex);
0585
0586 if (pool->ops && pool->ops->deinit)
0587 pool->ops->deinit(dev);
0588
0589 ib_dealloc_pd(dev->ib_pd);
0590
0591 if (pool->ops && pool->ops->free)
0592 pool->ops->free(dev);
0593 else
0594 kfree(dev);
0595 }
0596
0597 int rtrs_ib_dev_put(struct rtrs_ib_dev *dev)
0598 {
0599 return kref_put(&dev->ref, dev_free);
0600 }
0601 EXPORT_SYMBOL(rtrs_ib_dev_put);
0602
0603 static int rtrs_ib_dev_get(struct rtrs_ib_dev *dev)
0604 {
0605 return kref_get_unless_zero(&dev->ref);
0606 }
0607
0608 struct rtrs_ib_dev *
0609 rtrs_ib_dev_find_or_add(struct ib_device *ib_dev,
0610 struct rtrs_rdma_dev_pd *pool)
0611 {
0612 struct rtrs_ib_dev *dev;
0613
0614 mutex_lock(&pool->mutex);
0615 list_for_each_entry(dev, &pool->list, entry) {
0616 if (dev->ib_dev->node_guid == ib_dev->node_guid &&
0617 rtrs_ib_dev_get(dev))
0618 goto out_unlock;
0619 }
0620 mutex_unlock(&pool->mutex);
0621 if (pool->ops && pool->ops->alloc)
0622 dev = pool->ops->alloc();
0623 else
0624 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
0625 if (IS_ERR_OR_NULL(dev))
0626 goto out_err;
0627
0628 kref_init(&dev->ref);
0629 dev->pool = pool;
0630 dev->ib_dev = ib_dev;
0631 dev->ib_pd = ib_alloc_pd(ib_dev, pool->pd_flags);
0632 if (IS_ERR(dev->ib_pd))
0633 goto out_free_dev;
0634
0635 if (pool->ops && pool->ops->init && pool->ops->init(dev))
0636 goto out_free_pd;
0637
0638 mutex_lock(&pool->mutex);
0639 list_add(&dev->entry, &pool->list);
0640 out_unlock:
0641 mutex_unlock(&pool->mutex);
0642 return dev;
0643
0644 out_free_pd:
0645 ib_dealloc_pd(dev->ib_pd);
0646 out_free_dev:
0647 if (pool->ops && pool->ops->free)
0648 pool->ops->free(dev);
0649 else
0650 kfree(dev);
0651 out_err:
0652 return NULL;
0653 }
0654 EXPORT_SYMBOL(rtrs_ib_dev_find_or_add);