Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * RDMA transport layer based on the trans_fd.c implementation.
0004  *
0005  *  Copyright (C) 2008 by Tom Tucker <tom@opengridcomputing.com>
0006  *  Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
0007  *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
0008  *  Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com>
0009  *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
0010  */
0011 
0012 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0013 
0014 #include <linux/in.h>
0015 #include <linux/module.h>
0016 #include <linux/net.h>
0017 #include <linux/ipv6.h>
0018 #include <linux/kthread.h>
0019 #include <linux/errno.h>
0020 #include <linux/kernel.h>
0021 #include <linux/un.h>
0022 #include <linux/uaccess.h>
0023 #include <linux/inet.h>
0024 #include <linux/idr.h>
0025 #include <linux/file.h>
0026 #include <linux/parser.h>
0027 #include <linux/semaphore.h>
0028 #include <linux/slab.h>
0029 #include <linux/seq_file.h>
0030 #include <net/9p/9p.h>
0031 #include <net/9p/client.h>
0032 #include <net/9p/transport.h>
0033 #include <rdma/ib_verbs.h>
0034 #include <rdma/rdma_cm.h>
0035 
0036 #define P9_PORT         5640
0037 #define P9_RDMA_SQ_DEPTH    32
0038 #define P9_RDMA_RQ_DEPTH    32
0039 #define P9_RDMA_SEND_SGE    4
0040 #define P9_RDMA_RECV_SGE    4
0041 #define P9_RDMA_IRD     0
0042 #define P9_RDMA_ORD     0
0043 #define P9_RDMA_TIMEOUT     30000       /* 30 seconds */
0044 #define P9_RDMA_MAXSIZE     (1024*1024) /* 1MB */
0045 
0046 /**
0047  * struct p9_trans_rdma - RDMA transport instance
0048  *
0049  * @state: tracks the transport state machine for connection setup and tear down
0050  * @cm_id: The RDMA CM ID
0051  * @pd: Protection Domain pointer
0052  * @qp: Queue Pair pointer
0053  * @cq: Completion Queue pointer
0054  * @timeout: Number of uSecs to wait for connection management events
0055  * @privport: Whether a privileged port may be used
0056  * @port: The port to use
0057  * @sq_depth: The depth of the Send Queue
0058  * @sq_sem: Semaphore for the SQ
0059  * @rq_depth: The depth of the Receive Queue.
0060  * @rq_sem: Semaphore for the RQ
0061  * @excess_rc : Amount of posted Receive Contexts without a pending request.
0062  *      See rdma_request()
0063  * @addr: The remote peer's address
0064  * @req_lock: Protects the active request list
0065  * @cm_done: Completion event for connection management tracking
0066  */
0067 struct p9_trans_rdma {
0068     enum {
0069         P9_RDMA_INIT,
0070         P9_RDMA_ADDR_RESOLVED,
0071         P9_RDMA_ROUTE_RESOLVED,
0072         P9_RDMA_CONNECTED,
0073         P9_RDMA_FLUSHING,
0074         P9_RDMA_CLOSING,
0075         P9_RDMA_CLOSED,
0076     } state;
0077     struct rdma_cm_id *cm_id;
0078     struct ib_pd *pd;
0079     struct ib_qp *qp;
0080     struct ib_cq *cq;
0081     long timeout;
0082     bool privport;
0083     u16 port;
0084     int sq_depth;
0085     struct semaphore sq_sem;
0086     int rq_depth;
0087     struct semaphore rq_sem;
0088     atomic_t excess_rc;
0089     struct sockaddr_in addr;
0090     spinlock_t req_lock;
0091 
0092     struct completion cm_done;
0093 };
0094 
0095 struct p9_rdma_req;
0096 
0097 /**
0098  * struct p9_rdma_context - Keeps track of in-process WR
0099  *
0100  * @cqe: completion queue entry
0101  * @busa: Bus address to unmap when the WR completes
0102  * @req: Keeps track of requests (send)
0103  * @rc: Keepts track of replies (receive)
0104  */
0105 struct p9_rdma_context {
0106     struct ib_cqe cqe;
0107     dma_addr_t busa;
0108     union {
0109         struct p9_req_t *req;
0110         struct p9_fcall rc;
0111     };
0112 };
0113 
0114 /**
0115  * struct p9_rdma_opts - Collection of mount options
0116  * @port: port of connection
0117  * @privport: Whether a privileged port may be used
0118  * @sq_depth: The requested depth of the SQ. This really doesn't need
0119  * to be any deeper than the number of threads used in the client
0120  * @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth
0121  * @timeout: Time to wait in msecs for CM events
0122  */
0123 struct p9_rdma_opts {
0124     short port;
0125     bool privport;
0126     int sq_depth;
0127     int rq_depth;
0128     long timeout;
0129 };
0130 
0131 /*
0132  * Option Parsing (code inspired by NFS code)
0133  */
0134 enum {
0135     /* Options that take integer arguments */
0136     Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout,
0137     /* Options that take no argument */
0138     Opt_privport,
0139     Opt_err,
0140 };
0141 
0142 static match_table_t tokens = {
0143     {Opt_port, "port=%u"},
0144     {Opt_sq_depth, "sq=%u"},
0145     {Opt_rq_depth, "rq=%u"},
0146     {Opt_timeout, "timeout=%u"},
0147     {Opt_privport, "privport"},
0148     {Opt_err, NULL},
0149 };
0150 
0151 static int p9_rdma_show_options(struct seq_file *m, struct p9_client *clnt)
0152 {
0153     struct p9_trans_rdma *rdma = clnt->trans;
0154 
0155     if (rdma->port != P9_PORT)
0156         seq_printf(m, ",port=%u", rdma->port);
0157     if (rdma->sq_depth != P9_RDMA_SQ_DEPTH)
0158         seq_printf(m, ",sq=%u", rdma->sq_depth);
0159     if (rdma->rq_depth != P9_RDMA_RQ_DEPTH)
0160         seq_printf(m, ",rq=%u", rdma->rq_depth);
0161     if (rdma->timeout != P9_RDMA_TIMEOUT)
0162         seq_printf(m, ",timeout=%lu", rdma->timeout);
0163     if (rdma->privport)
0164         seq_puts(m, ",privport");
0165     return 0;
0166 }
0167 
0168 /**
0169  * parse_opts - parse mount options into rdma options structure
0170  * @params: options string passed from mount
0171  * @opts: rdma transport-specific structure to parse options into
0172  *
0173  * Returns 0 upon success, -ERRNO upon failure
0174  */
0175 static int parse_opts(char *params, struct p9_rdma_opts *opts)
0176 {
0177     char *p;
0178     substring_t args[MAX_OPT_ARGS];
0179     int option;
0180     char *options, *tmp_options;
0181 
0182     opts->port = P9_PORT;
0183     opts->sq_depth = P9_RDMA_SQ_DEPTH;
0184     opts->rq_depth = P9_RDMA_RQ_DEPTH;
0185     opts->timeout = P9_RDMA_TIMEOUT;
0186     opts->privport = false;
0187 
0188     if (!params)
0189         return 0;
0190 
0191     tmp_options = kstrdup(params, GFP_KERNEL);
0192     if (!tmp_options) {
0193         p9_debug(P9_DEBUG_ERROR,
0194              "failed to allocate copy of option string\n");
0195         return -ENOMEM;
0196     }
0197     options = tmp_options;
0198 
0199     while ((p = strsep(&options, ",")) != NULL) {
0200         int token;
0201         int r;
0202         if (!*p)
0203             continue;
0204         token = match_token(p, tokens, args);
0205         if ((token != Opt_err) && (token != Opt_privport)) {
0206             r = match_int(&args[0], &option);
0207             if (r < 0) {
0208                 p9_debug(P9_DEBUG_ERROR,
0209                      "integer field, but no integer?\n");
0210                 continue;
0211             }
0212         }
0213         switch (token) {
0214         case Opt_port:
0215             opts->port = option;
0216             break;
0217         case Opt_sq_depth:
0218             opts->sq_depth = option;
0219             break;
0220         case Opt_rq_depth:
0221             opts->rq_depth = option;
0222             break;
0223         case Opt_timeout:
0224             opts->timeout = option;
0225             break;
0226         case Opt_privport:
0227             opts->privport = true;
0228             break;
0229         default:
0230             continue;
0231         }
0232     }
0233     /* RQ must be at least as large as the SQ */
0234     opts->rq_depth = max(opts->rq_depth, opts->sq_depth);
0235     kfree(tmp_options);
0236     return 0;
0237 }
0238 
0239 static int
0240 p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
0241 {
0242     struct p9_client *c = id->context;
0243     struct p9_trans_rdma *rdma = c->trans;
0244     switch (event->event) {
0245     case RDMA_CM_EVENT_ADDR_RESOLVED:
0246         BUG_ON(rdma->state != P9_RDMA_INIT);
0247         rdma->state = P9_RDMA_ADDR_RESOLVED;
0248         break;
0249 
0250     case RDMA_CM_EVENT_ROUTE_RESOLVED:
0251         BUG_ON(rdma->state != P9_RDMA_ADDR_RESOLVED);
0252         rdma->state = P9_RDMA_ROUTE_RESOLVED;
0253         break;
0254 
0255     case RDMA_CM_EVENT_ESTABLISHED:
0256         BUG_ON(rdma->state != P9_RDMA_ROUTE_RESOLVED);
0257         rdma->state = P9_RDMA_CONNECTED;
0258         break;
0259 
0260     case RDMA_CM_EVENT_DISCONNECTED:
0261         if (rdma)
0262             rdma->state = P9_RDMA_CLOSED;
0263         c->status = Disconnected;
0264         break;
0265 
0266     case RDMA_CM_EVENT_TIMEWAIT_EXIT:
0267         break;
0268 
0269     case RDMA_CM_EVENT_ADDR_CHANGE:
0270     case RDMA_CM_EVENT_ROUTE_ERROR:
0271     case RDMA_CM_EVENT_DEVICE_REMOVAL:
0272     case RDMA_CM_EVENT_MULTICAST_JOIN:
0273     case RDMA_CM_EVENT_MULTICAST_ERROR:
0274     case RDMA_CM_EVENT_REJECTED:
0275     case RDMA_CM_EVENT_CONNECT_REQUEST:
0276     case RDMA_CM_EVENT_CONNECT_RESPONSE:
0277     case RDMA_CM_EVENT_CONNECT_ERROR:
0278     case RDMA_CM_EVENT_ADDR_ERROR:
0279     case RDMA_CM_EVENT_UNREACHABLE:
0280         c->status = Disconnected;
0281         rdma_disconnect(rdma->cm_id);
0282         break;
0283     default:
0284         BUG();
0285     }
0286     complete(&rdma->cm_done);
0287     return 0;
0288 }
0289 
0290 static void
0291 recv_done(struct ib_cq *cq, struct ib_wc *wc)
0292 {
0293     struct p9_client *client = cq->cq_context;
0294     struct p9_trans_rdma *rdma = client->trans;
0295     struct p9_rdma_context *c =
0296         container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
0297     struct p9_req_t *req;
0298     int err = 0;
0299     int16_t tag;
0300 
0301     req = NULL;
0302     ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize,
0303                              DMA_FROM_DEVICE);
0304 
0305     if (wc->status != IB_WC_SUCCESS)
0306         goto err_out;
0307 
0308     c->rc.size = wc->byte_len;
0309     err = p9_parse_header(&c->rc, NULL, NULL, &tag, 1);
0310     if (err)
0311         goto err_out;
0312 
0313     req = p9_tag_lookup(client, tag);
0314     if (!req)
0315         goto err_out;
0316 
0317     /* Check that we have not yet received a reply for this request.
0318      */
0319     if (unlikely(req->rc.sdata)) {
0320         pr_err("Duplicate reply for request %d", tag);
0321         goto err_out;
0322     }
0323 
0324     req->rc.size = c->rc.size;
0325     req->rc.sdata = c->rc.sdata;
0326     p9_client_cb(client, req, REQ_STATUS_RCVD);
0327 
0328  out:
0329     up(&rdma->rq_sem);
0330     kfree(c);
0331     return;
0332 
0333  err_out:
0334     p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n",
0335             req, err, wc->status);
0336     rdma->state = P9_RDMA_FLUSHING;
0337     client->status = Disconnected;
0338     goto out;
0339 }
0340 
0341 static void
0342 send_done(struct ib_cq *cq, struct ib_wc *wc)
0343 {
0344     struct p9_client *client = cq->cq_context;
0345     struct p9_trans_rdma *rdma = client->trans;
0346     struct p9_rdma_context *c =
0347         container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
0348 
0349     ib_dma_unmap_single(rdma->cm_id->device,
0350                 c->busa, c->req->tc.size,
0351                 DMA_TO_DEVICE);
0352     up(&rdma->sq_sem);
0353     p9_req_put(client, c->req);
0354     kfree(c);
0355 }
0356 
0357 static void qp_event_handler(struct ib_event *event, void *context)
0358 {
0359     p9_debug(P9_DEBUG_ERROR, "QP event %d context %p\n",
0360          event->event, context);
0361 }
0362 
0363 static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
0364 {
0365     if (!rdma)
0366         return;
0367 
0368     if (rdma->qp && !IS_ERR(rdma->qp))
0369         ib_destroy_qp(rdma->qp);
0370 
0371     if (rdma->pd && !IS_ERR(rdma->pd))
0372         ib_dealloc_pd(rdma->pd);
0373 
0374     if (rdma->cq && !IS_ERR(rdma->cq))
0375         ib_free_cq(rdma->cq);
0376 
0377     if (rdma->cm_id && !IS_ERR(rdma->cm_id))
0378         rdma_destroy_id(rdma->cm_id);
0379 
0380     kfree(rdma);
0381 }
0382 
0383 static int
0384 post_recv(struct p9_client *client, struct p9_rdma_context *c)
0385 {
0386     struct p9_trans_rdma *rdma = client->trans;
0387     struct ib_recv_wr wr;
0388     struct ib_sge sge;
0389 
0390     c->busa = ib_dma_map_single(rdma->cm_id->device,
0391                     c->rc.sdata, client->msize,
0392                     DMA_FROM_DEVICE);
0393     if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
0394         goto error;
0395 
0396     c->cqe.done = recv_done;
0397 
0398     sge.addr = c->busa;
0399     sge.length = client->msize;
0400     sge.lkey = rdma->pd->local_dma_lkey;
0401 
0402     wr.next = NULL;
0403     wr.wr_cqe = &c->cqe;
0404     wr.sg_list = &sge;
0405     wr.num_sge = 1;
0406     return ib_post_recv(rdma->qp, &wr, NULL);
0407 
0408  error:
0409     p9_debug(P9_DEBUG_ERROR, "EIO\n");
0410     return -EIO;
0411 }
0412 
0413 static int rdma_request(struct p9_client *client, struct p9_req_t *req)
0414 {
0415     struct p9_trans_rdma *rdma = client->trans;
0416     struct ib_send_wr wr;
0417     struct ib_sge sge;
0418     int err = 0;
0419     unsigned long flags;
0420     struct p9_rdma_context *c = NULL;
0421     struct p9_rdma_context *rpl_context = NULL;
0422 
0423     /* When an error occurs between posting the recv and the send,
0424      * there will be a receive context posted without a pending request.
0425      * Since there is no way to "un-post" it, we remember it and skip
0426      * post_recv() for the next request.
0427      * So here,
0428      * see if we are this `next request' and need to absorb an excess rc.
0429      * If yes, then drop and free our own, and do not recv_post().
0430      **/
0431     if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
0432         if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
0433             /* Got one! */
0434             p9_fcall_fini(&req->rc);
0435             req->rc.sdata = NULL;
0436             goto dont_need_post_recv;
0437         } else {
0438             /* We raced and lost. */
0439             atomic_inc(&rdma->excess_rc);
0440         }
0441     }
0442 
0443     /* Allocate an fcall for the reply */
0444     rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
0445     if (!rpl_context) {
0446         err = -ENOMEM;
0447         goto recv_error;
0448     }
0449     rpl_context->rc.sdata = req->rc.sdata;
0450 
0451     /*
0452      * Post a receive buffer for this request. We need to ensure
0453      * there is a reply buffer available for every outstanding
0454      * request. A flushed request can result in no reply for an
0455      * outstanding request, so we must keep a count to avoid
0456      * overflowing the RQ.
0457      */
0458     if (down_interruptible(&rdma->rq_sem)) {
0459         err = -EINTR;
0460         goto recv_error;
0461     }
0462 
0463     err = post_recv(client, rpl_context);
0464     if (err) {
0465         p9_debug(P9_DEBUG_ERROR, "POST RECV failed: %d\n", err);
0466         goto recv_error;
0467     }
0468     /* remove posted receive buffer from request structure */
0469     req->rc.sdata = NULL;
0470 
0471 dont_need_post_recv:
0472     /* Post the request */
0473     c = kmalloc(sizeof *c, GFP_NOFS);
0474     if (!c) {
0475         err = -ENOMEM;
0476         goto send_error;
0477     }
0478     c->req = req;
0479 
0480     c->busa = ib_dma_map_single(rdma->cm_id->device,
0481                     c->req->tc.sdata, c->req->tc.size,
0482                     DMA_TO_DEVICE);
0483     if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
0484         err = -EIO;
0485         goto send_error;
0486     }
0487 
0488     c->cqe.done = send_done;
0489 
0490     sge.addr = c->busa;
0491     sge.length = c->req->tc.size;
0492     sge.lkey = rdma->pd->local_dma_lkey;
0493 
0494     wr.next = NULL;
0495     wr.wr_cqe = &c->cqe;
0496     wr.opcode = IB_WR_SEND;
0497     wr.send_flags = IB_SEND_SIGNALED;
0498     wr.sg_list = &sge;
0499     wr.num_sge = 1;
0500 
0501     if (down_interruptible(&rdma->sq_sem)) {
0502         err = -EINTR;
0503         goto send_error;
0504     }
0505 
0506     /* Mark request as `sent' *before* we actually send it,
0507      * because doing if after could erase the REQ_STATUS_RCVD
0508      * status in case of a very fast reply.
0509      */
0510     req->status = REQ_STATUS_SENT;
0511     err = ib_post_send(rdma->qp, &wr, NULL);
0512     if (err)
0513         goto send_error;
0514 
0515     /* Success */
0516     return 0;
0517 
0518  /* Handle errors that happened during or while preparing the send: */
0519  send_error:
0520     req->status = REQ_STATUS_ERROR;
0521     kfree(c);
0522     p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);
0523 
0524     /* Ach.
0525      *  We did recv_post(), but not send. We have one recv_post in excess.
0526      */
0527     atomic_inc(&rdma->excess_rc);
0528     return err;
0529 
0530  /* Handle errors that happened during or while preparing post_recv(): */
0531  recv_error:
0532     kfree(rpl_context);
0533     spin_lock_irqsave(&rdma->req_lock, flags);
0534     if (err != -EINTR && rdma->state < P9_RDMA_CLOSING) {
0535         rdma->state = P9_RDMA_CLOSING;
0536         spin_unlock_irqrestore(&rdma->req_lock, flags);
0537         rdma_disconnect(rdma->cm_id);
0538     } else
0539         spin_unlock_irqrestore(&rdma->req_lock, flags);
0540     return err;
0541 }
0542 
0543 static void rdma_close(struct p9_client *client)
0544 {
0545     struct p9_trans_rdma *rdma;
0546 
0547     if (!client)
0548         return;
0549 
0550     rdma = client->trans;
0551     if (!rdma)
0552         return;
0553 
0554     client->status = Disconnected;
0555     rdma_disconnect(rdma->cm_id);
0556     rdma_destroy_trans(rdma);
0557 }
0558 
0559 /**
0560  * alloc_rdma - Allocate and initialize the rdma transport structure
0561  * @opts: Mount options structure
0562  */
0563 static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
0564 {
0565     struct p9_trans_rdma *rdma;
0566 
0567     rdma = kzalloc(sizeof(struct p9_trans_rdma), GFP_KERNEL);
0568     if (!rdma)
0569         return NULL;
0570 
0571     rdma->port = opts->port;
0572     rdma->privport = opts->privport;
0573     rdma->sq_depth = opts->sq_depth;
0574     rdma->rq_depth = opts->rq_depth;
0575     rdma->timeout = opts->timeout;
0576     spin_lock_init(&rdma->req_lock);
0577     init_completion(&rdma->cm_done);
0578     sema_init(&rdma->sq_sem, rdma->sq_depth);
0579     sema_init(&rdma->rq_sem, rdma->rq_depth);
0580     atomic_set(&rdma->excess_rc, 0);
0581 
0582     return rdma;
0583 }
0584 
0585 static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
0586 {
0587     /* Nothing to do here.
0588      * We will take care of it (if we have to) in rdma_cancelled()
0589      */
0590     return 1;
0591 }
0592 
0593 /* A request has been fully flushed without a reply.
0594  * That means we have posted one buffer in excess.
0595  */
0596 static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req)
0597 {
0598     struct p9_trans_rdma *rdma = client->trans;
0599     atomic_inc(&rdma->excess_rc);
0600     return 0;
0601 }
0602 
0603 static int p9_rdma_bind_privport(struct p9_trans_rdma *rdma)
0604 {
0605     struct sockaddr_in cl = {
0606         .sin_family = AF_INET,
0607         .sin_addr.s_addr = htonl(INADDR_ANY),
0608     };
0609     int port, err = -EINVAL;
0610 
0611     for (port = P9_DEF_MAX_RESVPORT; port >= P9_DEF_MIN_RESVPORT; port--) {
0612         cl.sin_port = htons((ushort)port);
0613         err = rdma_bind_addr(rdma->cm_id, (struct sockaddr *)&cl);
0614         if (err != -EADDRINUSE)
0615             break;
0616     }
0617     return err;
0618 }
0619 
0620 /**
0621  * rdma_create_trans - Transport method for creating a transport instance
0622  * @client: client instance
0623  * @addr: IP address string
0624  * @args: Mount options string
0625  */
0626 static int
0627 rdma_create_trans(struct p9_client *client, const char *addr, char *args)
0628 {
0629     int err;
0630     struct p9_rdma_opts opts;
0631     struct p9_trans_rdma *rdma;
0632     struct rdma_conn_param conn_param;
0633     struct ib_qp_init_attr qp_attr;
0634 
0635     if (addr == NULL)
0636         return -EINVAL;
0637 
0638     /* Parse the transport specific mount options */
0639     err = parse_opts(args, &opts);
0640     if (err < 0)
0641         return err;
0642 
0643     /* Create and initialize the RDMA transport structure */
0644     rdma = alloc_rdma(&opts);
0645     if (!rdma)
0646         return -ENOMEM;
0647 
0648     /* Create the RDMA CM ID */
0649     rdma->cm_id = rdma_create_id(&init_net, p9_cm_event_handler, client,
0650                      RDMA_PS_TCP, IB_QPT_RC);
0651     if (IS_ERR(rdma->cm_id))
0652         goto error;
0653 
0654     /* Associate the client with the transport */
0655     client->trans = rdma;
0656 
0657     /* Bind to a privileged port if we need to */
0658     if (opts.privport) {
0659         err = p9_rdma_bind_privport(rdma);
0660         if (err < 0) {
0661             pr_err("%s (%d): problem binding to privport: %d\n",
0662                    __func__, task_pid_nr(current), -err);
0663             goto error;
0664         }
0665     }
0666 
0667     /* Resolve the server's address */
0668     rdma->addr.sin_family = AF_INET;
0669     rdma->addr.sin_addr.s_addr = in_aton(addr);
0670     rdma->addr.sin_port = htons(opts.port);
0671     err = rdma_resolve_addr(rdma->cm_id, NULL,
0672                 (struct sockaddr *)&rdma->addr,
0673                 rdma->timeout);
0674     if (err)
0675         goto error;
0676     err = wait_for_completion_interruptible(&rdma->cm_done);
0677     if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED))
0678         goto error;
0679 
0680     /* Resolve the route to the server */
0681     err = rdma_resolve_route(rdma->cm_id, rdma->timeout);
0682     if (err)
0683         goto error;
0684     err = wait_for_completion_interruptible(&rdma->cm_done);
0685     if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED))
0686         goto error;
0687 
0688     /* Create the Completion Queue */
0689     rdma->cq = ib_alloc_cq_any(rdma->cm_id->device, client,
0690                    opts.sq_depth + opts.rq_depth + 1,
0691                    IB_POLL_SOFTIRQ);
0692     if (IS_ERR(rdma->cq))
0693         goto error;
0694 
0695     /* Create the Protection Domain */
0696     rdma->pd = ib_alloc_pd(rdma->cm_id->device, 0);
0697     if (IS_ERR(rdma->pd))
0698         goto error;
0699 
0700     /* Create the Queue Pair */
0701     memset(&qp_attr, 0, sizeof qp_attr);
0702     qp_attr.event_handler = qp_event_handler;
0703     qp_attr.qp_context = client;
0704     qp_attr.cap.max_send_wr = opts.sq_depth;
0705     qp_attr.cap.max_recv_wr = opts.rq_depth;
0706     qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE;
0707     qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE;
0708     qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
0709     qp_attr.qp_type = IB_QPT_RC;
0710     qp_attr.send_cq = rdma->cq;
0711     qp_attr.recv_cq = rdma->cq;
0712     err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr);
0713     if (err)
0714         goto error;
0715     rdma->qp = rdma->cm_id->qp;
0716 
0717     /* Request a connection */
0718     memset(&conn_param, 0, sizeof(conn_param));
0719     conn_param.private_data = NULL;
0720     conn_param.private_data_len = 0;
0721     conn_param.responder_resources = P9_RDMA_IRD;
0722     conn_param.initiator_depth = P9_RDMA_ORD;
0723     err = rdma_connect(rdma->cm_id, &conn_param);
0724     if (err)
0725         goto error;
0726     err = wait_for_completion_interruptible(&rdma->cm_done);
0727     if (err || (rdma->state != P9_RDMA_CONNECTED))
0728         goto error;
0729 
0730     client->status = Connected;
0731 
0732     return 0;
0733 
0734 error:
0735     rdma_destroy_trans(rdma);
0736     return -ENOTCONN;
0737 }
0738 
0739 static struct p9_trans_module p9_rdma_trans = {
0740     .name = "rdma",
0741     .maxsize = P9_RDMA_MAXSIZE,
0742     .def = 0,
0743     .owner = THIS_MODULE,
0744     .create = rdma_create_trans,
0745     .close = rdma_close,
0746     .request = rdma_request,
0747     .cancel = rdma_cancel,
0748     .cancelled = rdma_cancelled,
0749     .show_options = p9_rdma_show_options,
0750 };
0751 
0752 /**
0753  * p9_trans_rdma_init - Register the 9P RDMA transport driver
0754  */
0755 static int __init p9_trans_rdma_init(void)
0756 {
0757     v9fs_register_trans(&p9_rdma_trans);
0758     return 0;
0759 }
0760 
0761 static void __exit p9_trans_rdma_exit(void)
0762 {
0763     v9fs_unregister_trans(&p9_rdma_trans);
0764 }
0765 
0766 module_init(p9_trans_rdma_init);
0767 module_exit(p9_trans_rdma_exit);
0768 MODULE_ALIAS_9P("rdma");
0769 
0770 MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
0771 MODULE_DESCRIPTION("RDMA Transport for 9P");
0772 MODULE_LICENSE("Dual BSD/GPL");