Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  *   Copyright (C) 2017, Microsoft Corporation.
0004  *   Copyright (C) 2018, LG Electronics.
0005  *
0006  *   Author(s): Long Li <longli@microsoft.com>,
0007  *      Hyunchul Lee <hyc.lee@gmail.com>
0008  */
0009 
0010 #define SUBMOD_NAME "smb_direct"
0011 
0012 #include <linux/kthread.h>
0013 #include <linux/list.h>
0014 #include <linux/mempool.h>
0015 #include <linux/highmem.h>
0016 #include <linux/scatterlist.h>
0017 #include <rdma/ib_verbs.h>
0018 #include <rdma/rdma_cm.h>
0019 #include <rdma/rw.h>
0020 
0021 #include "glob.h"
0022 #include "connection.h"
0023 #include "smb_common.h"
0024 #include "smbstatus.h"
0025 #include "transport_rdma.h"
0026 
0027 #define SMB_DIRECT_PORT_IWARP       5445
0028 #define SMB_DIRECT_PORT_INFINIBAND  445
0029 
0030 #define SMB_DIRECT_VERSION_LE       cpu_to_le16(0x0100)
0031 
0032 /* SMB_DIRECT negotiation timeout in seconds */
0033 #define SMB_DIRECT_NEGOTIATE_TIMEOUT        120
0034 
0035 #define SMB_DIRECT_MAX_SEND_SGES        8
0036 #define SMB_DIRECT_MAX_RECV_SGES        1
0037 
0038 /*
0039  * Default maximum number of RDMA read/write outstanding on this connection
0040  * This value is possibly decreased during QP creation on hardware limit
0041  */
0042 #define SMB_DIRECT_CM_INITIATOR_DEPTH       8
0043 
0044 /* Maximum number of retries on data transfer operations */
0045 #define SMB_DIRECT_CM_RETRY         6
0046 /* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */
0047 #define SMB_DIRECT_CM_RNR_RETRY     0
0048 
0049 /*
0050  * User configurable initial values per SMB_DIRECT transport connection
0051  * as defined in [MS-SMBD] 3.1.1.1
0052  * Those may change after a SMB_DIRECT negotiation
0053  */
0054 
0055 /* Set 445 port to SMB Direct port by default */
0056 static int smb_direct_port = SMB_DIRECT_PORT_INFINIBAND;
0057 
0058 /* The local peer's maximum number of credits to grant to the peer */
0059 static int smb_direct_receive_credit_max = 255;
0060 
0061 /* The remote peer's credit request of local peer */
0062 static int smb_direct_send_credit_target = 255;
0063 
0064 /* The maximum single message size can be sent to remote peer */
0065 static int smb_direct_max_send_size = 8192;
0066 
0067 /*  The maximum fragmented upper-layer payload receive size supported */
0068 static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
0069 
0070 /*  The maximum single-message size which can be received */
0071 static int smb_direct_max_receive_size = 8192;
0072 
0073 static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE;
0074 
0075 static LIST_HEAD(smb_direct_device_list);
0076 static DEFINE_RWLOCK(smb_direct_device_lock);
0077 
0078 struct smb_direct_device {
0079     struct ib_device    *ib_dev;
0080     struct list_head    list;
0081 };
0082 
0083 static struct smb_direct_listener {
0084     struct rdma_cm_id   *cm_id;
0085 } smb_direct_listener;
0086 
0087 static struct workqueue_struct *smb_direct_wq;
0088 
0089 enum smb_direct_status {
0090     SMB_DIRECT_CS_NEW = 0,
0091     SMB_DIRECT_CS_CONNECTED,
0092     SMB_DIRECT_CS_DISCONNECTING,
0093     SMB_DIRECT_CS_DISCONNECTED,
0094 };
0095 
0096 struct smb_direct_transport {
0097     struct ksmbd_transport  transport;
0098 
0099     enum smb_direct_status  status;
0100     bool            full_packet_received;
0101     wait_queue_head_t   wait_status;
0102 
0103     struct rdma_cm_id   *cm_id;
0104     struct ib_cq        *send_cq;
0105     struct ib_cq        *recv_cq;
0106     struct ib_pd        *pd;
0107     struct ib_qp        *qp;
0108 
0109     int         max_send_size;
0110     int         max_recv_size;
0111     int         max_fragmented_send_size;
0112     int         max_fragmented_recv_size;
0113     int         max_rdma_rw_size;
0114 
0115     spinlock_t      reassembly_queue_lock;
0116     struct list_head    reassembly_queue;
0117     int         reassembly_data_length;
0118     int         reassembly_queue_length;
0119     int         first_entry_offset;
0120     wait_queue_head_t   wait_reassembly_queue;
0121 
0122     spinlock_t      receive_credit_lock;
0123     int         recv_credits;
0124     int         count_avail_recvmsg;
0125     int         recv_credit_max;
0126     int         recv_credit_target;
0127 
0128     spinlock_t      recvmsg_queue_lock;
0129     struct list_head    recvmsg_queue;
0130 
0131     spinlock_t      empty_recvmsg_queue_lock;
0132     struct list_head    empty_recvmsg_queue;
0133 
0134     int         send_credit_target;
0135     atomic_t        send_credits;
0136     spinlock_t      lock_new_recv_credits;
0137     int         new_recv_credits;
0138     int         max_rw_credits;
0139     int         pages_per_rw_credit;
0140     atomic_t        rw_credits;
0141 
0142     wait_queue_head_t   wait_send_credits;
0143     wait_queue_head_t   wait_rw_credits;
0144 
0145     mempool_t       *sendmsg_mempool;
0146     struct kmem_cache   *sendmsg_cache;
0147     mempool_t       *recvmsg_mempool;
0148     struct kmem_cache   *recvmsg_cache;
0149 
0150     wait_queue_head_t   wait_send_pending;
0151     atomic_t        send_pending;
0152 
0153     struct delayed_work post_recv_credits_work;
0154     struct work_struct  send_immediate_work;
0155     struct work_struct  disconnect_work;
0156 
0157     bool            negotiation_requested;
0158 };
0159 
0160 #define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport))
0161 
0162 enum {
0163     SMB_DIRECT_MSG_NEGOTIATE_REQ = 0,
0164     SMB_DIRECT_MSG_DATA_TRANSFER
0165 };
0166 
0167 static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops;
0168 
0169 struct smb_direct_send_ctx {
0170     struct list_head    msg_list;
0171     int         wr_cnt;
0172     bool            need_invalidate_rkey;
0173     unsigned int        remote_key;
0174 };
0175 
0176 struct smb_direct_sendmsg {
0177     struct smb_direct_transport *transport;
0178     struct ib_send_wr   wr;
0179     struct list_head    list;
0180     int         num_sge;
0181     struct ib_sge       sge[SMB_DIRECT_MAX_SEND_SGES];
0182     struct ib_cqe       cqe;
0183     u8          packet[];
0184 };
0185 
0186 struct smb_direct_recvmsg {
0187     struct smb_direct_transport *transport;
0188     struct list_head    list;
0189     int         type;
0190     struct ib_sge       sge;
0191     struct ib_cqe       cqe;
0192     bool            first_segment;
0193     u8          packet[];
0194 };
0195 
0196 struct smb_direct_rdma_rw_msg {
0197     struct smb_direct_transport *t;
0198     struct ib_cqe       cqe;
0199     int         status;
0200     struct completion   *completion;
0201     struct list_head    list;
0202     struct rdma_rw_ctx  rw_ctx;
0203     struct sg_table     sgt;
0204     struct scatterlist  sg_list[];
0205 };
0206 
0207 void init_smbd_max_io_size(unsigned int sz)
0208 {
0209     sz = clamp_val(sz, SMBD_MIN_IOSIZE, SMBD_MAX_IOSIZE);
0210     smb_direct_max_read_write_size = sz;
0211 }
0212 
0213 unsigned int get_smbd_max_read_write_size(void)
0214 {
0215     return smb_direct_max_read_write_size;
0216 }
0217 
0218 static inline int get_buf_page_count(void *buf, int size)
0219 {
0220     return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) -
0221         (uintptr_t)buf / PAGE_SIZE;
0222 }
0223 
0224 static void smb_direct_destroy_pools(struct smb_direct_transport *transport);
0225 static void smb_direct_post_recv_credits(struct work_struct *work);
0226 static int smb_direct_post_send_data(struct smb_direct_transport *t,
0227                      struct smb_direct_send_ctx *send_ctx,
0228                      struct kvec *iov, int niov,
0229                      int remaining_data_length);
0230 
0231 static inline struct smb_direct_transport *
0232 smb_trans_direct_transfort(struct ksmbd_transport *t)
0233 {
0234     return container_of(t, struct smb_direct_transport, transport);
0235 }
0236 
0237 static inline void
0238 *smb_direct_recvmsg_payload(struct smb_direct_recvmsg *recvmsg)
0239 {
0240     return (void *)recvmsg->packet;
0241 }
0242 
0243 static inline bool is_receive_credit_post_required(int receive_credits,
0244                            int avail_recvmsg_count)
0245 {
0246     return receive_credits <= (smb_direct_receive_credit_max >> 3) &&
0247         avail_recvmsg_count >= (receive_credits >> 2);
0248 }
0249 
0250 static struct
0251 smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t)
0252 {
0253     struct smb_direct_recvmsg *recvmsg = NULL;
0254 
0255     spin_lock(&t->recvmsg_queue_lock);
0256     if (!list_empty(&t->recvmsg_queue)) {
0257         recvmsg = list_first_entry(&t->recvmsg_queue,
0258                        struct smb_direct_recvmsg,
0259                        list);
0260         list_del(&recvmsg->list);
0261     }
0262     spin_unlock(&t->recvmsg_queue_lock);
0263     return recvmsg;
0264 }
0265 
0266 static void put_recvmsg(struct smb_direct_transport *t,
0267             struct smb_direct_recvmsg *recvmsg)
0268 {
0269     ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
0270                 recvmsg->sge.length, DMA_FROM_DEVICE);
0271 
0272     spin_lock(&t->recvmsg_queue_lock);
0273     list_add(&recvmsg->list, &t->recvmsg_queue);
0274     spin_unlock(&t->recvmsg_queue_lock);
0275 }
0276 
0277 static struct
0278 smb_direct_recvmsg *get_empty_recvmsg(struct smb_direct_transport *t)
0279 {
0280     struct smb_direct_recvmsg *recvmsg = NULL;
0281 
0282     spin_lock(&t->empty_recvmsg_queue_lock);
0283     if (!list_empty(&t->empty_recvmsg_queue)) {
0284         recvmsg = list_first_entry(&t->empty_recvmsg_queue,
0285                        struct smb_direct_recvmsg, list);
0286         list_del(&recvmsg->list);
0287     }
0288     spin_unlock(&t->empty_recvmsg_queue_lock);
0289     return recvmsg;
0290 }
0291 
0292 static void put_empty_recvmsg(struct smb_direct_transport *t,
0293                   struct smb_direct_recvmsg *recvmsg)
0294 {
0295     ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
0296                 recvmsg->sge.length, DMA_FROM_DEVICE);
0297 
0298     spin_lock(&t->empty_recvmsg_queue_lock);
0299     list_add_tail(&recvmsg->list, &t->empty_recvmsg_queue);
0300     spin_unlock(&t->empty_recvmsg_queue_lock);
0301 }
0302 
0303 static void enqueue_reassembly(struct smb_direct_transport *t,
0304                    struct smb_direct_recvmsg *recvmsg,
0305                    int data_length)
0306 {
0307     spin_lock(&t->reassembly_queue_lock);
0308     list_add_tail(&recvmsg->list, &t->reassembly_queue);
0309     t->reassembly_queue_length++;
0310     /*
0311      * Make sure reassembly_data_length is updated after list and
0312      * reassembly_queue_length are updated. On the dequeue side
0313      * reassembly_data_length is checked without a lock to determine
0314      * if reassembly_queue_length and list is up to date
0315      */
0316     virt_wmb();
0317     t->reassembly_data_length += data_length;
0318     spin_unlock(&t->reassembly_queue_lock);
0319 }
0320 
0321 static struct smb_direct_recvmsg *get_first_reassembly(struct smb_direct_transport *t)
0322 {
0323     if (!list_empty(&t->reassembly_queue))
0324         return list_first_entry(&t->reassembly_queue,
0325                 struct smb_direct_recvmsg, list);
0326     else
0327         return NULL;
0328 }
0329 
0330 static void smb_direct_disconnect_rdma_work(struct work_struct *work)
0331 {
0332     struct smb_direct_transport *t =
0333         container_of(work, struct smb_direct_transport,
0334                  disconnect_work);
0335 
0336     if (t->status == SMB_DIRECT_CS_CONNECTED) {
0337         t->status = SMB_DIRECT_CS_DISCONNECTING;
0338         rdma_disconnect(t->cm_id);
0339     }
0340 }
0341 
0342 static void
0343 smb_direct_disconnect_rdma_connection(struct smb_direct_transport *t)
0344 {
0345     if (t->status == SMB_DIRECT_CS_CONNECTED)
0346         queue_work(smb_direct_wq, &t->disconnect_work);
0347 }
0348 
0349 static void smb_direct_send_immediate_work(struct work_struct *work)
0350 {
0351     struct smb_direct_transport *t = container_of(work,
0352             struct smb_direct_transport, send_immediate_work);
0353 
0354     if (t->status != SMB_DIRECT_CS_CONNECTED)
0355         return;
0356 
0357     smb_direct_post_send_data(t, NULL, NULL, 0, 0);
0358 }
0359 
0360 static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
0361 {
0362     struct smb_direct_transport *t;
0363     struct ksmbd_conn *conn;
0364 
0365     t = kzalloc(sizeof(*t), GFP_KERNEL);
0366     if (!t)
0367         return NULL;
0368 
0369     t->cm_id = cm_id;
0370     cm_id->context = t;
0371 
0372     t->status = SMB_DIRECT_CS_NEW;
0373     init_waitqueue_head(&t->wait_status);
0374 
0375     spin_lock_init(&t->reassembly_queue_lock);
0376     INIT_LIST_HEAD(&t->reassembly_queue);
0377     t->reassembly_data_length = 0;
0378     t->reassembly_queue_length = 0;
0379     init_waitqueue_head(&t->wait_reassembly_queue);
0380     init_waitqueue_head(&t->wait_send_credits);
0381     init_waitqueue_head(&t->wait_rw_credits);
0382 
0383     spin_lock_init(&t->receive_credit_lock);
0384     spin_lock_init(&t->recvmsg_queue_lock);
0385     INIT_LIST_HEAD(&t->recvmsg_queue);
0386 
0387     spin_lock_init(&t->empty_recvmsg_queue_lock);
0388     INIT_LIST_HEAD(&t->empty_recvmsg_queue);
0389 
0390     init_waitqueue_head(&t->wait_send_pending);
0391     atomic_set(&t->send_pending, 0);
0392 
0393     spin_lock_init(&t->lock_new_recv_credits);
0394 
0395     INIT_DELAYED_WORK(&t->post_recv_credits_work,
0396               smb_direct_post_recv_credits);
0397     INIT_WORK(&t->send_immediate_work, smb_direct_send_immediate_work);
0398     INIT_WORK(&t->disconnect_work, smb_direct_disconnect_rdma_work);
0399 
0400     conn = ksmbd_conn_alloc();
0401     if (!conn)
0402         goto err;
0403     conn->transport = KSMBD_TRANS(t);
0404     KSMBD_TRANS(t)->conn = conn;
0405     KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops;
0406     return t;
0407 err:
0408     kfree(t);
0409     return NULL;
0410 }
0411 
0412 static void free_transport(struct smb_direct_transport *t)
0413 {
0414     struct smb_direct_recvmsg *recvmsg;
0415 
0416     wake_up_interruptible(&t->wait_send_credits);
0417 
0418     ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n");
0419     wait_event(t->wait_send_pending,
0420            atomic_read(&t->send_pending) == 0);
0421 
0422     cancel_work_sync(&t->disconnect_work);
0423     cancel_delayed_work_sync(&t->post_recv_credits_work);
0424     cancel_work_sync(&t->send_immediate_work);
0425 
0426     if (t->qp) {
0427         ib_drain_qp(t->qp);
0428         ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs);
0429         ib_destroy_qp(t->qp);
0430     }
0431 
0432     ksmbd_debug(RDMA, "drain the reassembly queue\n");
0433     do {
0434         spin_lock(&t->reassembly_queue_lock);
0435         recvmsg = get_first_reassembly(t);
0436         if (recvmsg) {
0437             list_del(&recvmsg->list);
0438             spin_unlock(&t->reassembly_queue_lock);
0439             put_recvmsg(t, recvmsg);
0440         } else {
0441             spin_unlock(&t->reassembly_queue_lock);
0442         }
0443     } while (recvmsg);
0444     t->reassembly_data_length = 0;
0445 
0446     if (t->send_cq)
0447         ib_free_cq(t->send_cq);
0448     if (t->recv_cq)
0449         ib_free_cq(t->recv_cq);
0450     if (t->pd)
0451         ib_dealloc_pd(t->pd);
0452     if (t->cm_id)
0453         rdma_destroy_id(t->cm_id);
0454 
0455     smb_direct_destroy_pools(t);
0456     ksmbd_conn_free(KSMBD_TRANS(t)->conn);
0457     kfree(t);
0458 }
0459 
0460 static struct smb_direct_sendmsg
0461 *smb_direct_alloc_sendmsg(struct smb_direct_transport *t)
0462 {
0463     struct smb_direct_sendmsg *msg;
0464 
0465     msg = mempool_alloc(t->sendmsg_mempool, GFP_KERNEL);
0466     if (!msg)
0467         return ERR_PTR(-ENOMEM);
0468     msg->transport = t;
0469     INIT_LIST_HEAD(&msg->list);
0470     msg->num_sge = 0;
0471     return msg;
0472 }
0473 
0474 static void smb_direct_free_sendmsg(struct smb_direct_transport *t,
0475                     struct smb_direct_sendmsg *msg)
0476 {
0477     int i;
0478 
0479     if (msg->num_sge > 0) {
0480         ib_dma_unmap_single(t->cm_id->device,
0481                     msg->sge[0].addr, msg->sge[0].length,
0482                     DMA_TO_DEVICE);
0483         for (i = 1; i < msg->num_sge; i++)
0484             ib_dma_unmap_page(t->cm_id->device,
0485                       msg->sge[i].addr, msg->sge[i].length,
0486                       DMA_TO_DEVICE);
0487     }
0488     mempool_free(msg, t->sendmsg_mempool);
0489 }
0490 
0491 static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg)
0492 {
0493     switch (recvmsg->type) {
0494     case SMB_DIRECT_MSG_DATA_TRANSFER: {
0495         struct smb_direct_data_transfer *req =
0496             (struct smb_direct_data_transfer *)recvmsg->packet;
0497         struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet
0498                 + le32_to_cpu(req->data_offset));
0499         ksmbd_debug(RDMA,
0500                 "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n",
0501                 le16_to_cpu(req->credits_granted),
0502                 le16_to_cpu(req->credits_requested),
0503                 req->data_length, req->remaining_data_length,
0504                 hdr->ProtocolId, hdr->Command);
0505         break;
0506     }
0507     case SMB_DIRECT_MSG_NEGOTIATE_REQ: {
0508         struct smb_direct_negotiate_req *req =
0509             (struct smb_direct_negotiate_req *)recvmsg->packet;
0510         ksmbd_debug(RDMA,
0511                 "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n",
0512                 le16_to_cpu(req->min_version),
0513                 le16_to_cpu(req->max_version),
0514                 le16_to_cpu(req->credits_requested),
0515                 le32_to_cpu(req->preferred_send_size),
0516                 le32_to_cpu(req->max_receive_size),
0517                 le32_to_cpu(req->max_fragmented_size));
0518         if (le16_to_cpu(req->min_version) > 0x0100 ||
0519             le16_to_cpu(req->max_version) < 0x0100)
0520             return -EOPNOTSUPP;
0521         if (le16_to_cpu(req->credits_requested) <= 0 ||
0522             le32_to_cpu(req->max_receive_size) <= 128 ||
0523             le32_to_cpu(req->max_fragmented_size) <=
0524                     128 * 1024)
0525             return -ECONNABORTED;
0526 
0527         break;
0528     }
0529     default:
0530         return -EINVAL;
0531     }
0532     return 0;
0533 }
0534 
0535 static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
0536 {
0537     struct smb_direct_recvmsg *recvmsg;
0538     struct smb_direct_transport *t;
0539 
0540     recvmsg = container_of(wc->wr_cqe, struct smb_direct_recvmsg, cqe);
0541     t = recvmsg->transport;
0542 
0543     if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
0544         if (wc->status != IB_WC_WR_FLUSH_ERR) {
0545             pr_err("Recv error. status='%s (%d)' opcode=%d\n",
0546                    ib_wc_status_msg(wc->status), wc->status,
0547                    wc->opcode);
0548             smb_direct_disconnect_rdma_connection(t);
0549         }
0550         put_empty_recvmsg(t, recvmsg);
0551         return;
0552     }
0553 
0554     ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n",
0555             ib_wc_status_msg(wc->status), wc->status,
0556             wc->opcode);
0557 
0558     ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr,
0559                    recvmsg->sge.length, DMA_FROM_DEVICE);
0560 
0561     switch (recvmsg->type) {
0562     case SMB_DIRECT_MSG_NEGOTIATE_REQ:
0563         if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) {
0564             put_empty_recvmsg(t, recvmsg);
0565             return;
0566         }
0567         t->negotiation_requested = true;
0568         t->full_packet_received = true;
0569         t->status = SMB_DIRECT_CS_CONNECTED;
0570         enqueue_reassembly(t, recvmsg, 0);
0571         wake_up_interruptible(&t->wait_status);
0572         break;
0573     case SMB_DIRECT_MSG_DATA_TRANSFER: {
0574         struct smb_direct_data_transfer *data_transfer =
0575             (struct smb_direct_data_transfer *)recvmsg->packet;
0576         unsigned int data_length;
0577         int avail_recvmsg_count, receive_credits;
0578 
0579         if (wc->byte_len <
0580             offsetof(struct smb_direct_data_transfer, padding)) {
0581             put_empty_recvmsg(t, recvmsg);
0582             return;
0583         }
0584 
0585         data_length = le32_to_cpu(data_transfer->data_length);
0586         if (data_length) {
0587             if (wc->byte_len < sizeof(struct smb_direct_data_transfer) +
0588                 (u64)data_length) {
0589                 put_empty_recvmsg(t, recvmsg);
0590                 return;
0591             }
0592 
0593             if (t->full_packet_received)
0594                 recvmsg->first_segment = true;
0595 
0596             if (le32_to_cpu(data_transfer->remaining_data_length))
0597                 t->full_packet_received = false;
0598             else
0599                 t->full_packet_received = true;
0600 
0601             enqueue_reassembly(t, recvmsg, (int)data_length);
0602             wake_up_interruptible(&t->wait_reassembly_queue);
0603 
0604             spin_lock(&t->receive_credit_lock);
0605             receive_credits = --(t->recv_credits);
0606             avail_recvmsg_count = t->count_avail_recvmsg;
0607             spin_unlock(&t->receive_credit_lock);
0608         } else {
0609             put_empty_recvmsg(t, recvmsg);
0610 
0611             spin_lock(&t->receive_credit_lock);
0612             receive_credits = --(t->recv_credits);
0613             avail_recvmsg_count = ++(t->count_avail_recvmsg);
0614             spin_unlock(&t->receive_credit_lock);
0615         }
0616 
0617         t->recv_credit_target =
0618                 le16_to_cpu(data_transfer->credits_requested);
0619         atomic_add(le16_to_cpu(data_transfer->credits_granted),
0620                &t->send_credits);
0621 
0622         if (le16_to_cpu(data_transfer->flags) &
0623             SMB_DIRECT_RESPONSE_REQUESTED)
0624             queue_work(smb_direct_wq, &t->send_immediate_work);
0625 
0626         if (atomic_read(&t->send_credits) > 0)
0627             wake_up_interruptible(&t->wait_send_credits);
0628 
0629         if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count))
0630             mod_delayed_work(smb_direct_wq,
0631                      &t->post_recv_credits_work, 0);
0632         break;
0633     }
0634     default:
0635         break;
0636     }
0637 }
0638 
0639 static int smb_direct_post_recv(struct smb_direct_transport *t,
0640                 struct smb_direct_recvmsg *recvmsg)
0641 {
0642     struct ib_recv_wr wr;
0643     int ret;
0644 
0645     recvmsg->sge.addr = ib_dma_map_single(t->cm_id->device,
0646                           recvmsg->packet, t->max_recv_size,
0647                           DMA_FROM_DEVICE);
0648     ret = ib_dma_mapping_error(t->cm_id->device, recvmsg->sge.addr);
0649     if (ret)
0650         return ret;
0651     recvmsg->sge.length = t->max_recv_size;
0652     recvmsg->sge.lkey = t->pd->local_dma_lkey;
0653     recvmsg->cqe.done = recv_done;
0654 
0655     wr.wr_cqe = &recvmsg->cqe;
0656     wr.next = NULL;
0657     wr.sg_list = &recvmsg->sge;
0658     wr.num_sge = 1;
0659 
0660     ret = ib_post_recv(t->qp, &wr, NULL);
0661     if (ret) {
0662         pr_err("Can't post recv: %d\n", ret);
0663         ib_dma_unmap_single(t->cm_id->device,
0664                     recvmsg->sge.addr, recvmsg->sge.length,
0665                     DMA_FROM_DEVICE);
0666         smb_direct_disconnect_rdma_connection(t);
0667         return ret;
0668     }
0669     return ret;
0670 }
0671 
0672 static int smb_direct_read(struct ksmbd_transport *t, char *buf,
0673                unsigned int size)
0674 {
0675     struct smb_direct_recvmsg *recvmsg;
0676     struct smb_direct_data_transfer *data_transfer;
0677     int to_copy, to_read, data_read, offset;
0678     u32 data_length, remaining_data_length, data_offset;
0679     int rc;
0680     struct smb_direct_transport *st = smb_trans_direct_transfort(t);
0681 
0682 again:
0683     if (st->status != SMB_DIRECT_CS_CONNECTED) {
0684         pr_err("disconnected\n");
0685         return -ENOTCONN;
0686     }
0687 
0688     /*
0689      * No need to hold the reassembly queue lock all the time as we are
0690      * the only one reading from the front of the queue. The transport
0691      * may add more entries to the back of the queue at the same time
0692      */
0693     if (st->reassembly_data_length >= size) {
0694         int queue_length;
0695         int queue_removed = 0;
0696 
0697         /*
0698          * Need to make sure reassembly_data_length is read before
0699          * reading reassembly_queue_length and calling
0700          * get_first_reassembly. This call is lock free
0701          * as we never read at the end of the queue which are being
0702          * updated in SOFTIRQ as more data is received
0703          */
0704         virt_rmb();
0705         queue_length = st->reassembly_queue_length;
0706         data_read = 0;
0707         to_read = size;
0708         offset = st->first_entry_offset;
0709         while (data_read < size) {
0710             recvmsg = get_first_reassembly(st);
0711             data_transfer = smb_direct_recvmsg_payload(recvmsg);
0712             data_length = le32_to_cpu(data_transfer->data_length);
0713             remaining_data_length =
0714                 le32_to_cpu(data_transfer->remaining_data_length);
0715             data_offset = le32_to_cpu(data_transfer->data_offset);
0716 
0717             /*
0718              * The upper layer expects RFC1002 length at the
0719              * beginning of the payload. Return it to indicate
0720              * the total length of the packet. This minimize the
0721              * change to upper layer packet processing logic. This
0722              * will be eventually remove when an intermediate
0723              * transport layer is added
0724              */
0725             if (recvmsg->first_segment && size == 4) {
0726                 unsigned int rfc1002_len =
0727                     data_length + remaining_data_length;
0728                 *((__be32 *)buf) = cpu_to_be32(rfc1002_len);
0729                 data_read = 4;
0730                 recvmsg->first_segment = false;
0731                 ksmbd_debug(RDMA,
0732                         "returning rfc1002 length %d\n",
0733                         rfc1002_len);
0734                 goto read_rfc1002_done;
0735             }
0736 
0737             to_copy = min_t(int, data_length - offset, to_read);
0738             memcpy(buf + data_read, (char *)data_transfer + data_offset + offset,
0739                    to_copy);
0740 
0741             /* move on to the next buffer? */
0742             if (to_copy == data_length - offset) {
0743                 queue_length--;
0744                 /*
0745                  * No need to lock if we are not at the
0746                  * end of the queue
0747                  */
0748                 if (queue_length) {
0749                     list_del(&recvmsg->list);
0750                 } else {
0751                     spin_lock_irq(&st->reassembly_queue_lock);
0752                     list_del(&recvmsg->list);
0753                     spin_unlock_irq(&st->reassembly_queue_lock);
0754                 }
0755                 queue_removed++;
0756                 put_recvmsg(st, recvmsg);
0757                 offset = 0;
0758             } else {
0759                 offset += to_copy;
0760             }
0761 
0762             to_read -= to_copy;
0763             data_read += to_copy;
0764         }
0765 
0766         spin_lock_irq(&st->reassembly_queue_lock);
0767         st->reassembly_data_length -= data_read;
0768         st->reassembly_queue_length -= queue_removed;
0769         spin_unlock_irq(&st->reassembly_queue_lock);
0770 
0771         spin_lock(&st->receive_credit_lock);
0772         st->count_avail_recvmsg += queue_removed;
0773         if (is_receive_credit_post_required(st->recv_credits, st->count_avail_recvmsg)) {
0774             spin_unlock(&st->receive_credit_lock);
0775             mod_delayed_work(smb_direct_wq,
0776                      &st->post_recv_credits_work, 0);
0777         } else {
0778             spin_unlock(&st->receive_credit_lock);
0779         }
0780 
0781         st->first_entry_offset = offset;
0782         ksmbd_debug(RDMA,
0783                 "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n",
0784                 data_read, st->reassembly_data_length,
0785                 st->first_entry_offset);
0786 read_rfc1002_done:
0787         return data_read;
0788     }
0789 
0790     ksmbd_debug(RDMA, "wait_event on more data\n");
0791     rc = wait_event_interruptible(st->wait_reassembly_queue,
0792                       st->reassembly_data_length >= size ||
0793                        st->status != SMB_DIRECT_CS_CONNECTED);
0794     if (rc)
0795         return -EINTR;
0796 
0797     goto again;
0798 }
0799 
0800 static void smb_direct_post_recv_credits(struct work_struct *work)
0801 {
0802     struct smb_direct_transport *t = container_of(work,
0803         struct smb_direct_transport, post_recv_credits_work.work);
0804     struct smb_direct_recvmsg *recvmsg;
0805     int receive_credits, credits = 0;
0806     int ret;
0807     int use_free = 1;
0808 
0809     spin_lock(&t->receive_credit_lock);
0810     receive_credits = t->recv_credits;
0811     spin_unlock(&t->receive_credit_lock);
0812 
0813     if (receive_credits < t->recv_credit_target) {
0814         while (true) {
0815             if (use_free)
0816                 recvmsg = get_free_recvmsg(t);
0817             else
0818                 recvmsg = get_empty_recvmsg(t);
0819             if (!recvmsg) {
0820                 if (use_free) {
0821                     use_free = 0;
0822                     continue;
0823                 } else {
0824                     break;
0825                 }
0826             }
0827 
0828             recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER;
0829             recvmsg->first_segment = false;
0830 
0831             ret = smb_direct_post_recv(t, recvmsg);
0832             if (ret) {
0833                 pr_err("Can't post recv: %d\n", ret);
0834                 put_recvmsg(t, recvmsg);
0835                 break;
0836             }
0837             credits++;
0838         }
0839     }
0840 
0841     spin_lock(&t->receive_credit_lock);
0842     t->recv_credits += credits;
0843     t->count_avail_recvmsg -= credits;
0844     spin_unlock(&t->receive_credit_lock);
0845 
0846     spin_lock(&t->lock_new_recv_credits);
0847     t->new_recv_credits += credits;
0848     spin_unlock(&t->lock_new_recv_credits);
0849 
0850     if (credits)
0851         queue_work(smb_direct_wq, &t->send_immediate_work);
0852 }
0853 
0854 static void send_done(struct ib_cq *cq, struct ib_wc *wc)
0855 {
0856     struct smb_direct_sendmsg *sendmsg, *sibling;
0857     struct smb_direct_transport *t;
0858     struct list_head *pos, *prev, *end;
0859 
0860     sendmsg = container_of(wc->wr_cqe, struct smb_direct_sendmsg, cqe);
0861     t = sendmsg->transport;
0862 
0863     ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n",
0864             ib_wc_status_msg(wc->status), wc->status,
0865             wc->opcode);
0866 
0867     if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
0868         pr_err("Send error. status='%s (%d)', opcode=%d\n",
0869                ib_wc_status_msg(wc->status), wc->status,
0870                wc->opcode);
0871         smb_direct_disconnect_rdma_connection(t);
0872     }
0873 
0874     if (atomic_dec_and_test(&t->send_pending))
0875         wake_up(&t->wait_send_pending);
0876 
0877     /* iterate and free the list of messages in reverse. the list's head
0878      * is invalid.
0879      */
0880     for (pos = &sendmsg->list, prev = pos->prev, end = sendmsg->list.next;
0881          prev != end; pos = prev, prev = prev->prev) {
0882         sibling = container_of(pos, struct smb_direct_sendmsg, list);
0883         smb_direct_free_sendmsg(t, sibling);
0884     }
0885 
0886     sibling = container_of(pos, struct smb_direct_sendmsg, list);
0887     smb_direct_free_sendmsg(t, sibling);
0888 }
0889 
0890 static int manage_credits_prior_sending(struct smb_direct_transport *t)
0891 {
0892     int new_credits;
0893 
0894     spin_lock(&t->lock_new_recv_credits);
0895     new_credits = t->new_recv_credits;
0896     t->new_recv_credits = 0;
0897     spin_unlock(&t->lock_new_recv_credits);
0898 
0899     return new_credits;
0900 }
0901 
0902 static int smb_direct_post_send(struct smb_direct_transport *t,
0903                 struct ib_send_wr *wr)
0904 {
0905     int ret;
0906 
0907     atomic_inc(&t->send_pending);
0908     ret = ib_post_send(t->qp, wr, NULL);
0909     if (ret) {
0910         pr_err("failed to post send: %d\n", ret);
0911         if (atomic_dec_and_test(&t->send_pending))
0912             wake_up(&t->wait_send_pending);
0913         smb_direct_disconnect_rdma_connection(t);
0914     }
0915     return ret;
0916 }
0917 
0918 static void smb_direct_send_ctx_init(struct smb_direct_transport *t,
0919                      struct smb_direct_send_ctx *send_ctx,
0920                      bool need_invalidate_rkey,
0921                      unsigned int remote_key)
0922 {
0923     INIT_LIST_HEAD(&send_ctx->msg_list);
0924     send_ctx->wr_cnt = 0;
0925     send_ctx->need_invalidate_rkey = need_invalidate_rkey;
0926     send_ctx->remote_key = remote_key;
0927 }
0928 
0929 static int smb_direct_flush_send_list(struct smb_direct_transport *t,
0930                       struct smb_direct_send_ctx *send_ctx,
0931                       bool is_last)
0932 {
0933     struct smb_direct_sendmsg *first, *last;
0934     int ret;
0935 
0936     if (list_empty(&send_ctx->msg_list))
0937         return 0;
0938 
0939     first = list_first_entry(&send_ctx->msg_list,
0940                  struct smb_direct_sendmsg,
0941                  list);
0942     last = list_last_entry(&send_ctx->msg_list,
0943                    struct smb_direct_sendmsg,
0944                    list);
0945 
0946     last->wr.send_flags = IB_SEND_SIGNALED;
0947     last->wr.wr_cqe = &last->cqe;
0948     if (is_last && send_ctx->need_invalidate_rkey) {
0949         last->wr.opcode = IB_WR_SEND_WITH_INV;
0950         last->wr.ex.invalidate_rkey = send_ctx->remote_key;
0951     }
0952 
0953     ret = smb_direct_post_send(t, &first->wr);
0954     if (!ret) {
0955         smb_direct_send_ctx_init(t, send_ctx,
0956                      send_ctx->need_invalidate_rkey,
0957                      send_ctx->remote_key);
0958     } else {
0959         atomic_add(send_ctx->wr_cnt, &t->send_credits);
0960         wake_up(&t->wait_send_credits);
0961         list_for_each_entry_safe(first, last, &send_ctx->msg_list,
0962                      list) {
0963             smb_direct_free_sendmsg(t, first);
0964         }
0965     }
0966     return ret;
0967 }
0968 
0969 static int wait_for_credits(struct smb_direct_transport *t,
0970                 wait_queue_head_t *waitq, atomic_t *total_credits,
0971                 int needed)
0972 {
0973     int ret;
0974 
0975     do {
0976         if (atomic_sub_return(needed, total_credits) >= 0)
0977             return 0;
0978 
0979         atomic_add(needed, total_credits);
0980         ret = wait_event_interruptible(*waitq,
0981                            atomic_read(total_credits) >= needed ||
0982                            t->status != SMB_DIRECT_CS_CONNECTED);
0983 
0984         if (t->status != SMB_DIRECT_CS_CONNECTED)
0985             return -ENOTCONN;
0986         else if (ret < 0)
0987             return ret;
0988     } while (true);
0989 }
0990 
0991 static int wait_for_send_credits(struct smb_direct_transport *t,
0992                  struct smb_direct_send_ctx *send_ctx)
0993 {
0994     int ret;
0995 
0996     if (send_ctx &&
0997         (send_ctx->wr_cnt >= 16 || atomic_read(&t->send_credits) <= 1)) {
0998         ret = smb_direct_flush_send_list(t, send_ctx, false);
0999         if (ret)
1000             return ret;
1001     }
1002 
1003     return wait_for_credits(t, &t->wait_send_credits, &t->send_credits, 1);
1004 }
1005 
1006 static int wait_for_rw_credits(struct smb_direct_transport *t, int credits)
1007 {
1008     return wait_for_credits(t, &t->wait_rw_credits, &t->rw_credits, credits);
1009 }
1010 
1011 static int calc_rw_credits(struct smb_direct_transport *t,
1012                char *buf, unsigned int len)
1013 {
1014     return DIV_ROUND_UP(get_buf_page_count(buf, len),
1015                 t->pages_per_rw_credit);
1016 }
1017 
1018 static int smb_direct_create_header(struct smb_direct_transport *t,
1019                     int size, int remaining_data_length,
1020                     struct smb_direct_sendmsg **sendmsg_out)
1021 {
1022     struct smb_direct_sendmsg *sendmsg;
1023     struct smb_direct_data_transfer *packet;
1024     int header_length;
1025     int ret;
1026 
1027     sendmsg = smb_direct_alloc_sendmsg(t);
1028     if (IS_ERR(sendmsg))
1029         return PTR_ERR(sendmsg);
1030 
1031     /* Fill in the packet header */
1032     packet = (struct smb_direct_data_transfer *)sendmsg->packet;
1033     packet->credits_requested = cpu_to_le16(t->send_credit_target);
1034     packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
1035 
1036     packet->flags = 0;
1037     packet->reserved = 0;
1038     if (!size)
1039         packet->data_offset = 0;
1040     else
1041         packet->data_offset = cpu_to_le32(24);
1042     packet->data_length = cpu_to_le32(size);
1043     packet->remaining_data_length = cpu_to_le32(remaining_data_length);
1044     packet->padding = 0;
1045 
1046     ksmbd_debug(RDMA,
1047             "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n",
1048             le16_to_cpu(packet->credits_requested),
1049             le16_to_cpu(packet->credits_granted),
1050             le32_to_cpu(packet->data_offset),
1051             le32_to_cpu(packet->data_length),
1052             le32_to_cpu(packet->remaining_data_length));
1053 
1054     /* Map the packet to DMA */
1055     header_length = sizeof(struct smb_direct_data_transfer);
1056     /* If this is a packet without payload, don't send padding */
1057     if (!size)
1058         header_length =
1059             offsetof(struct smb_direct_data_transfer, padding);
1060 
1061     sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
1062                          (void *)packet,
1063                          header_length,
1064                          DMA_TO_DEVICE);
1065     ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
1066     if (ret) {
1067         smb_direct_free_sendmsg(t, sendmsg);
1068         return ret;
1069     }
1070 
1071     sendmsg->num_sge = 1;
1072     sendmsg->sge[0].length = header_length;
1073     sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
1074 
1075     *sendmsg_out = sendmsg;
1076     return 0;
1077 }
1078 
1079 static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries)
1080 {
1081     bool high = is_vmalloc_addr(buf);
1082     struct page *page;
1083     int offset, len;
1084     int i = 0;
1085 
1086     if (size <= 0 || nentries < get_buf_page_count(buf, size))
1087         return -EINVAL;
1088 
1089     offset = offset_in_page(buf);
1090     buf -= offset;
1091     while (size > 0) {
1092         len = min_t(int, PAGE_SIZE - offset, size);
1093         if (high)
1094             page = vmalloc_to_page(buf);
1095         else
1096             page = kmap_to_page(buf);
1097 
1098         if (!sg_list)
1099             return -EINVAL;
1100         sg_set_page(sg_list, page, len, offset);
1101         sg_list = sg_next(sg_list);
1102 
1103         buf += PAGE_SIZE;
1104         size -= len;
1105         offset = 0;
1106         i++;
1107     }
1108     return i;
1109 }
1110 
1111 static int get_mapped_sg_list(struct ib_device *device, void *buf, int size,
1112                   struct scatterlist *sg_list, int nentries,
1113                   enum dma_data_direction dir)
1114 {
1115     int npages;
1116 
1117     npages = get_sg_list(buf, size, sg_list, nentries);
1118     if (npages < 0)
1119         return -EINVAL;
1120     return ib_dma_map_sg(device, sg_list, npages, dir);
1121 }
1122 
1123 static int post_sendmsg(struct smb_direct_transport *t,
1124             struct smb_direct_send_ctx *send_ctx,
1125             struct smb_direct_sendmsg *msg)
1126 {
1127     int i;
1128 
1129     for (i = 0; i < msg->num_sge; i++)
1130         ib_dma_sync_single_for_device(t->cm_id->device,
1131                           msg->sge[i].addr, msg->sge[i].length,
1132                           DMA_TO_DEVICE);
1133 
1134     msg->cqe.done = send_done;
1135     msg->wr.opcode = IB_WR_SEND;
1136     msg->wr.sg_list = &msg->sge[0];
1137     msg->wr.num_sge = msg->num_sge;
1138     msg->wr.next = NULL;
1139 
1140     if (send_ctx) {
1141         msg->wr.wr_cqe = NULL;
1142         msg->wr.send_flags = 0;
1143         if (!list_empty(&send_ctx->msg_list)) {
1144             struct smb_direct_sendmsg *last;
1145 
1146             last = list_last_entry(&send_ctx->msg_list,
1147                            struct smb_direct_sendmsg,
1148                            list);
1149             last->wr.next = &msg->wr;
1150         }
1151         list_add_tail(&msg->list, &send_ctx->msg_list);
1152         send_ctx->wr_cnt++;
1153         return 0;
1154     }
1155 
1156     msg->wr.wr_cqe = &msg->cqe;
1157     msg->wr.send_flags = IB_SEND_SIGNALED;
1158     return smb_direct_post_send(t, &msg->wr);
1159 }
1160 
1161 static int smb_direct_post_send_data(struct smb_direct_transport *t,
1162                      struct smb_direct_send_ctx *send_ctx,
1163                      struct kvec *iov, int niov,
1164                      int remaining_data_length)
1165 {
1166     int i, j, ret;
1167     struct smb_direct_sendmsg *msg;
1168     int data_length;
1169     struct scatterlist sg[SMB_DIRECT_MAX_SEND_SGES - 1];
1170 
1171     ret = wait_for_send_credits(t, send_ctx);
1172     if (ret)
1173         return ret;
1174 
1175     data_length = 0;
1176     for (i = 0; i < niov; i++)
1177         data_length += iov[i].iov_len;
1178 
1179     ret = smb_direct_create_header(t, data_length, remaining_data_length,
1180                        &msg);
1181     if (ret) {
1182         atomic_inc(&t->send_credits);
1183         return ret;
1184     }
1185 
1186     for (i = 0; i < niov; i++) {
1187         struct ib_sge *sge;
1188         int sg_cnt;
1189 
1190         sg_init_table(sg, SMB_DIRECT_MAX_SEND_SGES - 1);
1191         sg_cnt = get_mapped_sg_list(t->cm_id->device,
1192                         iov[i].iov_base, iov[i].iov_len,
1193                         sg, SMB_DIRECT_MAX_SEND_SGES - 1,
1194                         DMA_TO_DEVICE);
1195         if (sg_cnt <= 0) {
1196             pr_err("failed to map buffer\n");
1197             ret = -ENOMEM;
1198             goto err;
1199         } else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES) {
1200             pr_err("buffer not fitted into sges\n");
1201             ret = -E2BIG;
1202             ib_dma_unmap_sg(t->cm_id->device, sg, sg_cnt,
1203                     DMA_TO_DEVICE);
1204             goto err;
1205         }
1206 
1207         for (j = 0; j < sg_cnt; j++) {
1208             sge = &msg->sge[msg->num_sge];
1209             sge->addr = sg_dma_address(&sg[j]);
1210             sge->length = sg_dma_len(&sg[j]);
1211             sge->lkey  = t->pd->local_dma_lkey;
1212             msg->num_sge++;
1213         }
1214     }
1215 
1216     ret = post_sendmsg(t, send_ctx, msg);
1217     if (ret)
1218         goto err;
1219     return 0;
1220 err:
1221     smb_direct_free_sendmsg(t, msg);
1222     atomic_inc(&t->send_credits);
1223     return ret;
1224 }
1225 
1226 static int smb_direct_writev(struct ksmbd_transport *t,
1227                  struct kvec *iov, int niovs, int buflen,
1228                  bool need_invalidate, unsigned int remote_key)
1229 {
1230     struct smb_direct_transport *st = smb_trans_direct_transfort(t);
1231     int remaining_data_length;
1232     int start, i, j;
1233     int max_iov_size = st->max_send_size -
1234             sizeof(struct smb_direct_data_transfer);
1235     int ret;
1236     struct kvec vec;
1237     struct smb_direct_send_ctx send_ctx;
1238 
1239     if (st->status != SMB_DIRECT_CS_CONNECTED)
1240         return -ENOTCONN;
1241 
1242     //FIXME: skip RFC1002 header..
1243     buflen -= 4;
1244     iov[0].iov_base += 4;
1245     iov[0].iov_len -= 4;
1246 
1247     remaining_data_length = buflen;
1248     ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen);
1249 
1250     smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key);
1251     start = i = 0;
1252     buflen = 0;
1253     while (true) {
1254         buflen += iov[i].iov_len;
1255         if (buflen > max_iov_size) {
1256             if (i > start) {
1257                 remaining_data_length -=
1258                     (buflen - iov[i].iov_len);
1259                 ret = smb_direct_post_send_data(st, &send_ctx,
1260                                 &iov[start], i - start,
1261                                 remaining_data_length);
1262                 if (ret)
1263                     goto done;
1264             } else {
1265                 /* iov[start] is too big, break it */
1266                 int nvec  = (buflen + max_iov_size - 1) /
1267                         max_iov_size;
1268 
1269                 for (j = 0; j < nvec; j++) {
1270                     vec.iov_base =
1271                         (char *)iov[start].iov_base +
1272                         j * max_iov_size;
1273                     vec.iov_len =
1274                         min_t(int, max_iov_size,
1275                               buflen - max_iov_size * j);
1276                     remaining_data_length -= vec.iov_len;
1277                     ret = smb_direct_post_send_data(st, &send_ctx, &vec, 1,
1278                                     remaining_data_length);
1279                     if (ret)
1280                         goto done;
1281                 }
1282                 i++;
1283                 if (i == niovs)
1284                     break;
1285             }
1286             start = i;
1287             buflen = 0;
1288         } else {
1289             i++;
1290             if (i == niovs) {
1291                 /* send out all remaining vecs */
1292                 remaining_data_length -= buflen;
1293                 ret = smb_direct_post_send_data(st, &send_ctx,
1294                                 &iov[start], i - start,
1295                                 remaining_data_length);
1296                 if (ret)
1297                     goto done;
1298                 break;
1299             }
1300         }
1301     }
1302 
1303 done:
1304     ret = smb_direct_flush_send_list(st, &send_ctx, true);
1305 
1306     /*
1307      * As an optimization, we don't wait for individual I/O to finish
1308      * before sending the next one.
1309      * Send them all and wait for pending send count to get to 0
1310      * that means all the I/Os have been out and we are good to return
1311      */
1312 
1313     wait_event(st->wait_send_pending,
1314            atomic_read(&st->send_pending) == 0);
1315     return ret;
1316 }
1317 
1318 static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t,
1319                     struct smb_direct_rdma_rw_msg *msg,
1320                     enum dma_data_direction dir)
1321 {
1322     rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
1323                 msg->sgt.sgl, msg->sgt.nents, dir);
1324     sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
1325     kfree(msg);
1326 }
1327 
1328 static void read_write_done(struct ib_cq *cq, struct ib_wc *wc,
1329                 enum dma_data_direction dir)
1330 {
1331     struct smb_direct_rdma_rw_msg *msg = container_of(wc->wr_cqe,
1332                               struct smb_direct_rdma_rw_msg, cqe);
1333     struct smb_direct_transport *t = msg->t;
1334 
1335     if (wc->status != IB_WC_SUCCESS) {
1336         msg->status = -EIO;
1337         pr_err("read/write error. opcode = %d, status = %s(%d)\n",
1338                wc->opcode, ib_wc_status_msg(wc->status), wc->status);
1339         if (wc->status != IB_WC_WR_FLUSH_ERR)
1340             smb_direct_disconnect_rdma_connection(t);
1341     }
1342 
1343     complete(msg->completion);
1344 }
1345 
1346 static void read_done(struct ib_cq *cq, struct ib_wc *wc)
1347 {
1348     read_write_done(cq, wc, DMA_FROM_DEVICE);
1349 }
1350 
1351 static void write_done(struct ib_cq *cq, struct ib_wc *wc)
1352 {
1353     read_write_done(cq, wc, DMA_TO_DEVICE);
1354 }
1355 
1356 static int smb_direct_rdma_xmit(struct smb_direct_transport *t,
1357                 void *buf, int buf_len,
1358                 struct smb2_buffer_desc_v1 *desc,
1359                 unsigned int desc_len,
1360                 bool is_read)
1361 {
1362     struct smb_direct_rdma_rw_msg *msg, *next_msg;
1363     int i, ret;
1364     DECLARE_COMPLETION_ONSTACK(completion);
1365     struct ib_send_wr *first_wr;
1366     LIST_HEAD(msg_list);
1367     char *desc_buf;
1368     int credits_needed;
1369     unsigned int desc_buf_len;
1370     size_t total_length = 0;
1371 
1372     if (t->status != SMB_DIRECT_CS_CONNECTED)
1373         return -ENOTCONN;
1374 
1375     /* calculate needed credits */
1376     credits_needed = 0;
1377     desc_buf = buf;
1378     for (i = 0; i < desc_len / sizeof(*desc); i++) {
1379         desc_buf_len = le32_to_cpu(desc[i].length);
1380 
1381         credits_needed += calc_rw_credits(t, desc_buf, desc_buf_len);
1382         desc_buf += desc_buf_len;
1383         total_length += desc_buf_len;
1384         if (desc_buf_len == 0 || total_length > buf_len ||
1385             total_length > t->max_rdma_rw_size)
1386             return -EINVAL;
1387     }
1388 
1389     ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n",
1390             is_read ? "read" : "write", buf_len, credits_needed);
1391 
1392     ret = wait_for_rw_credits(t, credits_needed);
1393     if (ret < 0)
1394         return ret;
1395 
1396     /* build rdma_rw_ctx for each descriptor */
1397     desc_buf = buf;
1398     for (i = 0; i < desc_len / sizeof(*desc); i++) {
1399         msg = kzalloc(offsetof(struct smb_direct_rdma_rw_msg, sg_list) +
1400                   sizeof(struct scatterlist) * SG_CHUNK_SIZE, GFP_KERNEL);
1401         if (!msg) {
1402             ret = -ENOMEM;
1403             goto out;
1404         }
1405 
1406         desc_buf_len = le32_to_cpu(desc[i].length);
1407 
1408         msg->t = t;
1409         msg->cqe.done = is_read ? read_done : write_done;
1410         msg->completion = &completion;
1411 
1412         msg->sgt.sgl = &msg->sg_list[0];
1413         ret = sg_alloc_table_chained(&msg->sgt,
1414                          get_buf_page_count(desc_buf, desc_buf_len),
1415                          msg->sg_list, SG_CHUNK_SIZE);
1416         if (ret) {
1417             kfree(msg);
1418             ret = -ENOMEM;
1419             goto out;
1420         }
1421 
1422         ret = get_sg_list(desc_buf, desc_buf_len,
1423                   msg->sgt.sgl, msg->sgt.orig_nents);
1424         if (ret < 0) {
1425             sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
1426             kfree(msg);
1427             goto out;
1428         }
1429 
1430         ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port,
1431                        msg->sgt.sgl,
1432                        get_buf_page_count(desc_buf, desc_buf_len),
1433                        0,
1434                        le64_to_cpu(desc[i].offset),
1435                        le32_to_cpu(desc[i].token),
1436                        is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1437         if (ret < 0) {
1438             pr_err("failed to init rdma_rw_ctx: %d\n", ret);
1439             sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
1440             kfree(msg);
1441             goto out;
1442         }
1443 
1444         list_add_tail(&msg->list, &msg_list);
1445         desc_buf += desc_buf_len;
1446     }
1447 
1448     /* concatenate work requests of rdma_rw_ctxs */
1449     first_wr = NULL;
1450     list_for_each_entry_reverse(msg, &msg_list, list) {
1451         first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port,
1452                        &msg->cqe, first_wr);
1453     }
1454 
1455     ret = ib_post_send(t->qp, first_wr, NULL);
1456     if (ret) {
1457         pr_err("failed to post send wr for RDMA R/W: %d\n", ret);
1458         goto out;
1459     }
1460 
1461     msg = list_last_entry(&msg_list, struct smb_direct_rdma_rw_msg, list);
1462     wait_for_completion(&completion);
1463     ret = msg->status;
1464 out:
1465     list_for_each_entry_safe(msg, next_msg, &msg_list, list) {
1466         list_del(&msg->list);
1467         smb_direct_free_rdma_rw_msg(t, msg,
1468                         is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1469     }
1470     atomic_add(credits_needed, &t->rw_credits);
1471     wake_up(&t->wait_rw_credits);
1472     return ret;
1473 }
1474 
1475 static int smb_direct_rdma_write(struct ksmbd_transport *t,
1476                  void *buf, unsigned int buflen,
1477                  struct smb2_buffer_desc_v1 *desc,
1478                  unsigned int desc_len)
1479 {
1480     return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
1481                     desc, desc_len, false);
1482 }
1483 
1484 static int smb_direct_rdma_read(struct ksmbd_transport *t,
1485                 void *buf, unsigned int buflen,
1486                 struct smb2_buffer_desc_v1 *desc,
1487                 unsigned int desc_len)
1488 {
1489     return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
1490                     desc, desc_len, true);
1491 }
1492 
1493 static void smb_direct_disconnect(struct ksmbd_transport *t)
1494 {
1495     struct smb_direct_transport *st = smb_trans_direct_transfort(t);
1496 
1497     ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", st->cm_id);
1498 
1499     smb_direct_disconnect_rdma_work(&st->disconnect_work);
1500     wait_event_interruptible(st->wait_status,
1501                  st->status == SMB_DIRECT_CS_DISCONNECTED);
1502     free_transport(st);
1503 }
1504 
1505 static void smb_direct_shutdown(struct ksmbd_transport *t)
1506 {
1507     struct smb_direct_transport *st = smb_trans_direct_transfort(t);
1508 
1509     ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", st->cm_id);
1510 
1511     smb_direct_disconnect_rdma_work(&st->disconnect_work);
1512 }
1513 
1514 static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
1515                  struct rdma_cm_event *event)
1516 {
1517     struct smb_direct_transport *t = cm_id->context;
1518 
1519     ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n",
1520             cm_id, rdma_event_msg(event->event), event->event);
1521 
1522     switch (event->event) {
1523     case RDMA_CM_EVENT_ESTABLISHED: {
1524         t->status = SMB_DIRECT_CS_CONNECTED;
1525         wake_up_interruptible(&t->wait_status);
1526         break;
1527     }
1528     case RDMA_CM_EVENT_DEVICE_REMOVAL:
1529     case RDMA_CM_EVENT_DISCONNECTED: {
1530         t->status = SMB_DIRECT_CS_DISCONNECTED;
1531         wake_up_interruptible(&t->wait_status);
1532         wake_up_interruptible(&t->wait_reassembly_queue);
1533         wake_up(&t->wait_send_credits);
1534         break;
1535     }
1536     case RDMA_CM_EVENT_CONNECT_ERROR: {
1537         t->status = SMB_DIRECT_CS_DISCONNECTED;
1538         wake_up_interruptible(&t->wait_status);
1539         break;
1540     }
1541     default:
1542         pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n",
1543                cm_id, rdma_event_msg(event->event),
1544                event->event);
1545         break;
1546     }
1547     return 0;
1548 }
1549 
1550 static void smb_direct_qpair_handler(struct ib_event *event, void *context)
1551 {
1552     struct smb_direct_transport *t = context;
1553 
1554     ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n",
1555             t->cm_id, ib_event_msg(event->event), event->event);
1556 
1557     switch (event->event) {
1558     case IB_EVENT_CQ_ERR:
1559     case IB_EVENT_QP_FATAL:
1560         smb_direct_disconnect_rdma_connection(t);
1561         break;
1562     default:
1563         break;
1564     }
1565 }
1566 
1567 static int smb_direct_send_negotiate_response(struct smb_direct_transport *t,
1568                           int failed)
1569 {
1570     struct smb_direct_sendmsg *sendmsg;
1571     struct smb_direct_negotiate_resp *resp;
1572     int ret;
1573 
1574     sendmsg = smb_direct_alloc_sendmsg(t);
1575     if (IS_ERR(sendmsg))
1576         return -ENOMEM;
1577 
1578     resp = (struct smb_direct_negotiate_resp *)sendmsg->packet;
1579     if (failed) {
1580         memset(resp, 0, sizeof(*resp));
1581         resp->min_version = cpu_to_le16(0x0100);
1582         resp->max_version = cpu_to_le16(0x0100);
1583         resp->status = STATUS_NOT_SUPPORTED;
1584     } else {
1585         resp->status = STATUS_SUCCESS;
1586         resp->min_version = SMB_DIRECT_VERSION_LE;
1587         resp->max_version = SMB_DIRECT_VERSION_LE;
1588         resp->negotiated_version = SMB_DIRECT_VERSION_LE;
1589         resp->reserved = 0;
1590         resp->credits_requested =
1591                 cpu_to_le16(t->send_credit_target);
1592         resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
1593         resp->max_readwrite_size = cpu_to_le32(t->max_rdma_rw_size);
1594         resp->preferred_send_size = cpu_to_le32(t->max_send_size);
1595         resp->max_receive_size = cpu_to_le32(t->max_recv_size);
1596         resp->max_fragmented_size =
1597                 cpu_to_le32(t->max_fragmented_recv_size);
1598     }
1599 
1600     sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
1601                          (void *)resp, sizeof(*resp),
1602                          DMA_TO_DEVICE);
1603     ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
1604     if (ret) {
1605         smb_direct_free_sendmsg(t, sendmsg);
1606         return ret;
1607     }
1608 
1609     sendmsg->num_sge = 1;
1610     sendmsg->sge[0].length = sizeof(*resp);
1611     sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
1612 
1613     ret = post_sendmsg(t, NULL, sendmsg);
1614     if (ret) {
1615         smb_direct_free_sendmsg(t, sendmsg);
1616         return ret;
1617     }
1618 
1619     wait_event(t->wait_send_pending,
1620            atomic_read(&t->send_pending) == 0);
1621     return 0;
1622 }
1623 
1624 static int smb_direct_accept_client(struct smb_direct_transport *t)
1625 {
1626     struct rdma_conn_param conn_param;
1627     struct ib_port_immutable port_immutable;
1628     u32 ird_ord_hdr[2];
1629     int ret;
1630 
1631     memset(&conn_param, 0, sizeof(conn_param));
1632     conn_param.initiator_depth = min_t(u8, t->cm_id->device->attrs.max_qp_rd_atom,
1633                        SMB_DIRECT_CM_INITIATOR_DEPTH);
1634     conn_param.responder_resources = 0;
1635 
1636     t->cm_id->device->ops.get_port_immutable(t->cm_id->device,
1637                          t->cm_id->port_num,
1638                          &port_immutable);
1639     if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
1640         ird_ord_hdr[0] = conn_param.responder_resources;
1641         ird_ord_hdr[1] = 1;
1642         conn_param.private_data = ird_ord_hdr;
1643         conn_param.private_data_len = sizeof(ird_ord_hdr);
1644     } else {
1645         conn_param.private_data = NULL;
1646         conn_param.private_data_len = 0;
1647     }
1648     conn_param.retry_count = SMB_DIRECT_CM_RETRY;
1649     conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY;
1650     conn_param.flow_control = 0;
1651 
1652     ret = rdma_accept(t->cm_id, &conn_param);
1653     if (ret) {
1654         pr_err("error at rdma_accept: %d\n", ret);
1655         return ret;
1656     }
1657     return 0;
1658 }
1659 
1660 static int smb_direct_prepare_negotiation(struct smb_direct_transport *t)
1661 {
1662     int ret;
1663     struct smb_direct_recvmsg *recvmsg;
1664 
1665     recvmsg = get_free_recvmsg(t);
1666     if (!recvmsg)
1667         return -ENOMEM;
1668     recvmsg->type = SMB_DIRECT_MSG_NEGOTIATE_REQ;
1669 
1670     ret = smb_direct_post_recv(t, recvmsg);
1671     if (ret) {
1672         pr_err("Can't post recv: %d\n", ret);
1673         goto out_err;
1674     }
1675 
1676     t->negotiation_requested = false;
1677     ret = smb_direct_accept_client(t);
1678     if (ret) {
1679         pr_err("Can't accept client\n");
1680         goto out_err;
1681     }
1682 
1683     smb_direct_post_recv_credits(&t->post_recv_credits_work.work);
1684     return 0;
1685 out_err:
1686     put_recvmsg(t, recvmsg);
1687     return ret;
1688 }
1689 
1690 static unsigned int smb_direct_get_max_fr_pages(struct smb_direct_transport *t)
1691 {
1692     return min_t(unsigned int,
1693              t->cm_id->device->attrs.max_fast_reg_page_list_len,
1694              256);
1695 }
1696 
1697 static int smb_direct_init_params(struct smb_direct_transport *t,
1698                   struct ib_qp_cap *cap)
1699 {
1700     struct ib_device *device = t->cm_id->device;
1701     int max_send_sges, max_rw_wrs, max_send_wrs;
1702     unsigned int max_sge_per_wr, wrs_per_credit;
1703 
1704     /* need 3 more sge. because a SMB_DIRECT header, SMB2 header,
1705      * SMB2 response could be mapped.
1706      */
1707     t->max_send_size = smb_direct_max_send_size;
1708     max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 3;
1709     if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) {
1710         pr_err("max_send_size %d is too large\n", t->max_send_size);
1711         return -EINVAL;
1712     }
1713 
1714     /* Calculate the number of work requests for RDMA R/W.
1715      * The maximum number of pages which can be registered
1716      * with one Memory region can be transferred with one
1717      * R/W credit. And at least 4 work requests for each credit
1718      * are needed for MR registration, RDMA R/W, local & remote
1719      * MR invalidation.
1720      */
1721     t->max_rdma_rw_size = smb_direct_max_read_write_size;
1722     t->pages_per_rw_credit = smb_direct_get_max_fr_pages(t);
1723     t->max_rw_credits = DIV_ROUND_UP(t->max_rdma_rw_size,
1724                      (t->pages_per_rw_credit - 1) *
1725                      PAGE_SIZE);
1726 
1727     max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge,
1728                    device->attrs.max_sge_rd);
1729     max_sge_per_wr = max_t(unsigned int, max_sge_per_wr,
1730                    max_send_sges);
1731     wrs_per_credit = max_t(unsigned int, 4,
1732                    DIV_ROUND_UP(t->pages_per_rw_credit,
1733                         max_sge_per_wr) + 1);
1734     max_rw_wrs = t->max_rw_credits * wrs_per_credit;
1735 
1736     max_send_wrs = smb_direct_send_credit_target + max_rw_wrs;
1737     if (max_send_wrs > device->attrs.max_cqe ||
1738         max_send_wrs > device->attrs.max_qp_wr) {
1739         pr_err("consider lowering send_credit_target = %d\n",
1740                smb_direct_send_credit_target);
1741         pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
1742                device->attrs.max_cqe, device->attrs.max_qp_wr);
1743         return -EINVAL;
1744     }
1745 
1746     if (smb_direct_receive_credit_max > device->attrs.max_cqe ||
1747         smb_direct_receive_credit_max > device->attrs.max_qp_wr) {
1748         pr_err("consider lowering receive_credit_max = %d\n",
1749                smb_direct_receive_credit_max);
1750         pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n",
1751                device->attrs.max_cqe, device->attrs.max_qp_wr);
1752         return -EINVAL;
1753     }
1754 
1755     if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) {
1756         pr_err("warning: device max_recv_sge = %d too small\n",
1757                device->attrs.max_recv_sge);
1758         return -EINVAL;
1759     }
1760 
1761     t->recv_credits = 0;
1762     t->count_avail_recvmsg = 0;
1763 
1764     t->recv_credit_max = smb_direct_receive_credit_max;
1765     t->recv_credit_target = 10;
1766     t->new_recv_credits = 0;
1767 
1768     t->send_credit_target = smb_direct_send_credit_target;
1769     atomic_set(&t->send_credits, 0);
1770     atomic_set(&t->rw_credits, t->max_rw_credits);
1771 
1772     t->max_send_size = smb_direct_max_send_size;
1773     t->max_recv_size = smb_direct_max_receive_size;
1774     t->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size;
1775 
1776     cap->max_send_wr = max_send_wrs;
1777     cap->max_recv_wr = t->recv_credit_max;
1778     cap->max_send_sge = max_sge_per_wr;
1779     cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES;
1780     cap->max_inline_data = 0;
1781     cap->max_rdma_ctxs = t->max_rw_credits;
1782     return 0;
1783 }
1784 
1785 static void smb_direct_destroy_pools(struct smb_direct_transport *t)
1786 {
1787     struct smb_direct_recvmsg *recvmsg;
1788 
1789     while ((recvmsg = get_free_recvmsg(t)))
1790         mempool_free(recvmsg, t->recvmsg_mempool);
1791     while ((recvmsg = get_empty_recvmsg(t)))
1792         mempool_free(recvmsg, t->recvmsg_mempool);
1793 
1794     mempool_destroy(t->recvmsg_mempool);
1795     t->recvmsg_mempool = NULL;
1796 
1797     kmem_cache_destroy(t->recvmsg_cache);
1798     t->recvmsg_cache = NULL;
1799 
1800     mempool_destroy(t->sendmsg_mempool);
1801     t->sendmsg_mempool = NULL;
1802 
1803     kmem_cache_destroy(t->sendmsg_cache);
1804     t->sendmsg_cache = NULL;
1805 }
1806 
1807 static int smb_direct_create_pools(struct smb_direct_transport *t)
1808 {
1809     char name[80];
1810     int i;
1811     struct smb_direct_recvmsg *recvmsg;
1812 
1813     snprintf(name, sizeof(name), "smb_direct_rqst_pool_%p", t);
1814     t->sendmsg_cache = kmem_cache_create(name,
1815                          sizeof(struct smb_direct_sendmsg) +
1816                           sizeof(struct smb_direct_negotiate_resp),
1817                          0, SLAB_HWCACHE_ALIGN, NULL);
1818     if (!t->sendmsg_cache)
1819         return -ENOMEM;
1820 
1821     t->sendmsg_mempool = mempool_create(t->send_credit_target,
1822                         mempool_alloc_slab, mempool_free_slab,
1823                         t->sendmsg_cache);
1824     if (!t->sendmsg_mempool)
1825         goto err;
1826 
1827     snprintf(name, sizeof(name), "smb_direct_resp_%p", t);
1828     t->recvmsg_cache = kmem_cache_create(name,
1829                          sizeof(struct smb_direct_recvmsg) +
1830                           t->max_recv_size,
1831                          0, SLAB_HWCACHE_ALIGN, NULL);
1832     if (!t->recvmsg_cache)
1833         goto err;
1834 
1835     t->recvmsg_mempool =
1836         mempool_create(t->recv_credit_max, mempool_alloc_slab,
1837                    mempool_free_slab, t->recvmsg_cache);
1838     if (!t->recvmsg_mempool)
1839         goto err;
1840 
1841     INIT_LIST_HEAD(&t->recvmsg_queue);
1842 
1843     for (i = 0; i < t->recv_credit_max; i++) {
1844         recvmsg = mempool_alloc(t->recvmsg_mempool, GFP_KERNEL);
1845         if (!recvmsg)
1846             goto err;
1847         recvmsg->transport = t;
1848         list_add(&recvmsg->list, &t->recvmsg_queue);
1849     }
1850     t->count_avail_recvmsg = t->recv_credit_max;
1851 
1852     return 0;
1853 err:
1854     smb_direct_destroy_pools(t);
1855     return -ENOMEM;
1856 }
1857 
1858 static int smb_direct_create_qpair(struct smb_direct_transport *t,
1859                    struct ib_qp_cap *cap)
1860 {
1861     int ret;
1862     struct ib_qp_init_attr qp_attr;
1863     int pages_per_rw;
1864 
1865     t->pd = ib_alloc_pd(t->cm_id->device, 0);
1866     if (IS_ERR(t->pd)) {
1867         pr_err("Can't create RDMA PD\n");
1868         ret = PTR_ERR(t->pd);
1869         t->pd = NULL;
1870         return ret;
1871     }
1872 
1873     t->send_cq = ib_alloc_cq(t->cm_id->device, t,
1874                  smb_direct_send_credit_target + cap->max_rdma_ctxs,
1875                  0, IB_POLL_WORKQUEUE);
1876     if (IS_ERR(t->send_cq)) {
1877         pr_err("Can't create RDMA send CQ\n");
1878         ret = PTR_ERR(t->send_cq);
1879         t->send_cq = NULL;
1880         goto err;
1881     }
1882 
1883     t->recv_cq = ib_alloc_cq(t->cm_id->device, t,
1884                  t->recv_credit_max, 0, IB_POLL_WORKQUEUE);
1885     if (IS_ERR(t->recv_cq)) {
1886         pr_err("Can't create RDMA recv CQ\n");
1887         ret = PTR_ERR(t->recv_cq);
1888         t->recv_cq = NULL;
1889         goto err;
1890     }
1891 
1892     memset(&qp_attr, 0, sizeof(qp_attr));
1893     qp_attr.event_handler = smb_direct_qpair_handler;
1894     qp_attr.qp_context = t;
1895     qp_attr.cap = *cap;
1896     qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
1897     qp_attr.qp_type = IB_QPT_RC;
1898     qp_attr.send_cq = t->send_cq;
1899     qp_attr.recv_cq = t->recv_cq;
1900     qp_attr.port_num = ~0;
1901 
1902     ret = rdma_create_qp(t->cm_id, t->pd, &qp_attr);
1903     if (ret) {
1904         pr_err("Can't create RDMA QP: %d\n", ret);
1905         goto err;
1906     }
1907 
1908     t->qp = t->cm_id->qp;
1909     t->cm_id->event_handler = smb_direct_cm_handler;
1910 
1911     pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1;
1912     if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) {
1913         ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs,
1914                       t->max_rw_credits, IB_MR_TYPE_MEM_REG,
1915                       t->pages_per_rw_credit, 0);
1916         if (ret) {
1917             pr_err("failed to init mr pool count %d pages %d\n",
1918                    t->max_rw_credits, t->pages_per_rw_credit);
1919             goto err;
1920         }
1921     }
1922 
1923     return 0;
1924 err:
1925     if (t->qp) {
1926         ib_destroy_qp(t->qp);
1927         t->qp = NULL;
1928     }
1929     if (t->recv_cq) {
1930         ib_destroy_cq(t->recv_cq);
1931         t->recv_cq = NULL;
1932     }
1933     if (t->send_cq) {
1934         ib_destroy_cq(t->send_cq);
1935         t->send_cq = NULL;
1936     }
1937     if (t->pd) {
1938         ib_dealloc_pd(t->pd);
1939         t->pd = NULL;
1940     }
1941     return ret;
1942 }
1943 
1944 static int smb_direct_prepare(struct ksmbd_transport *t)
1945 {
1946     struct smb_direct_transport *st = smb_trans_direct_transfort(t);
1947     struct smb_direct_recvmsg *recvmsg;
1948     struct smb_direct_negotiate_req *req;
1949     int ret;
1950 
1951     ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
1952     ret = wait_event_interruptible_timeout(st->wait_status,
1953                            st->negotiation_requested ||
1954                            st->status == SMB_DIRECT_CS_DISCONNECTED,
1955                            SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ);
1956     if (ret <= 0 || st->status == SMB_DIRECT_CS_DISCONNECTED)
1957         return ret < 0 ? ret : -ETIMEDOUT;
1958 
1959     recvmsg = get_first_reassembly(st);
1960     if (!recvmsg)
1961         return -ECONNABORTED;
1962 
1963     ret = smb_direct_check_recvmsg(recvmsg);
1964     if (ret == -ECONNABORTED)
1965         goto out;
1966 
1967     req = (struct smb_direct_negotiate_req *)recvmsg->packet;
1968     st->max_recv_size = min_t(int, st->max_recv_size,
1969                   le32_to_cpu(req->preferred_send_size));
1970     st->max_send_size = min_t(int, st->max_send_size,
1971                   le32_to_cpu(req->max_receive_size));
1972     st->max_fragmented_send_size =
1973         le32_to_cpu(req->max_fragmented_size);
1974     st->max_fragmented_recv_size =
1975         (st->recv_credit_max * st->max_recv_size) / 2;
1976 
1977     ret = smb_direct_send_negotiate_response(st, ret);
1978 out:
1979     spin_lock_irq(&st->reassembly_queue_lock);
1980     st->reassembly_queue_length--;
1981     list_del(&recvmsg->list);
1982     spin_unlock_irq(&st->reassembly_queue_lock);
1983     put_recvmsg(st, recvmsg);
1984 
1985     return ret;
1986 }
1987 
1988 static int smb_direct_connect(struct smb_direct_transport *st)
1989 {
1990     int ret;
1991     struct ib_qp_cap qp_cap;
1992 
1993     ret = smb_direct_init_params(st, &qp_cap);
1994     if (ret) {
1995         pr_err("Can't configure RDMA parameters\n");
1996         return ret;
1997     }
1998 
1999     ret = smb_direct_create_pools(st);
2000     if (ret) {
2001         pr_err("Can't init RDMA pool: %d\n", ret);
2002         return ret;
2003     }
2004 
2005     ret = smb_direct_create_qpair(st, &qp_cap);
2006     if (ret) {
2007         pr_err("Can't accept RDMA client: %d\n", ret);
2008         return ret;
2009     }
2010 
2011     ret = smb_direct_prepare_negotiation(st);
2012     if (ret) {
2013         pr_err("Can't negotiate: %d\n", ret);
2014         return ret;
2015     }
2016     return 0;
2017 }
2018 
2019 static bool rdma_frwr_is_supported(struct ib_device_attr *attrs)
2020 {
2021     if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
2022         return false;
2023     if (attrs->max_fast_reg_page_list_len == 0)
2024         return false;
2025     return true;
2026 }
2027 
2028 static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
2029 {
2030     struct smb_direct_transport *t;
2031     int ret;
2032 
2033     if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) {
2034         ksmbd_debug(RDMA,
2035                 "Fast Registration Work Requests is not supported. device capabilities=%llx\n",
2036                 new_cm_id->device->attrs.device_cap_flags);
2037         return -EPROTONOSUPPORT;
2038     }
2039 
2040     t = alloc_transport(new_cm_id);
2041     if (!t)
2042         return -ENOMEM;
2043 
2044     ret = smb_direct_connect(t);
2045     if (ret)
2046         goto out_err;
2047 
2048     KSMBD_TRANS(t)->handler = kthread_run(ksmbd_conn_handler_loop,
2049                           KSMBD_TRANS(t)->conn, "ksmbd:r%u",
2050                           smb_direct_port);
2051     if (IS_ERR(KSMBD_TRANS(t)->handler)) {
2052         ret = PTR_ERR(KSMBD_TRANS(t)->handler);
2053         pr_err("Can't start thread\n");
2054         goto out_err;
2055     }
2056 
2057     return 0;
2058 out_err:
2059     free_transport(t);
2060     return ret;
2061 }
2062 
2063 static int smb_direct_listen_handler(struct rdma_cm_id *cm_id,
2064                      struct rdma_cm_event *event)
2065 {
2066     switch (event->event) {
2067     case RDMA_CM_EVENT_CONNECT_REQUEST: {
2068         int ret = smb_direct_handle_connect_request(cm_id);
2069 
2070         if (ret) {
2071             pr_err("Can't create transport: %d\n", ret);
2072             return ret;
2073         }
2074 
2075         ksmbd_debug(RDMA, "Received connection request. cm_id=%p\n",
2076                 cm_id);
2077         break;
2078     }
2079     default:
2080         pr_err("Unexpected listen event. cm_id=%p, event=%s (%d)\n",
2081                cm_id, rdma_event_msg(event->event), event->event);
2082         break;
2083     }
2084     return 0;
2085 }
2086 
2087 static int smb_direct_listen(int port)
2088 {
2089     int ret;
2090     struct rdma_cm_id *cm_id;
2091     struct sockaddr_in sin = {
2092         .sin_family     = AF_INET,
2093         .sin_addr.s_addr    = htonl(INADDR_ANY),
2094         .sin_port       = htons(port),
2095     };
2096 
2097     cm_id = rdma_create_id(&init_net, smb_direct_listen_handler,
2098                    &smb_direct_listener, RDMA_PS_TCP, IB_QPT_RC);
2099     if (IS_ERR(cm_id)) {
2100         pr_err("Can't create cm id: %ld\n", PTR_ERR(cm_id));
2101         return PTR_ERR(cm_id);
2102     }
2103 
2104     ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
2105     if (ret) {
2106         pr_err("Can't bind: %d\n", ret);
2107         goto err;
2108     }
2109 
2110     smb_direct_listener.cm_id = cm_id;
2111 
2112     ret = rdma_listen(cm_id, 10);
2113     if (ret) {
2114         pr_err("Can't listen: %d\n", ret);
2115         goto err;
2116     }
2117     return 0;
2118 err:
2119     smb_direct_listener.cm_id = NULL;
2120     rdma_destroy_id(cm_id);
2121     return ret;
2122 }
2123 
2124 static int smb_direct_ib_client_add(struct ib_device *ib_dev)
2125 {
2126     struct smb_direct_device *smb_dev;
2127 
2128     /* Set 5445 port if device type is iWARP(No IB) */
2129     if (ib_dev->node_type != RDMA_NODE_IB_CA)
2130         smb_direct_port = SMB_DIRECT_PORT_IWARP;
2131 
2132     if (!ib_dev->ops.get_netdev ||
2133         !rdma_frwr_is_supported(&ib_dev->attrs))
2134         return 0;
2135 
2136     smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL);
2137     if (!smb_dev)
2138         return -ENOMEM;
2139     smb_dev->ib_dev = ib_dev;
2140 
2141     write_lock(&smb_direct_device_lock);
2142     list_add(&smb_dev->list, &smb_direct_device_list);
2143     write_unlock(&smb_direct_device_lock);
2144 
2145     ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name);
2146     return 0;
2147 }
2148 
2149 static void smb_direct_ib_client_remove(struct ib_device *ib_dev,
2150                     void *client_data)
2151 {
2152     struct smb_direct_device *smb_dev, *tmp;
2153 
2154     write_lock(&smb_direct_device_lock);
2155     list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) {
2156         if (smb_dev->ib_dev == ib_dev) {
2157             list_del(&smb_dev->list);
2158             kfree(smb_dev);
2159             break;
2160         }
2161     }
2162     write_unlock(&smb_direct_device_lock);
2163 }
2164 
2165 static struct ib_client smb_direct_ib_client = {
2166     .name   = "ksmbd_smb_direct_ib",
2167     .add    = smb_direct_ib_client_add,
2168     .remove = smb_direct_ib_client_remove,
2169 };
2170 
2171 int ksmbd_rdma_init(void)
2172 {
2173     int ret;
2174 
2175     smb_direct_listener.cm_id = NULL;
2176 
2177     ret = ib_register_client(&smb_direct_ib_client);
2178     if (ret) {
2179         pr_err("failed to ib_register_client\n");
2180         return ret;
2181     }
2182 
2183     /* When a client is running out of send credits, the credits are
2184      * granted by the server's sending a packet using this queue.
2185      * This avoids the situation that a clients cannot send packets
2186      * for lack of credits
2187      */
2188     smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq",
2189                     WQ_HIGHPRI | WQ_MEM_RECLAIM, 0);
2190     if (!smb_direct_wq)
2191         return -ENOMEM;
2192 
2193     ret = smb_direct_listen(smb_direct_port);
2194     if (ret) {
2195         destroy_workqueue(smb_direct_wq);
2196         smb_direct_wq = NULL;
2197         pr_err("Can't listen: %d\n", ret);
2198         return ret;
2199     }
2200 
2201     ksmbd_debug(RDMA, "init RDMA listener. cm_id=%p\n",
2202             smb_direct_listener.cm_id);
2203     return 0;
2204 }
2205 
2206 void ksmbd_rdma_destroy(void)
2207 {
2208     if (!smb_direct_listener.cm_id)
2209         return;
2210 
2211     ib_unregister_client(&smb_direct_ib_client);
2212     rdma_destroy_id(smb_direct_listener.cm_id);
2213 
2214     smb_direct_listener.cm_id = NULL;
2215 
2216     if (smb_direct_wq) {
2217         destroy_workqueue(smb_direct_wq);
2218         smb_direct_wq = NULL;
2219     }
2220 }
2221 
2222 bool ksmbd_rdma_capable_netdev(struct net_device *netdev)
2223 {
2224     struct smb_direct_device *smb_dev;
2225     int i;
2226     bool rdma_capable = false;
2227 
2228     read_lock(&smb_direct_device_lock);
2229     list_for_each_entry(smb_dev, &smb_direct_device_list, list) {
2230         for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
2231             struct net_device *ndev;
2232 
2233             ndev = smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev,
2234                                    i + 1);
2235             if (!ndev)
2236                 continue;
2237 
2238             if (ndev == netdev) {
2239                 dev_put(ndev);
2240                 rdma_capable = true;
2241                 goto out;
2242             }
2243             dev_put(ndev);
2244         }
2245     }
2246 out:
2247     read_unlock(&smb_direct_device_lock);
2248 
2249     if (rdma_capable == false) {
2250         struct ib_device *ibdev;
2251 
2252         ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
2253         if (ibdev) {
2254             if (rdma_frwr_is_supported(&ibdev->attrs))
2255                 rdma_capable = true;
2256             ib_device_put(ibdev);
2257         }
2258     }
2259 
2260     return rdma_capable;
2261 }
2262 
2263 static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
2264     .prepare    = smb_direct_prepare,
2265     .disconnect = smb_direct_disconnect,
2266     .shutdown   = smb_direct_shutdown,
2267     .writev     = smb_direct_writev,
2268     .read       = smb_direct_read,
2269     .rdma_read  = smb_direct_rdma_read,
2270     .rdma_write = smb_direct_rdma_write,
2271 };