Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-or-later */
0002 /*
0003  * RDMA Transport Layer
0004  *
0005  * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
0006  * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
0007  * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
0008  */
0009 
0010 #ifndef RTRS_PRI_H
0011 #define RTRS_PRI_H
0012 
0013 #include <linux/uuid.h>
0014 #include <rdma/rdma_cm.h>
0015 #include <rdma/ib_verbs.h>
0016 #include <rdma/ib.h>
0017 
0018 #include "rtrs.h"
0019 
0020 #define RTRS_PROTO_VER_MAJOR 2
0021 #define RTRS_PROTO_VER_MINOR 0
0022 
0023 #define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \
0024                    __stringify(RTRS_PROTO_VER_MINOR)
0025 
0026 /*
0027  * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS)
0028  * and the minimum chunk size is 4096 (2^12).
0029  * So the maximum sess_queue_depth is 65536 (2^16) in theory.
0030  * But mempool_create, create_qp and ib_post_send fail with
0031  * "cannot allocate memory" error if sess_queue_depth is too big.
0032  * Therefore the pratical max value of sess_queue_depth is
0033  * somewhere between 1 and 65534 and it depends on the system.
0034  */
0035 #define MAX_SESS_QUEUE_DEPTH 65535
0036 
0037 enum rtrs_imm_const {
0038     MAX_IMM_TYPE_BITS = 4,
0039     MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1),
0040     MAX_IMM_PAYL_BITS = 28,
0041     MAX_IMM_PAYL_MASK = ((1 << MAX_IMM_PAYL_BITS) - 1),
0042 };
0043 
0044 enum rtrs_imm_type {
0045     RTRS_IO_REQ_IMM       = 0, /* client to server */
0046     RTRS_IO_RSP_IMM       = 1, /* server to client */
0047     RTRS_IO_RSP_W_INV_IMM = 2, /* server to client */
0048 
0049     RTRS_HB_MSG_IMM = 8, /* HB: HeartBeat */
0050     RTRS_HB_ACK_IMM = 9,
0051 
0052     RTRS_LAST_IMM,
0053 };
0054 
0055 enum {
0056     SERVICE_CON_QUEUE_DEPTH = 512,
0057 
0058     MAX_PATHS_NUM = 128,
0059 
0060     MIN_CHUNK_SIZE = 8192,
0061 
0062     RTRS_HB_INTERVAL_MS = 5000,
0063     RTRS_HB_MISSED_MAX = 5,
0064 
0065     RTRS_MAGIC = 0x1BBD,
0066     RTRS_PROTO_VER = (RTRS_PROTO_VER_MAJOR << 8) | RTRS_PROTO_VER_MINOR,
0067 };
0068 
0069 struct rtrs_ib_dev;
0070 
0071 struct rtrs_rdma_dev_pd_ops {
0072     struct rtrs_ib_dev *(*alloc)(void);
0073     void (*free)(struct rtrs_ib_dev *dev);
0074     int (*init)(struct rtrs_ib_dev *dev);
0075     void (*deinit)(struct rtrs_ib_dev *dev);
0076 };
0077 
0078 struct rtrs_rdma_dev_pd {
0079     struct mutex        mutex;
0080     struct list_head    list;
0081     enum ib_pd_flags    pd_flags;
0082     const struct rtrs_rdma_dev_pd_ops *ops;
0083 };
0084 
0085 struct rtrs_ib_dev {
0086     struct ib_device     *ib_dev;
0087     struct ib_pd         *ib_pd;
0088     struct kref      ref;
0089     struct list_head     entry;
0090     struct rtrs_rdma_dev_pd *pool;
0091 };
0092 
0093 struct rtrs_con {
0094     struct rtrs_path    *path;
0095     struct ib_qp        *qp;
0096     struct ib_cq        *cq;
0097     struct rdma_cm_id   *cm_id;
0098     unsigned int        cid;
0099     int                     nr_cqe;
0100     atomic_t        wr_cnt;
0101     atomic_t        sq_wr_avail;
0102 };
0103 
0104 struct rtrs_path {
0105     struct list_head    entry;
0106     struct sockaddr_storage dst_addr;
0107     struct sockaddr_storage src_addr;
0108     char            sessname[NAME_MAX];
0109     uuid_t          uuid;
0110     struct rtrs_con **con;
0111     unsigned int        con_num;
0112     unsigned int        irq_con_num;
0113     unsigned int        recon_cnt;
0114     unsigned int        signal_interval;
0115     struct rtrs_ib_dev  *dev;
0116     int         dev_ref;
0117     struct ib_cqe       *hb_cqe;
0118     void            (*hb_err_handler)(struct rtrs_con *con);
0119     struct workqueue_struct *hb_wq;
0120     struct delayed_work hb_dwork;
0121     unsigned int        hb_interval_ms;
0122     unsigned int        hb_missed_cnt;
0123     unsigned int        hb_missed_max;
0124     ktime_t         hb_last_sent;
0125     ktime_t         hb_cur_latency;
0126 };
0127 
0128 /* rtrs information unit */
0129 struct rtrs_iu {
0130     struct ib_cqe           cqe;
0131     dma_addr_t              dma_addr;
0132     void                    *buf;
0133     size_t                  size;
0134     enum dma_data_direction direction;
0135 };
0136 
0137 /**
0138  * enum rtrs_msg_types - RTRS message types, see also rtrs/README
0139  * @RTRS_MSG_INFO_REQ:      Client additional info request to the server
0140  * @RTRS_MSG_INFO_RSP:      Server additional info response to the client
0141  * @RTRS_MSG_WRITE:     Client writes data per RDMA to server
0142  * @RTRS_MSG_READ:      Client requests data transfer from server
0143  * @RTRS_MSG_RKEY_RSP:      Server refreshed rkey for rbuf
0144  */
0145 enum rtrs_msg_types {
0146     RTRS_MSG_INFO_REQ,
0147     RTRS_MSG_INFO_RSP,
0148     RTRS_MSG_WRITE,
0149     RTRS_MSG_READ,
0150     RTRS_MSG_RKEY_RSP,
0151 };
0152 
0153 /**
0154  * enum rtrs_msg_flags - RTRS message flags.
0155  * @RTRS_NEED_INVAL:    Send invalidation in response.
0156  * @RTRS_MSG_NEW_RKEY_F: Send refreshed rkey in response.
0157  */
0158 enum rtrs_msg_flags {
0159     RTRS_MSG_NEED_INVAL_F = 1 << 0,
0160     RTRS_MSG_NEW_RKEY_F = 1 << 1,
0161 };
0162 
0163 /**
0164  * struct rtrs_sg_desc - RDMA-Buffer entry description
0165  * @addr:   Address of RDMA destination buffer
0166  * @key:    Authorization rkey to write to the buffer
0167  * @len:    Size of the buffer
0168  */
0169 struct rtrs_sg_desc {
0170     __le64          addr;
0171     __le32          key;
0172     __le32          len;
0173 };
0174 
0175 /**
0176  * struct rtrs_msg_conn_req - Client connection request to the server
0177  * @magic:     RTRS magic
0178  * @version:       RTRS protocol version
0179  * @cid:       Current connection id
0180  * @cid_num:       Number of connections per session
0181  * @recon_cnt:     Reconnections counter
0182  * @sess_uuid:     UUID of a session (path)
0183  * @paths_uuid:    UUID of a group of sessions (paths)
0184  *
0185  * NOTE: max size 56 bytes, see man rdma_connect().
0186  */
0187 struct rtrs_msg_conn_req {
0188     /* Is set to 0 by cma.c in case of AF_IB, do not touch that.
0189      * see https://www.spinics.net/lists/linux-rdma/msg22397.html
0190      */
0191     u8      __cma_version;
0192     /* On sender side that should be set to 0, or cma_save_ip_info()
0193      * extract garbage and will fail.
0194      */
0195     u8      __ip_version;
0196     __le16      magic;
0197     __le16      version;
0198     __le16      cid;
0199     __le16      cid_num;
0200     __le16      recon_cnt;
0201     uuid_t      sess_uuid;
0202     uuid_t      paths_uuid;
0203     u8      first_conn : 1;
0204     u8      reserved_bits : 7;
0205     u8      reserved[11];
0206 };
0207 
0208 /**
0209  * struct rtrs_msg_conn_rsp - Server connection response to the client
0210  * @magic:     RTRS magic
0211  * @version:       RTRS protocol version
0212  * @errno:     If rdma_accept() then 0, if rdma_reject() indicates error
0213  * @queue_depth:   max inflight messages (queue-depth) in this session
0214  * @max_io_size:   max io size server supports
0215  * @max_hdr_size:  max msg header size server supports
0216  *
0217  * NOTE: size is 56 bytes, max possible is 136 bytes, see man rdma_accept().
0218  */
0219 struct rtrs_msg_conn_rsp {
0220     __le16      magic;
0221     __le16      version;
0222     __le16      errno;
0223     __le16      queue_depth;
0224     __le32      max_io_size;
0225     __le32      max_hdr_size;
0226     __le32      flags;
0227     u8      reserved[36];
0228 };
0229 
0230 /**
0231  * struct rtrs_msg_info_req
0232  * @type:       @RTRS_MSG_INFO_REQ
0233  * @pathname:       Path name chosen by client
0234  */
0235 struct rtrs_msg_info_req {
0236     __le16      type;
0237     u8      pathname[NAME_MAX];
0238     u8      reserved[15];
0239 };
0240 
0241 /**
0242  * struct rtrs_msg_info_rsp
0243  * @type:       @RTRS_MSG_INFO_RSP
0244  * @sg_cnt:     Number of @desc entries
0245  * @desc:       RDMA buffers where the client can write to server
0246  */
0247 struct rtrs_msg_info_rsp {
0248     __le16      type;
0249     __le16          sg_cnt;
0250     u8              reserved[4];
0251     struct rtrs_sg_desc desc[];
0252 };
0253 
0254 /**
0255  * struct rtrs_msg_rkey_rsp
0256  * @type:       @RTRS_MSG_RKEY_RSP
0257  * @buf_id:     RDMA buf_id of the new rkey
0258  * @rkey:       new remote key for RDMA buffers id from server
0259  */
0260 struct rtrs_msg_rkey_rsp {
0261     __le16      type;
0262     __le16          buf_id;
0263     __le32      rkey;
0264 };
0265 
0266 /**
0267  * struct rtrs_msg_rdma_read - RDMA data transfer request from client
0268  * @type:       always @RTRS_MSG_READ
0269  * @usr_len:        length of user payload
0270  * @sg_cnt:     number of @desc entries
0271  * @desc:       RDMA buffers where the server can write the result to
0272  */
0273 struct rtrs_msg_rdma_read {
0274     __le16          type;
0275     __le16          usr_len;
0276     __le16          flags;
0277     __le16          sg_cnt;
0278     struct rtrs_sg_desc    desc[];
0279 };
0280 
0281 /**
0282  * struct_msg_rdma_write - Message transferred to server with RDMA-Write
0283  * @type:       always @RTRS_MSG_WRITE
0284  * @usr_len:        length of user payload
0285  */
0286 struct rtrs_msg_rdma_write {
0287     __le16          type;
0288     __le16          usr_len;
0289 };
0290 
0291 /**
0292  * struct_msg_rdma_hdr - header for read or write request
0293  * @type:       @RTRS_MSG_WRITE | @RTRS_MSG_READ
0294  */
0295 struct rtrs_msg_rdma_hdr {
0296     __le16          type;
0297 };
0298 
0299 /* rtrs.c */
0300 
0301 struct rtrs_iu *rtrs_iu_alloc(u32 queue_num, size_t size, gfp_t t,
0302                   struct ib_device *dev, enum dma_data_direction,
0303                   void (*done)(struct ib_cq *cq, struct ib_wc *wc));
0304 void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_num);
0305 int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu);
0306 int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size,
0307               struct ib_send_wr *head);
0308 int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu,
0309                 struct ib_sge *sge, unsigned int num_sge,
0310                 u32 rkey, u64 rdma_addr, u32 imm_data,
0311                 enum ib_send_flags flags,
0312                 struct ib_send_wr *head,
0313                 struct ib_send_wr *tail);
0314 
0315 int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe);
0316 
0317 int rtrs_cq_qp_create(struct rtrs_path *path, struct rtrs_con *con,
0318               u32 max_send_sge, int cq_vector, int nr_cqe,
0319               u32 max_send_wr, u32 max_recv_wr,
0320               enum ib_poll_context poll_ctx);
0321 void rtrs_cq_qp_destroy(struct rtrs_con *con);
0322 
0323 void rtrs_init_hb(struct rtrs_path *path, struct ib_cqe *cqe,
0324           unsigned int interval_ms, unsigned int missed_max,
0325           void (*err_handler)(struct rtrs_con *con),
0326           struct workqueue_struct *wq);
0327 void rtrs_start_hb(struct rtrs_path *path);
0328 void rtrs_stop_hb(struct rtrs_path *path);
0329 void rtrs_send_hb_ack(struct rtrs_path *path);
0330 
0331 void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags,
0332                struct rtrs_rdma_dev_pd *pool);
0333 void rtrs_rdma_dev_pd_deinit(struct rtrs_rdma_dev_pd *pool);
0334 
0335 struct rtrs_ib_dev *rtrs_ib_dev_find_or_add(struct ib_device *ib_dev,
0336                         struct rtrs_rdma_dev_pd *pool);
0337 int rtrs_ib_dev_put(struct rtrs_ib_dev *dev);
0338 
0339 static inline u32 rtrs_to_imm(u32 type, u32 payload)
0340 {
0341     BUILD_BUG_ON(MAX_IMM_PAYL_BITS + MAX_IMM_TYPE_BITS != 32);
0342     BUILD_BUG_ON(RTRS_LAST_IMM > (1<<MAX_IMM_TYPE_BITS));
0343     return ((type & MAX_IMM_TYPE_MASK) << MAX_IMM_PAYL_BITS) |
0344         (payload & MAX_IMM_PAYL_MASK);
0345 }
0346 
0347 static inline void rtrs_from_imm(u32 imm, u32 *type, u32 *payload)
0348 {
0349     *payload = imm & MAX_IMM_PAYL_MASK;
0350     *type = imm >> MAX_IMM_PAYL_BITS;
0351 }
0352 
0353 static inline u32 rtrs_to_io_req_imm(u32 addr)
0354 {
0355     return rtrs_to_imm(RTRS_IO_REQ_IMM, addr);
0356 }
0357 
0358 static inline u32 rtrs_to_io_rsp_imm(u32 msg_id, int errno, bool w_inval)
0359 {
0360     enum rtrs_imm_type type;
0361     u32 payload;
0362 
0363     /* 9 bits for errno, 19 bits for msg_id */
0364     payload = (abs(errno) & 0x1ff) << 19 | (msg_id & 0x7ffff);
0365     type = w_inval ? RTRS_IO_RSP_W_INV_IMM : RTRS_IO_RSP_IMM;
0366 
0367     return rtrs_to_imm(type, payload);
0368 }
0369 
0370 static inline void rtrs_from_io_rsp_imm(u32 payload, u32 *msg_id, int *errno)
0371 {
0372     /* 9 bits for errno, 19 bits for msg_id */
0373     *msg_id = payload & 0x7ffff;
0374     *errno = -(int)((payload >> 19) & 0x1ff);
0375 }
0376 
0377 #define STAT_STORE_FUNC(type, set_value, reset)             \
0378 static ssize_t set_value##_store(struct kobject *kobj,          \
0379                  struct kobj_attribute *attr,       \
0380                  const char *buf, size_t count)     \
0381 {                                   \
0382     int ret = -EINVAL;                      \
0383     type *stats = container_of(kobj, type, kobj_stats);     \
0384                                     \
0385     if (sysfs_streq(buf, "1"))                  \
0386         ret = reset(stats, true);           \
0387     else if (sysfs_streq(buf, "0"))                 \
0388         ret = reset(stats, false);          \
0389     if (ret)                            \
0390         return ret;                     \
0391                                     \
0392     return count;                           \
0393 }
0394 
0395 #define STAT_SHOW_FUNC(type, get_value, print)              \
0396 static ssize_t get_value##_show(struct kobject *kobj,           \
0397                struct kobj_attribute *attr,         \
0398                char *page)                  \
0399 {                                   \
0400     type *stats = container_of(kobj, type, kobj_stats);     \
0401                                     \
0402     return print(stats, page);          \
0403 }
0404 
0405 #define STAT_ATTR(type, stat, print, reset)             \
0406 STAT_STORE_FUNC(type, stat, reset)                  \
0407 STAT_SHOW_FUNC(type, stat, print)                   \
0408 static struct kobj_attribute stat##_attr = __ATTR_RW(stat)
0409 
0410 #endif /* RTRS_PRI_H */