Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * iSER transport for the Open iSCSI Initiator & iSER transport internals
0003  *
0004  * Copyright (C) 2004 Dmitry Yusupov
0005  * Copyright (C) 2004 Alex Aizman
0006  * Copyright (C) 2005 Mike Christie
0007  * based on code maintained by open-iscsi@googlegroups.com
0008  *
0009  * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
0010  * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
0011  * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
0012  *
0013  * This software is available to you under a choice of one of two
0014  * licenses.  You may choose to be licensed under the terms of the GNU
0015  * General Public License (GPL) Version 2, available from the file
0016  * COPYING in the main directory of this source tree, or the
0017  * OpenIB.org BSD license below:
0018  *
0019  *     Redistribution and use in source and binary forms, with or
0020  *     without modification, are permitted provided that the following
0021  *     conditions are met:
0022  *
0023  *  - Redistributions of source code must retain the above
0024  *    copyright notice, this list of conditions and the following
0025  *    disclaimer.
0026  *
0027  *  - Redistributions in binary form must reproduce the above
0028  *    copyright notice, this list of conditions and the following
0029  *    disclaimer in the documentation and/or other materials
0030  *    provided with the distribution.
0031  *
0032  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0033  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0034  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
0035  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
0036  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
0037  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
0038  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
0039  * SOFTWARE.
0040  */
0041 #ifndef __ISCSI_ISER_H__
0042 #define __ISCSI_ISER_H__
0043 
0044 #include <linux/types.h>
0045 #include <linux/net.h>
0046 #include <linux/printk.h>
0047 #include <scsi/libiscsi.h>
0048 #include <scsi/scsi_transport_iscsi.h>
0049 #include <scsi/scsi_cmnd.h>
0050 #include <scsi/scsi_device.h>
0051 #include <scsi/iser.h>
0052 
0053 #include <linux/interrupt.h>
0054 #include <linux/wait.h>
0055 #include <linux/sched.h>
0056 #include <linux/list.h>
0057 #include <linux/slab.h>
0058 #include <linux/dma-mapping.h>
0059 #include <linux/mutex.h>
0060 #include <linux/mempool.h>
0061 #include <linux/uio.h>
0062 
0063 #include <linux/socket.h>
0064 #include <linux/in.h>
0065 #include <linux/in6.h>
0066 
0067 #include <rdma/ib_verbs.h>
0068 #include <rdma/rdma_cm.h>
0069 
0070 #define DRV_NAME    "iser"
0071 #define PFX     DRV_NAME ": "
0072 #define DRV_VER     "1.6"
0073 
0074 #define iser_dbg(fmt, arg...)                \
0075     do {                         \
0076         if (unlikely(iser_debug_level > 2))  \
0077             printk(KERN_DEBUG PFX "%s: " fmt,\
0078                 __func__ , ## arg);  \
0079     } while (0)
0080 
0081 #define iser_warn(fmt, arg...)              \
0082     do {                        \
0083         if (unlikely(iser_debug_level > 0)) \
0084             pr_warn(PFX "%s: " fmt,     \
0085                 __func__ , ## arg); \
0086     } while (0)
0087 
0088 #define iser_info(fmt, arg...)              \
0089     do {                        \
0090         if (unlikely(iser_debug_level > 1)) \
0091             pr_info(PFX "%s: " fmt,     \
0092                 __func__ , ## arg); \
0093     } while (0)
0094 
0095 #define iser_err(fmt, arg...) \
0096     pr_err(PFX "%s: " fmt, __func__ , ## arg)
0097 
0098 /* Default support is 512KB I/O size */
0099 #define ISER_DEF_MAX_SECTORS        1024
0100 #define ISCSI_ISER_DEF_SG_TABLESIZE                                            \
0101     ((ISER_DEF_MAX_SECTORS * SECTOR_SIZE) >> ilog2(SZ_4K))
0102 /* Maximum support is 16MB I/O size */
0103 #define ISCSI_ISER_MAX_SG_TABLESIZE ((32768 * SECTOR_SIZE) >> ilog2(SZ_4K))
0104 
0105 #define ISER_DEF_XMIT_CMDS_DEFAULT      512
0106 #if ISCSI_DEF_XMIT_CMDS_MAX > ISER_DEF_XMIT_CMDS_DEFAULT
0107     #define ISER_DEF_XMIT_CMDS_MAX      ISCSI_DEF_XMIT_CMDS_MAX
0108 #else
0109     #define ISER_DEF_XMIT_CMDS_MAX      ISER_DEF_XMIT_CMDS_DEFAULT
0110 #endif
0111 #define ISER_DEF_CMD_PER_LUN        ISER_DEF_XMIT_CMDS_MAX
0112 
0113 /* QP settings */
0114 /* Maximal bounds on received asynchronous PDUs */
0115 #define ISER_MAX_RX_MISC_PDUS       4 /* NOOP_IN(2) , ASYNC_EVENT(2)   */
0116 
0117 #define ISER_MAX_TX_MISC_PDUS       6 /* NOOP_OUT(2), TEXT(1),         *
0118                        * SCSI_TMFUNC(2), LOGOUT(1) */
0119 
0120 #define ISER_QP_MAX_RECV_DTOS       (ISER_DEF_XMIT_CMDS_MAX)
0121 
0122 /* the max TX (send) WR supported by the iSER QP is defined by                 *
0123  * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect   *
0124  * to have at max for SCSI command. The tx posting & completion handling code  *
0125  * supports -EAGAIN scheme where tx is suspended till the QP has room for more *
0126  * send WR. D=8 comes from 64K/8K                                              */
0127 
0128 #define ISER_INFLIGHT_DATAOUTS      8
0129 
0130 #define ISER_QP_MAX_REQ_DTOS        (ISER_DEF_XMIT_CMDS_MAX *    \
0131                     (1 + ISER_INFLIGHT_DATAOUTS) + \
0132                     ISER_MAX_TX_MISC_PDUS        + \
0133                     ISER_MAX_RX_MISC_PDUS)
0134 
0135 /* Max registration work requests per command */
0136 #define ISER_MAX_REG_WR_PER_CMD     5
0137 
0138 /* For Signature we don't support DATAOUTs so no need to make room for them */
0139 #define ISER_QP_SIG_MAX_REQ_DTOS    (ISER_DEF_XMIT_CMDS_MAX *       \
0140                     (1 + ISER_MAX_REG_WR_PER_CMD) + \
0141                     ISER_MAX_TX_MISC_PDUS         + \
0142                     ISER_MAX_RX_MISC_PDUS)
0143 
0144 #define ISER_GET_MAX_XMIT_CMDS(send_wr) ((send_wr           \
0145                      - ISER_MAX_TX_MISC_PDUS    \
0146                      - ISER_MAX_RX_MISC_PDUS) / \
0147                      (1 + ISER_INFLIGHT_DATAOUTS))
0148 
0149 /* Constant PDU lengths calculations */
0150 #define ISER_HEADERS_LEN    (sizeof(struct iser_ctrl) + sizeof(struct iscsi_hdr))
0151 
0152 #define ISER_RECV_DATA_SEG_LEN  128
0153 #define ISER_RX_PAYLOAD_SIZE    (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
0154 #define ISER_RX_LOGIN_SIZE  (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
0155 
0156 /* Length of an object name string */
0157 #define ISER_OBJECT_NAME_SIZE           64
0158 
0159 enum iser_conn_state {
0160     ISER_CONN_INIT,        /* descriptor allocd, no conn          */
0161     ISER_CONN_PENDING,     /* in the process of being established */
0162     ISER_CONN_UP,          /* up and running                      */
0163     ISER_CONN_TERMINATING,     /* in the process of being terminated  */
0164     ISER_CONN_DOWN,        /* shut down                           */
0165     ISER_CONN_STATES_NUM
0166 };
0167 
0168 enum iser_task_status {
0169     ISER_TASK_STATUS_INIT = 0,
0170     ISER_TASK_STATUS_STARTED,
0171     ISER_TASK_STATUS_COMPLETED
0172 };
0173 
0174 enum iser_data_dir {
0175     ISER_DIR_IN = 0,       /* to initiator */
0176     ISER_DIR_OUT,          /* from initiator */
0177     ISER_DIRS_NUM
0178 };
0179 
0180 /**
0181  * struct iser_data_buf - iSER data buffer
0182  *
0183  * @sg:           pointer to the sg list
0184  * @size:         num entries of this sg
0185  * @data_len:     total beffer byte len
0186  * @dma_nents:    returned by dma_map_sg
0187  */
0188 struct iser_data_buf {
0189     struct scatterlist *sg;
0190     int                size;
0191     unsigned long      data_len;
0192     int                dma_nents;
0193 };
0194 
0195 /* fwd declarations */
0196 struct iser_device;
0197 struct iscsi_iser_task;
0198 struct iscsi_endpoint;
0199 struct iser_reg_resources;
0200 
0201 /**
0202  * struct iser_mem_reg - iSER memory registration info
0203  *
0204  * @sge:          memory region sg element
0205  * @rkey:         memory region remote key
0206  * @desc:         pointer to fast registration context
0207  */
0208 struct iser_mem_reg {
0209     struct ib_sge sge;
0210     u32 rkey;
0211     struct iser_fr_desc *desc;
0212 };
0213 
0214 enum iser_desc_type {
0215     ISCSI_TX_CONTROL ,
0216     ISCSI_TX_SCSI_COMMAND,
0217     ISCSI_TX_DATAOUT
0218 };
0219 
0220 /**
0221  * struct iser_tx_desc - iSER TX descriptor
0222  *
0223  * @iser_header:   iser header
0224  * @iscsi_header:  iscsi header
0225  * @type:          command/control/dataout
0226  * @dma_addr:      header buffer dma_address
0227  * @tx_sg:         sg[0] points to iser/iscsi headers
0228  *                 sg[1] optionally points to either of immediate data
0229  *                 unsolicited data-out or control
0230  * @num_sge:       number sges used on this TX task
0231  * @cqe:           completion handler
0232  * @mapped:        Is the task header mapped
0233  * @reg_wr:        registration WR
0234  * @send_wr:       send WR
0235  * @inv_wr:        invalidate WR
0236  */
0237 struct iser_tx_desc {
0238     struct iser_ctrl             iser_header;
0239     struct iscsi_hdr             iscsi_header;
0240     enum   iser_desc_type        type;
0241     u64                  dma_addr;
0242     struct ib_sge            tx_sg[2];
0243     int                          num_sge;
0244     struct ib_cqe            cqe;
0245     bool                 mapped;
0246     struct ib_reg_wr         reg_wr;
0247     struct ib_send_wr        send_wr;
0248     struct ib_send_wr        inv_wr;
0249 };
0250 
0251 #define ISER_RX_PAD_SIZE    (256 - (ISER_RX_PAYLOAD_SIZE + \
0252                  sizeof(u64) + sizeof(struct ib_sge) + \
0253                  sizeof(struct ib_cqe)))
0254 /**
0255  * struct iser_rx_desc - iSER RX descriptor
0256  *
0257  * @iser_header:   iser header
0258  * @iscsi_header:  iscsi header
0259  * @data:          received data segment
0260  * @dma_addr:      receive buffer dma address
0261  * @rx_sg:         ib_sge of receive buffer
0262  * @cqe:           completion handler
0263  * @pad:           for sense data TODO: Modify to maximum sense length supported
0264  */
0265 struct iser_rx_desc {
0266     struct iser_ctrl             iser_header;
0267     struct iscsi_hdr             iscsi_header;
0268     char                     data[ISER_RECV_DATA_SEG_LEN];
0269     u64                  dma_addr;
0270     struct ib_sge            rx_sg;
0271     struct ib_cqe            cqe;
0272     char                     pad[ISER_RX_PAD_SIZE];
0273 } __packed;
0274 
0275 /**
0276  * struct iser_login_desc - iSER login descriptor
0277  *
0278  * @req:           pointer to login request buffer
0279  * @rsp:           pointer to login response buffer
0280  * @req_dma:       DMA address of login request buffer
0281  * @rsp_dma:       DMA address of login response buffer
0282  * @sge:           IB sge for login post recv
0283  * @cqe:           completion handler
0284  */
0285 struct iser_login_desc {
0286     void                         *req;
0287     void                         *rsp;
0288     u64                          req_dma;
0289     u64                          rsp_dma;
0290     struct ib_sge                sge;
0291     struct ib_cqe            cqe;
0292 } __packed;
0293 
0294 struct iser_conn;
0295 struct ib_conn;
0296 
0297 /**
0298  * struct iser_device - iSER device handle
0299  *
0300  * @ib_device:     RDMA device
0301  * @pd:            Protection Domain for this device
0302  * @mr:            Global DMA memory region
0303  * @event_handler: IB events handle routine
0304  * @ig_list:       entry in devices list
0305  * @refcount:      Reference counter, dominated by open iser connections
0306  */
0307 struct iser_device {
0308     struct ib_device             *ib_device;
0309     struct ib_pd                 *pd;
0310     struct ib_event_handler      event_handler;
0311     struct list_head             ig_list;
0312     int                          refcount;
0313 };
0314 
0315 /**
0316  * struct iser_reg_resources - Fast registration resources
0317  *
0318  * @mr:         memory region
0319  * @sig_mr:     signature memory region
0320  * @mr_valid:   is mr valid indicator
0321  */
0322 struct iser_reg_resources {
0323     struct ib_mr                     *mr;
0324     struct ib_mr                     *sig_mr;
0325     u8                mr_valid:1;
0326 };
0327 
0328 /**
0329  * struct iser_fr_desc - Fast registration descriptor
0330  *
0331  * @list:           entry in connection fastreg pool
0332  * @rsc:            data buffer registration resources
0333  * @sig_protected:  is region protected indicator
0334  * @all_list:       first and last list members
0335  */
0336 struct iser_fr_desc {
0337     struct list_head          list;
0338     struct iser_reg_resources     rsc;
0339     bool                  sig_protected;
0340     struct list_head                  all_list;
0341 };
0342 
0343 /**
0344  * struct iser_fr_pool - connection fast registration pool
0345  *
0346  * @list:                list of fastreg descriptors
0347  * @lock:                protects fastreg pool
0348  * @size:                size of the pool
0349  * @all_list:            first and last list members
0350  */
0351 struct iser_fr_pool {
0352     struct list_head        list;
0353     spinlock_t              lock;
0354     int                     size;
0355     struct list_head        all_list;
0356 };
0357 
0358 /**
0359  * struct ib_conn - Infiniband related objects
0360  *
0361  * @cma_id:              rdma_cm connection maneger handle
0362  * @qp:                  Connection Queue-pair
0363  * @cq:                  Connection completion queue
0364  * @cq_size:             The number of max outstanding completions
0365  * @device:              reference to iser device
0366  * @fr_pool:             connection fast registration pool
0367  * @pi_support:          Indicate device T10-PI support
0368  * @reg_cqe:             completion handler
0369  */
0370 struct ib_conn {
0371     struct rdma_cm_id           *cma_id;
0372     struct ib_qp                *qp;
0373     struct ib_cq            *cq;
0374     u32             cq_size;
0375     struct iser_device          *device;
0376     struct iser_fr_pool          fr_pool;
0377     bool                 pi_support;
0378     struct ib_cqe            reg_cqe;
0379 };
0380 
0381 /**
0382  * struct iser_conn - iSER connection context
0383  *
0384  * @ib_conn:          connection RDMA resources
0385  * @iscsi_conn:       link to matching iscsi connection
0386  * @ep:               transport handle
0387  * @state:            connection logical state
0388  * @qp_max_recv_dtos: maximum number of data outs, corresponds
0389  *                    to max number of post recvs
0390  * @max_cmds:         maximum cmds allowed for this connection
0391  * @name:             connection peer portal
0392  * @release_work:     deffered work for release job
0393  * @state_mutex:      protects iser onnection state
0394  * @stop_completion:  conn_stop completion
0395  * @ib_completion:    RDMA cleanup completion
0396  * @up_completion:    connection establishment completed
0397  *                    (state is ISER_CONN_UP)
0398  * @conn_list:        entry in ig conn list
0399  * @login_desc:       login descriptor
0400  * @rx_descs:         rx buffers array (cyclic buffer)
0401  * @num_rx_descs:     number of rx descriptors
0402  * @scsi_sg_tablesize: scsi host sg_tablesize
0403  * @pages_per_mr:     maximum pages available for registration
0404  * @snd_w_inv:        connection uses remote invalidation
0405  */
0406 struct iser_conn {
0407     struct ib_conn           ib_conn;
0408     struct iscsi_conn        *iscsi_conn;
0409     struct iscsi_endpoint        *ep;
0410     enum iser_conn_state         state;
0411     unsigned             qp_max_recv_dtos;
0412     u16                          max_cmds;
0413     char                 name[ISER_OBJECT_NAME_SIZE];
0414     struct work_struct       release_work;
0415     struct mutex             state_mutex;
0416     struct completion        stop_completion;
0417     struct completion        ib_completion;
0418     struct completion        up_completion;
0419     struct list_head         conn_list;
0420     struct iser_login_desc       login_desc;
0421     struct iser_rx_desc      *rx_descs;
0422     u32                          num_rx_descs;
0423     unsigned short               scsi_sg_tablesize;
0424     unsigned short               pages_per_mr;
0425     bool                 snd_w_inv;
0426 };
0427 
0428 /**
0429  * struct iscsi_iser_task - iser task context
0430  *
0431  * @desc:     TX descriptor
0432  * @iser_conn:        link to iser connection
0433  * @status:           current task status
0434  * @sc:               link to scsi command
0435  * @command_sent:     indicate if command was sent
0436  * @dir:              iser data direction
0437  * @rdma_reg:         task rdma registration desc
0438  * @data:             iser data buffer desc
0439  * @prot:             iser protection buffer desc
0440  */
0441 struct iscsi_iser_task {
0442     struct iser_tx_desc          desc;
0443     struct iser_conn         *iser_conn;
0444     enum iser_task_status        status;
0445     struct scsi_cmnd         *sc;
0446     int                          command_sent;
0447     int                          dir[ISER_DIRS_NUM];
0448     struct iser_mem_reg          rdma_reg[ISER_DIRS_NUM];
0449     struct iser_data_buf         data[ISER_DIRS_NUM];
0450     struct iser_data_buf         prot[ISER_DIRS_NUM];
0451 };
0452 
0453 /**
0454  * struct iser_global - iSER global context
0455  *
0456  * @device_list_mutex:    protects device_list
0457  * @device_list:          iser devices global list
0458  * @connlist_mutex:       protects connlist
0459  * @connlist:             iser connections global list
0460  * @desc_cache:           kmem cache for tx dataout
0461  */
0462 struct iser_global {
0463     struct mutex      device_list_mutex;
0464     struct list_head  device_list;
0465     struct mutex      connlist_mutex;
0466     struct list_head  connlist;
0467     struct kmem_cache *desc_cache;
0468 };
0469 
0470 extern struct iser_global ig;
0471 extern int iser_debug_level;
0472 extern bool iser_pi_enable;
0473 extern unsigned int iser_max_sectors;
0474 extern bool iser_always_reg;
0475 
0476 int iser_send_control(struct iscsi_conn *conn,
0477               struct iscsi_task *task);
0478 
0479 int iser_send_command(struct iscsi_conn *conn,
0480               struct iscsi_task *task);
0481 
0482 int iser_send_data_out(struct iscsi_conn *conn,
0483                struct iscsi_task *task,
0484                struct iscsi_data *hdr);
0485 
0486 void iscsi_iser_recv(struct iscsi_conn *conn,
0487              struct iscsi_hdr *hdr,
0488              char *rx_data,
0489              int rx_data_len);
0490 
0491 void iser_conn_init(struct iser_conn *iser_conn);
0492 
0493 void iser_conn_release(struct iser_conn *iser_conn);
0494 
0495 int iser_conn_terminate(struct iser_conn *iser_conn);
0496 
0497 void iser_release_work(struct work_struct *work);
0498 
0499 void iser_err_comp(struct ib_wc *wc, const char *type);
0500 void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc);
0501 void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc);
0502 void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc);
0503 void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc);
0504 void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc);
0505 void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc);
0506 
0507 void iser_task_rdma_init(struct iscsi_iser_task *task);
0508 
0509 void iser_task_rdma_finalize(struct iscsi_iser_task *task);
0510 
0511 void iser_free_rx_descriptors(struct iser_conn *iser_conn);
0512 
0513 void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
0514                      struct iser_data_buf *mem,
0515                      enum iser_data_dir cmd_dir);
0516 
0517 int iser_reg_mem_fastreg(struct iscsi_iser_task *task,
0518              enum iser_data_dir dir,
0519              bool all_imm);
0520 void iser_unreg_mem_fastreg(struct iscsi_iser_task *task,
0521                 enum iser_data_dir dir);
0522 
0523 int  iser_connect(struct iser_conn *iser_conn,
0524           struct sockaddr *src_addr,
0525           struct sockaddr *dst_addr,
0526           int non_blocking);
0527 
0528 int  iser_post_recvl(struct iser_conn *iser_conn);
0529 int  iser_post_recvm(struct iser_conn *iser_conn,
0530              struct iser_rx_desc *rx_desc);
0531 int  iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc);
0532 
0533 int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
0534                enum iser_data_dir iser_dir,
0535                enum dma_data_direction dma_dir);
0536 
0537 void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
0538                   enum iser_data_dir iser_dir,
0539                   enum dma_data_direction dma_dir);
0540 
0541 int  iser_initialize_task_headers(struct iscsi_task *task,
0542             struct iser_tx_desc *tx_desc);
0543 int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
0544                   struct iscsi_session *session);
0545 int iser_alloc_fastreg_pool(struct ib_conn *ib_conn,
0546                 unsigned cmds_max,
0547                 unsigned int size);
0548 void iser_free_fastreg_pool(struct ib_conn *ib_conn);
0549 u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
0550                  enum iser_data_dir cmd_dir, sector_t *sector);
0551 
0552 static inline struct iser_conn *
0553 to_iser_conn(struct ib_conn *ib_conn)
0554 {
0555     return container_of(ib_conn, struct iser_conn, ib_conn);
0556 }
0557 
0558 static inline struct iser_rx_desc *
0559 iser_rx(struct ib_cqe *cqe)
0560 {
0561     return container_of(cqe, struct iser_rx_desc, cqe);
0562 }
0563 
0564 static inline struct iser_tx_desc *
0565 iser_tx(struct ib_cqe *cqe)
0566 {
0567     return container_of(cqe, struct iser_tx_desc, cqe);
0568 }
0569 
0570 static inline struct iser_login_desc *
0571 iser_login(struct ib_cqe *cqe)
0572 {
0573     return container_of(cqe, struct iser_login_desc, cqe);
0574 }
0575 
0576 #endif