Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * iSCSI Initiator over iSER Data-Path
0003  *
0004  * Copyright (C) 2004 Dmitry Yusupov
0005  * Copyright (C) 2004 Alex Aizman
0006  * Copyright (C) 2005 Mike Christie
0007  * Copyright (c) 2005, 2006 Voltaire, Inc. All rights reserved.
0008  * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
0009  * maintained by openib-general@openib.org
0010  *
0011  * This software is available to you under a choice of one of two
0012  * licenses.  You may choose to be licensed under the terms of the GNU
0013  * General Public License (GPL) Version 2, available from the file
0014  * COPYING in the main directory of this source tree, or the
0015  * OpenIB.org BSD license below:
0016  *
0017  *     Redistribution and use in source and binary forms, with or
0018  *     without modification, are permitted provided that the following
0019  *     conditions are met:
0020  *
0021  *  - Redistributions of source code must retain the above
0022  *    copyright notice, this list of conditions and the following
0023  *    disclaimer.
0024  *
0025  *  - Redistributions in binary form must reproduce the above
0026  *    copyright notice, this list of conditions and the following
0027  *    disclaimer in the documentation and/or other materials
0028  *    provided with the distribution.
0029  *
0030  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0031  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0032  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
0033  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
0034  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
0035  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
0036  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
0037  * SOFTWARE.
0038  *
0039  * Credits:
0040  *  Christoph Hellwig
0041  *  FUJITA Tomonori
0042  *  Arne Redlich
0043  *  Zhenyu Wang
0044  * Modified by:
0045  *      Erez Zilber
0046  */
0047 
0048 #include <linux/types.h>
0049 #include <linux/list.h>
0050 #include <linux/hardirq.h>
0051 #include <linux/kfifo.h>
0052 #include <linux/blkdev.h>
0053 #include <linux/init.h>
0054 #include <linux/ioctl.h>
0055 #include <linux/cdev.h>
0056 #include <linux/in.h>
0057 #include <linux/net.h>
0058 #include <linux/scatterlist.h>
0059 #include <linux/delay.h>
0060 #include <linux/slab.h>
0061 #include <linux/module.h>
0062 
0063 #include <net/sock.h>
0064 
0065 #include <linux/uaccess.h>
0066 
0067 #include <scsi/scsi_cmnd.h>
0068 #include <scsi/scsi_device.h>
0069 #include <scsi/scsi_eh.h>
0070 #include <scsi/scsi_tcq.h>
0071 #include <scsi/scsi_host.h>
0072 #include <scsi/scsi.h>
0073 #include <scsi/scsi_transport_iscsi.h>
0074 
0075 #include "iscsi_iser.h"
0076 
0077 MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover");
0078 MODULE_LICENSE("Dual BSD/GPL");
0079 MODULE_AUTHOR("Alex Nezhinsky, Dan Bar Dov, Or Gerlitz");
0080 
0081 static struct scsi_host_template iscsi_iser_sht;
0082 static struct iscsi_transport iscsi_iser_transport;
0083 static struct scsi_transport_template *iscsi_iser_scsi_transport;
0084 static struct workqueue_struct *release_wq;
0085 static DEFINE_MUTEX(unbind_iser_conn_mutex);
0086 struct iser_global ig;
0087 
0088 int iser_debug_level = 0;
0089 module_param_named(debug_level, iser_debug_level, int, S_IRUGO | S_IWUSR);
0090 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:disabled)");
0091 
0092 static int iscsi_iser_set(const char *val, const struct kernel_param *kp);
0093 static const struct kernel_param_ops iscsi_iser_size_ops = {
0094     .set = iscsi_iser_set,
0095     .get = param_get_uint,
0096 };
0097 
0098 static unsigned int iscsi_max_lun = 512;
0099 module_param_cb(max_lun, &iscsi_iser_size_ops, &iscsi_max_lun, S_IRUGO);
0100 MODULE_PARM_DESC(max_lun, "Max LUNs to allow per session, should > 0 (default:512)");
0101 
0102 unsigned int iser_max_sectors = ISER_DEF_MAX_SECTORS;
0103 module_param_cb(max_sectors, &iscsi_iser_size_ops, &iser_max_sectors,
0104         S_IRUGO | S_IWUSR);
0105 MODULE_PARM_DESC(max_sectors, "Max number of sectors in a single scsi command, should > 0 (default:1024)");
0106 
0107 bool iser_always_reg = true;
0108 module_param_named(always_register, iser_always_reg, bool, S_IRUGO);
0109 MODULE_PARM_DESC(always_register,
0110          "Always register memory, even for continuous memory regions (default:true)");
0111 
0112 bool iser_pi_enable = false;
0113 module_param_named(pi_enable, iser_pi_enable, bool, S_IRUGO);
0114 MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)");
0115 
0116 static int iscsi_iser_set(const char *val, const struct kernel_param *kp)
0117 {
0118     int ret;
0119     unsigned int n = 0;
0120 
0121     ret = kstrtouint(val, 10, &n);
0122     if (ret != 0 || n == 0)
0123         return -EINVAL;
0124 
0125     return param_set_uint(val, kp);
0126 }
0127 
0128 /*
0129  * iscsi_iser_recv() - Process a successful recv completion
0130  * @conn:         iscsi connection
0131  * @hdr:          iscsi header
0132  * @rx_data:      buffer containing receive data payload
0133  * @rx_data_len:  length of rx_data
0134  *
0135  * Notes: In case of data length errors or iscsi PDU completion failures
0136  *        this routine will signal iscsi layer of connection failure.
0137  */
0138 void iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
0139              char *rx_data, int rx_data_len)
0140 {
0141     int rc = 0;
0142     int datalen;
0143 
0144     /* verify PDU length */
0145     datalen = ntoh24(hdr->dlength);
0146     if (datalen > rx_data_len || (datalen + 4) < rx_data_len) {
0147         iser_err("wrong datalen %d (hdr), %d (IB)\n",
0148             datalen, rx_data_len);
0149         rc = ISCSI_ERR_DATALEN;
0150         goto error;
0151     }
0152 
0153     if (datalen != rx_data_len)
0154         iser_dbg("aligned datalen (%d) hdr, %d (IB)\n",
0155             datalen, rx_data_len);
0156 
0157     rc = iscsi_complete_pdu(conn, hdr, rx_data, rx_data_len);
0158     if (rc && rc != ISCSI_ERR_NO_SCSI_CMD)
0159         goto error;
0160 
0161     return;
0162 error:
0163     iscsi_conn_failure(conn, rc);
0164 }
0165 
0166 /**
0167  * iscsi_iser_pdu_alloc() - allocate an iscsi-iser PDU
0168  * @task:     iscsi task
0169  * @opcode:   iscsi command opcode
0170  *
0171  * Netes: This routine can't fail, just assign iscsi task
0172  *        hdr and max hdr size.
0173  */
0174 static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
0175 {
0176     struct iscsi_iser_task *iser_task = task->dd_data;
0177 
0178     task->hdr = (struct iscsi_hdr *)&iser_task->desc.iscsi_header;
0179     task->hdr_max = sizeof(iser_task->desc.iscsi_header);
0180 
0181     return 0;
0182 }
0183 
0184 /**
0185  * iser_initialize_task_headers() - Initialize task headers
0186  * @task:       iscsi task
0187  * @tx_desc:    iser tx descriptor
0188  *
0189  * Notes:
0190  * This routine may race with iser teardown flow for scsi
0191  * error handling TMFs. So for TMF we should acquire the
0192  * state mutex to avoid dereferencing the IB device which
0193  * may have already been terminated.
0194  */
0195 int iser_initialize_task_headers(struct iscsi_task *task,
0196                  struct iser_tx_desc *tx_desc)
0197 {
0198     struct iser_conn *iser_conn = task->conn->dd_data;
0199     struct iser_device *device = iser_conn->ib_conn.device;
0200     struct iscsi_iser_task *iser_task = task->dd_data;
0201     u64 dma_addr;
0202 
0203     if (unlikely(iser_conn->state != ISER_CONN_UP))
0204         return -ENODEV;
0205 
0206     dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
0207                 ISER_HEADERS_LEN, DMA_TO_DEVICE);
0208     if (ib_dma_mapping_error(device->ib_device, dma_addr))
0209         return -ENOMEM;
0210 
0211     tx_desc->inv_wr.next = NULL;
0212     tx_desc->reg_wr.wr.next = NULL;
0213     tx_desc->mapped = true;
0214     tx_desc->dma_addr = dma_addr;
0215     tx_desc->tx_sg[0].addr   = tx_desc->dma_addr;
0216     tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
0217     tx_desc->tx_sg[0].lkey   = device->pd->local_dma_lkey;
0218 
0219     iser_task->iser_conn = iser_conn;
0220 
0221     return 0;
0222 }
0223 
0224 /**
0225  * iscsi_iser_task_init() - Initialize iscsi-iser task
0226  * @task: iscsi task
0227  *
0228  * Initialize the task for the scsi command or mgmt command.
0229  *
0230  * Return: Returns zero on success or -ENOMEM when failing
0231  *         to init task headers (dma mapping error).
0232  */
0233 static int iscsi_iser_task_init(struct iscsi_task *task)
0234 {
0235     struct iscsi_iser_task *iser_task = task->dd_data;
0236     int ret;
0237 
0238     ret = iser_initialize_task_headers(task, &iser_task->desc);
0239     if (ret) {
0240         iser_err("Failed to init task %p, err = %d\n",
0241              iser_task, ret);
0242         return ret;
0243     }
0244 
0245     /* mgmt task */
0246     if (!task->sc)
0247         return 0;
0248 
0249     iser_task->command_sent = 0;
0250     iser_task_rdma_init(iser_task);
0251     iser_task->sc = task->sc;
0252 
0253     return 0;
0254 }
0255 
0256 /**
0257  * iscsi_iser_mtask_xmit() - xmit management (immediate) task
0258  * @conn: iscsi connection
0259  * @task: task management task
0260  *
0261  * Notes:
0262  *  The function can return -EAGAIN in which case caller must
0263  *  call it again later, or recover. '0' return code means successful
0264  *  xmit.
0265  *
0266  **/
0267 static int iscsi_iser_mtask_xmit(struct iscsi_conn *conn,
0268                  struct iscsi_task *task)
0269 {
0270     int error = 0;
0271 
0272     iser_dbg("mtask xmit [cid %d itt 0x%x]\n", conn->id, task->itt);
0273 
0274     error = iser_send_control(conn, task);
0275 
0276     /* since iser xmits control with zero copy, tasks can not be recycled
0277      * right after sending them.
0278      * The recycling scheme is based on whether a response is expected
0279      * - if yes, the task is recycled at iscsi_complete_pdu
0280      * - if no,  the task is recycled at iser_snd_completion
0281      */
0282     return error;
0283 }
0284 
0285 static int iscsi_iser_task_xmit_unsol_data(struct iscsi_conn *conn,
0286                        struct iscsi_task *task)
0287 {
0288     struct iscsi_r2t_info *r2t = &task->unsol_r2t;
0289     struct iscsi_data hdr;
0290     int error = 0;
0291 
0292     /* Send data-out PDUs while there's still unsolicited data to send */
0293     while (iscsi_task_has_unsol_data(task)) {
0294         iscsi_prep_data_out_pdu(task, r2t, &hdr);
0295         iser_dbg("Sending data-out: itt 0x%x, data count %d\n",
0296                hdr.itt, r2t->data_count);
0297 
0298         /* the buffer description has been passed with the command */
0299         /* Send the command */
0300         error = iser_send_data_out(conn, task, &hdr);
0301         if (error) {
0302             r2t->datasn--;
0303             goto iscsi_iser_task_xmit_unsol_data_exit;
0304         }
0305         r2t->sent += r2t->data_count;
0306         iser_dbg("Need to send %d more as data-out PDUs\n",
0307                r2t->data_length - r2t->sent);
0308     }
0309 
0310 iscsi_iser_task_xmit_unsol_data_exit:
0311     return error;
0312 }
0313 
0314 /**
0315  * iscsi_iser_task_xmit() - xmit iscsi-iser task
0316  * @task: iscsi task
0317  *
0318  * Return: zero on success or escalates $error on failure.
0319  */
0320 static int iscsi_iser_task_xmit(struct iscsi_task *task)
0321 {
0322     struct iscsi_conn *conn = task->conn;
0323     struct iscsi_iser_task *iser_task = task->dd_data;
0324     int error = 0;
0325 
0326     if (!task->sc)
0327         return iscsi_iser_mtask_xmit(conn, task);
0328 
0329     if (task->sc->sc_data_direction == DMA_TO_DEVICE) {
0330         BUG_ON(scsi_bufflen(task->sc) == 0);
0331 
0332         iser_dbg("cmd [itt %x total %d imm %d unsol_data %d\n",
0333                task->itt, scsi_bufflen(task->sc),
0334                task->imm_count, task->unsol_r2t.data_length);
0335     }
0336 
0337     iser_dbg("ctask xmit [cid %d itt 0x%x]\n",
0338            conn->id, task->itt);
0339 
0340     /* Send the cmd PDU */
0341     if (!iser_task->command_sent) {
0342         error = iser_send_command(conn, task);
0343         if (error)
0344             goto iscsi_iser_task_xmit_exit;
0345         iser_task->command_sent = 1;
0346     }
0347 
0348     /* Send unsolicited data-out PDU(s) if necessary */
0349     if (iscsi_task_has_unsol_data(task))
0350         error = iscsi_iser_task_xmit_unsol_data(conn, task);
0351 
0352  iscsi_iser_task_xmit_exit:
0353     return error;
0354 }
0355 
0356 /**
0357  * iscsi_iser_cleanup_task() - cleanup an iscsi-iser task
0358  * @task: iscsi task
0359  *
0360  * Notes: In case the RDMA device is already NULL (might have
0361  *        been removed in DEVICE_REMOVAL CM event it will bail-out
0362  *        without doing dma unmapping.
0363  */
0364 static void iscsi_iser_cleanup_task(struct iscsi_task *task)
0365 {
0366     struct iscsi_iser_task *iser_task = task->dd_data;
0367     struct iser_tx_desc *tx_desc = &iser_task->desc;
0368     struct iser_conn *iser_conn = task->conn->dd_data;
0369     struct iser_device *device = iser_conn->ib_conn.device;
0370 
0371     /* DEVICE_REMOVAL event might have already released the device */
0372     if (!device)
0373         return;
0374 
0375     if (likely(tx_desc->mapped)) {
0376         ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
0377                     ISER_HEADERS_LEN, DMA_TO_DEVICE);
0378         tx_desc->mapped = false;
0379     }
0380 
0381     /* mgmt tasks do not need special cleanup */
0382     if (!task->sc)
0383         return;
0384 
0385     if (iser_task->status == ISER_TASK_STATUS_STARTED) {
0386         iser_task->status = ISER_TASK_STATUS_COMPLETED;
0387         iser_task_rdma_finalize(iser_task);
0388     }
0389 }
0390 
0391 /**
0392  * iscsi_iser_check_protection() - check protection information status of task.
0393  * @task:     iscsi task
0394  * @sector:   error sector if exsists (output)
0395  *
0396  * Return: zero if no data-integrity errors have occured
0397  *         0x1: data-integrity error occured in the guard-block
0398  *         0x2: data-integrity error occured in the reference tag
0399  *         0x3: data-integrity error occured in the application tag
0400  *
0401  *         In addition the error sector is marked.
0402  */
0403 static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector)
0404 {
0405     struct iscsi_iser_task *iser_task = task->dd_data;
0406     enum iser_data_dir dir = iser_task->dir[ISER_DIR_IN] ?
0407                     ISER_DIR_IN : ISER_DIR_OUT;
0408 
0409     return iser_check_task_pi_status(iser_task, dir, sector);
0410 }
0411 
0412 /**
0413  * iscsi_iser_conn_create() - create a new iscsi-iser connection
0414  * @cls_session: iscsi class connection
0415  * @conn_idx:    connection index within the session (for MCS)
0416  *
0417  * Return: iscsi_cls_conn when iscsi_conn_setup succeeds or NULL
0418  *         otherwise.
0419  */
0420 static struct iscsi_cls_conn *
0421 iscsi_iser_conn_create(struct iscsi_cls_session *cls_session,
0422                uint32_t conn_idx)
0423 {
0424     struct iscsi_conn *conn;
0425     struct iscsi_cls_conn *cls_conn;
0426 
0427     cls_conn = iscsi_conn_setup(cls_session, 0, conn_idx);
0428     if (!cls_conn)
0429         return NULL;
0430     conn = cls_conn->dd_data;
0431 
0432     /*
0433      * due to issues with the login code re iser sematics
0434      * this not set in iscsi_conn_setup - FIXME
0435      */
0436     conn->max_recv_dlength = ISER_RECV_DATA_SEG_LEN;
0437 
0438     return cls_conn;
0439 }
0440 
0441 /**
0442  * iscsi_iser_conn_bind() - bind iscsi and iser connection structures
0443  * @cls_session:     iscsi class session
0444  * @cls_conn:        iscsi class connection
0445  * @transport_eph:   transport end-point handle
0446  * @is_leading:      indicate if this is the session leading connection (MCS)
0447  *
0448  * Return: zero on success, $error if iscsi_conn_bind fails and
0449  *         -EINVAL in case end-point doesn't exsits anymore or iser connection
0450  *         state is not UP (teardown already started).
0451  */
0452 static int iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
0453                 struct iscsi_cls_conn *cls_conn,
0454                 uint64_t transport_eph, int is_leading)
0455 {
0456     struct iscsi_conn *conn = cls_conn->dd_data;
0457     struct iser_conn *iser_conn;
0458     struct iscsi_endpoint *ep;
0459     int error;
0460 
0461     error = iscsi_conn_bind(cls_session, cls_conn, is_leading);
0462     if (error)
0463         return error;
0464 
0465     /* the transport ep handle comes from user space so it must be
0466      * verified against the global ib connections list */
0467     ep = iscsi_lookup_endpoint(transport_eph);
0468     if (!ep) {
0469         iser_err("can't bind eph %llx\n",
0470              (unsigned long long)transport_eph);
0471         return -EINVAL;
0472     }
0473     iser_conn = ep->dd_data;
0474 
0475     mutex_lock(&iser_conn->state_mutex);
0476     if (iser_conn->state != ISER_CONN_UP) {
0477         error = -EINVAL;
0478         iser_err("iser_conn %p state is %d, teardown started\n",
0479              iser_conn, iser_conn->state);
0480         goto out;
0481     }
0482 
0483     error = iser_alloc_rx_descriptors(iser_conn, conn->session);
0484     if (error)
0485         goto out;
0486 
0487     /* binds the iSER connection retrieved from the previously
0488      * connected ep_handle to the iSCSI layer connection. exchanges
0489      * connection pointers */
0490     iser_info("binding iscsi conn %p to iser_conn %p\n", conn, iser_conn);
0491 
0492     conn->dd_data = iser_conn;
0493     iser_conn->iscsi_conn = conn;
0494 
0495 out:
0496     iscsi_put_endpoint(ep);
0497     mutex_unlock(&iser_conn->state_mutex);
0498     return error;
0499 }
0500 
0501 /**
0502  * iscsi_iser_conn_start() - start iscsi-iser connection
0503  * @cls_conn: iscsi class connection
0504  *
0505  * Notes: Here iser intialize (or re-initialize) stop_completion as
0506  *        from this point iscsi must call conn_stop in session/connection
0507  *        teardown so iser transport must wait for it.
0508  */
0509 static int iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn)
0510 {
0511     struct iscsi_conn *iscsi_conn;
0512     struct iser_conn *iser_conn;
0513 
0514     iscsi_conn = cls_conn->dd_data;
0515     iser_conn = iscsi_conn->dd_data;
0516     reinit_completion(&iser_conn->stop_completion);
0517 
0518     return iscsi_conn_start(cls_conn);
0519 }
0520 
0521 /**
0522  * iscsi_iser_conn_stop() - stop iscsi-iser connection
0523  * @cls_conn:  iscsi class connection
0524  * @flag:      indicate if recover or terminate (passed as is)
0525  *
0526  * Notes: Calling iscsi_conn_stop might theoretically race with
0527  *        DEVICE_REMOVAL event and dereference a previously freed RDMA device
0528  *        handle, so we call it under iser the state lock to protect against
0529  *        this kind of race.
0530  */
0531 static void iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
0532 {
0533     struct iscsi_conn *conn = cls_conn->dd_data;
0534     struct iser_conn *iser_conn = conn->dd_data;
0535 
0536     iser_info("stopping iscsi_conn: %p, iser_conn: %p\n", conn, iser_conn);
0537 
0538     /*
0539      * Userspace may have goofed up and not bound the connection or
0540      * might have only partially setup the connection.
0541      */
0542     if (iser_conn) {
0543         mutex_lock(&iser_conn->state_mutex);
0544         mutex_lock(&unbind_iser_conn_mutex);
0545         iser_conn_terminate(iser_conn);
0546         iscsi_conn_stop(cls_conn, flag);
0547 
0548         /* unbind */
0549         iser_conn->iscsi_conn = NULL;
0550         conn->dd_data = NULL;
0551         mutex_unlock(&unbind_iser_conn_mutex);
0552 
0553         complete(&iser_conn->stop_completion);
0554         mutex_unlock(&iser_conn->state_mutex);
0555     } else {
0556         iscsi_conn_stop(cls_conn, flag);
0557     }
0558 }
0559 
0560 /**
0561  * iscsi_iser_session_destroy() - destroy iscsi-iser session
0562  * @cls_session: iscsi class session
0563  *
0564  * Removes and free iscsi host.
0565  */
0566 static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
0567 {
0568     struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
0569 
0570     iscsi_session_teardown(cls_session);
0571     iscsi_host_remove(shost, false);
0572     iscsi_host_free(shost);
0573 }
0574 
0575 static inline unsigned int iser_dif_prot_caps(int prot_caps)
0576 {
0577     int ret = 0;
0578 
0579     if (prot_caps & IB_PROT_T10DIF_TYPE_1)
0580         ret |= SHOST_DIF_TYPE1_PROTECTION |
0581                SHOST_DIX_TYPE0_PROTECTION |
0582                SHOST_DIX_TYPE1_PROTECTION;
0583     if (prot_caps & IB_PROT_T10DIF_TYPE_2)
0584         ret |= SHOST_DIF_TYPE2_PROTECTION |
0585                SHOST_DIX_TYPE2_PROTECTION;
0586     if (prot_caps & IB_PROT_T10DIF_TYPE_3)
0587         ret |= SHOST_DIF_TYPE3_PROTECTION |
0588                SHOST_DIX_TYPE3_PROTECTION;
0589 
0590     return ret;
0591 }
0592 
0593 /**
0594  * iscsi_iser_session_create() - create an iscsi-iser session
0595  * @ep:             iscsi end-point handle
0596  * @cmds_max:       maximum commands in this session
0597  * @qdepth:         session command queue depth
0598  * @initial_cmdsn:  initiator command sequnce number
0599  *
0600  * Allocates and adds a scsi host, expose DIF supprot if
0601  * exists, and sets up an iscsi session.
0602  */
0603 static struct iscsi_cls_session *
0604 iscsi_iser_session_create(struct iscsi_endpoint *ep,
0605               uint16_t cmds_max, uint16_t qdepth,
0606               uint32_t initial_cmdsn)
0607 {
0608     struct iscsi_cls_session *cls_session;
0609     struct Scsi_Host *shost;
0610     struct iser_conn *iser_conn = NULL;
0611     struct ib_conn *ib_conn;
0612     struct ib_device *ib_dev;
0613     u32 max_fr_sectors;
0614 
0615     shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
0616     if (!shost)
0617         return NULL;
0618     shost->transportt = iscsi_iser_scsi_transport;
0619     shost->cmd_per_lun = qdepth;
0620     shost->max_lun = iscsi_max_lun;
0621     shost->max_id = 0;
0622     shost->max_channel = 0;
0623     shost->max_cmd_len = 16;
0624 
0625     /*
0626      * older userspace tools (before 2.0-870) did not pass us
0627      * the leading conn's ep so this will be NULL;
0628      */
0629     if (ep) {
0630         iser_conn = ep->dd_data;
0631         shost->sg_tablesize = iser_conn->scsi_sg_tablesize;
0632         shost->can_queue = min_t(u16, cmds_max, iser_conn->max_cmds);
0633 
0634         mutex_lock(&iser_conn->state_mutex);
0635         if (iser_conn->state != ISER_CONN_UP) {
0636             iser_err("iser conn %p already started teardown\n",
0637                  iser_conn);
0638             mutex_unlock(&iser_conn->state_mutex);
0639             goto free_host;
0640         }
0641 
0642         ib_conn = &iser_conn->ib_conn;
0643         ib_dev = ib_conn->device->ib_device;
0644         if (ib_conn->pi_support) {
0645             u32 sig_caps = ib_dev->attrs.sig_prot_cap;
0646 
0647             shost->sg_prot_tablesize = shost->sg_tablesize;
0648             scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
0649             scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP |
0650                            SHOST_DIX_GUARD_CRC);
0651         }
0652 
0653         if (!(ib_dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG))
0654             shost->virt_boundary_mask = SZ_4K - 1;
0655 
0656         if (iscsi_host_add(shost, ib_dev->dev.parent)) {
0657             mutex_unlock(&iser_conn->state_mutex);
0658             goto free_host;
0659         }
0660         mutex_unlock(&iser_conn->state_mutex);
0661     } else {
0662         shost->can_queue = min_t(u16, cmds_max, ISER_DEF_XMIT_CMDS_MAX);
0663         if (iscsi_host_add(shost, NULL))
0664             goto free_host;
0665     }
0666 
0667     max_fr_sectors = (shost->sg_tablesize * PAGE_SIZE) >> 9;
0668     shost->max_sectors = min(iser_max_sectors, max_fr_sectors);
0669 
0670     iser_dbg("iser_conn %p, sg_tablesize %u, max_sectors %u\n",
0671          iser_conn, shost->sg_tablesize,
0672          shost->max_sectors);
0673 
0674     if (shost->max_sectors < iser_max_sectors)
0675         iser_warn("max_sectors was reduced from %u to %u\n",
0676               iser_max_sectors, shost->max_sectors);
0677 
0678     cls_session = iscsi_session_setup(&iscsi_iser_transport, shost,
0679                       shost->can_queue, 0,
0680                       sizeof(struct iscsi_iser_task),
0681                       initial_cmdsn, 0);
0682     if (!cls_session)
0683         goto remove_host;
0684 
0685     return cls_session;
0686 
0687 remove_host:
0688     iscsi_host_remove(shost, false);
0689 free_host:
0690     iscsi_host_free(shost);
0691     return NULL;
0692 }
0693 
0694 static int iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn,
0695                 enum iscsi_param param, char *buf, int buflen)
0696 {
0697     int value;
0698 
0699     switch (param) {
0700     case ISCSI_PARAM_MAX_RECV_DLENGTH:
0701         /* TBD */
0702         break;
0703     case ISCSI_PARAM_HDRDGST_EN:
0704         sscanf(buf, "%d", &value);
0705         if (value) {
0706             iser_err("DataDigest wasn't negotiated to None\n");
0707             return -EPROTO;
0708         }
0709         break;
0710     case ISCSI_PARAM_DATADGST_EN:
0711         sscanf(buf, "%d", &value);
0712         if (value) {
0713             iser_err("DataDigest wasn't negotiated to None\n");
0714             return -EPROTO;
0715         }
0716         break;
0717     case ISCSI_PARAM_IFMARKER_EN:
0718         sscanf(buf, "%d", &value);
0719         if (value) {
0720             iser_err("IFMarker wasn't negotiated to No\n");
0721             return -EPROTO;
0722         }
0723         break;
0724     case ISCSI_PARAM_OFMARKER_EN:
0725         sscanf(buf, "%d", &value);
0726         if (value) {
0727             iser_err("OFMarker wasn't negotiated to No\n");
0728             return -EPROTO;
0729         }
0730         break;
0731     default:
0732         return iscsi_set_param(cls_conn, param, buf, buflen);
0733     }
0734 
0735     return 0;
0736 }
0737 
0738 /**
0739  * iscsi_iser_conn_get_stats() - get iscsi connection statistics
0740  * @cls_conn:    iscsi class connection
0741  * @stats:       iscsi stats to output
0742  *
0743  * Output connection statistics.
0744  */
0745 static void iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn,
0746                       struct iscsi_stats *stats)
0747 {
0748     struct iscsi_conn *conn = cls_conn->dd_data;
0749 
0750     stats->txdata_octets = conn->txdata_octets;
0751     stats->rxdata_octets = conn->rxdata_octets;
0752     stats->scsicmd_pdus = conn->scsicmd_pdus_cnt;
0753     stats->dataout_pdus = conn->dataout_pdus_cnt;
0754     stats->scsirsp_pdus = conn->scsirsp_pdus_cnt;
0755     stats->datain_pdus = conn->datain_pdus_cnt; /* always 0 */
0756     stats->r2t_pdus = conn->r2t_pdus_cnt; /* always 0 */
0757     stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
0758     stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
0759     stats->custom_length = 0;
0760 }
0761 
0762 static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep,
0763                    enum iscsi_param param, char *buf)
0764 {
0765     struct iser_conn *iser_conn = ep->dd_data;
0766 
0767     switch (param) {
0768     case ISCSI_PARAM_CONN_PORT:
0769     case ISCSI_PARAM_CONN_ADDRESS:
0770         if (!iser_conn || !iser_conn->ib_conn.cma_id)
0771             return -ENOTCONN;
0772 
0773         return iscsi_conn_get_addr_param((struct sockaddr_storage *)
0774                 &iser_conn->ib_conn.cma_id->route.addr.dst_addr,
0775                 param, buf);
0776     default:
0777         break;
0778     }
0779     return -ENOSYS;
0780 }
0781 
0782 /**
0783  * iscsi_iser_ep_connect() - Initiate iSER connection establishment
0784  * @shost:          scsi_host
0785  * @dst_addr:       destination address
0786  * @non_blocking:   indicate if routine can block
0787  *
0788  * Allocate an iscsi endpoint, an iser_conn structure and bind them.
0789  * After that start RDMA connection establishment via rdma_cm. We
0790  * don't allocate iser_conn embedded in iscsi_endpoint since in teardown
0791  * the endpoint will be destroyed at ep_disconnect while iser_conn will
0792  * cleanup its resources asynchronuously.
0793  *
0794  * Return: iscsi_endpoint created by iscsi layer or ERR_PTR(error)
0795  *         if fails.
0796  */
0797 static struct iscsi_endpoint *iscsi_iser_ep_connect(struct Scsi_Host *shost,
0798                             struct sockaddr *dst_addr,
0799                             int non_blocking)
0800 {
0801     int err;
0802     struct iser_conn *iser_conn;
0803     struct iscsi_endpoint *ep;
0804 
0805     ep = iscsi_create_endpoint(0);
0806     if (!ep)
0807         return ERR_PTR(-ENOMEM);
0808 
0809     iser_conn = kzalloc(sizeof(*iser_conn), GFP_KERNEL);
0810     if (!iser_conn) {
0811         err = -ENOMEM;
0812         goto failure;
0813     }
0814 
0815     ep->dd_data = iser_conn;
0816     iser_conn->ep = ep;
0817     iser_conn_init(iser_conn);
0818 
0819     err = iser_connect(iser_conn, NULL, dst_addr, non_blocking);
0820     if (err)
0821         goto failure;
0822 
0823     return ep;
0824 failure:
0825     iscsi_destroy_endpoint(ep);
0826     return ERR_PTR(err);
0827 }
0828 
0829 /**
0830  * iscsi_iser_ep_poll() - poll for iser connection establishment to complete
0831  * @ep:            iscsi endpoint (created at ep_connect)
0832  * @timeout_ms:    polling timeout allowed in ms.
0833  *
0834  * This routine boils down to waiting for up_completion signaling
0835  * that cma_id got CONNECTED event.
0836  *
0837  * Return: 1 if succeeded in connection establishment, 0 if timeout expired
0838  *         (libiscsi will retry will kick in) or -1 if interrupted by signal
0839  *         or more likely iser connection state transitioned to TEMINATING or
0840  *         DOWN during the wait period.
0841  */
0842 static int iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
0843 {
0844     struct iser_conn *iser_conn = ep->dd_data;
0845     int rc;
0846 
0847     rc = wait_for_completion_interruptible_timeout(&iser_conn->up_completion,
0848                                msecs_to_jiffies(timeout_ms));
0849     /* if conn establishment failed, return error code to iscsi */
0850     if (rc == 0) {
0851         mutex_lock(&iser_conn->state_mutex);
0852         if (iser_conn->state == ISER_CONN_TERMINATING ||
0853             iser_conn->state == ISER_CONN_DOWN)
0854             rc = -1;
0855         mutex_unlock(&iser_conn->state_mutex);
0856     }
0857 
0858     iser_info("iser conn %p rc = %d\n", iser_conn, rc);
0859 
0860     if (rc > 0)
0861         return 1; /* success, this is the equivalent of EPOLLOUT */
0862     else if (!rc)
0863         return 0; /* timeout */
0864     else
0865         return rc; /* signal */
0866 }
0867 
0868 /**
0869  * iscsi_iser_ep_disconnect() - Initiate connection teardown process
0870  * @ep:    iscsi endpoint handle
0871  *
0872  * This routine is not blocked by iser and RDMA termination process
0873  * completion as we queue a deffered work for iser/RDMA destruction
0874  * and cleanup or actually call it immediately in case we didn't pass
0875  * iscsi conn bind/start stage, thus it is safe.
0876  */
0877 static void iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
0878 {
0879     struct iser_conn *iser_conn = ep->dd_data;
0880 
0881     iser_info("ep %p iser conn %p\n", ep, iser_conn);
0882 
0883     mutex_lock(&iser_conn->state_mutex);
0884     iser_conn_terminate(iser_conn);
0885 
0886     /*
0887      * if iser_conn and iscsi_conn are bound, we must wait for
0888      * iscsi_conn_stop and flush errors completion before freeing
0889      * the iser resources. Otherwise we are safe to free resources
0890      * immediately.
0891      */
0892     if (iser_conn->iscsi_conn) {
0893         INIT_WORK(&iser_conn->release_work, iser_release_work);
0894         queue_work(release_wq, &iser_conn->release_work);
0895         mutex_unlock(&iser_conn->state_mutex);
0896     } else {
0897         iser_conn->state = ISER_CONN_DOWN;
0898         mutex_unlock(&iser_conn->state_mutex);
0899         iser_conn_release(iser_conn);
0900     }
0901 
0902     iscsi_destroy_endpoint(ep);
0903 }
0904 
0905 static umode_t iser_attr_is_visible(int param_type, int param)
0906 {
0907     switch (param_type) {
0908     case ISCSI_HOST_PARAM:
0909         switch (param) {
0910         case ISCSI_HOST_PARAM_NETDEV_NAME:
0911         case ISCSI_HOST_PARAM_HWADDRESS:
0912         case ISCSI_HOST_PARAM_INITIATOR_NAME:
0913             return S_IRUGO;
0914         default:
0915             return 0;
0916         }
0917     case ISCSI_PARAM:
0918         switch (param) {
0919         case ISCSI_PARAM_MAX_RECV_DLENGTH:
0920         case ISCSI_PARAM_MAX_XMIT_DLENGTH:
0921         case ISCSI_PARAM_HDRDGST_EN:
0922         case ISCSI_PARAM_DATADGST_EN:
0923         case ISCSI_PARAM_CONN_ADDRESS:
0924         case ISCSI_PARAM_CONN_PORT:
0925         case ISCSI_PARAM_EXP_STATSN:
0926         case ISCSI_PARAM_PERSISTENT_ADDRESS:
0927         case ISCSI_PARAM_PERSISTENT_PORT:
0928         case ISCSI_PARAM_PING_TMO:
0929         case ISCSI_PARAM_RECV_TMO:
0930         case ISCSI_PARAM_INITIAL_R2T_EN:
0931         case ISCSI_PARAM_MAX_R2T:
0932         case ISCSI_PARAM_IMM_DATA_EN:
0933         case ISCSI_PARAM_FIRST_BURST:
0934         case ISCSI_PARAM_MAX_BURST:
0935         case ISCSI_PARAM_PDU_INORDER_EN:
0936         case ISCSI_PARAM_DATASEQ_INORDER_EN:
0937         case ISCSI_PARAM_TARGET_NAME:
0938         case ISCSI_PARAM_TPGT:
0939         case ISCSI_PARAM_USERNAME:
0940         case ISCSI_PARAM_PASSWORD:
0941         case ISCSI_PARAM_USERNAME_IN:
0942         case ISCSI_PARAM_PASSWORD_IN:
0943         case ISCSI_PARAM_FAST_ABORT:
0944         case ISCSI_PARAM_ABORT_TMO:
0945         case ISCSI_PARAM_LU_RESET_TMO:
0946         case ISCSI_PARAM_TGT_RESET_TMO:
0947         case ISCSI_PARAM_IFACE_NAME:
0948         case ISCSI_PARAM_INITIATOR_NAME:
0949         case ISCSI_PARAM_DISCOVERY_SESS:
0950             return S_IRUGO;
0951         default:
0952             return 0;
0953         }
0954     }
0955 
0956     return 0;
0957 }
0958 
0959 static struct scsi_host_template iscsi_iser_sht = {
0960     .module                 = THIS_MODULE,
0961     .name                   = "iSCSI Initiator over iSER",
0962     .queuecommand           = iscsi_queuecommand,
0963     .change_queue_depth = scsi_change_queue_depth,
0964     .sg_tablesize           = ISCSI_ISER_DEF_SG_TABLESIZE,
0965     .cmd_per_lun            = ISER_DEF_CMD_PER_LUN,
0966     .eh_timed_out       = iscsi_eh_cmd_timed_out,
0967     .eh_abort_handler       = iscsi_eh_abort,
0968     .eh_device_reset_handler= iscsi_eh_device_reset,
0969     .eh_target_reset_handler = iscsi_eh_recover_target,
0970     .target_alloc       = iscsi_target_alloc,
0971     .proc_name              = "iscsi_iser",
0972     .this_id                = -1,
0973     .track_queue_depth  = 1,
0974     .cmd_size       = sizeof(struct iscsi_cmd),
0975 };
0976 
0977 static struct iscsi_transport iscsi_iser_transport = {
0978     .owner                  = THIS_MODULE,
0979     .name                   = "iser",
0980     .caps                   = CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_TEXT_NEGO,
0981     /* session management */
0982     .create_session         = iscsi_iser_session_create,
0983     .destroy_session        = iscsi_iser_session_destroy,
0984     /* connection management */
0985     .create_conn            = iscsi_iser_conn_create,
0986     .bind_conn              = iscsi_iser_conn_bind,
0987     .unbind_conn        = iscsi_conn_unbind,
0988     .destroy_conn           = iscsi_conn_teardown,
0989     .attr_is_visible    = iser_attr_is_visible,
0990     .set_param              = iscsi_iser_set_param,
0991     .get_conn_param     = iscsi_conn_get_param,
0992     .get_ep_param       = iscsi_iser_get_ep_param,
0993     .get_session_param  = iscsi_session_get_param,
0994     .start_conn             = iscsi_iser_conn_start,
0995     .stop_conn              = iscsi_iser_conn_stop,
0996     /* iscsi host params */
0997     .get_host_param     = iscsi_host_get_param,
0998     .set_host_param     = iscsi_host_set_param,
0999     /* IO */
1000     .send_pdu       = iscsi_conn_send_pdu,
1001     .get_stats      = iscsi_iser_conn_get_stats,
1002     .init_task      = iscsi_iser_task_init,
1003     .xmit_task      = iscsi_iser_task_xmit,
1004     .cleanup_task       = iscsi_iser_cleanup_task,
1005     .alloc_pdu      = iscsi_iser_pdu_alloc,
1006     .check_protection   = iscsi_iser_check_protection,
1007     /* recovery */
1008     .session_recovery_timedout = iscsi_session_recovery_timedout,
1009 
1010     .ep_connect             = iscsi_iser_ep_connect,
1011     .ep_poll                = iscsi_iser_ep_poll,
1012     .ep_disconnect          = iscsi_iser_ep_disconnect
1013 };
1014 
1015 static int __init iser_init(void)
1016 {
1017     int err;
1018 
1019     iser_dbg("Starting iSER datamover...\n");
1020 
1021     memset(&ig, 0, sizeof(struct iser_global));
1022 
1023     ig.desc_cache = kmem_cache_create("iser_descriptors",
1024                       sizeof(struct iser_tx_desc),
1025                       0, SLAB_HWCACHE_ALIGN,
1026                       NULL);
1027     if (ig.desc_cache == NULL)
1028         return -ENOMEM;
1029 
1030     /* device init is called only after the first addr resolution */
1031     mutex_init(&ig.device_list_mutex);
1032     INIT_LIST_HEAD(&ig.device_list);
1033     mutex_init(&ig.connlist_mutex);
1034     INIT_LIST_HEAD(&ig.connlist);
1035 
1036     release_wq = alloc_workqueue("release workqueue", 0, 0);
1037     if (!release_wq) {
1038         iser_err("failed to allocate release workqueue\n");
1039         err = -ENOMEM;
1040         goto err_alloc_wq;
1041     }
1042 
1043     iscsi_iser_scsi_transport = iscsi_register_transport(
1044                             &iscsi_iser_transport);
1045     if (!iscsi_iser_scsi_transport) {
1046         iser_err("iscsi_register_transport failed\n");
1047         err = -EINVAL;
1048         goto err_reg;
1049     }
1050 
1051     return 0;
1052 
1053 err_reg:
1054     destroy_workqueue(release_wq);
1055 err_alloc_wq:
1056     kmem_cache_destroy(ig.desc_cache);
1057 
1058     return err;
1059 }
1060 
1061 static void __exit iser_exit(void)
1062 {
1063     struct iser_conn *iser_conn, *n;
1064     int connlist_empty;
1065 
1066     iser_dbg("Removing iSER datamover...\n");
1067     destroy_workqueue(release_wq);
1068 
1069     mutex_lock(&ig.connlist_mutex);
1070     connlist_empty = list_empty(&ig.connlist);
1071     mutex_unlock(&ig.connlist_mutex);
1072 
1073     if (!connlist_empty) {
1074         iser_err("Error cleanup stage completed but we still have iser "
1075              "connections, destroying them anyway\n");
1076         list_for_each_entry_safe(iser_conn, n, &ig.connlist,
1077                      conn_list) {
1078             iser_conn_release(iser_conn);
1079         }
1080     }
1081 
1082     iscsi_unregister_transport(&iscsi_iser_transport);
1083     kmem_cache_destroy(ig.desc_cache);
1084 }
1085 
1086 module_init(iser_init);
1087 module_exit(iser_exit);