Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Linux driver for VMware's para-virtualized SCSI HBA.
0003  *
0004  * Copyright (C) 2008-2014, VMware, Inc. All Rights Reserved.
0005  *
0006  * This program is free software; you can redistribute it and/or modify it
0007  * under the terms of the GNU General Public License as published by the
0008  * Free Software Foundation; version 2 of the License and no later version.
0009  *
0010  * This program is distributed in the hope that it will be useful, but
0011  * WITHOUT ANY WARRANTY; without even the implied warranty of
0012  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
0013  * NON INFRINGEMENT.  See the GNU General Public License for more
0014  * details.
0015  *
0016  * You should have received a copy of the GNU General Public License
0017  * along with this program; if not, write to the Free Software
0018  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0019  *
0020  */
0021 
0022 #include <linux/kernel.h>
0023 #include <linux/module.h>
0024 #include <linux/interrupt.h>
0025 #include <linux/slab.h>
0026 #include <linux/workqueue.h>
0027 #include <linux/pci.h>
0028 
0029 #include <scsi/scsi.h>
0030 #include <scsi/scsi_host.h>
0031 #include <scsi/scsi_cmnd.h>
0032 #include <scsi/scsi_device.h>
0033 #include <scsi/scsi_tcq.h>
0034 
0035 #include "vmw_pvscsi.h"
0036 
0037 #define PVSCSI_LINUX_DRIVER_DESC "VMware PVSCSI driver"
0038 
0039 MODULE_DESCRIPTION(PVSCSI_LINUX_DRIVER_DESC);
0040 MODULE_AUTHOR("VMware, Inc.");
0041 MODULE_LICENSE("GPL");
0042 MODULE_VERSION(PVSCSI_DRIVER_VERSION_STRING);
0043 
0044 #define PVSCSI_DEFAULT_NUM_PAGES_PER_RING   8
0045 #define PVSCSI_DEFAULT_NUM_PAGES_MSG_RING   1
0046 #define PVSCSI_DEFAULT_QUEUE_DEPTH      254
0047 #define SGL_SIZE                PAGE_SIZE
0048 
0049 struct pvscsi_sg_list {
0050     struct PVSCSISGElement sge[PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT];
0051 };
0052 
0053 struct pvscsi_ctx {
0054     /*
0055      * The index of the context in cmd_map serves as the context ID for a
0056      * 1-to-1 mapping completions back to requests.
0057      */
0058     struct scsi_cmnd    *cmd;
0059     struct pvscsi_sg_list   *sgl;
0060     struct list_head    list;
0061     dma_addr_t      dataPA;
0062     dma_addr_t      sensePA;
0063     dma_addr_t      sglPA;
0064     struct completion   *abort_cmp;
0065 };
0066 
0067 struct pvscsi_adapter {
0068     char                *mmioBase;
0069     u8              rev;
0070     bool                use_msg;
0071     bool                use_req_threshold;
0072 
0073     spinlock_t          hw_lock;
0074 
0075     struct workqueue_struct     *workqueue;
0076     struct work_struct      work;
0077 
0078     struct PVSCSIRingReqDesc    *req_ring;
0079     unsigned            req_pages;
0080     unsigned            req_depth;
0081     dma_addr_t          reqRingPA;
0082 
0083     struct PVSCSIRingCmpDesc    *cmp_ring;
0084     unsigned            cmp_pages;
0085     dma_addr_t          cmpRingPA;
0086 
0087     struct PVSCSIRingMsgDesc    *msg_ring;
0088     unsigned            msg_pages;
0089     dma_addr_t          msgRingPA;
0090 
0091     struct PVSCSIRingsState     *rings_state;
0092     dma_addr_t          ringStatePA;
0093 
0094     struct pci_dev          *dev;
0095     struct Scsi_Host        *host;
0096 
0097     struct list_head        cmd_pool;
0098     struct pvscsi_ctx       *cmd_map;
0099 };
0100 
0101 
0102 /* Command line parameters */
0103 static int pvscsi_ring_pages;
0104 static int pvscsi_msg_ring_pages = PVSCSI_DEFAULT_NUM_PAGES_MSG_RING;
0105 static int pvscsi_cmd_per_lun    = PVSCSI_DEFAULT_QUEUE_DEPTH;
0106 static bool pvscsi_disable_msi;
0107 static bool pvscsi_disable_msix;
0108 static bool pvscsi_use_msg       = true;
0109 static bool pvscsi_use_req_threshold = true;
0110 
0111 #define PVSCSI_RW (S_IRUSR | S_IWUSR)
0112 
0113 module_param_named(ring_pages, pvscsi_ring_pages, int, PVSCSI_RW);
0114 MODULE_PARM_DESC(ring_pages, "Number of pages per req/cmp ring - (default="
0115          __stringify(PVSCSI_DEFAULT_NUM_PAGES_PER_RING)
0116          "[up to 16 targets],"
0117          __stringify(PVSCSI_SETUP_RINGS_MAX_NUM_PAGES)
0118          "[for 16+ targets])");
0119 
0120 module_param_named(msg_ring_pages, pvscsi_msg_ring_pages, int, PVSCSI_RW);
0121 MODULE_PARM_DESC(msg_ring_pages, "Number of pages for the msg ring - (default="
0122          __stringify(PVSCSI_DEFAULT_NUM_PAGES_MSG_RING) ")");
0123 
0124 module_param_named(cmd_per_lun, pvscsi_cmd_per_lun, int, PVSCSI_RW);
0125 MODULE_PARM_DESC(cmd_per_lun, "Maximum commands per lun - (default="
0126          __stringify(PVSCSI_DEFAULT_QUEUE_DEPTH) ")");
0127 
0128 module_param_named(disable_msi, pvscsi_disable_msi, bool, PVSCSI_RW);
0129 MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
0130 
0131 module_param_named(disable_msix, pvscsi_disable_msix, bool, PVSCSI_RW);
0132 MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");
0133 
0134 module_param_named(use_msg, pvscsi_use_msg, bool, PVSCSI_RW);
0135 MODULE_PARM_DESC(use_msg, "Use msg ring when available - (default=1)");
0136 
0137 module_param_named(use_req_threshold, pvscsi_use_req_threshold,
0138            bool, PVSCSI_RW);
0139 MODULE_PARM_DESC(use_req_threshold, "Use driver-based request coalescing if configured - (default=1)");
0140 
0141 static const struct pci_device_id pvscsi_pci_tbl[] = {
0142     { PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_PVSCSI) },
0143     { 0 }
0144 };
0145 
0146 MODULE_DEVICE_TABLE(pci, pvscsi_pci_tbl);
0147 
0148 static struct device *
0149 pvscsi_dev(const struct pvscsi_adapter *adapter)
0150 {
0151     return &(adapter->dev->dev);
0152 }
0153 
0154 static struct pvscsi_ctx *
0155 pvscsi_find_context(const struct pvscsi_adapter *adapter, struct scsi_cmnd *cmd)
0156 {
0157     struct pvscsi_ctx *ctx, *end;
0158 
0159     end = &adapter->cmd_map[adapter->req_depth];
0160     for (ctx = adapter->cmd_map; ctx < end; ctx++)
0161         if (ctx->cmd == cmd)
0162             return ctx;
0163 
0164     return NULL;
0165 }
0166 
0167 static struct pvscsi_ctx *
0168 pvscsi_acquire_context(struct pvscsi_adapter *adapter, struct scsi_cmnd *cmd)
0169 {
0170     struct pvscsi_ctx *ctx;
0171 
0172     if (list_empty(&adapter->cmd_pool))
0173         return NULL;
0174 
0175     ctx = list_first_entry(&adapter->cmd_pool, struct pvscsi_ctx, list);
0176     ctx->cmd = cmd;
0177     list_del(&ctx->list);
0178 
0179     return ctx;
0180 }
0181 
0182 static void pvscsi_release_context(struct pvscsi_adapter *adapter,
0183                    struct pvscsi_ctx *ctx)
0184 {
0185     ctx->cmd = NULL;
0186     ctx->abort_cmp = NULL;
0187     list_add(&ctx->list, &adapter->cmd_pool);
0188 }
0189 
0190 /*
0191  * Map a pvscsi_ctx struct to a context ID field value; we map to a simple
0192  * non-zero integer. ctx always points to an entry in cmd_map array, hence
0193  * the return value is always >=1.
0194  */
0195 static u64 pvscsi_map_context(const struct pvscsi_adapter *adapter,
0196                   const struct pvscsi_ctx *ctx)
0197 {
0198     return ctx - adapter->cmd_map + 1;
0199 }
0200 
0201 static struct pvscsi_ctx *
0202 pvscsi_get_context(const struct pvscsi_adapter *adapter, u64 context)
0203 {
0204     return &adapter->cmd_map[context - 1];
0205 }
0206 
0207 static void pvscsi_reg_write(const struct pvscsi_adapter *adapter,
0208                  u32 offset, u32 val)
0209 {
0210     writel(val, adapter->mmioBase + offset);
0211 }
0212 
0213 static u32 pvscsi_reg_read(const struct pvscsi_adapter *adapter, u32 offset)
0214 {
0215     return readl(adapter->mmioBase + offset);
0216 }
0217 
0218 static u32 pvscsi_read_intr_status(const struct pvscsi_adapter *adapter)
0219 {
0220     return pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_INTR_STATUS);
0221 }
0222 
0223 static void pvscsi_write_intr_status(const struct pvscsi_adapter *adapter,
0224                      u32 val)
0225 {
0226     pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_STATUS, val);
0227 }
0228 
0229 static void pvscsi_unmask_intr(const struct pvscsi_adapter *adapter)
0230 {
0231     u32 intr_bits;
0232 
0233     intr_bits = PVSCSI_INTR_CMPL_MASK;
0234     if (adapter->use_msg)
0235         intr_bits |= PVSCSI_INTR_MSG_MASK;
0236 
0237     pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_MASK, intr_bits);
0238 }
0239 
0240 static void pvscsi_mask_intr(const struct pvscsi_adapter *adapter)
0241 {
0242     pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_MASK, 0);
0243 }
0244 
0245 static void pvscsi_write_cmd_desc(const struct pvscsi_adapter *adapter,
0246                   u32 cmd, const void *desc, size_t len)
0247 {
0248     const u32 *ptr = desc;
0249     size_t i;
0250 
0251     len /= sizeof(*ptr);
0252     pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND, cmd);
0253     for (i = 0; i < len; i++)
0254         pvscsi_reg_write(adapter,
0255                  PVSCSI_REG_OFFSET_COMMAND_DATA, ptr[i]);
0256 }
0257 
0258 static void pvscsi_abort_cmd(const struct pvscsi_adapter *adapter,
0259                  const struct pvscsi_ctx *ctx)
0260 {
0261     struct PVSCSICmdDescAbortCmd cmd = { 0 };
0262 
0263     cmd.target = ctx->cmd->device->id;
0264     cmd.context = pvscsi_map_context(adapter, ctx);
0265 
0266     pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_ABORT_CMD, &cmd, sizeof(cmd));
0267 }
0268 
0269 static void pvscsi_kick_rw_io(const struct pvscsi_adapter *adapter)
0270 {
0271     pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_KICK_RW_IO, 0);
0272 }
0273 
0274 static void pvscsi_process_request_ring(const struct pvscsi_adapter *adapter)
0275 {
0276     pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_KICK_NON_RW_IO, 0);
0277 }
0278 
0279 static int scsi_is_rw(unsigned char op)
0280 {
0281     return op == READ_6  || op == WRITE_6 ||
0282            op == READ_10 || op == WRITE_10 ||
0283            op == READ_12 || op == WRITE_12 ||
0284            op == READ_16 || op == WRITE_16;
0285 }
0286 
0287 static void pvscsi_kick_io(const struct pvscsi_adapter *adapter,
0288                unsigned char op)
0289 {
0290     if (scsi_is_rw(op)) {
0291         struct PVSCSIRingsState *s = adapter->rings_state;
0292 
0293         if (!adapter->use_req_threshold ||
0294             s->reqProdIdx - s->reqConsIdx >= s->reqCallThreshold)
0295             pvscsi_kick_rw_io(adapter);
0296     } else {
0297         pvscsi_process_request_ring(adapter);
0298     }
0299 }
0300 
0301 static void ll_adapter_reset(const struct pvscsi_adapter *adapter)
0302 {
0303     dev_dbg(pvscsi_dev(adapter), "Adapter Reset on %p\n", adapter);
0304 
0305     pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_ADAPTER_RESET, NULL, 0);
0306 }
0307 
0308 static void ll_bus_reset(const struct pvscsi_adapter *adapter)
0309 {
0310     dev_dbg(pvscsi_dev(adapter), "Resetting bus on %p\n", adapter);
0311 
0312     pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_RESET_BUS, NULL, 0);
0313 }
0314 
0315 static void ll_device_reset(const struct pvscsi_adapter *adapter, u32 target)
0316 {
0317     struct PVSCSICmdDescResetDevice cmd = { 0 };
0318 
0319     dev_dbg(pvscsi_dev(adapter), "Resetting device: target=%u\n", target);
0320 
0321     cmd.target = target;
0322 
0323     pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_RESET_DEVICE,
0324                   &cmd, sizeof(cmd));
0325 }
0326 
0327 static void pvscsi_create_sg(struct pvscsi_ctx *ctx,
0328                  struct scatterlist *sg, unsigned count)
0329 {
0330     unsigned i;
0331     struct PVSCSISGElement *sge;
0332 
0333     BUG_ON(count > PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT);
0334 
0335     sge = &ctx->sgl->sge[0];
0336     for (i = 0; i < count; i++, sg = sg_next(sg)) {
0337         sge[i].addr   = sg_dma_address(sg);
0338         sge[i].length = sg_dma_len(sg);
0339         sge[i].flags  = 0;
0340     }
0341 }
0342 
0343 /*
0344  * Map all data buffers for a command into PCI space and
0345  * setup the scatter/gather list if needed.
0346  */
0347 static int pvscsi_map_buffers(struct pvscsi_adapter *adapter,
0348                   struct pvscsi_ctx *ctx, struct scsi_cmnd *cmd,
0349                   struct PVSCSIRingReqDesc *e)
0350 {
0351     unsigned count;
0352     unsigned bufflen = scsi_bufflen(cmd);
0353     struct scatterlist *sg;
0354 
0355     e->dataLen = bufflen;
0356     e->dataAddr = 0;
0357     if (bufflen == 0)
0358         return 0;
0359 
0360     sg = scsi_sglist(cmd);
0361     count = scsi_sg_count(cmd);
0362     if (count != 0) {
0363         int segs = scsi_dma_map(cmd);
0364 
0365         if (segs == -ENOMEM) {
0366             scmd_printk(KERN_DEBUG, cmd,
0367                     "vmw_pvscsi: Failed to map cmd sglist for DMA.\n");
0368             return -ENOMEM;
0369         } else if (segs > 1) {
0370             pvscsi_create_sg(ctx, sg, segs);
0371 
0372             e->flags |= PVSCSI_FLAG_CMD_WITH_SG_LIST;
0373             ctx->sglPA = dma_map_single(&adapter->dev->dev,
0374                     ctx->sgl, SGL_SIZE, DMA_TO_DEVICE);
0375             if (dma_mapping_error(&adapter->dev->dev, ctx->sglPA)) {
0376                 scmd_printk(KERN_ERR, cmd,
0377                         "vmw_pvscsi: Failed to map ctx sglist for DMA.\n");
0378                 scsi_dma_unmap(cmd);
0379                 ctx->sglPA = 0;
0380                 return -ENOMEM;
0381             }
0382             e->dataAddr = ctx->sglPA;
0383         } else
0384             e->dataAddr = sg_dma_address(sg);
0385     } else {
0386         /*
0387          * In case there is no S/G list, scsi_sglist points
0388          * directly to the buffer.
0389          */
0390         ctx->dataPA = dma_map_single(&adapter->dev->dev, sg, bufflen,
0391                          cmd->sc_data_direction);
0392         if (dma_mapping_error(&adapter->dev->dev, ctx->dataPA)) {
0393             scmd_printk(KERN_DEBUG, cmd,
0394                     "vmw_pvscsi: Failed to map direct data buffer for DMA.\n");
0395             return -ENOMEM;
0396         }
0397         e->dataAddr = ctx->dataPA;
0398     }
0399 
0400     return 0;
0401 }
0402 
0403 /*
0404  * The device incorrectly doesn't clear the first byte of the sense
0405  * buffer in some cases. We have to do it ourselves.
0406  * Otherwise we run into trouble when SWIOTLB is forced.
0407  */
0408 static void pvscsi_patch_sense(struct scsi_cmnd *cmd)
0409 {
0410     if (cmd->sense_buffer)
0411         cmd->sense_buffer[0] = 0;
0412 }
0413 
0414 static void pvscsi_unmap_buffers(const struct pvscsi_adapter *adapter,
0415                  struct pvscsi_ctx *ctx)
0416 {
0417     struct scsi_cmnd *cmd;
0418     unsigned bufflen;
0419 
0420     cmd = ctx->cmd;
0421     bufflen = scsi_bufflen(cmd);
0422 
0423     if (bufflen != 0) {
0424         unsigned count = scsi_sg_count(cmd);
0425 
0426         if (count != 0) {
0427             scsi_dma_unmap(cmd);
0428             if (ctx->sglPA) {
0429                 dma_unmap_single(&adapter->dev->dev, ctx->sglPA,
0430                          SGL_SIZE, DMA_TO_DEVICE);
0431                 ctx->sglPA = 0;
0432             }
0433         } else
0434             dma_unmap_single(&adapter->dev->dev, ctx->dataPA,
0435                      bufflen, cmd->sc_data_direction);
0436     }
0437     if (cmd->sense_buffer)
0438         dma_unmap_single(&adapter->dev->dev, ctx->sensePA,
0439                  SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE);
0440 }
0441 
0442 static int pvscsi_allocate_rings(struct pvscsi_adapter *adapter)
0443 {
0444     adapter->rings_state = dma_alloc_coherent(&adapter->dev->dev, PAGE_SIZE,
0445             &adapter->ringStatePA, GFP_KERNEL);
0446     if (!adapter->rings_state)
0447         return -ENOMEM;
0448 
0449     adapter->req_pages = min(PVSCSI_MAX_NUM_PAGES_REQ_RING,
0450                  pvscsi_ring_pages);
0451     adapter->req_depth = adapter->req_pages
0452                     * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
0453     adapter->req_ring = dma_alloc_coherent(&adapter->dev->dev,
0454             adapter->req_pages * PAGE_SIZE, &adapter->reqRingPA,
0455             GFP_KERNEL);
0456     if (!adapter->req_ring)
0457         return -ENOMEM;
0458 
0459     adapter->cmp_pages = min(PVSCSI_MAX_NUM_PAGES_CMP_RING,
0460                  pvscsi_ring_pages);
0461     adapter->cmp_ring = dma_alloc_coherent(&adapter->dev->dev,
0462             adapter->cmp_pages * PAGE_SIZE, &adapter->cmpRingPA,
0463             GFP_KERNEL);
0464     if (!adapter->cmp_ring)
0465         return -ENOMEM;
0466 
0467     BUG_ON(!IS_ALIGNED(adapter->ringStatePA, PAGE_SIZE));
0468     BUG_ON(!IS_ALIGNED(adapter->reqRingPA, PAGE_SIZE));
0469     BUG_ON(!IS_ALIGNED(adapter->cmpRingPA, PAGE_SIZE));
0470 
0471     if (!adapter->use_msg)
0472         return 0;
0473 
0474     adapter->msg_pages = min(PVSCSI_MAX_NUM_PAGES_MSG_RING,
0475                  pvscsi_msg_ring_pages);
0476     adapter->msg_ring = dma_alloc_coherent(&adapter->dev->dev,
0477             adapter->msg_pages * PAGE_SIZE, &adapter->msgRingPA,
0478             GFP_KERNEL);
0479     if (!adapter->msg_ring)
0480         return -ENOMEM;
0481     BUG_ON(!IS_ALIGNED(adapter->msgRingPA, PAGE_SIZE));
0482 
0483     return 0;
0484 }
0485 
0486 static void pvscsi_setup_all_rings(const struct pvscsi_adapter *adapter)
0487 {
0488     struct PVSCSICmdDescSetupRings cmd = { 0 };
0489     dma_addr_t base;
0490     unsigned i;
0491 
0492     cmd.ringsStatePPN   = adapter->ringStatePA >> PAGE_SHIFT;
0493     cmd.reqRingNumPages = adapter->req_pages;
0494     cmd.cmpRingNumPages = adapter->cmp_pages;
0495 
0496     base = adapter->reqRingPA;
0497     for (i = 0; i < adapter->req_pages; i++) {
0498         cmd.reqRingPPNs[i] = base >> PAGE_SHIFT;
0499         base += PAGE_SIZE;
0500     }
0501 
0502     base = adapter->cmpRingPA;
0503     for (i = 0; i < adapter->cmp_pages; i++) {
0504         cmd.cmpRingPPNs[i] = base >> PAGE_SHIFT;
0505         base += PAGE_SIZE;
0506     }
0507 
0508     memset(adapter->rings_state, 0, PAGE_SIZE);
0509     memset(adapter->req_ring, 0, adapter->req_pages * PAGE_SIZE);
0510     memset(adapter->cmp_ring, 0, adapter->cmp_pages * PAGE_SIZE);
0511 
0512     pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_SETUP_RINGS,
0513                   &cmd, sizeof(cmd));
0514 
0515     if (adapter->use_msg) {
0516         struct PVSCSICmdDescSetupMsgRing cmd_msg = { 0 };
0517 
0518         cmd_msg.numPages = adapter->msg_pages;
0519 
0520         base = adapter->msgRingPA;
0521         for (i = 0; i < adapter->msg_pages; i++) {
0522             cmd_msg.ringPPNs[i] = base >> PAGE_SHIFT;
0523             base += PAGE_SIZE;
0524         }
0525         memset(adapter->msg_ring, 0, adapter->msg_pages * PAGE_SIZE);
0526 
0527         pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_SETUP_MSG_RING,
0528                       &cmd_msg, sizeof(cmd_msg));
0529     }
0530 }
0531 
0532 static int pvscsi_change_queue_depth(struct scsi_device *sdev, int qdepth)
0533 {
0534     if (!sdev->tagged_supported)
0535         qdepth = 1;
0536     return scsi_change_queue_depth(sdev, qdepth);
0537 }
0538 
0539 /*
0540  * Pull a completion descriptor off and pass the completion back
0541  * to the SCSI mid layer.
0542  */
0543 static void pvscsi_complete_request(struct pvscsi_adapter *adapter,
0544                     const struct PVSCSIRingCmpDesc *e)
0545 {
0546     struct pvscsi_ctx *ctx;
0547     struct scsi_cmnd *cmd;
0548     struct completion *abort_cmp;
0549     u32 btstat = e->hostStatus;
0550     u32 sdstat = e->scsiStatus;
0551 
0552     ctx = pvscsi_get_context(adapter, e->context);
0553     cmd = ctx->cmd;
0554     abort_cmp = ctx->abort_cmp;
0555     pvscsi_unmap_buffers(adapter, ctx);
0556     if (sdstat != SAM_STAT_CHECK_CONDITION)
0557         pvscsi_patch_sense(cmd);
0558     pvscsi_release_context(adapter, ctx);
0559     if (abort_cmp) {
0560         /*
0561          * The command was requested to be aborted. Just signal that
0562          * the request completed and swallow the actual cmd completion
0563          * here. The abort handler will post a completion for this
0564          * command indicating that it got successfully aborted.
0565          */
0566         complete(abort_cmp);
0567         return;
0568     }
0569 
0570     cmd->result = 0;
0571     if (sdstat != SAM_STAT_GOOD &&
0572         (btstat == BTSTAT_SUCCESS ||
0573          btstat == BTSTAT_LINKED_COMMAND_COMPLETED ||
0574          btstat == BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG)) {
0575         if (sdstat == SAM_STAT_COMMAND_TERMINATED) {
0576             cmd->result = (DID_RESET << 16);
0577         } else {
0578             cmd->result = (DID_OK << 16) | sdstat;
0579         }
0580     } else
0581         switch (btstat) {
0582         case BTSTAT_SUCCESS:
0583         case BTSTAT_LINKED_COMMAND_COMPLETED:
0584         case BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG:
0585             /*
0586              * Commands like INQUIRY may transfer less data than
0587              * requested by the initiator via bufflen. Set residual
0588              * count to make upper layer aware of the actual amount
0589              * of data returned. There are cases when controller
0590              * returns zero dataLen with non zero data - do not set
0591              * residual count in that case.
0592              */
0593             if (e->dataLen && (e->dataLen < scsi_bufflen(cmd)))
0594                 scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
0595             cmd->result = (DID_OK << 16);
0596             break;
0597 
0598         case BTSTAT_DATARUN:
0599         case BTSTAT_DATA_UNDERRUN:
0600             /* Report residual data in underruns */
0601             scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
0602             cmd->result = (DID_ERROR << 16);
0603             break;
0604 
0605         case BTSTAT_SELTIMEO:
0606             /* Our emulation returns this for non-connected devs */
0607             cmd->result = (DID_BAD_TARGET << 16);
0608             break;
0609 
0610         case BTSTAT_LUNMISMATCH:
0611         case BTSTAT_TAGREJECT:
0612         case BTSTAT_BADMSG:
0613         case BTSTAT_HAHARDWARE:
0614         case BTSTAT_INVPHASE:
0615         case BTSTAT_HATIMEOUT:
0616         case BTSTAT_NORESPONSE:
0617         case BTSTAT_DISCONNECT:
0618         case BTSTAT_HASOFTWARE:
0619         case BTSTAT_BUSFREE:
0620         case BTSTAT_SENSFAILED:
0621             cmd->result |= (DID_ERROR << 16);
0622             break;
0623 
0624         case BTSTAT_SENTRST:
0625         case BTSTAT_RECVRST:
0626         case BTSTAT_BUSRESET:
0627             cmd->result = (DID_RESET << 16);
0628             break;
0629 
0630         case BTSTAT_ABORTQUEUE:
0631             cmd->result = (DID_BUS_BUSY << 16);
0632             break;
0633 
0634         case BTSTAT_SCSIPARITY:
0635             cmd->result = (DID_PARITY << 16);
0636             break;
0637 
0638         default:
0639             cmd->result = (DID_ERROR << 16);
0640             scmd_printk(KERN_DEBUG, cmd,
0641                     "Unknown completion status: 0x%x\n",
0642                     btstat);
0643     }
0644 
0645     dev_dbg(&cmd->device->sdev_gendev,
0646         "cmd=%p %x ctx=%p result=0x%x status=0x%x,%x\n",
0647         cmd, cmd->cmnd[0], ctx, cmd->result, btstat, sdstat);
0648 
0649     scsi_done(cmd);
0650 }
0651 
0652 /*
0653  * barrier usage : Since the PVSCSI device is emulated, there could be cases
0654  * where we may want to serialize some accesses between the driver and the
0655  * emulation layer. We use compiler barriers instead of the more expensive
0656  * memory barriers because PVSCSI is only supported on X86 which has strong
0657  * memory access ordering.
0658  */
0659 static void pvscsi_process_completion_ring(struct pvscsi_adapter *adapter)
0660 {
0661     struct PVSCSIRingsState *s = adapter->rings_state;
0662     struct PVSCSIRingCmpDesc *ring = adapter->cmp_ring;
0663     u32 cmp_entries = s->cmpNumEntriesLog2;
0664 
0665     while (s->cmpConsIdx != s->cmpProdIdx) {
0666         struct PVSCSIRingCmpDesc *e = ring + (s->cmpConsIdx &
0667                               MASK(cmp_entries));
0668         /*
0669          * This barrier() ensures that *e is not dereferenced while
0670          * the device emulation still writes data into the slot.
0671          * Since the device emulation advances s->cmpProdIdx only after
0672          * updating the slot we want to check it first.
0673          */
0674         barrier();
0675         pvscsi_complete_request(adapter, e);
0676         /*
0677          * This barrier() ensures that compiler doesn't reorder write
0678          * to s->cmpConsIdx before the read of (*e) inside
0679          * pvscsi_complete_request. Otherwise, device emulation may
0680          * overwrite *e before we had a chance to read it.
0681          */
0682         barrier();
0683         s->cmpConsIdx++;
0684     }
0685 }
0686 
0687 /*
0688  * Translate a Linux SCSI request into a request ring entry.
0689  */
0690 static int pvscsi_queue_ring(struct pvscsi_adapter *adapter,
0691                  struct pvscsi_ctx *ctx, struct scsi_cmnd *cmd)
0692 {
0693     struct PVSCSIRingsState *s;
0694     struct PVSCSIRingReqDesc *e;
0695     struct scsi_device *sdev;
0696     u32 req_entries;
0697 
0698     s = adapter->rings_state;
0699     sdev = cmd->device;
0700     req_entries = s->reqNumEntriesLog2;
0701 
0702     /*
0703      * If this condition holds, we might have room on the request ring, but
0704      * we might not have room on the completion ring for the response.
0705      * However, we have already ruled out this possibility - we would not
0706      * have successfully allocated a context if it were true, since we only
0707      * have one context per request entry.  Check for it anyway, since it
0708      * would be a serious bug.
0709      */
0710     if (s->reqProdIdx - s->cmpConsIdx >= 1 << req_entries) {
0711         scmd_printk(KERN_ERR, cmd, "vmw_pvscsi: "
0712                 "ring full: reqProdIdx=%d cmpConsIdx=%d\n",
0713                 s->reqProdIdx, s->cmpConsIdx);
0714         return -1;
0715     }
0716 
0717     e = adapter->req_ring + (s->reqProdIdx & MASK(req_entries));
0718 
0719     e->bus    = sdev->channel;
0720     e->target = sdev->id;
0721     memset(e->lun, 0, sizeof(e->lun));
0722     e->lun[1] = sdev->lun;
0723 
0724     if (cmd->sense_buffer) {
0725         ctx->sensePA = dma_map_single(&adapter->dev->dev,
0726                 cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE,
0727                 DMA_FROM_DEVICE);
0728         if (dma_mapping_error(&adapter->dev->dev, ctx->sensePA)) {
0729             scmd_printk(KERN_DEBUG, cmd,
0730                     "vmw_pvscsi: Failed to map sense buffer for DMA.\n");
0731             ctx->sensePA = 0;
0732             return -ENOMEM;
0733         }
0734         e->senseAddr = ctx->sensePA;
0735         e->senseLen = SCSI_SENSE_BUFFERSIZE;
0736     } else {
0737         e->senseLen  = 0;
0738         e->senseAddr = 0;
0739     }
0740     e->cdbLen   = cmd->cmd_len;
0741     e->vcpuHint = smp_processor_id();
0742     memcpy(e->cdb, cmd->cmnd, e->cdbLen);
0743 
0744     e->tag = SIMPLE_QUEUE_TAG;
0745 
0746     if (cmd->sc_data_direction == DMA_FROM_DEVICE)
0747         e->flags = PVSCSI_FLAG_CMD_DIR_TOHOST;
0748     else if (cmd->sc_data_direction == DMA_TO_DEVICE)
0749         e->flags = PVSCSI_FLAG_CMD_DIR_TODEVICE;
0750     else if (cmd->sc_data_direction == DMA_NONE)
0751         e->flags = PVSCSI_FLAG_CMD_DIR_NONE;
0752     else
0753         e->flags = 0;
0754 
0755     if (pvscsi_map_buffers(adapter, ctx, cmd, e) != 0) {
0756         if (cmd->sense_buffer) {
0757             dma_unmap_single(&adapter->dev->dev, ctx->sensePA,
0758                      SCSI_SENSE_BUFFERSIZE,
0759                      DMA_FROM_DEVICE);
0760             ctx->sensePA = 0;
0761         }
0762         return -ENOMEM;
0763     }
0764 
0765     e->context = pvscsi_map_context(adapter, ctx);
0766 
0767     barrier();
0768 
0769     s->reqProdIdx++;
0770 
0771     return 0;
0772 }
0773 
0774 static int pvscsi_queue_lck(struct scsi_cmnd *cmd)
0775 {
0776     struct Scsi_Host *host = cmd->device->host;
0777     struct pvscsi_adapter *adapter = shost_priv(host);
0778     struct pvscsi_ctx *ctx;
0779     unsigned long flags;
0780     unsigned char op;
0781 
0782     spin_lock_irqsave(&adapter->hw_lock, flags);
0783 
0784     ctx = pvscsi_acquire_context(adapter, cmd);
0785     if (!ctx || pvscsi_queue_ring(adapter, ctx, cmd) != 0) {
0786         if (ctx)
0787             pvscsi_release_context(adapter, ctx);
0788         spin_unlock_irqrestore(&adapter->hw_lock, flags);
0789         return SCSI_MLQUEUE_HOST_BUSY;
0790     }
0791 
0792     op = cmd->cmnd[0];
0793 
0794     dev_dbg(&cmd->device->sdev_gendev,
0795         "queued cmd %p, ctx %p, op=%x\n", cmd, ctx, op);
0796 
0797     spin_unlock_irqrestore(&adapter->hw_lock, flags);
0798 
0799     pvscsi_kick_io(adapter, op);
0800 
0801     return 0;
0802 }
0803 
0804 static DEF_SCSI_QCMD(pvscsi_queue)
0805 
0806 static int pvscsi_abort(struct scsi_cmnd *cmd)
0807 {
0808     struct pvscsi_adapter *adapter = shost_priv(cmd->device->host);
0809     struct pvscsi_ctx *ctx;
0810     unsigned long flags;
0811     int result = SUCCESS;
0812     DECLARE_COMPLETION_ONSTACK(abort_cmp);
0813     int done;
0814 
0815     scmd_printk(KERN_DEBUG, cmd, "task abort on host %u, %p\n",
0816             adapter->host->host_no, cmd);
0817 
0818     spin_lock_irqsave(&adapter->hw_lock, flags);
0819 
0820     /*
0821      * Poll the completion ring first - we might be trying to abort
0822      * a command that is waiting to be dispatched in the completion ring.
0823      */
0824     pvscsi_process_completion_ring(adapter);
0825 
0826     /*
0827      * If there is no context for the command, it either already succeeded
0828      * or else was never properly issued.  Not our problem.
0829      */
0830     ctx = pvscsi_find_context(adapter, cmd);
0831     if (!ctx) {
0832         scmd_printk(KERN_DEBUG, cmd, "Failed to abort cmd %p\n", cmd);
0833         goto out;
0834     }
0835 
0836     /*
0837      * Mark that the command has been requested to be aborted and issue
0838      * the abort.
0839      */
0840     ctx->abort_cmp = &abort_cmp;
0841 
0842     pvscsi_abort_cmd(adapter, ctx);
0843     spin_unlock_irqrestore(&adapter->hw_lock, flags);
0844     /* Wait for 2 secs for the completion. */
0845     done = wait_for_completion_timeout(&abort_cmp, msecs_to_jiffies(2000));
0846     spin_lock_irqsave(&adapter->hw_lock, flags);
0847 
0848     if (!done) {
0849         /*
0850          * Failed to abort the command, unmark the fact that it
0851          * was requested to be aborted.
0852          */
0853         ctx->abort_cmp = NULL;
0854         result = FAILED;
0855         scmd_printk(KERN_DEBUG, cmd,
0856                 "Failed to get completion for aborted cmd %p\n",
0857                 cmd);
0858         goto out;
0859     }
0860 
0861     /*
0862      * Successfully aborted the command.
0863      */
0864     cmd->result = (DID_ABORT << 16);
0865     scsi_done(cmd);
0866 
0867 out:
0868     spin_unlock_irqrestore(&adapter->hw_lock, flags);
0869     return result;
0870 }
0871 
0872 /*
0873  * Abort all outstanding requests.  This is only safe to use if the completion
0874  * ring will never be walked again or the device has been reset, because it
0875  * destroys the 1-1 mapping between context field passed to emulation and our
0876  * request structure.
0877  */
0878 static void pvscsi_reset_all(struct pvscsi_adapter *adapter)
0879 {
0880     unsigned i;
0881 
0882     for (i = 0; i < adapter->req_depth; i++) {
0883         struct pvscsi_ctx *ctx = &adapter->cmd_map[i];
0884         struct scsi_cmnd *cmd = ctx->cmd;
0885         if (cmd) {
0886             scmd_printk(KERN_ERR, cmd,
0887                     "Forced reset on cmd %p\n", cmd);
0888             pvscsi_unmap_buffers(adapter, ctx);
0889             pvscsi_patch_sense(cmd);
0890             pvscsi_release_context(adapter, ctx);
0891             cmd->result = (DID_RESET << 16);
0892             scsi_done(cmd);
0893         }
0894     }
0895 }
0896 
0897 static int pvscsi_host_reset(struct scsi_cmnd *cmd)
0898 {
0899     struct Scsi_Host *host = cmd->device->host;
0900     struct pvscsi_adapter *adapter = shost_priv(host);
0901     unsigned long flags;
0902     bool use_msg;
0903 
0904     scmd_printk(KERN_INFO, cmd, "SCSI Host reset\n");
0905 
0906     spin_lock_irqsave(&adapter->hw_lock, flags);
0907 
0908     use_msg = adapter->use_msg;
0909 
0910     if (use_msg) {
0911         adapter->use_msg = false;
0912         spin_unlock_irqrestore(&adapter->hw_lock, flags);
0913 
0914         /*
0915          * Now that we know that the ISR won't add more work on the
0916          * workqueue we can safely flush any outstanding work.
0917          */
0918         flush_workqueue(adapter->workqueue);
0919         spin_lock_irqsave(&adapter->hw_lock, flags);
0920     }
0921 
0922     /*
0923      * We're going to tear down the entire ring structure and set it back
0924      * up, so stalling new requests until all completions are flushed and
0925      * the rings are back in place.
0926      */
0927 
0928     pvscsi_process_request_ring(adapter);
0929 
0930     ll_adapter_reset(adapter);
0931 
0932     /*
0933      * Now process any completions.  Note we do this AFTER adapter reset,
0934      * which is strange, but stops races where completions get posted
0935      * between processing the ring and issuing the reset.  The backend will
0936      * not touch the ring memory after reset, so the immediately pre-reset
0937      * completion ring state is still valid.
0938      */
0939     pvscsi_process_completion_ring(adapter);
0940 
0941     pvscsi_reset_all(adapter);
0942     adapter->use_msg = use_msg;
0943     pvscsi_setup_all_rings(adapter);
0944     pvscsi_unmask_intr(adapter);
0945 
0946     spin_unlock_irqrestore(&adapter->hw_lock, flags);
0947 
0948     return SUCCESS;
0949 }
0950 
0951 static int pvscsi_bus_reset(struct scsi_cmnd *cmd)
0952 {
0953     struct Scsi_Host *host = cmd->device->host;
0954     struct pvscsi_adapter *adapter = shost_priv(host);
0955     unsigned long flags;
0956 
0957     scmd_printk(KERN_INFO, cmd, "SCSI Bus reset\n");
0958 
0959     /*
0960      * We don't want to queue new requests for this bus after
0961      * flushing all pending requests to emulation, since new
0962      * requests could then sneak in during this bus reset phase,
0963      * so take the lock now.
0964      */
0965     spin_lock_irqsave(&adapter->hw_lock, flags);
0966 
0967     pvscsi_process_request_ring(adapter);
0968     ll_bus_reset(adapter);
0969     pvscsi_process_completion_ring(adapter);
0970 
0971     spin_unlock_irqrestore(&adapter->hw_lock, flags);
0972 
0973     return SUCCESS;
0974 }
0975 
0976 static int pvscsi_device_reset(struct scsi_cmnd *cmd)
0977 {
0978     struct Scsi_Host *host = cmd->device->host;
0979     struct pvscsi_adapter *adapter = shost_priv(host);
0980     unsigned long flags;
0981 
0982     scmd_printk(KERN_INFO, cmd, "SCSI device reset on scsi%u:%u\n",
0983             host->host_no, cmd->device->id);
0984 
0985     /*
0986      * We don't want to queue new requests for this device after flushing
0987      * all pending requests to emulation, since new requests could then
0988      * sneak in during this device reset phase, so take the lock now.
0989      */
0990     spin_lock_irqsave(&adapter->hw_lock, flags);
0991 
0992     pvscsi_process_request_ring(adapter);
0993     ll_device_reset(adapter, cmd->device->id);
0994     pvscsi_process_completion_ring(adapter);
0995 
0996     spin_unlock_irqrestore(&adapter->hw_lock, flags);
0997 
0998     return SUCCESS;
0999 }
1000 
1001 static struct scsi_host_template pvscsi_template;
1002 
1003 static const char *pvscsi_info(struct Scsi_Host *host)
1004 {
1005     struct pvscsi_adapter *adapter = shost_priv(host);
1006     static char buf[256];
1007 
1008     sprintf(buf, "VMware PVSCSI storage adapter rev %d, req/cmp/msg rings: "
1009         "%u/%u/%u pages, cmd_per_lun=%u", adapter->rev,
1010         adapter->req_pages, adapter->cmp_pages, adapter->msg_pages,
1011         pvscsi_template.cmd_per_lun);
1012 
1013     return buf;
1014 }
1015 
1016 static struct scsi_host_template pvscsi_template = {
1017     .module             = THIS_MODULE,
1018     .name               = "VMware PVSCSI Host Adapter",
1019     .proc_name          = "vmw_pvscsi",
1020     .info               = pvscsi_info,
1021     .queuecommand           = pvscsi_queue,
1022     .this_id            = -1,
1023     .sg_tablesize           = PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT,
1024     .dma_boundary           = UINT_MAX,
1025     .max_sectors            = 0xffff,
1026     .change_queue_depth     = pvscsi_change_queue_depth,
1027     .eh_abort_handler       = pvscsi_abort,
1028     .eh_device_reset_handler    = pvscsi_device_reset,
1029     .eh_bus_reset_handler       = pvscsi_bus_reset,
1030     .eh_host_reset_handler      = pvscsi_host_reset,
1031 };
1032 
1033 static void pvscsi_process_msg(const struct pvscsi_adapter *adapter,
1034                    const struct PVSCSIRingMsgDesc *e)
1035 {
1036     struct PVSCSIRingsState *s = adapter->rings_state;
1037     struct Scsi_Host *host = adapter->host;
1038     struct scsi_device *sdev;
1039 
1040     printk(KERN_INFO "vmw_pvscsi: msg type: 0x%x - MSG RING: %u/%u (%u) \n",
1041            e->type, s->msgProdIdx, s->msgConsIdx, s->msgNumEntriesLog2);
1042 
1043     BUILD_BUG_ON(PVSCSI_MSG_LAST != 2);
1044 
1045     if (e->type == PVSCSI_MSG_DEV_ADDED) {
1046         struct PVSCSIMsgDescDevStatusChanged *desc;
1047         desc = (struct PVSCSIMsgDescDevStatusChanged *)e;
1048 
1049         printk(KERN_INFO
1050                "vmw_pvscsi: msg: device added at scsi%u:%u:%u\n",
1051                desc->bus, desc->target, desc->lun[1]);
1052 
1053         if (!scsi_host_get(host))
1054             return;
1055 
1056         sdev = scsi_device_lookup(host, desc->bus, desc->target,
1057                       desc->lun[1]);
1058         if (sdev) {
1059             printk(KERN_INFO "vmw_pvscsi: device already exists\n");
1060             scsi_device_put(sdev);
1061         } else
1062             scsi_add_device(adapter->host, desc->bus,
1063                     desc->target, desc->lun[1]);
1064 
1065         scsi_host_put(host);
1066     } else if (e->type == PVSCSI_MSG_DEV_REMOVED) {
1067         struct PVSCSIMsgDescDevStatusChanged *desc;
1068         desc = (struct PVSCSIMsgDescDevStatusChanged *)e;
1069 
1070         printk(KERN_INFO
1071                "vmw_pvscsi: msg: device removed at scsi%u:%u:%u\n",
1072                desc->bus, desc->target, desc->lun[1]);
1073 
1074         if (!scsi_host_get(host))
1075             return;
1076 
1077         sdev = scsi_device_lookup(host, desc->bus, desc->target,
1078                       desc->lun[1]);
1079         if (sdev) {
1080             scsi_remove_device(sdev);
1081             scsi_device_put(sdev);
1082         } else
1083             printk(KERN_INFO
1084                    "vmw_pvscsi: failed to lookup scsi%u:%u:%u\n",
1085                    desc->bus, desc->target, desc->lun[1]);
1086 
1087         scsi_host_put(host);
1088     }
1089 }
1090 
1091 static int pvscsi_msg_pending(const struct pvscsi_adapter *adapter)
1092 {
1093     struct PVSCSIRingsState *s = adapter->rings_state;
1094 
1095     return s->msgProdIdx != s->msgConsIdx;
1096 }
1097 
1098 static void pvscsi_process_msg_ring(const struct pvscsi_adapter *adapter)
1099 {
1100     struct PVSCSIRingsState *s = adapter->rings_state;
1101     struct PVSCSIRingMsgDesc *ring = adapter->msg_ring;
1102     u32 msg_entries = s->msgNumEntriesLog2;
1103 
1104     while (pvscsi_msg_pending(adapter)) {
1105         struct PVSCSIRingMsgDesc *e = ring + (s->msgConsIdx &
1106                               MASK(msg_entries));
1107 
1108         barrier();
1109         pvscsi_process_msg(adapter, e);
1110         barrier();
1111         s->msgConsIdx++;
1112     }
1113 }
1114 
1115 static void pvscsi_msg_workqueue_handler(struct work_struct *data)
1116 {
1117     struct pvscsi_adapter *adapter;
1118 
1119     adapter = container_of(data, struct pvscsi_adapter, work);
1120 
1121     pvscsi_process_msg_ring(adapter);
1122 }
1123 
1124 static int pvscsi_setup_msg_workqueue(struct pvscsi_adapter *adapter)
1125 {
1126     char name[32];
1127 
1128     if (!pvscsi_use_msg)
1129         return 0;
1130 
1131     pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND,
1132              PVSCSI_CMD_SETUP_MSG_RING);
1133 
1134     if (pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_COMMAND_STATUS) == -1)
1135         return 0;
1136 
1137     snprintf(name, sizeof(name),
1138          "vmw_pvscsi_wq_%u", adapter->host->host_no);
1139 
1140     adapter->workqueue = create_singlethread_workqueue(name);
1141     if (!adapter->workqueue) {
1142         printk(KERN_ERR "vmw_pvscsi: failed to create work queue\n");
1143         return 0;
1144     }
1145     INIT_WORK(&adapter->work, pvscsi_msg_workqueue_handler);
1146 
1147     return 1;
1148 }
1149 
1150 static bool pvscsi_setup_req_threshold(struct pvscsi_adapter *adapter,
1151                       bool enable)
1152 {
1153     u32 val;
1154 
1155     if (!pvscsi_use_req_threshold)
1156         return false;
1157 
1158     pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND,
1159              PVSCSI_CMD_SETUP_REQCALLTHRESHOLD);
1160     val = pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_COMMAND_STATUS);
1161     if (val == -1) {
1162         printk(KERN_INFO "vmw_pvscsi: device does not support req_threshold\n");
1163         return false;
1164     } else {
1165         struct PVSCSICmdDescSetupReqCall cmd_msg = { 0 };
1166         cmd_msg.enable = enable;
1167         printk(KERN_INFO
1168                "vmw_pvscsi: %sabling reqCallThreshold\n",
1169             enable ? "en" : "dis");
1170         pvscsi_write_cmd_desc(adapter,
1171                       PVSCSI_CMD_SETUP_REQCALLTHRESHOLD,
1172                       &cmd_msg, sizeof(cmd_msg));
1173         return pvscsi_reg_read(adapter,
1174                        PVSCSI_REG_OFFSET_COMMAND_STATUS) != 0;
1175     }
1176 }
1177 
1178 static irqreturn_t pvscsi_isr(int irq, void *devp)
1179 {
1180     struct pvscsi_adapter *adapter = devp;
1181     unsigned long flags;
1182 
1183     spin_lock_irqsave(&adapter->hw_lock, flags);
1184     pvscsi_process_completion_ring(adapter);
1185     if (adapter->use_msg && pvscsi_msg_pending(adapter))
1186         queue_work(adapter->workqueue, &adapter->work);
1187     spin_unlock_irqrestore(&adapter->hw_lock, flags);
1188 
1189     return IRQ_HANDLED;
1190 }
1191 
1192 static irqreturn_t pvscsi_shared_isr(int irq, void *devp)
1193 {
1194     struct pvscsi_adapter *adapter = devp;
1195     u32 val = pvscsi_read_intr_status(adapter);
1196 
1197     if (!(val & PVSCSI_INTR_ALL_SUPPORTED))
1198         return IRQ_NONE;
1199     pvscsi_write_intr_status(devp, val);
1200     return pvscsi_isr(irq, devp);
1201 }
1202 
1203 static void pvscsi_free_sgls(const struct pvscsi_adapter *adapter)
1204 {
1205     struct pvscsi_ctx *ctx = adapter->cmd_map;
1206     unsigned i;
1207 
1208     for (i = 0; i < adapter->req_depth; ++i, ++ctx)
1209         free_pages((unsigned long)ctx->sgl, get_order(SGL_SIZE));
1210 }
1211 
1212 static void pvscsi_shutdown_intr(struct pvscsi_adapter *adapter)
1213 {
1214     free_irq(pci_irq_vector(adapter->dev, 0), adapter);
1215     pci_free_irq_vectors(adapter->dev);
1216 }
1217 
1218 static void pvscsi_release_resources(struct pvscsi_adapter *adapter)
1219 {
1220     if (adapter->workqueue)
1221         destroy_workqueue(adapter->workqueue);
1222 
1223     if (adapter->mmioBase)
1224         pci_iounmap(adapter->dev, adapter->mmioBase);
1225 
1226     pci_release_regions(adapter->dev);
1227 
1228     if (adapter->cmd_map) {
1229         pvscsi_free_sgls(adapter);
1230         kfree(adapter->cmd_map);
1231     }
1232 
1233     if (adapter->rings_state)
1234         dma_free_coherent(&adapter->dev->dev, PAGE_SIZE,
1235                     adapter->rings_state, adapter->ringStatePA);
1236 
1237     if (adapter->req_ring)
1238         dma_free_coherent(&adapter->dev->dev,
1239                     adapter->req_pages * PAGE_SIZE,
1240                     adapter->req_ring, adapter->reqRingPA);
1241 
1242     if (adapter->cmp_ring)
1243         dma_free_coherent(&adapter->dev->dev,
1244                     adapter->cmp_pages * PAGE_SIZE,
1245                     adapter->cmp_ring, adapter->cmpRingPA);
1246 
1247     if (adapter->msg_ring)
1248         dma_free_coherent(&adapter->dev->dev,
1249                     adapter->msg_pages * PAGE_SIZE,
1250                     adapter->msg_ring, adapter->msgRingPA);
1251 }
1252 
1253 /*
1254  * Allocate scatter gather lists.
1255  *
1256  * These are statically allocated.  Trying to be clever was not worth it.
1257  *
1258  * Dynamic allocation can fail, and we can't go deep into the memory
1259  * allocator, since we're a SCSI driver, and trying too hard to allocate
1260  * memory might generate disk I/O.  We also don't want to fail disk I/O
1261  * in that case because we can't get an allocation - the I/O could be
1262  * trying to swap out data to free memory.  Since that is pathological,
1263  * just use a statically allocated scatter list.
1264  *
1265  */
1266 static int pvscsi_allocate_sg(struct pvscsi_adapter *adapter)
1267 {
1268     struct pvscsi_ctx *ctx;
1269     int i;
1270 
1271     ctx = adapter->cmd_map;
1272     BUILD_BUG_ON(sizeof(struct pvscsi_sg_list) > SGL_SIZE);
1273 
1274     for (i = 0; i < adapter->req_depth; ++i, ++ctx) {
1275         ctx->sgl = (void *)__get_free_pages(GFP_KERNEL,
1276                             get_order(SGL_SIZE));
1277         ctx->sglPA = 0;
1278         BUG_ON(!IS_ALIGNED(((unsigned long)ctx->sgl), PAGE_SIZE));
1279         if (!ctx->sgl) {
1280             for (; i >= 0; --i, --ctx) {
1281                 free_pages((unsigned long)ctx->sgl,
1282                        get_order(SGL_SIZE));
1283                 ctx->sgl = NULL;
1284             }
1285             return -ENOMEM;
1286         }
1287     }
1288 
1289     return 0;
1290 }
1291 
1292 /*
1293  * Query the device, fetch the config info and return the
1294  * maximum number of targets on the adapter. In case of
1295  * failure due to any reason return default i.e. 16.
1296  */
1297 static u32 pvscsi_get_max_targets(struct pvscsi_adapter *adapter)
1298 {
1299     struct PVSCSICmdDescConfigCmd cmd;
1300     struct PVSCSIConfigPageHeader *header;
1301     struct device *dev;
1302     dma_addr_t configPagePA;
1303     void *config_page;
1304     u32 numPhys = 16;
1305 
1306     dev = pvscsi_dev(adapter);
1307     config_page = dma_alloc_coherent(&adapter->dev->dev, PAGE_SIZE,
1308             &configPagePA, GFP_KERNEL);
1309     if (!config_page) {
1310         dev_warn(dev, "vmw_pvscsi: failed to allocate memory for config page\n");
1311         goto exit;
1312     }
1313     BUG_ON(configPagePA & ~PAGE_MASK);
1314 
1315     /* Fetch config info from the device. */
1316     cmd.configPageAddress = ((u64)PVSCSI_CONFIG_CONTROLLER_ADDRESS) << 32;
1317     cmd.configPageNum = PVSCSI_CONFIG_PAGE_CONTROLLER;
1318     cmd.cmpAddr = configPagePA;
1319     cmd._pad = 0;
1320 
1321     /*
1322      * Mark the completion page header with error values. If the device
1323      * completes the command successfully, it sets the status values to
1324      * indicate success.
1325      */
1326     header = config_page;
1327     header->hostStatus = BTSTAT_INVPARAM;
1328     header->scsiStatus = SDSTAT_CHECK;
1329 
1330     pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_CONFIG, &cmd, sizeof cmd);
1331 
1332     if (header->hostStatus == BTSTAT_SUCCESS &&
1333         header->scsiStatus == SDSTAT_GOOD) {
1334         struct PVSCSIConfigPageController *config;
1335 
1336         config = config_page;
1337         numPhys = config->numPhys;
1338     } else
1339         dev_warn(dev, "vmw_pvscsi: PVSCSI_CMD_CONFIG failed. hostStatus = 0x%x, scsiStatus = 0x%x\n",
1340              header->hostStatus, header->scsiStatus);
1341     dma_free_coherent(&adapter->dev->dev, PAGE_SIZE, config_page,
1342               configPagePA);
1343 exit:
1344     return numPhys;
1345 }
1346 
1347 static int pvscsi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1348 {
1349     unsigned int irq_flag = PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY;
1350     struct pvscsi_adapter *adapter;
1351     struct pvscsi_adapter adapter_temp;
1352     struct Scsi_Host *host = NULL;
1353     unsigned int i;
1354     int error;
1355     u32 max_id;
1356 
1357     error = -ENODEV;
1358 
1359     if (pci_enable_device(pdev))
1360         return error;
1361 
1362     if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
1363         printk(KERN_INFO "vmw_pvscsi: using 64bit dma\n");
1364     } else if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) {
1365         printk(KERN_INFO "vmw_pvscsi: using 32bit dma\n");
1366     } else {
1367         printk(KERN_ERR "vmw_pvscsi: failed to set DMA mask\n");
1368         goto out_disable_device;
1369     }
1370 
1371     /*
1372      * Let's use a temp pvscsi_adapter struct until we find the number of
1373      * targets on the adapter, after that we will switch to the real
1374      * allocated struct.
1375      */
1376     adapter = &adapter_temp;
1377     memset(adapter, 0, sizeof(*adapter));
1378     adapter->dev  = pdev;
1379     adapter->rev = pdev->revision;
1380 
1381     if (pci_request_regions(pdev, "vmw_pvscsi")) {
1382         printk(KERN_ERR "vmw_pvscsi: pci memory selection failed\n");
1383         goto out_disable_device;
1384     }
1385 
1386     for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
1387         if ((pci_resource_flags(pdev, i) & PCI_BASE_ADDRESS_SPACE_IO))
1388             continue;
1389 
1390         if (pci_resource_len(pdev, i) < PVSCSI_MEM_SPACE_SIZE)
1391             continue;
1392 
1393         break;
1394     }
1395 
1396     if (i == DEVICE_COUNT_RESOURCE) {
1397         printk(KERN_ERR
1398                "vmw_pvscsi: adapter has no suitable MMIO region\n");
1399         goto out_release_resources_and_disable;
1400     }
1401 
1402     adapter->mmioBase = pci_iomap(pdev, i, PVSCSI_MEM_SPACE_SIZE);
1403 
1404     if (!adapter->mmioBase) {
1405         printk(KERN_ERR
1406                "vmw_pvscsi: can't iomap for BAR %d memsize %lu\n",
1407                i, PVSCSI_MEM_SPACE_SIZE);
1408         goto out_release_resources_and_disable;
1409     }
1410 
1411     pci_set_master(pdev);
1412 
1413     /*
1414      * Ask the device for max number of targets before deciding the
1415      * default pvscsi_ring_pages value.
1416      */
1417     max_id = pvscsi_get_max_targets(adapter);
1418     printk(KERN_INFO "vmw_pvscsi: max_id: %u\n", max_id);
1419 
1420     if (pvscsi_ring_pages == 0)
1421         /*
1422          * Set the right default value. Up to 16 it is 8, above it is
1423          * max.
1424          */
1425         pvscsi_ring_pages = (max_id > 16) ?
1426             PVSCSI_SETUP_RINGS_MAX_NUM_PAGES :
1427             PVSCSI_DEFAULT_NUM_PAGES_PER_RING;
1428     printk(KERN_INFO
1429            "vmw_pvscsi: setting ring_pages to %d\n",
1430            pvscsi_ring_pages);
1431 
1432     pvscsi_template.can_queue =
1433         min(PVSCSI_MAX_NUM_PAGES_REQ_RING, pvscsi_ring_pages) *
1434         PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
1435     pvscsi_template.cmd_per_lun =
1436         min(pvscsi_template.can_queue, pvscsi_cmd_per_lun);
1437     host = scsi_host_alloc(&pvscsi_template, sizeof(struct pvscsi_adapter));
1438     if (!host) {
1439         printk(KERN_ERR "vmw_pvscsi: failed to allocate host\n");
1440         goto out_release_resources_and_disable;
1441     }
1442 
1443     /*
1444      * Let's use the real pvscsi_adapter struct here onwards.
1445      */
1446     adapter = shost_priv(host);
1447     memset(adapter, 0, sizeof(*adapter));
1448     adapter->dev  = pdev;
1449     adapter->host = host;
1450     /*
1451      * Copy back what we already have to the allocated adapter struct.
1452      */
1453     adapter->rev = adapter_temp.rev;
1454     adapter->mmioBase = adapter_temp.mmioBase;
1455 
1456     spin_lock_init(&adapter->hw_lock);
1457     host->max_channel = 0;
1458     host->max_lun     = 1;
1459     host->max_cmd_len = 16;
1460     host->max_id      = max_id;
1461 
1462     pci_set_drvdata(pdev, host);
1463 
1464     ll_adapter_reset(adapter);
1465 
1466     adapter->use_msg = pvscsi_setup_msg_workqueue(adapter);
1467 
1468     error = pvscsi_allocate_rings(adapter);
1469     if (error) {
1470         printk(KERN_ERR "vmw_pvscsi: unable to allocate ring memory\n");
1471         goto out_release_resources;
1472     }
1473 
1474     /*
1475      * From this point on we should reset the adapter if anything goes
1476      * wrong.
1477      */
1478     pvscsi_setup_all_rings(adapter);
1479 
1480     adapter->cmd_map = kcalloc(adapter->req_depth,
1481                    sizeof(struct pvscsi_ctx), GFP_KERNEL);
1482     if (!adapter->cmd_map) {
1483         printk(KERN_ERR "vmw_pvscsi: failed to allocate memory.\n");
1484         error = -ENOMEM;
1485         goto out_reset_adapter;
1486     }
1487 
1488     INIT_LIST_HEAD(&adapter->cmd_pool);
1489     for (i = 0; i < adapter->req_depth; i++) {
1490         struct pvscsi_ctx *ctx = adapter->cmd_map + i;
1491         list_add(&ctx->list, &adapter->cmd_pool);
1492     }
1493 
1494     error = pvscsi_allocate_sg(adapter);
1495     if (error) {
1496         printk(KERN_ERR "vmw_pvscsi: unable to allocate s/g table\n");
1497         goto out_reset_adapter;
1498     }
1499 
1500     if (pvscsi_disable_msix)
1501         irq_flag &= ~PCI_IRQ_MSIX;
1502     if (pvscsi_disable_msi)
1503         irq_flag &= ~PCI_IRQ_MSI;
1504 
1505     error = pci_alloc_irq_vectors(adapter->dev, 1, 1, irq_flag);
1506     if (error < 0)
1507         goto out_reset_adapter;
1508 
1509     adapter->use_req_threshold = pvscsi_setup_req_threshold(adapter, true);
1510     printk(KERN_DEBUG "vmw_pvscsi: driver-based request coalescing %sabled\n",
1511            adapter->use_req_threshold ? "en" : "dis");
1512 
1513     if (adapter->dev->msix_enabled || adapter->dev->msi_enabled) {
1514         printk(KERN_INFO "vmw_pvscsi: using MSI%s\n",
1515             adapter->dev->msix_enabled ? "-X" : "");
1516         error = request_irq(pci_irq_vector(pdev, 0), pvscsi_isr,
1517                 0, "vmw_pvscsi", adapter);
1518     } else {
1519         printk(KERN_INFO "vmw_pvscsi: using INTx\n");
1520         error = request_irq(pci_irq_vector(pdev, 0), pvscsi_shared_isr,
1521                 IRQF_SHARED, "vmw_pvscsi", adapter);
1522     }
1523 
1524     if (error) {
1525         printk(KERN_ERR
1526                "vmw_pvscsi: unable to request IRQ: %d\n", error);
1527         goto out_reset_adapter;
1528     }
1529 
1530     error = scsi_add_host(host, &pdev->dev);
1531     if (error) {
1532         printk(KERN_ERR
1533                "vmw_pvscsi: scsi_add_host failed: %d\n", error);
1534         goto out_reset_adapter;
1535     }
1536 
1537     dev_info(&pdev->dev, "VMware PVSCSI rev %d host #%u\n",
1538          adapter->rev, host->host_no);
1539 
1540     pvscsi_unmask_intr(adapter);
1541 
1542     scsi_scan_host(host);
1543 
1544     return 0;
1545 
1546 out_reset_adapter:
1547     ll_adapter_reset(adapter);
1548 out_release_resources:
1549     pvscsi_shutdown_intr(adapter);
1550     pvscsi_release_resources(adapter);
1551     scsi_host_put(host);
1552 out_disable_device:
1553     pci_disable_device(pdev);
1554 
1555     return error;
1556 
1557 out_release_resources_and_disable:
1558     pvscsi_shutdown_intr(adapter);
1559     pvscsi_release_resources(adapter);
1560     goto out_disable_device;
1561 }
1562 
1563 static void __pvscsi_shutdown(struct pvscsi_adapter *adapter)
1564 {
1565     pvscsi_mask_intr(adapter);
1566 
1567     if (adapter->workqueue)
1568         flush_workqueue(adapter->workqueue);
1569 
1570     pvscsi_shutdown_intr(adapter);
1571 
1572     pvscsi_process_request_ring(adapter);
1573     pvscsi_process_completion_ring(adapter);
1574     ll_adapter_reset(adapter);
1575 }
1576 
1577 static void pvscsi_shutdown(struct pci_dev *dev)
1578 {
1579     struct Scsi_Host *host = pci_get_drvdata(dev);
1580     struct pvscsi_adapter *adapter = shost_priv(host);
1581 
1582     __pvscsi_shutdown(adapter);
1583 }
1584 
1585 static void pvscsi_remove(struct pci_dev *pdev)
1586 {
1587     struct Scsi_Host *host = pci_get_drvdata(pdev);
1588     struct pvscsi_adapter *adapter = shost_priv(host);
1589 
1590     scsi_remove_host(host);
1591 
1592     __pvscsi_shutdown(adapter);
1593     pvscsi_release_resources(adapter);
1594 
1595     scsi_host_put(host);
1596 
1597     pci_disable_device(pdev);
1598 }
1599 
1600 static struct pci_driver pvscsi_pci_driver = {
1601     .name       = "vmw_pvscsi",
1602     .id_table   = pvscsi_pci_tbl,
1603     .probe      = pvscsi_probe,
1604     .remove     = pvscsi_remove,
1605     .shutdown       = pvscsi_shutdown,
1606 };
1607 
1608 static int __init pvscsi_init(void)
1609 {
1610     pr_info("%s - version %s\n",
1611         PVSCSI_LINUX_DRIVER_DESC, PVSCSI_DRIVER_VERSION_STRING);
1612     return pci_register_driver(&pvscsi_pci_driver);
1613 }
1614 
1615 static void __exit pvscsi_exit(void)
1616 {
1617     pci_unregister_driver(&pvscsi_pci_driver);
1618 }
1619 
1620 module_init(pvscsi_init);
1621 module_exit(pvscsi_exit);