Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * VMware VMCI Driver
0004  *
0005  * Copyright (C) 2012 VMware, Inc. All rights reserved.
0006  */
0007 
0008 #include <linux/vmw_vmci_defs.h>
0009 #include <linux/vmw_vmci_api.h>
0010 #include <linux/moduleparam.h>
0011 #include <linux/interrupt.h>
0012 #include <linux/highmem.h>
0013 #include <linux/kernel.h>
0014 #include <linux/mm.h>
0015 #include <linux/module.h>
0016 #include <linux/processor.h>
0017 #include <linux/sched.h>
0018 #include <linux/slab.h>
0019 #include <linux/init.h>
0020 #include <linux/pci.h>
0021 #include <linux/smp.h>
0022 #include <linux/io.h>
0023 #include <linux/vmalloc.h>
0024 
0025 #include "vmci_datagram.h"
0026 #include "vmci_doorbell.h"
0027 #include "vmci_context.h"
0028 #include "vmci_driver.h"
0029 #include "vmci_event.h"
0030 
0031 #define PCI_DEVICE_ID_VMWARE_VMCI   0x0740
0032 
0033 #define VMCI_UTIL_NUM_RESOURCES 1
0034 
0035 /*
0036  * Datagram buffers for DMA send/receive must accommodate at least
0037  * a maximum sized datagram and the header.
0038  */
0039 #define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE)
0040 
0041 static bool vmci_disable_msi;
0042 module_param_named(disable_msi, vmci_disable_msi, bool, 0);
0043 MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
0044 
0045 static bool vmci_disable_msix;
0046 module_param_named(disable_msix, vmci_disable_msix, bool, 0);
0047 MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");
0048 
0049 static u32 ctx_update_sub_id = VMCI_INVALID_ID;
0050 static u32 vm_context_id = VMCI_INVALID_ID;
0051 
0052 struct vmci_guest_device {
0053     struct device *dev; /* PCI device we are attached to */
0054     void __iomem *iobase;
0055     void __iomem *mmio_base;
0056 
0057     bool exclusive_vectors;
0058 
0059     struct tasklet_struct datagram_tasklet;
0060     struct tasklet_struct bm_tasklet;
0061     struct wait_queue_head inout_wq;
0062 
0063     void *data_buffer;
0064     dma_addr_t data_buffer_base;
0065     void *tx_buffer;
0066     dma_addr_t tx_buffer_base;
0067     void *notification_bitmap;
0068     dma_addr_t notification_base;
0069 };
0070 
0071 static bool use_ppn64;
0072 
0073 bool vmci_use_ppn64(void)
0074 {
0075     return use_ppn64;
0076 }
0077 
0078 /* vmci_dev singleton device and supporting data*/
0079 struct pci_dev *vmci_pdev;
0080 static struct vmci_guest_device *vmci_dev_g;
0081 static DEFINE_SPINLOCK(vmci_dev_spinlock);
0082 
0083 static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0);
0084 
0085 bool vmci_guest_code_active(void)
0086 {
0087     return atomic_read(&vmci_num_guest_devices) != 0;
0088 }
0089 
0090 u32 vmci_get_vm_context_id(void)
0091 {
0092     if (vm_context_id == VMCI_INVALID_ID) {
0093         struct vmci_datagram get_cid_msg;
0094         get_cid_msg.dst =
0095             vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
0096                      VMCI_GET_CONTEXT_ID);
0097         get_cid_msg.src = VMCI_ANON_SRC_HANDLE;
0098         get_cid_msg.payload_size = 0;
0099         vm_context_id = vmci_send_datagram(&get_cid_msg);
0100     }
0101     return vm_context_id;
0102 }
0103 
0104 static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg)
0105 {
0106     if (dev->mmio_base != NULL)
0107         return readl(dev->mmio_base + reg);
0108     return ioread32(dev->iobase + reg);
0109 }
0110 
0111 static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg)
0112 {
0113     if (dev->mmio_base != NULL)
0114         writel(val, dev->mmio_base + reg);
0115     else
0116         iowrite32(val, dev->iobase + reg);
0117 }
0118 
0119 static void vmci_read_data(struct vmci_guest_device *vmci_dev,
0120                void *dest, size_t size)
0121 {
0122     if (vmci_dev->mmio_base == NULL)
0123         ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
0124                 dest, size);
0125     else {
0126         /*
0127          * For DMA datagrams, the data_buffer will contain the header on the
0128          * first page, followed by the incoming datagram(s) on the following
0129          * pages. The header uses an S/G element immediately following the
0130          * header on the first page to point to the data area.
0131          */
0132         struct vmci_data_in_out_header *buffer_header = vmci_dev->data_buffer;
0133         struct vmci_sg_elem *sg_array = (struct vmci_sg_elem *)(buffer_header + 1);
0134         size_t buffer_offset = dest - vmci_dev->data_buffer;
0135 
0136         buffer_header->opcode = 1;
0137         buffer_header->size = 1;
0138         buffer_header->busy = 0;
0139         sg_array[0].addr = vmci_dev->data_buffer_base + buffer_offset;
0140         sg_array[0].size = size;
0141 
0142         vmci_write_reg(vmci_dev, lower_32_bits(vmci_dev->data_buffer_base),
0143                    VMCI_DATA_IN_LOW_ADDR);
0144 
0145         wait_event(vmci_dev->inout_wq, buffer_header->busy == 1);
0146     }
0147 }
0148 
0149 static int vmci_write_data(struct vmci_guest_device *dev,
0150                struct vmci_datagram *dg)
0151 {
0152     int result;
0153 
0154     if (dev->mmio_base != NULL) {
0155         struct vmci_data_in_out_header *buffer_header = dev->tx_buffer;
0156         u8 *dg_out_buffer = (u8 *)(buffer_header + 1);
0157 
0158         if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE)
0159             return VMCI_ERROR_INVALID_ARGS;
0160 
0161         /*
0162          * Initialize send buffer with outgoing datagram
0163          * and set up header for inline data. Device will
0164          * not access buffer asynchronously - only after
0165          * the write to VMCI_DATA_OUT_LOW_ADDR.
0166          */
0167         memcpy(dg_out_buffer, dg, VMCI_DG_SIZE(dg));
0168         buffer_header->opcode = 0;
0169         buffer_header->size = VMCI_DG_SIZE(dg);
0170         buffer_header->busy = 1;
0171 
0172         vmci_write_reg(dev, lower_32_bits(dev->tx_buffer_base),
0173                    VMCI_DATA_OUT_LOW_ADDR);
0174 
0175         /* Caller holds a spinlock, so cannot block. */
0176         spin_until_cond(buffer_header->busy == 0);
0177 
0178         result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
0179         if (result == VMCI_SUCCESS)
0180             result = (int)buffer_header->result;
0181     } else {
0182         iowrite8_rep(dev->iobase + VMCI_DATA_OUT_ADDR,
0183                  dg, VMCI_DG_SIZE(dg));
0184         result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
0185     }
0186 
0187     return result;
0188 }
0189 
0190 /*
0191  * VM to hypervisor call mechanism. We use the standard VMware naming
0192  * convention since shared code is calling this function as well.
0193  */
0194 int vmci_send_datagram(struct vmci_datagram *dg)
0195 {
0196     unsigned long flags;
0197     int result;
0198 
0199     /* Check args. */
0200     if (dg == NULL)
0201         return VMCI_ERROR_INVALID_ARGS;
0202 
0203     /*
0204      * Need to acquire spinlock on the device because the datagram
0205      * data may be spread over multiple pages and the monitor may
0206      * interleave device user rpc calls from multiple
0207      * VCPUs. Acquiring the spinlock precludes that
0208      * possibility. Disabling interrupts to avoid incoming
0209      * datagrams during a "rep out" and possibly landing up in
0210      * this function.
0211      */
0212     spin_lock_irqsave(&vmci_dev_spinlock, flags);
0213 
0214     if (vmci_dev_g) {
0215         vmci_write_data(vmci_dev_g, dg);
0216         result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
0217     } else {
0218         result = VMCI_ERROR_UNAVAILABLE;
0219     }
0220 
0221     spin_unlock_irqrestore(&vmci_dev_spinlock, flags);
0222 
0223     return result;
0224 }
0225 EXPORT_SYMBOL_GPL(vmci_send_datagram);
0226 
0227 /*
0228  * Gets called with the new context id if updated or resumed.
0229  * Context id.
0230  */
0231 static void vmci_guest_cid_update(u32 sub_id,
0232                   const struct vmci_event_data *event_data,
0233                   void *client_data)
0234 {
0235     const struct vmci_event_payld_ctx *ev_payload =
0236                 vmci_event_data_const_payload(event_data);
0237 
0238     if (sub_id != ctx_update_sub_id) {
0239         pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id);
0240         return;
0241     }
0242 
0243     if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) {
0244         pr_devel("Invalid event data\n");
0245         return;
0246     }
0247 
0248     pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n",
0249          vm_context_id, ev_payload->context_id, event_data->event);
0250 
0251     vm_context_id = ev_payload->context_id;
0252 }
0253 
0254 /*
0255  * Verify that the host supports the hypercalls we need. If it does not,
0256  * try to find fallback hypercalls and use those instead.  Returns 0 if
0257  * required hypercalls (or fallback hypercalls) are supported by the host,
0258  * an error code otherwise.
0259  */
0260 static int vmci_check_host_caps(struct pci_dev *pdev)
0261 {
0262     bool result;
0263     struct vmci_resource_query_msg *msg;
0264     u32 msg_size = sizeof(struct vmci_resource_query_hdr) +
0265                 VMCI_UTIL_NUM_RESOURCES * sizeof(u32);
0266     struct vmci_datagram *check_msg;
0267 
0268     check_msg = kzalloc(msg_size, GFP_KERNEL);
0269     if (!check_msg) {
0270         dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__);
0271         return -ENOMEM;
0272     }
0273 
0274     check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
0275                       VMCI_RESOURCES_QUERY);
0276     check_msg->src = VMCI_ANON_SRC_HANDLE;
0277     check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE;
0278     msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg);
0279 
0280     msg->num_resources = VMCI_UTIL_NUM_RESOURCES;
0281     msg->resources[0] = VMCI_GET_CONTEXT_ID;
0282 
0283     /* Checks that hyper calls are supported */
0284     result = vmci_send_datagram(check_msg) == 0x01;
0285     kfree(check_msg);
0286 
0287     dev_dbg(&pdev->dev, "%s: Host capability check: %s\n",
0288         __func__, result ? "PASSED" : "FAILED");
0289 
0290     /* We need the vector. There are no fallbacks. */
0291     return result ? 0 : -ENXIO;
0292 }
0293 
0294 /*
0295  * Reads datagrams from the device and dispatches them. For IO port
0296  * based access to the device, we always start reading datagrams into
0297  * only the first page of the datagram buffer. If the datagrams don't
0298  * fit into one page, we use the maximum datagram buffer size for the
0299  * remainder of the invocation. This is a simple heuristic for not
0300  * penalizing small datagrams. For DMA-based datagrams, we always
0301  * use the maximum datagram buffer size, since there is no performance
0302  * penalty for doing so.
0303  *
0304  * This function assumes that it has exclusive access to the data
0305  * in register(s) for the duration of the call.
0306  */
0307 static void vmci_dispatch_dgs(unsigned long data)
0308 {
0309     struct vmci_guest_device *vmci_dev = (struct vmci_guest_device *)data;
0310     u8 *dg_in_buffer = vmci_dev->data_buffer;
0311     struct vmci_datagram *dg;
0312     size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE;
0313     size_t current_dg_in_buffer_size;
0314     size_t remaining_bytes;
0315     bool is_io_port = vmci_dev->mmio_base == NULL;
0316 
0317     BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE);
0318 
0319     if (!is_io_port) {
0320         /* For mmio, the first page is used for the header. */
0321         dg_in_buffer += PAGE_SIZE;
0322 
0323         /*
0324          * For DMA-based datagram operations, there is no performance
0325          * penalty for reading the maximum buffer size.
0326          */
0327         current_dg_in_buffer_size = VMCI_MAX_DG_SIZE;
0328     } else {
0329         current_dg_in_buffer_size = PAGE_SIZE;
0330     }
0331     vmci_read_data(vmci_dev, dg_in_buffer, current_dg_in_buffer_size);
0332     dg = (struct vmci_datagram *)dg_in_buffer;
0333     remaining_bytes = current_dg_in_buffer_size;
0334 
0335     /*
0336      * Read through the buffer until an invalid datagram header is
0337      * encountered. The exit condition for datagrams read through
0338      * VMCI_DATA_IN_ADDR is a bit more complicated, since a datagram
0339      * can start on any page boundary in the buffer.
0340      */
0341     while (dg->dst.resource != VMCI_INVALID_ID ||
0342            (is_io_port && remaining_bytes > PAGE_SIZE)) {
0343         unsigned dg_in_size;
0344 
0345         /*
0346          * If using VMCI_DATA_IN_ADDR, skip to the next page
0347          * as a datagram can start on any page boundary.
0348          */
0349         if (dg->dst.resource == VMCI_INVALID_ID) {
0350             dg = (struct vmci_datagram *)roundup(
0351                 (uintptr_t)dg + 1, PAGE_SIZE);
0352             remaining_bytes =
0353                 (size_t)(dg_in_buffer +
0354                      current_dg_in_buffer_size -
0355                      (u8 *)dg);
0356             continue;
0357         }
0358 
0359         dg_in_size = VMCI_DG_SIZE_ALIGNED(dg);
0360 
0361         if (dg_in_size <= dg_in_buffer_size) {
0362             int result;
0363 
0364             /*
0365              * If the remaining bytes in the datagram
0366              * buffer doesn't contain the complete
0367              * datagram, we first make sure we have enough
0368              * room for it and then we read the reminder
0369              * of the datagram and possibly any following
0370              * datagrams.
0371              */
0372             if (dg_in_size > remaining_bytes) {
0373                 if (remaining_bytes !=
0374                     current_dg_in_buffer_size) {
0375 
0376                     /*
0377                      * We move the partial
0378                      * datagram to the front and
0379                      * read the reminder of the
0380                      * datagram and possibly
0381                      * following calls into the
0382                      * following bytes.
0383                      */
0384                     memmove(dg_in_buffer, dg_in_buffer +
0385                         current_dg_in_buffer_size -
0386                         remaining_bytes,
0387                         remaining_bytes);
0388                     dg = (struct vmci_datagram *)
0389                         dg_in_buffer;
0390                 }
0391 
0392                 if (current_dg_in_buffer_size !=
0393                     dg_in_buffer_size)
0394                     current_dg_in_buffer_size =
0395                         dg_in_buffer_size;
0396 
0397                 vmci_read_data(vmci_dev,
0398                            dg_in_buffer +
0399                         remaining_bytes,
0400                            current_dg_in_buffer_size -
0401                         remaining_bytes);
0402             }
0403 
0404             /*
0405              * We special case event datagrams from the
0406              * hypervisor.
0407              */
0408             if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
0409                 dg->dst.resource == VMCI_EVENT_HANDLER) {
0410                 result = vmci_event_dispatch(dg);
0411             } else {
0412                 result = vmci_datagram_invoke_guest_handler(dg);
0413             }
0414             if (result < VMCI_SUCCESS)
0415                 dev_dbg(vmci_dev->dev,
0416                     "Datagram with resource (ID=0x%x) failed (err=%d)\n",
0417                      dg->dst.resource, result);
0418 
0419             /* On to the next datagram. */
0420             dg = (struct vmci_datagram *)((u8 *)dg +
0421                               dg_in_size);
0422         } else {
0423             size_t bytes_to_skip;
0424 
0425             /*
0426              * Datagram doesn't fit in datagram buffer of maximal
0427              * size. We drop it.
0428              */
0429             dev_dbg(vmci_dev->dev,
0430                 "Failed to receive datagram (size=%u bytes)\n",
0431                  dg_in_size);
0432 
0433             bytes_to_skip = dg_in_size - remaining_bytes;
0434             if (current_dg_in_buffer_size != dg_in_buffer_size)
0435                 current_dg_in_buffer_size = dg_in_buffer_size;
0436 
0437             for (;;) {
0438                 vmci_read_data(vmci_dev, dg_in_buffer,
0439                            current_dg_in_buffer_size);
0440                 if (bytes_to_skip <= current_dg_in_buffer_size)
0441                     break;
0442 
0443                 bytes_to_skip -= current_dg_in_buffer_size;
0444             }
0445             dg = (struct vmci_datagram *)(dg_in_buffer +
0446                               bytes_to_skip);
0447         }
0448 
0449         remaining_bytes =
0450             (size_t) (dg_in_buffer + current_dg_in_buffer_size -
0451                   (u8 *)dg);
0452 
0453         if (remaining_bytes < VMCI_DG_HEADERSIZE) {
0454             /* Get the next batch of datagrams. */
0455 
0456             vmci_read_data(vmci_dev, dg_in_buffer,
0457                     current_dg_in_buffer_size);
0458             dg = (struct vmci_datagram *)dg_in_buffer;
0459             remaining_bytes = current_dg_in_buffer_size;
0460         }
0461     }
0462 }
0463 
0464 /*
0465  * Scans the notification bitmap for raised flags, clears them
0466  * and handles the notifications.
0467  */
0468 static void vmci_process_bitmap(unsigned long data)
0469 {
0470     struct vmci_guest_device *dev = (struct vmci_guest_device *)data;
0471 
0472     if (!dev->notification_bitmap) {
0473         dev_dbg(dev->dev, "No bitmap present in %s\n", __func__);
0474         return;
0475     }
0476 
0477     vmci_dbell_scan_notification_entries(dev->notification_bitmap);
0478 }
0479 
0480 /*
0481  * Interrupt handler for legacy or MSI interrupt, or for first MSI-X
0482  * interrupt (vector VMCI_INTR_DATAGRAM).
0483  */
0484 static irqreturn_t vmci_interrupt(int irq, void *_dev)
0485 {
0486     struct vmci_guest_device *dev = _dev;
0487 
0488     /*
0489      * If we are using MSI-X with exclusive vectors then we simply schedule
0490      * the datagram tasklet, since we know the interrupt was meant for us.
0491      * Otherwise we must read the ICR to determine what to do.
0492      */
0493 
0494     if (dev->exclusive_vectors) {
0495         tasklet_schedule(&dev->datagram_tasklet);
0496     } else {
0497         unsigned int icr;
0498 
0499         /* Acknowledge interrupt and determine what needs doing. */
0500         icr = vmci_read_reg(dev, VMCI_ICR_ADDR);
0501         if (icr == 0 || icr == ~0)
0502             return IRQ_NONE;
0503 
0504         if (icr & VMCI_ICR_DATAGRAM) {
0505             tasklet_schedule(&dev->datagram_tasklet);
0506             icr &= ~VMCI_ICR_DATAGRAM;
0507         }
0508 
0509         if (icr & VMCI_ICR_NOTIFICATION) {
0510             tasklet_schedule(&dev->bm_tasklet);
0511             icr &= ~VMCI_ICR_NOTIFICATION;
0512         }
0513 
0514 
0515         if (icr & VMCI_ICR_DMA_DATAGRAM) {
0516             wake_up_all(&dev->inout_wq);
0517             icr &= ~VMCI_ICR_DMA_DATAGRAM;
0518         }
0519 
0520         if (icr != 0)
0521             dev_warn(dev->dev,
0522                  "Ignoring unknown interrupt cause (%d)\n",
0523                  icr);
0524     }
0525 
0526     return IRQ_HANDLED;
0527 }
0528 
0529 /*
0530  * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
0531  * which is for the notification bitmap.  Will only get called if we are
0532  * using MSI-X with exclusive vectors.
0533  */
0534 static irqreturn_t vmci_interrupt_bm(int irq, void *_dev)
0535 {
0536     struct vmci_guest_device *dev = _dev;
0537 
0538     /* For MSI-X we can just assume it was meant for us. */
0539     tasklet_schedule(&dev->bm_tasklet);
0540 
0541     return IRQ_HANDLED;
0542 }
0543 
0544 /*
0545  * Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM,
0546  * which is for the completion of a DMA datagram send or receive operation.
0547  * Will only get called if we are using MSI-X with exclusive vectors.
0548  */
0549 static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev)
0550 {
0551     struct vmci_guest_device *dev = _dev;
0552 
0553     wake_up_all(&dev->inout_wq);
0554 
0555     return IRQ_HANDLED;
0556 }
0557 
0558 static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev)
0559 {
0560     if (vmci_dev->mmio_base != NULL) {
0561         if (vmci_dev->tx_buffer != NULL)
0562             dma_free_coherent(vmci_dev->dev,
0563                       VMCI_DMA_DG_BUFFER_SIZE,
0564                       vmci_dev->tx_buffer,
0565                       vmci_dev->tx_buffer_base);
0566         if (vmci_dev->data_buffer != NULL)
0567             dma_free_coherent(vmci_dev->dev,
0568                       VMCI_DMA_DG_BUFFER_SIZE,
0569                       vmci_dev->data_buffer,
0570                       vmci_dev->data_buffer_base);
0571     } else {
0572         vfree(vmci_dev->data_buffer);
0573     }
0574 }
0575 
0576 /*
0577  * Most of the initialization at module load time is done here.
0578  */
0579 static int vmci_guest_probe_device(struct pci_dev *pdev,
0580                    const struct pci_device_id *id)
0581 {
0582     struct vmci_guest_device *vmci_dev;
0583     void __iomem *iobase = NULL;
0584     void __iomem *mmio_base = NULL;
0585     unsigned int num_irq_vectors;
0586     unsigned int capabilities;
0587     unsigned int caps_in_use;
0588     unsigned long cmd;
0589     int vmci_err;
0590     int error;
0591 
0592     dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n");
0593 
0594     error = pcim_enable_device(pdev);
0595     if (error) {
0596         dev_err(&pdev->dev,
0597             "Failed to enable VMCI device: %d\n", error);
0598         return error;
0599     }
0600 
0601     /*
0602      * The VMCI device with mmio access to registers requests 256KB
0603      * for BAR1. If present, driver will use new VMCI device
0604      * functionality for register access and datagram send/recv.
0605      */
0606 
0607     if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) {
0608         dev_info(&pdev->dev, "MMIO register access is available\n");
0609         mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET,
0610                         VMCI_MMIO_ACCESS_SIZE);
0611         /* If the map fails, we fall back to IOIO access. */
0612         if (!mmio_base)
0613             dev_warn(&pdev->dev, "Failed to map MMIO register access\n");
0614     }
0615 
0616     if (!mmio_base) {
0617         if (IS_ENABLED(CONFIG_ARM64)) {
0618             dev_err(&pdev->dev, "MMIO base is invalid\n");
0619             return -ENXIO;
0620         }
0621         error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME);
0622         if (error) {
0623             dev_err(&pdev->dev, "Failed to reserve/map IO regions\n");
0624             return error;
0625         }
0626         iobase = pcim_iomap_table(pdev)[0];
0627     }
0628 
0629     vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL);
0630     if (!vmci_dev) {
0631         dev_err(&pdev->dev,
0632             "Can't allocate memory for VMCI device\n");
0633         return -ENOMEM;
0634     }
0635 
0636     vmci_dev->dev = &pdev->dev;
0637     vmci_dev->exclusive_vectors = false;
0638     vmci_dev->iobase = iobase;
0639     vmci_dev->mmio_base = mmio_base;
0640 
0641     tasklet_init(&vmci_dev->datagram_tasklet,
0642              vmci_dispatch_dgs, (unsigned long)vmci_dev);
0643     tasklet_init(&vmci_dev->bm_tasklet,
0644              vmci_process_bitmap, (unsigned long)vmci_dev);
0645     init_waitqueue_head(&vmci_dev->inout_wq);
0646 
0647     if (mmio_base != NULL) {
0648         vmci_dev->tx_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
0649                              &vmci_dev->tx_buffer_base,
0650                              GFP_KERNEL);
0651         if (!vmci_dev->tx_buffer) {
0652             dev_err(&pdev->dev,
0653                 "Can't allocate memory for datagram tx buffer\n");
0654             return -ENOMEM;
0655         }
0656 
0657         vmci_dev->data_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
0658                                &vmci_dev->data_buffer_base,
0659                                GFP_KERNEL);
0660     } else {
0661         vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
0662     }
0663     if (!vmci_dev->data_buffer) {
0664         dev_err(&pdev->dev,
0665             "Can't allocate memory for datagram buffer\n");
0666         error = -ENOMEM;
0667         goto err_free_data_buffers;
0668     }
0669 
0670     pci_set_master(pdev);   /* To enable queue_pair functionality. */
0671 
0672     /*
0673      * Verify that the VMCI Device supports the capabilities that
0674      * we need. If the device is missing capabilities that we would
0675      * like to use, check for fallback capabilities and use those
0676      * instead (so we can run a new VM on old hosts). Fail the load if
0677      * a required capability is missing and there is no fallback.
0678      *
0679      * Right now, we need datagrams. There are no fallbacks.
0680      */
0681     capabilities = vmci_read_reg(vmci_dev, VMCI_CAPS_ADDR);
0682     if (!(capabilities & VMCI_CAPS_DATAGRAM)) {
0683         dev_err(&pdev->dev, "Device does not support datagrams\n");
0684         error = -ENXIO;
0685         goto err_free_data_buffers;
0686     }
0687     caps_in_use = VMCI_CAPS_DATAGRAM;
0688 
0689     /*
0690      * Use 64-bit PPNs if the device supports.
0691      *
0692      * There is no check for the return value of dma_set_mask_and_coherent
0693      * since this driver can handle the default mask values if
0694      * dma_set_mask_and_coherent fails.
0695      */
0696     if (capabilities & VMCI_CAPS_PPN64) {
0697         dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
0698         use_ppn64 = true;
0699         caps_in_use |= VMCI_CAPS_PPN64;
0700     } else {
0701         dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44));
0702         use_ppn64 = false;
0703     }
0704 
0705     /*
0706      * If the hardware supports notifications, we will use that as
0707      * well.
0708      */
0709     if (capabilities & VMCI_CAPS_NOTIFICATIONS) {
0710         vmci_dev->notification_bitmap = dma_alloc_coherent(
0711             &pdev->dev, PAGE_SIZE, &vmci_dev->notification_base,
0712             GFP_KERNEL);
0713         if (!vmci_dev->notification_bitmap)
0714             dev_warn(&pdev->dev,
0715                  "Unable to allocate notification bitmap\n");
0716         else
0717             caps_in_use |= VMCI_CAPS_NOTIFICATIONS;
0718     }
0719 
0720     if (mmio_base != NULL) {
0721         if (capabilities & VMCI_CAPS_DMA_DATAGRAM) {
0722             caps_in_use |= VMCI_CAPS_DMA_DATAGRAM;
0723         } else {
0724             dev_err(&pdev->dev,
0725                 "Missing capability: VMCI_CAPS_DMA_DATAGRAM\n");
0726             error = -ENXIO;
0727             goto err_free_notification_bitmap;
0728         }
0729     }
0730 
0731     dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use);
0732 
0733     /* Let the host know which capabilities we intend to use. */
0734     vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR);
0735 
0736     if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
0737         /* Let the device know the size for pages passed down. */
0738         vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT);
0739 
0740         /* Configure the high order parts of the data in/out buffers. */
0741         vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->data_buffer_base),
0742                    VMCI_DATA_IN_HIGH_ADDR);
0743         vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->tx_buffer_base),
0744                    VMCI_DATA_OUT_HIGH_ADDR);
0745     }
0746 
0747     /* Set up global device so that we can start sending datagrams */
0748     spin_lock_irq(&vmci_dev_spinlock);
0749     vmci_dev_g = vmci_dev;
0750     vmci_pdev = pdev;
0751     spin_unlock_irq(&vmci_dev_spinlock);
0752 
0753     /*
0754      * Register notification bitmap with device if that capability is
0755      * used.
0756      */
0757     if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) {
0758         unsigned long bitmap_ppn =
0759             vmci_dev->notification_base >> PAGE_SHIFT;
0760         if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) {
0761             dev_warn(&pdev->dev,
0762                  "VMCI device unable to register notification bitmap with PPN 0x%lx\n",
0763                  bitmap_ppn);
0764             error = -ENXIO;
0765             goto err_remove_vmci_dev_g;
0766         }
0767     }
0768 
0769     /* Check host capabilities. */
0770     error = vmci_check_host_caps(pdev);
0771     if (error)
0772         goto err_remove_vmci_dev_g;
0773 
0774     /* Enable device. */
0775 
0776     /*
0777      * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can
0778      * update the internal context id when needed.
0779      */
0780     vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE,
0781                     vmci_guest_cid_update, NULL,
0782                     &ctx_update_sub_id);
0783     if (vmci_err < VMCI_SUCCESS)
0784         dev_warn(&pdev->dev,
0785              "Failed to subscribe to event (type=%d): %d\n",
0786              VMCI_EVENT_CTX_ID_UPDATE, vmci_err);
0787 
0788     /*
0789      * Enable interrupts.  Try MSI-X first, then MSI, and then fallback on
0790      * legacy interrupts.
0791      */
0792     if (vmci_dev->mmio_base != NULL)
0793         num_irq_vectors = VMCI_MAX_INTRS;
0794     else
0795         num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION;
0796     error = pci_alloc_irq_vectors(pdev, num_irq_vectors, num_irq_vectors,
0797                       PCI_IRQ_MSIX);
0798     if (error < 0) {
0799         error = pci_alloc_irq_vectors(pdev, 1, 1,
0800                 PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY);
0801         if (error < 0)
0802             goto err_unsubscribe_event;
0803     } else {
0804         vmci_dev->exclusive_vectors = true;
0805     }
0806 
0807     /*
0808      * Request IRQ for legacy or MSI interrupts, or for first
0809      * MSI-X vector.
0810      */
0811     error = request_irq(pci_irq_vector(pdev, 0), vmci_interrupt,
0812                 IRQF_SHARED, KBUILD_MODNAME, vmci_dev);
0813     if (error) {
0814         dev_err(&pdev->dev, "Irq %u in use: %d\n",
0815             pci_irq_vector(pdev, 0), error);
0816         goto err_disable_msi;
0817     }
0818 
0819     /*
0820      * For MSI-X with exclusive vectors we need to request an
0821      * interrupt for each vector so that we get a separate
0822      * interrupt handler routine.  This allows us to distinguish
0823      * between the vectors.
0824      */
0825     if (vmci_dev->exclusive_vectors) {
0826         error = request_irq(pci_irq_vector(pdev, 1),
0827                     vmci_interrupt_bm, 0, KBUILD_MODNAME,
0828                     vmci_dev);
0829         if (error) {
0830             dev_err(&pdev->dev,
0831                 "Failed to allocate irq %u: %d\n",
0832                 pci_irq_vector(pdev, 1), error);
0833             goto err_free_irq;
0834         }
0835         if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
0836             error = request_irq(pci_irq_vector(pdev, 2),
0837                         vmci_interrupt_dma_datagram,
0838                         0, KBUILD_MODNAME, vmci_dev);
0839             if (error) {
0840                 dev_err(&pdev->dev,
0841                     "Failed to allocate irq %u: %d\n",
0842                     pci_irq_vector(pdev, 2), error);
0843                 goto err_free_bm_irq;
0844             }
0845         }
0846     }
0847 
0848     dev_dbg(&pdev->dev, "Registered device\n");
0849 
0850     atomic_inc(&vmci_num_guest_devices);
0851 
0852     /* Enable specific interrupt bits. */
0853     cmd = VMCI_IMR_DATAGRAM;
0854     if (caps_in_use & VMCI_CAPS_NOTIFICATIONS)
0855         cmd |= VMCI_IMR_NOTIFICATION;
0856     if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM)
0857         cmd |= VMCI_IMR_DMA_DATAGRAM;
0858     vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR);
0859 
0860     /* Enable interrupts. */
0861     vmci_write_reg(vmci_dev, VMCI_CONTROL_INT_ENABLE, VMCI_CONTROL_ADDR);
0862 
0863     pci_set_drvdata(pdev, vmci_dev);
0864 
0865     vmci_call_vsock_callback(false);
0866     return 0;
0867 
0868 err_free_bm_irq:
0869     if (vmci_dev->exclusive_vectors)
0870         free_irq(pci_irq_vector(pdev, 1), vmci_dev);
0871 
0872 err_free_irq:
0873     free_irq(pci_irq_vector(pdev, 0), vmci_dev);
0874     tasklet_kill(&vmci_dev->datagram_tasklet);
0875     tasklet_kill(&vmci_dev->bm_tasklet);
0876 
0877 err_disable_msi:
0878     pci_free_irq_vectors(pdev);
0879 
0880 err_unsubscribe_event:
0881     vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
0882     if (vmci_err < VMCI_SUCCESS)
0883         dev_warn(&pdev->dev,
0884              "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
0885              VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
0886 
0887 err_remove_vmci_dev_g:
0888     spin_lock_irq(&vmci_dev_spinlock);
0889     vmci_pdev = NULL;
0890     vmci_dev_g = NULL;
0891     spin_unlock_irq(&vmci_dev_spinlock);
0892 
0893 err_free_notification_bitmap:
0894     if (vmci_dev->notification_bitmap) {
0895         vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
0896         dma_free_coherent(&pdev->dev, PAGE_SIZE,
0897                   vmci_dev->notification_bitmap,
0898                   vmci_dev->notification_base);
0899     }
0900 
0901 err_free_data_buffers:
0902     vmci_free_dg_buffers(vmci_dev);
0903 
0904     /* The rest are managed resources and will be freed by PCI core */
0905     return error;
0906 }
0907 
0908 static void vmci_guest_remove_device(struct pci_dev *pdev)
0909 {
0910     struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev);
0911     int vmci_err;
0912 
0913     dev_dbg(&pdev->dev, "Removing device\n");
0914 
0915     atomic_dec(&vmci_num_guest_devices);
0916 
0917     vmci_qp_guest_endpoints_exit();
0918 
0919     vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
0920     if (vmci_err < VMCI_SUCCESS)
0921         dev_warn(&pdev->dev,
0922              "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
0923              VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
0924 
0925     spin_lock_irq(&vmci_dev_spinlock);
0926     vmci_dev_g = NULL;
0927     vmci_pdev = NULL;
0928     spin_unlock_irq(&vmci_dev_spinlock);
0929 
0930     dev_dbg(&pdev->dev, "Resetting vmci device\n");
0931     vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
0932 
0933     /*
0934      * Free IRQ and then disable MSI/MSI-X as appropriate.  For
0935      * MSI-X, we might have multiple vectors, each with their own
0936      * IRQ, which we must free too.
0937      */
0938     if (vmci_dev->exclusive_vectors) {
0939         free_irq(pci_irq_vector(pdev, 1), vmci_dev);
0940         if (vmci_dev->mmio_base != NULL)
0941             free_irq(pci_irq_vector(pdev, 2), vmci_dev);
0942     }
0943     free_irq(pci_irq_vector(pdev, 0), vmci_dev);
0944     pci_free_irq_vectors(pdev);
0945 
0946     tasklet_kill(&vmci_dev->datagram_tasklet);
0947     tasklet_kill(&vmci_dev->bm_tasklet);
0948 
0949     if (vmci_dev->notification_bitmap) {
0950         /*
0951          * The device reset above cleared the bitmap state of the
0952          * device, so we can safely free it here.
0953          */
0954 
0955         dma_free_coherent(&pdev->dev, PAGE_SIZE,
0956                   vmci_dev->notification_bitmap,
0957                   vmci_dev->notification_base);
0958     }
0959 
0960     vmci_free_dg_buffers(vmci_dev);
0961 
0962     if (vmci_dev->mmio_base != NULL)
0963         pci_iounmap(pdev, vmci_dev->mmio_base);
0964 
0965     /* The rest are managed resources and will be freed by PCI core */
0966 }
0967 
0968 static const struct pci_device_id vmci_ids[] = {
0969     { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), },
0970     { 0 },
0971 };
0972 MODULE_DEVICE_TABLE(pci, vmci_ids);
0973 
0974 static struct pci_driver vmci_guest_driver = {
0975     .name       = KBUILD_MODNAME,
0976     .id_table   = vmci_ids,
0977     .probe      = vmci_guest_probe_device,
0978     .remove     = vmci_guest_remove_device,
0979 };
0980 
0981 int __init vmci_guest_init(void)
0982 {
0983     return pci_register_driver(&vmci_guest_driver);
0984 }
0985 
0986 void __exit vmci_guest_exit(void)
0987 {
0988     pci_unregister_driver(&vmci_guest_driver);
0989 }