Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Xen PCI Frontend
0004  *
0005  * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
0006  */
0007 #include <linux/module.h>
0008 #include <linux/init.h>
0009 #include <linux/mm.h>
0010 #include <xen/xenbus.h>
0011 #include <xen/events.h>
0012 #include <xen/grant_table.h>
0013 #include <xen/page.h>
0014 #include <linux/spinlock.h>
0015 #include <linux/pci.h>
0016 #include <linux/msi.h>
0017 #include <xen/interface/io/pciif.h>
0018 #include <asm/xen/pci.h>
0019 #include <linux/interrupt.h>
0020 #include <linux/atomic.h>
0021 #include <linux/workqueue.h>
0022 #include <linux/bitops.h>
0023 #include <linux/time.h>
0024 #include <linux/ktime.h>
0025 #include <linux/swiotlb.h>
0026 #include <xen/platform_pci.h>
0027 
0028 #include <asm/xen/swiotlb-xen.h>
0029 
0030 #define INVALID_EVTCHN    (-1)
0031 
0032 struct pci_bus_entry {
0033     struct list_head list;
0034     struct pci_bus *bus;
0035 };
0036 
0037 #define _PDEVB_op_active        (0)
0038 #define PDEVB_op_active         (1 << (_PDEVB_op_active))
0039 
0040 struct pcifront_device {
0041     struct xenbus_device *xdev;
0042     struct list_head root_buses;
0043 
0044     int evtchn;
0045     grant_ref_t gnt_ref;
0046 
0047     int irq;
0048 
0049     /* Lock this when doing any operations in sh_info */
0050     spinlock_t sh_info_lock;
0051     struct xen_pci_sharedinfo *sh_info;
0052     struct work_struct op_work;
0053     unsigned long flags;
0054 
0055 };
0056 
0057 struct pcifront_sd {
0058     struct pci_sysdata sd;
0059     struct pcifront_device *pdev;
0060 };
0061 
0062 static inline struct pcifront_device *
0063 pcifront_get_pdev(struct pcifront_sd *sd)
0064 {
0065     return sd->pdev;
0066 }
0067 
0068 static inline void pcifront_init_sd(struct pcifront_sd *sd,
0069                     unsigned int domain, unsigned int bus,
0070                     struct pcifront_device *pdev)
0071 {
0072     /* Because we do not expose that information via XenBus. */
0073     sd->sd.node = first_online_node;
0074     sd->sd.domain = domain;
0075     sd->pdev = pdev;
0076 }
0077 
0078 static DEFINE_SPINLOCK(pcifront_dev_lock);
0079 static struct pcifront_device *pcifront_dev;
0080 
0081 static int errno_to_pcibios_err(int errno)
0082 {
0083     switch (errno) {
0084     case XEN_PCI_ERR_success:
0085         return PCIBIOS_SUCCESSFUL;
0086 
0087     case XEN_PCI_ERR_dev_not_found:
0088         return PCIBIOS_DEVICE_NOT_FOUND;
0089 
0090     case XEN_PCI_ERR_invalid_offset:
0091     case XEN_PCI_ERR_op_failed:
0092         return PCIBIOS_BAD_REGISTER_NUMBER;
0093 
0094     case XEN_PCI_ERR_not_implemented:
0095         return PCIBIOS_FUNC_NOT_SUPPORTED;
0096 
0097     case XEN_PCI_ERR_access_denied:
0098         return PCIBIOS_SET_FAILED;
0099     }
0100     return errno;
0101 }
0102 
0103 static inline void schedule_pcifront_aer_op(struct pcifront_device *pdev)
0104 {
0105     if (test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
0106         && !test_and_set_bit(_PDEVB_op_active, &pdev->flags)) {
0107         dev_dbg(&pdev->xdev->dev, "schedule aer frontend job\n");
0108         schedule_work(&pdev->op_work);
0109     }
0110 }
0111 
0112 static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
0113 {
0114     int err = 0;
0115     struct xen_pci_op *active_op = &pdev->sh_info->op;
0116     unsigned long irq_flags;
0117     evtchn_port_t port = pdev->evtchn;
0118     unsigned int irq = pdev->irq;
0119     s64 ns, ns_timeout;
0120 
0121     spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
0122 
0123     memcpy(active_op, op, sizeof(struct xen_pci_op));
0124 
0125     /* Go */
0126     wmb();
0127     set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
0128     notify_remote_via_evtchn(port);
0129 
0130     /*
0131      * We set a poll timeout of 3 seconds but give up on return after
0132      * 2 seconds. It is better to time out too late rather than too early
0133      * (in the latter case we end up continually re-executing poll() with a
0134      * timeout in the past). 1s difference gives plenty of slack for error.
0135      */
0136     ns_timeout = ktime_get_ns() + 2 * (s64)NSEC_PER_SEC;
0137 
0138     xen_clear_irq_pending(irq);
0139 
0140     while (test_bit(_XEN_PCIF_active,
0141             (unsigned long *)&pdev->sh_info->flags)) {
0142         xen_poll_irq_timeout(irq, jiffies + 3*HZ);
0143         xen_clear_irq_pending(irq);
0144         ns = ktime_get_ns();
0145         if (ns > ns_timeout) {
0146             dev_err(&pdev->xdev->dev,
0147                 "pciback not responding!!!\n");
0148             clear_bit(_XEN_PCIF_active,
0149                   (unsigned long *)&pdev->sh_info->flags);
0150             err = XEN_PCI_ERR_dev_not_found;
0151             goto out;
0152         }
0153     }
0154 
0155     /*
0156      * We might lose backend service request since we
0157      * reuse same evtchn with pci_conf backend response. So re-schedule
0158      * aer pcifront service.
0159      */
0160     if (test_bit(_XEN_PCIB_active,
0161             (unsigned long *)&pdev->sh_info->flags)) {
0162         dev_err(&pdev->xdev->dev,
0163             "schedule aer pcifront service\n");
0164         schedule_pcifront_aer_op(pdev);
0165     }
0166 
0167     memcpy(op, active_op, sizeof(struct xen_pci_op));
0168 
0169     err = op->err;
0170 out:
0171     spin_unlock_irqrestore(&pdev->sh_info_lock, irq_flags);
0172     return err;
0173 }
0174 
0175 /* Access to this function is spinlocked in drivers/pci/access.c */
0176 static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn,
0177                  int where, int size, u32 *val)
0178 {
0179     int err = 0;
0180     struct xen_pci_op op = {
0181         .cmd    = XEN_PCI_OP_conf_read,
0182         .domain = pci_domain_nr(bus),
0183         .bus    = bus->number,
0184         .devfn  = devfn,
0185         .offset = where,
0186         .size   = size,
0187     };
0188     struct pcifront_sd *sd = bus->sysdata;
0189     struct pcifront_device *pdev = pcifront_get_pdev(sd);
0190 
0191     dev_dbg(&pdev->xdev->dev,
0192         "read dev=%04x:%02x:%02x.%d - offset %x size %d\n",
0193         pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
0194         PCI_FUNC(devfn), where, size);
0195 
0196     err = do_pci_op(pdev, &op);
0197 
0198     if (likely(!err)) {
0199         dev_dbg(&pdev->xdev->dev, "read got back value %x\n",
0200             op.value);
0201 
0202         *val = op.value;
0203     } else if (err == -ENODEV) {
0204         /* No device here, pretend that it just returned 0 */
0205         err = 0;
0206         *val = 0;
0207     }
0208 
0209     return errno_to_pcibios_err(err);
0210 }
0211 
0212 /* Access to this function is spinlocked in drivers/pci/access.c */
0213 static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn,
0214                   int where, int size, u32 val)
0215 {
0216     struct xen_pci_op op = {
0217         .cmd    = XEN_PCI_OP_conf_write,
0218         .domain = pci_domain_nr(bus),
0219         .bus    = bus->number,
0220         .devfn  = devfn,
0221         .offset = where,
0222         .size   = size,
0223         .value  = val,
0224     };
0225     struct pcifront_sd *sd = bus->sysdata;
0226     struct pcifront_device *pdev = pcifront_get_pdev(sd);
0227 
0228     dev_dbg(&pdev->xdev->dev,
0229         "write dev=%04x:%02x:%02x.%d - offset %x size %d val %x\n",
0230         pci_domain_nr(bus), bus->number,
0231         PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
0232 
0233     return errno_to_pcibios_err(do_pci_op(pdev, &op));
0234 }
0235 
0236 static struct pci_ops pcifront_bus_ops = {
0237     .read = pcifront_bus_read,
0238     .write = pcifront_bus_write,
0239 };
0240 
0241 #ifdef CONFIG_PCI_MSI
0242 static int pci_frontend_enable_msix(struct pci_dev *dev,
0243                     int vector[], int nvec)
0244 {
0245     int err;
0246     int i;
0247     struct xen_pci_op op = {
0248         .cmd    = XEN_PCI_OP_enable_msix,
0249         .domain = pci_domain_nr(dev->bus),
0250         .bus = dev->bus->number,
0251         .devfn = dev->devfn,
0252         .value = nvec,
0253     };
0254     struct pcifront_sd *sd = dev->bus->sysdata;
0255     struct pcifront_device *pdev = pcifront_get_pdev(sd);
0256     struct msi_desc *entry;
0257 
0258     if (nvec > SH_INFO_MAX_VEC) {
0259         pci_err(dev, "too many vectors (0x%x) for PCI frontend:"
0260                    " Increase SH_INFO_MAX_VEC\n", nvec);
0261         return -EINVAL;
0262     }
0263 
0264     i = 0;
0265     msi_for_each_desc(entry, &dev->dev, MSI_DESC_NOTASSOCIATED) {
0266         op.msix_entries[i].entry = entry->msi_index;
0267         /* Vector is useless at this point. */
0268         op.msix_entries[i].vector = -1;
0269         i++;
0270     }
0271 
0272     err = do_pci_op(pdev, &op);
0273 
0274     if (likely(!err)) {
0275         if (likely(!op.value)) {
0276             /* we get the result */
0277             for (i = 0; i < nvec; i++) {
0278                 if (op.msix_entries[i].vector <= 0) {
0279                     pci_warn(dev, "MSI-X entry %d is invalid: %d!\n",
0280                         i, op.msix_entries[i].vector);
0281                     err = -EINVAL;
0282                     vector[i] = -1;
0283                     continue;
0284                 }
0285                 vector[i] = op.msix_entries[i].vector;
0286             }
0287         } else {
0288             pr_info("enable msix get value %x\n", op.value);
0289             err = op.value;
0290         }
0291     } else {
0292         pci_err(dev, "enable msix get err %x\n", err);
0293     }
0294     return err;
0295 }
0296 
0297 static void pci_frontend_disable_msix(struct pci_dev *dev)
0298 {
0299     int err;
0300     struct xen_pci_op op = {
0301         .cmd    = XEN_PCI_OP_disable_msix,
0302         .domain = pci_domain_nr(dev->bus),
0303         .bus = dev->bus->number,
0304         .devfn = dev->devfn,
0305     };
0306     struct pcifront_sd *sd = dev->bus->sysdata;
0307     struct pcifront_device *pdev = pcifront_get_pdev(sd);
0308 
0309     err = do_pci_op(pdev, &op);
0310 
0311     /* What should do for error ? */
0312     if (err)
0313         pci_err(dev, "pci_disable_msix get err %x\n", err);
0314 }
0315 
0316 static int pci_frontend_enable_msi(struct pci_dev *dev, int vector[])
0317 {
0318     int err;
0319     struct xen_pci_op op = {
0320         .cmd    = XEN_PCI_OP_enable_msi,
0321         .domain = pci_domain_nr(dev->bus),
0322         .bus = dev->bus->number,
0323         .devfn = dev->devfn,
0324     };
0325     struct pcifront_sd *sd = dev->bus->sysdata;
0326     struct pcifront_device *pdev = pcifront_get_pdev(sd);
0327 
0328     err = do_pci_op(pdev, &op);
0329     if (likely(!err)) {
0330         vector[0] = op.value;
0331         if (op.value <= 0) {
0332             pci_warn(dev, "MSI entry is invalid: %d!\n",
0333                 op.value);
0334             err = -EINVAL;
0335             vector[0] = -1;
0336         }
0337     } else {
0338         pci_err(dev, "pci frontend enable msi failed for dev "
0339                     "%x:%x\n", op.bus, op.devfn);
0340         err = -EINVAL;
0341     }
0342     return err;
0343 }
0344 
0345 static void pci_frontend_disable_msi(struct pci_dev *dev)
0346 {
0347     int err;
0348     struct xen_pci_op op = {
0349         .cmd    = XEN_PCI_OP_disable_msi,
0350         .domain = pci_domain_nr(dev->bus),
0351         .bus = dev->bus->number,
0352         .devfn = dev->devfn,
0353     };
0354     struct pcifront_sd *sd = dev->bus->sysdata;
0355     struct pcifront_device *pdev = pcifront_get_pdev(sd);
0356 
0357     err = do_pci_op(pdev, &op);
0358     if (err == XEN_PCI_ERR_dev_not_found) {
0359         /* XXX No response from backend, what shall we do? */
0360         pr_info("get no response from backend for disable MSI\n");
0361         return;
0362     }
0363     if (err)
0364         /* how can pciback notify us fail? */
0365         pr_info("get fake response from backend\n");
0366 }
0367 
0368 static struct xen_pci_frontend_ops pci_frontend_ops = {
0369     .enable_msi = pci_frontend_enable_msi,
0370     .disable_msi = pci_frontend_disable_msi,
0371     .enable_msix = pci_frontend_enable_msix,
0372     .disable_msix = pci_frontend_disable_msix,
0373 };
0374 
0375 static void pci_frontend_registrar(int enable)
0376 {
0377     if (enable)
0378         xen_pci_frontend = &pci_frontend_ops;
0379     else
0380         xen_pci_frontend = NULL;
0381 };
0382 #else
0383 static inline void pci_frontend_registrar(int enable) { };
0384 #endif /* CONFIG_PCI_MSI */
0385 
0386 /* Claim resources for the PCI frontend as-is, backend won't allow changes */
0387 static int pcifront_claim_resource(struct pci_dev *dev, void *data)
0388 {
0389     struct pcifront_device *pdev = data;
0390     int i;
0391     struct resource *r;
0392 
0393     for (i = 0; i < PCI_NUM_RESOURCES; i++) {
0394         r = &dev->resource[i];
0395 
0396         if (!r->parent && r->start && r->flags) {
0397             dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n",
0398                 pci_name(dev), i);
0399             if (pci_claim_resource(dev, i)) {
0400                 dev_err(&pdev->xdev->dev, "Could not claim resource %s/%d! "
0401                     "Device offline. Try using e820_host=1 in the guest config.\n",
0402                     pci_name(dev), i);
0403             }
0404         }
0405     }
0406 
0407     return 0;
0408 }
0409 
0410 static int pcifront_scan_bus(struct pcifront_device *pdev,
0411                 unsigned int domain, unsigned int bus,
0412                 struct pci_bus *b)
0413 {
0414     struct pci_dev *d;
0415     unsigned int devfn;
0416 
0417     /*
0418      * Scan the bus for functions and add.
0419      * We omit handling of PCI bridge attachment because pciback prevents
0420      * bridges from being exported.
0421      */
0422     for (devfn = 0; devfn < 0x100; devfn++) {
0423         d = pci_get_slot(b, devfn);
0424         if (d) {
0425             /* Device is already known. */
0426             pci_dev_put(d);
0427             continue;
0428         }
0429 
0430         d = pci_scan_single_device(b, devfn);
0431         if (d)
0432             dev_info(&pdev->xdev->dev, "New device on "
0433                  "%04x:%02x:%02x.%d found.\n", domain, bus,
0434                  PCI_SLOT(devfn), PCI_FUNC(devfn));
0435     }
0436 
0437     return 0;
0438 }
0439 
0440 static int pcifront_scan_root(struct pcifront_device *pdev,
0441                  unsigned int domain, unsigned int bus)
0442 {
0443     struct pci_bus *b;
0444     LIST_HEAD(resources);
0445     struct pcifront_sd *sd = NULL;
0446     struct pci_bus_entry *bus_entry = NULL;
0447     int err = 0;
0448     static struct resource busn_res = {
0449         .start = 0,
0450         .end = 255,
0451         .flags = IORESOURCE_BUS,
0452     };
0453 
0454 #ifndef CONFIG_PCI_DOMAINS
0455     if (domain != 0) {
0456         dev_err(&pdev->xdev->dev,
0457             "PCI Root in non-zero PCI Domain! domain=%d\n", domain);
0458         dev_err(&pdev->xdev->dev,
0459             "Please compile with CONFIG_PCI_DOMAINS\n");
0460         err = -EINVAL;
0461         goto err_out;
0462     }
0463 #endif
0464 
0465     dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n",
0466          domain, bus);
0467 
0468     bus_entry = kzalloc(sizeof(*bus_entry), GFP_KERNEL);
0469     sd = kzalloc(sizeof(*sd), GFP_KERNEL);
0470     if (!bus_entry || !sd) {
0471         err = -ENOMEM;
0472         goto err_out;
0473     }
0474     pci_add_resource(&resources, &ioport_resource);
0475     pci_add_resource(&resources, &iomem_resource);
0476     pci_add_resource(&resources, &busn_res);
0477     pcifront_init_sd(sd, domain, bus, pdev);
0478 
0479     pci_lock_rescan_remove();
0480 
0481     b = pci_scan_root_bus(&pdev->xdev->dev, bus,
0482                   &pcifront_bus_ops, sd, &resources);
0483     if (!b) {
0484         dev_err(&pdev->xdev->dev,
0485             "Error creating PCI Frontend Bus!\n");
0486         err = -ENOMEM;
0487         pci_unlock_rescan_remove();
0488         pci_free_resource_list(&resources);
0489         goto err_out;
0490     }
0491 
0492     bus_entry->bus = b;
0493 
0494     list_add(&bus_entry->list, &pdev->root_buses);
0495 
0496     /*
0497      * pci_scan_root_bus skips devices which do not have a
0498      * devfn==0. The pcifront_scan_bus enumerates all devfn.
0499      */
0500     err = pcifront_scan_bus(pdev, domain, bus, b);
0501 
0502     /* Claim resources before going "live" with our devices */
0503     pci_walk_bus(b, pcifront_claim_resource, pdev);
0504 
0505     /* Create SysFS and notify udev of the devices. Aka: "going live" */
0506     pci_bus_add_devices(b);
0507 
0508     pci_unlock_rescan_remove();
0509     return err;
0510 
0511 err_out:
0512     kfree(bus_entry);
0513     kfree(sd);
0514 
0515     return err;
0516 }
0517 
0518 static int pcifront_rescan_root(struct pcifront_device *pdev,
0519                    unsigned int domain, unsigned int bus)
0520 {
0521     int err;
0522     struct pci_bus *b;
0523 
0524 #ifndef CONFIG_PCI_DOMAINS
0525     if (domain != 0) {
0526         dev_err(&pdev->xdev->dev,
0527             "PCI Root in non-zero PCI Domain! domain=%d\n", domain);
0528         dev_err(&pdev->xdev->dev,
0529             "Please compile with CONFIG_PCI_DOMAINS\n");
0530         return -EINVAL;
0531     }
0532 #endif
0533 
0534     dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n",
0535          domain, bus);
0536 
0537     b = pci_find_bus(domain, bus);
0538     if (!b)
0539         /* If the bus is unknown, create it. */
0540         return pcifront_scan_root(pdev, domain, bus);
0541 
0542     err = pcifront_scan_bus(pdev, domain, bus, b);
0543 
0544     /* Claim resources before going "live" with our devices */
0545     pci_walk_bus(b, pcifront_claim_resource, pdev);
0546 
0547     /* Create SysFS and notify udev of the devices. Aka: "going live" */
0548     pci_bus_add_devices(b);
0549 
0550     return err;
0551 }
0552 
0553 static void free_root_bus_devs(struct pci_bus *bus)
0554 {
0555     struct pci_dev *dev;
0556 
0557     while (!list_empty(&bus->devices)) {
0558         dev = container_of(bus->devices.next, struct pci_dev,
0559                    bus_list);
0560         pci_dbg(dev, "removing device\n");
0561         pci_stop_and_remove_bus_device(dev);
0562     }
0563 }
0564 
0565 static void pcifront_free_roots(struct pcifront_device *pdev)
0566 {
0567     struct pci_bus_entry *bus_entry, *t;
0568 
0569     dev_dbg(&pdev->xdev->dev, "cleaning up root buses\n");
0570 
0571     pci_lock_rescan_remove();
0572     list_for_each_entry_safe(bus_entry, t, &pdev->root_buses, list) {
0573         list_del(&bus_entry->list);
0574 
0575         free_root_bus_devs(bus_entry->bus);
0576 
0577         kfree(bus_entry->bus->sysdata);
0578 
0579         device_unregister(bus_entry->bus->bridge);
0580         pci_remove_bus(bus_entry->bus);
0581 
0582         kfree(bus_entry);
0583     }
0584     pci_unlock_rescan_remove();
0585 }
0586 
0587 static pci_ers_result_t pcifront_common_process(int cmd,
0588                         struct pcifront_device *pdev,
0589                         pci_channel_state_t state)
0590 {
0591     struct pci_driver *pdrv;
0592     int bus = pdev->sh_info->aer_op.bus;
0593     int devfn = pdev->sh_info->aer_op.devfn;
0594     int domain = pdev->sh_info->aer_op.domain;
0595     struct pci_dev *pcidev;
0596 
0597     dev_dbg(&pdev->xdev->dev,
0598         "pcifront AER process: cmd %x (bus:%x, devfn%x)",
0599         cmd, bus, devfn);
0600 
0601     pcidev = pci_get_domain_bus_and_slot(domain, bus, devfn);
0602     if (!pcidev || !pcidev->dev.driver) {
0603         dev_err(&pdev->xdev->dev, "device or AER driver is NULL\n");
0604         pci_dev_put(pcidev);
0605         return PCI_ERS_RESULT_NONE;
0606     }
0607     pdrv = to_pci_driver(pcidev->dev.driver);
0608 
0609     if (pdrv->err_handler && pdrv->err_handler->error_detected) {
0610         pci_dbg(pcidev, "trying to call AER service\n");
0611         switch (cmd) {
0612         case XEN_PCI_OP_aer_detected:
0613             return pdrv->err_handler->error_detected(pcidev, state);
0614         case XEN_PCI_OP_aer_mmio:
0615             return pdrv->err_handler->mmio_enabled(pcidev);
0616         case XEN_PCI_OP_aer_slotreset:
0617             return pdrv->err_handler->slot_reset(pcidev);
0618         case XEN_PCI_OP_aer_resume:
0619             pdrv->err_handler->resume(pcidev);
0620             return PCI_ERS_RESULT_NONE;
0621         default:
0622             dev_err(&pdev->xdev->dev,
0623                 "bad request in aer recovery operation!\n");
0624         }
0625     }
0626 
0627     return PCI_ERS_RESULT_NONE;
0628 }
0629 
0630 
0631 static void pcifront_do_aer(struct work_struct *data)
0632 {
0633     struct pcifront_device *pdev =
0634         container_of(data, struct pcifront_device, op_work);
0635     int cmd = pdev->sh_info->aer_op.cmd;
0636     pci_channel_state_t state =
0637         (pci_channel_state_t)pdev->sh_info->aer_op.err;
0638 
0639     /*
0640      * If a pci_conf op is in progress, we have to wait until it is done
0641      * before service aer op
0642      */
0643     dev_dbg(&pdev->xdev->dev,
0644         "pcifront service aer bus %x devfn %x\n",
0645         pdev->sh_info->aer_op.bus, pdev->sh_info->aer_op.devfn);
0646 
0647     pdev->sh_info->aer_op.err = pcifront_common_process(cmd, pdev, state);
0648 
0649     /* Post the operation to the guest. */
0650     wmb();
0651     clear_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags);
0652     notify_remote_via_evtchn(pdev->evtchn);
0653 
0654     /*in case of we lost an aer request in four lines time_window*/
0655     smp_mb__before_atomic();
0656     clear_bit(_PDEVB_op_active, &pdev->flags);
0657     smp_mb__after_atomic();
0658 
0659     schedule_pcifront_aer_op(pdev);
0660 
0661 }
0662 
0663 static irqreturn_t pcifront_handler_aer(int irq, void *dev)
0664 {
0665     struct pcifront_device *pdev = dev;
0666 
0667     schedule_pcifront_aer_op(pdev);
0668     return IRQ_HANDLED;
0669 }
0670 static int pcifront_connect_and_init_dma(struct pcifront_device *pdev)
0671 {
0672     int err = 0;
0673 
0674     spin_lock(&pcifront_dev_lock);
0675 
0676     if (!pcifront_dev) {
0677         dev_info(&pdev->xdev->dev, "Installing PCI frontend\n");
0678         pcifront_dev = pdev;
0679     } else
0680         err = -EEXIST;
0681 
0682     spin_unlock(&pcifront_dev_lock);
0683 
0684     if (!err && !is_swiotlb_active(&pdev->xdev->dev)) {
0685         err = pci_xen_swiotlb_init_late();
0686         if (err)
0687             dev_err(&pdev->xdev->dev, "Could not setup SWIOTLB!\n");
0688     }
0689     return err;
0690 }
0691 
0692 static void pcifront_disconnect(struct pcifront_device *pdev)
0693 {
0694     spin_lock(&pcifront_dev_lock);
0695 
0696     if (pdev == pcifront_dev) {
0697         dev_info(&pdev->xdev->dev,
0698              "Disconnecting PCI Frontend Buses\n");
0699         pcifront_dev = NULL;
0700     }
0701 
0702     spin_unlock(&pcifront_dev_lock);
0703 }
0704 static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev)
0705 {
0706     struct pcifront_device *pdev;
0707 
0708     pdev = kzalloc(sizeof(struct pcifront_device), GFP_KERNEL);
0709     if (pdev == NULL)
0710         goto out;
0711 
0712     if (xenbus_setup_ring(xdev, GFP_KERNEL, (void **)&pdev->sh_info, 1,
0713                   &pdev->gnt_ref)) {
0714         kfree(pdev);
0715         pdev = NULL;
0716         goto out;
0717     }
0718     pdev->sh_info->flags = 0;
0719 
0720     /*Flag for registering PV AER handler*/
0721     set_bit(_XEN_PCIB_AERHANDLER, (void *)&pdev->sh_info->flags);
0722 
0723     dev_set_drvdata(&xdev->dev, pdev);
0724     pdev->xdev = xdev;
0725 
0726     INIT_LIST_HEAD(&pdev->root_buses);
0727 
0728     spin_lock_init(&pdev->sh_info_lock);
0729 
0730     pdev->evtchn = INVALID_EVTCHN;
0731     pdev->irq = -1;
0732 
0733     INIT_WORK(&pdev->op_work, pcifront_do_aer);
0734 
0735     dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n",
0736         pdev, pdev->sh_info);
0737 out:
0738     return pdev;
0739 }
0740 
0741 static void free_pdev(struct pcifront_device *pdev)
0742 {
0743     dev_dbg(&pdev->xdev->dev, "freeing pdev @ 0x%p\n", pdev);
0744 
0745     pcifront_free_roots(pdev);
0746 
0747     cancel_work_sync(&pdev->op_work);
0748 
0749     if (pdev->irq >= 0)
0750         unbind_from_irqhandler(pdev->irq, pdev);
0751 
0752     if (pdev->evtchn != INVALID_EVTCHN)
0753         xenbus_free_evtchn(pdev->xdev, pdev->evtchn);
0754 
0755     xenbus_teardown_ring((void **)&pdev->sh_info, 1, &pdev->gnt_ref);
0756 
0757     dev_set_drvdata(&pdev->xdev->dev, NULL);
0758 
0759     kfree(pdev);
0760 }
0761 
0762 static int pcifront_publish_info(struct pcifront_device *pdev)
0763 {
0764     int err = 0;
0765     struct xenbus_transaction trans;
0766 
0767     err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
0768     if (err)
0769         goto out;
0770 
0771     err = bind_evtchn_to_irqhandler(pdev->evtchn, pcifront_handler_aer,
0772         0, "pcifront", pdev);
0773 
0774     if (err < 0)
0775         return err;
0776 
0777     pdev->irq = err;
0778 
0779 do_publish:
0780     err = xenbus_transaction_start(&trans);
0781     if (err) {
0782         xenbus_dev_fatal(pdev->xdev, err,
0783                  "Error writing configuration for backend "
0784                  "(start transaction)");
0785         goto out;
0786     }
0787 
0788     err = xenbus_printf(trans, pdev->xdev->nodename,
0789                 "pci-op-ref", "%u", pdev->gnt_ref);
0790     if (!err)
0791         err = xenbus_printf(trans, pdev->xdev->nodename,
0792                     "event-channel", "%u", pdev->evtchn);
0793     if (!err)
0794         err = xenbus_printf(trans, pdev->xdev->nodename,
0795                     "magic", XEN_PCI_MAGIC);
0796 
0797     if (err) {
0798         xenbus_transaction_end(trans, 1);
0799         xenbus_dev_fatal(pdev->xdev, err,
0800                  "Error writing configuration for backend");
0801         goto out;
0802     } else {
0803         err = xenbus_transaction_end(trans, 0);
0804         if (err == -EAGAIN)
0805             goto do_publish;
0806         else if (err) {
0807             xenbus_dev_fatal(pdev->xdev, err,
0808                      "Error completing transaction "
0809                      "for backend");
0810             goto out;
0811         }
0812     }
0813 
0814     xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
0815 
0816     dev_dbg(&pdev->xdev->dev, "publishing successful!\n");
0817 
0818 out:
0819     return err;
0820 }
0821 
0822 static int pcifront_try_connect(struct pcifront_device *pdev)
0823 {
0824     int err = -EFAULT;
0825     int i, num_roots, len;
0826     char str[64];
0827     unsigned int domain, bus;
0828 
0829 
0830     /* Only connect once */
0831     if (xenbus_read_driver_state(pdev->xdev->nodename) !=
0832         XenbusStateInitialised)
0833         goto out;
0834 
0835     err = pcifront_connect_and_init_dma(pdev);
0836     if (err && err != -EEXIST) {
0837         xenbus_dev_fatal(pdev->xdev, err,
0838                  "Error setting up PCI Frontend");
0839         goto out;
0840     }
0841 
0842     err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
0843                "root_num", "%d", &num_roots);
0844     if (err == -ENOENT) {
0845         xenbus_dev_error(pdev->xdev, err,
0846                  "No PCI Roots found, trying 0000:00");
0847         err = pcifront_scan_root(pdev, 0, 0);
0848         if (err) {
0849             xenbus_dev_fatal(pdev->xdev, err,
0850                      "Error scanning PCI root 0000:00");
0851             goto out;
0852         }
0853         num_roots = 0;
0854     } else if (err != 1) {
0855         if (err == 0)
0856             err = -EINVAL;
0857         xenbus_dev_fatal(pdev->xdev, err,
0858                  "Error reading number of PCI roots");
0859         goto out;
0860     }
0861 
0862     for (i = 0; i < num_roots; i++) {
0863         len = snprintf(str, sizeof(str), "root-%d", i);
0864         if (unlikely(len >= (sizeof(str) - 1))) {
0865             err = -ENOMEM;
0866             goto out;
0867         }
0868 
0869         err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
0870                    "%x:%x", &domain, &bus);
0871         if (err != 2) {
0872             if (err >= 0)
0873                 err = -EINVAL;
0874             xenbus_dev_fatal(pdev->xdev, err,
0875                      "Error reading PCI root %d", i);
0876             goto out;
0877         }
0878 
0879         err = pcifront_scan_root(pdev, domain, bus);
0880         if (err) {
0881             xenbus_dev_fatal(pdev->xdev, err,
0882                      "Error scanning PCI root %04x:%02x",
0883                      domain, bus);
0884             goto out;
0885         }
0886     }
0887 
0888     err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
0889 
0890 out:
0891     return err;
0892 }
0893 
0894 static int pcifront_try_disconnect(struct pcifront_device *pdev)
0895 {
0896     int err = 0;
0897     enum xenbus_state prev_state;
0898 
0899 
0900     prev_state = xenbus_read_driver_state(pdev->xdev->nodename);
0901 
0902     if (prev_state >= XenbusStateClosing)
0903         goto out;
0904 
0905     if (prev_state == XenbusStateConnected) {
0906         pcifront_free_roots(pdev);
0907         pcifront_disconnect(pdev);
0908     }
0909 
0910     err = xenbus_switch_state(pdev->xdev, XenbusStateClosed);
0911 
0912 out:
0913 
0914     return err;
0915 }
0916 
0917 static int pcifront_attach_devices(struct pcifront_device *pdev)
0918 {
0919     int err = -EFAULT;
0920     int i, num_roots, len;
0921     unsigned int domain, bus;
0922     char str[64];
0923 
0924     if (xenbus_read_driver_state(pdev->xdev->nodename) !=
0925         XenbusStateReconfiguring)
0926         goto out;
0927 
0928     err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
0929                "root_num", "%d", &num_roots);
0930     if (err == -ENOENT) {
0931         xenbus_dev_error(pdev->xdev, err,
0932                  "No PCI Roots found, trying 0000:00");
0933         err = pcifront_rescan_root(pdev, 0, 0);
0934         if (err) {
0935             xenbus_dev_fatal(pdev->xdev, err,
0936                      "Error scanning PCI root 0000:00");
0937             goto out;
0938         }
0939         num_roots = 0;
0940     } else if (err != 1) {
0941         if (err == 0)
0942             err = -EINVAL;
0943         xenbus_dev_fatal(pdev->xdev, err,
0944                  "Error reading number of PCI roots");
0945         goto out;
0946     }
0947 
0948     for (i = 0; i < num_roots; i++) {
0949         len = snprintf(str, sizeof(str), "root-%d", i);
0950         if (unlikely(len >= (sizeof(str) - 1))) {
0951             err = -ENOMEM;
0952             goto out;
0953         }
0954 
0955         err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
0956                    "%x:%x", &domain, &bus);
0957         if (err != 2) {
0958             if (err >= 0)
0959                 err = -EINVAL;
0960             xenbus_dev_fatal(pdev->xdev, err,
0961                      "Error reading PCI root %d", i);
0962             goto out;
0963         }
0964 
0965         err = pcifront_rescan_root(pdev, domain, bus);
0966         if (err) {
0967             xenbus_dev_fatal(pdev->xdev, err,
0968                      "Error scanning PCI root %04x:%02x",
0969                      domain, bus);
0970             goto out;
0971         }
0972     }
0973 
0974     xenbus_switch_state(pdev->xdev, XenbusStateConnected);
0975 
0976 out:
0977     return err;
0978 }
0979 
0980 static int pcifront_detach_devices(struct pcifront_device *pdev)
0981 {
0982     int err = 0;
0983     int i, num_devs;
0984     unsigned int domain, bus, slot, func;
0985     struct pci_dev *pci_dev;
0986     char str[64];
0987 
0988     if (xenbus_read_driver_state(pdev->xdev->nodename) !=
0989         XenbusStateConnected)
0990         goto out;
0991 
0992     err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "num_devs", "%d",
0993                &num_devs);
0994     if (err != 1) {
0995         if (err >= 0)
0996             err = -EINVAL;
0997         xenbus_dev_fatal(pdev->xdev, err,
0998                  "Error reading number of PCI devices");
0999         goto out;
1000     }
1001 
1002     /* Find devices being detached and remove them. */
1003     for (i = 0; i < num_devs; i++) {
1004         int l, state;
1005 
1006         l = snprintf(str, sizeof(str), "state-%d", i);
1007         if (unlikely(l >= (sizeof(str) - 1))) {
1008             err = -ENOMEM;
1009             goto out;
1010         }
1011         state = xenbus_read_unsigned(pdev->xdev->otherend, str,
1012                          XenbusStateUnknown);
1013 
1014         if (state != XenbusStateClosing)
1015             continue;
1016 
1017         /* Remove device. */
1018         l = snprintf(str, sizeof(str), "vdev-%d", i);
1019         if (unlikely(l >= (sizeof(str) - 1))) {
1020             err = -ENOMEM;
1021             goto out;
1022         }
1023         err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
1024                    "%x:%x:%x.%x", &domain, &bus, &slot, &func);
1025         if (err != 4) {
1026             if (err >= 0)
1027                 err = -EINVAL;
1028             xenbus_dev_fatal(pdev->xdev, err,
1029                      "Error reading PCI device %d", i);
1030             goto out;
1031         }
1032 
1033         pci_dev = pci_get_domain_bus_and_slot(domain, bus,
1034                 PCI_DEVFN(slot, func));
1035         if (!pci_dev) {
1036             dev_dbg(&pdev->xdev->dev,
1037                 "Cannot get PCI device %04x:%02x:%02x.%d\n",
1038                 domain, bus, slot, func);
1039             continue;
1040         }
1041         pci_lock_rescan_remove();
1042         pci_stop_and_remove_bus_device(pci_dev);
1043         pci_dev_put(pci_dev);
1044         pci_unlock_rescan_remove();
1045 
1046         dev_dbg(&pdev->xdev->dev,
1047             "PCI device %04x:%02x:%02x.%d removed.\n",
1048             domain, bus, slot, func);
1049     }
1050 
1051     err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring);
1052 
1053 out:
1054     return err;
1055 }
1056 
1057 static void pcifront_backend_changed(struct xenbus_device *xdev,
1058                           enum xenbus_state be_state)
1059 {
1060     struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
1061 
1062     switch (be_state) {
1063     case XenbusStateUnknown:
1064     case XenbusStateInitialising:
1065     case XenbusStateInitWait:
1066     case XenbusStateInitialised:
1067         break;
1068 
1069     case XenbusStateConnected:
1070         pcifront_try_connect(pdev);
1071         break;
1072 
1073     case XenbusStateClosed:
1074         if (xdev->state == XenbusStateClosed)
1075             break;
1076         fallthrough;    /* Missed the backend's CLOSING state */
1077     case XenbusStateClosing:
1078         dev_warn(&xdev->dev, "backend going away!\n");
1079         pcifront_try_disconnect(pdev);
1080         break;
1081 
1082     case XenbusStateReconfiguring:
1083         pcifront_detach_devices(pdev);
1084         break;
1085 
1086     case XenbusStateReconfigured:
1087         pcifront_attach_devices(pdev);
1088         break;
1089     }
1090 }
1091 
1092 static int pcifront_xenbus_probe(struct xenbus_device *xdev,
1093                  const struct xenbus_device_id *id)
1094 {
1095     int err = 0;
1096     struct pcifront_device *pdev = alloc_pdev(xdev);
1097 
1098     if (pdev == NULL) {
1099         err = -ENOMEM;
1100         xenbus_dev_fatal(xdev, err,
1101                  "Error allocating pcifront_device struct");
1102         goto out;
1103     }
1104 
1105     err = pcifront_publish_info(pdev);
1106     if (err)
1107         free_pdev(pdev);
1108 
1109 out:
1110     return err;
1111 }
1112 
1113 static int pcifront_xenbus_remove(struct xenbus_device *xdev)
1114 {
1115     struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
1116 
1117     if (pdev)
1118         free_pdev(pdev);
1119 
1120     return 0;
1121 }
1122 
1123 static const struct xenbus_device_id xenpci_ids[] = {
1124     {"pci"},
1125     {""},
1126 };
1127 
1128 static struct xenbus_driver xenpci_driver = {
1129     .name           = "pcifront",
1130     .ids            = xenpci_ids,
1131     .probe          = pcifront_xenbus_probe,
1132     .remove         = pcifront_xenbus_remove,
1133     .otherend_changed   = pcifront_backend_changed,
1134 };
1135 
1136 static int __init pcifront_init(void)
1137 {
1138     if (!xen_pv_domain() || xen_initial_domain())
1139         return -ENODEV;
1140 
1141     if (!xen_has_pv_devices())
1142         return -ENODEV;
1143 
1144     pci_frontend_registrar(1 /* enable */);
1145 
1146     return xenbus_register_frontend(&xenpci_driver);
1147 }
1148 
1149 static void __exit pcifront_cleanup(void)
1150 {
1151     xenbus_unregister_driver(&xenpci_driver);
1152     pci_frontend_registrar(0 /* disable */);
1153 }
1154 module_init(pcifront_init);
1155 module_exit(pcifront_cleanup);
1156 
1157 MODULE_DESCRIPTION("Xen PCI passthrough frontend.");
1158 MODULE_LICENSE("GPL");
1159 MODULE_ALIAS("xen:pci");