Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * PCI Message Signaled Interrupt (MSI)
0004  *
0005  * Copyright (C) 2003-2004 Intel
0006  * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
0007  * Copyright (C) 2016 Christoph Hellwig.
0008  */
0009 #include <linux/err.h>
0010 #include <linux/export.h>
0011 #include <linux/irq.h>
0012 
0013 #include "../pci.h"
0014 #include "msi.h"
0015 
0016 static int pci_msi_enable = 1;
0017 int pci_msi_ignore_mask;
0018 
0019 static noinline void pci_msi_update_mask(struct msi_desc *desc, u32 clear, u32 set)
0020 {
0021     raw_spinlock_t *lock = &to_pci_dev(desc->dev)->msi_lock;
0022     unsigned long flags;
0023 
0024     if (!desc->pci.msi_attrib.can_mask)
0025         return;
0026 
0027     raw_spin_lock_irqsave(lock, flags);
0028     desc->pci.msi_mask &= ~clear;
0029     desc->pci.msi_mask |= set;
0030     pci_write_config_dword(msi_desc_to_pci_dev(desc), desc->pci.mask_pos,
0031                    desc->pci.msi_mask);
0032     raw_spin_unlock_irqrestore(lock, flags);
0033 }
0034 
0035 static inline void pci_msi_mask(struct msi_desc *desc, u32 mask)
0036 {
0037     pci_msi_update_mask(desc, 0, mask);
0038 }
0039 
0040 static inline void pci_msi_unmask(struct msi_desc *desc, u32 mask)
0041 {
0042     pci_msi_update_mask(desc, mask, 0);
0043 }
0044 
0045 static inline void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
0046 {
0047     return desc->pci.mask_base + desc->msi_index * PCI_MSIX_ENTRY_SIZE;
0048 }
0049 
0050 /*
0051  * This internal function does not flush PCI writes to the device.  All
0052  * users must ensure that they read from the device before either assuming
0053  * that the device state is up to date, or returning out of this file.
0054  * It does not affect the msi_desc::msix_ctrl cache either. Use with care!
0055  */
0056 static void pci_msix_write_vector_ctrl(struct msi_desc *desc, u32 ctrl)
0057 {
0058     void __iomem *desc_addr = pci_msix_desc_addr(desc);
0059 
0060     if (desc->pci.msi_attrib.can_mask)
0061         writel(ctrl, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
0062 }
0063 
0064 static inline void pci_msix_mask(struct msi_desc *desc)
0065 {
0066     desc->pci.msix_ctrl |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
0067     pci_msix_write_vector_ctrl(desc, desc->pci.msix_ctrl);
0068     /* Flush write to device */
0069     readl(desc->pci.mask_base);
0070 }
0071 
0072 static inline void pci_msix_unmask(struct msi_desc *desc)
0073 {
0074     desc->pci.msix_ctrl &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
0075     pci_msix_write_vector_ctrl(desc, desc->pci.msix_ctrl);
0076 }
0077 
0078 static void __pci_msi_mask_desc(struct msi_desc *desc, u32 mask)
0079 {
0080     if (desc->pci.msi_attrib.is_msix)
0081         pci_msix_mask(desc);
0082     else
0083         pci_msi_mask(desc, mask);
0084 }
0085 
0086 static void __pci_msi_unmask_desc(struct msi_desc *desc, u32 mask)
0087 {
0088     if (desc->pci.msi_attrib.is_msix)
0089         pci_msix_unmask(desc);
0090     else
0091         pci_msi_unmask(desc, mask);
0092 }
0093 
0094 /**
0095  * pci_msi_mask_irq - Generic IRQ chip callback to mask PCI/MSI interrupts
0096  * @data:   pointer to irqdata associated to that interrupt
0097  */
0098 void pci_msi_mask_irq(struct irq_data *data)
0099 {
0100     struct msi_desc *desc = irq_data_get_msi_desc(data);
0101 
0102     __pci_msi_mask_desc(desc, BIT(data->irq - desc->irq));
0103 }
0104 EXPORT_SYMBOL_GPL(pci_msi_mask_irq);
0105 
0106 /**
0107  * pci_msi_unmask_irq - Generic IRQ chip callback to unmask PCI/MSI interrupts
0108  * @data:   pointer to irqdata associated to that interrupt
0109  */
0110 void pci_msi_unmask_irq(struct irq_data *data)
0111 {
0112     struct msi_desc *desc = irq_data_get_msi_desc(data);
0113 
0114     __pci_msi_unmask_desc(desc, BIT(data->irq - desc->irq));
0115 }
0116 EXPORT_SYMBOL_GPL(pci_msi_unmask_irq);
0117 
0118 void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
0119 {
0120     struct pci_dev *dev = msi_desc_to_pci_dev(entry);
0121 
0122     BUG_ON(dev->current_state != PCI_D0);
0123 
0124     if (entry->pci.msi_attrib.is_msix) {
0125         void __iomem *base = pci_msix_desc_addr(entry);
0126 
0127         if (WARN_ON_ONCE(entry->pci.msi_attrib.is_virtual))
0128             return;
0129 
0130         msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
0131         msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
0132         msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
0133     } else {
0134         int pos = dev->msi_cap;
0135         u16 data;
0136 
0137         pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
0138                       &msg->address_lo);
0139         if (entry->pci.msi_attrib.is_64) {
0140             pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
0141                           &msg->address_hi);
0142             pci_read_config_word(dev, pos + PCI_MSI_DATA_64, &data);
0143         } else {
0144             msg->address_hi = 0;
0145             pci_read_config_word(dev, pos + PCI_MSI_DATA_32, &data);
0146         }
0147         msg->data = data;
0148     }
0149 }
0150 
0151 void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
0152 {
0153     struct pci_dev *dev = msi_desc_to_pci_dev(entry);
0154 
0155     if (dev->current_state != PCI_D0 || pci_dev_is_disconnected(dev)) {
0156         /* Don't touch the hardware now */
0157     } else if (entry->pci.msi_attrib.is_msix) {
0158         void __iomem *base = pci_msix_desc_addr(entry);
0159         u32 ctrl = entry->pci.msix_ctrl;
0160         bool unmasked = !(ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT);
0161 
0162         if (entry->pci.msi_attrib.is_virtual)
0163             goto skip;
0164 
0165         /*
0166          * The specification mandates that the entry is masked
0167          * when the message is modified:
0168          *
0169          * "If software changes the Address or Data value of an
0170          * entry while the entry is unmasked, the result is
0171          * undefined."
0172          */
0173         if (unmasked)
0174             pci_msix_write_vector_ctrl(entry, ctrl | PCI_MSIX_ENTRY_CTRL_MASKBIT);
0175 
0176         writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
0177         writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
0178         writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
0179 
0180         if (unmasked)
0181             pci_msix_write_vector_ctrl(entry, ctrl);
0182 
0183         /* Ensure that the writes are visible in the device */
0184         readl(base + PCI_MSIX_ENTRY_DATA);
0185     } else {
0186         int pos = dev->msi_cap;
0187         u16 msgctl;
0188 
0189         pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
0190         msgctl &= ~PCI_MSI_FLAGS_QSIZE;
0191         msgctl |= entry->pci.msi_attrib.multiple << 4;
0192         pci_write_config_word(dev, pos + PCI_MSI_FLAGS, msgctl);
0193 
0194         pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
0195                        msg->address_lo);
0196         if (entry->pci.msi_attrib.is_64) {
0197             pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
0198                            msg->address_hi);
0199             pci_write_config_word(dev, pos + PCI_MSI_DATA_64,
0200                           msg->data);
0201         } else {
0202             pci_write_config_word(dev, pos + PCI_MSI_DATA_32,
0203                           msg->data);
0204         }
0205         /* Ensure that the writes are visible in the device */
0206         pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
0207     }
0208 
0209 skip:
0210     entry->msg = *msg;
0211 
0212     if (entry->write_msi_msg)
0213         entry->write_msi_msg(entry, entry->write_msi_msg_data);
0214 
0215 }
0216 
0217 void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg)
0218 {
0219     struct msi_desc *entry = irq_get_msi_desc(irq);
0220 
0221     __pci_write_msi_msg(entry, msg);
0222 }
0223 EXPORT_SYMBOL_GPL(pci_write_msi_msg);
0224 
0225 static void free_msi_irqs(struct pci_dev *dev)
0226 {
0227     pci_msi_teardown_msi_irqs(dev);
0228 
0229     if (dev->msix_base) {
0230         iounmap(dev->msix_base);
0231         dev->msix_base = NULL;
0232     }
0233 }
0234 
0235 static void pci_intx_for_msi(struct pci_dev *dev, int enable)
0236 {
0237     if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG))
0238         pci_intx(dev, enable);
0239 }
0240 
0241 static void pci_msi_set_enable(struct pci_dev *dev, int enable)
0242 {
0243     u16 control;
0244 
0245     pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
0246     control &= ~PCI_MSI_FLAGS_ENABLE;
0247     if (enable)
0248         control |= PCI_MSI_FLAGS_ENABLE;
0249     pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
0250 }
0251 
0252 /*
0253  * Architecture override returns true when the PCI MSI message should be
0254  * written by the generic restore function.
0255  */
0256 bool __weak arch_restore_msi_irqs(struct pci_dev *dev)
0257 {
0258     return true;
0259 }
0260 
0261 static void __pci_restore_msi_state(struct pci_dev *dev)
0262 {
0263     struct msi_desc *entry;
0264     u16 control;
0265 
0266     if (!dev->msi_enabled)
0267         return;
0268 
0269     entry = irq_get_msi_desc(dev->irq);
0270 
0271     pci_intx_for_msi(dev, 0);
0272     pci_msi_set_enable(dev, 0);
0273     if (arch_restore_msi_irqs(dev))
0274         __pci_write_msi_msg(entry, &entry->msg);
0275 
0276     pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
0277     pci_msi_update_mask(entry, 0, 0);
0278     control &= ~PCI_MSI_FLAGS_QSIZE;
0279     control |= (entry->pci.msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
0280     pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
0281 }
0282 
0283 static void pci_msix_clear_and_set_ctrl(struct pci_dev *dev, u16 clear, u16 set)
0284 {
0285     u16 ctrl;
0286 
0287     pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl);
0288     ctrl &= ~clear;
0289     ctrl |= set;
0290     pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, ctrl);
0291 }
0292 
0293 static void __pci_restore_msix_state(struct pci_dev *dev)
0294 {
0295     struct msi_desc *entry;
0296     bool write_msg;
0297 
0298     if (!dev->msix_enabled)
0299         return;
0300 
0301     /* route the table */
0302     pci_intx_for_msi(dev, 0);
0303     pci_msix_clear_and_set_ctrl(dev, 0,
0304                 PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL);
0305 
0306     write_msg = arch_restore_msi_irqs(dev);
0307 
0308     msi_lock_descs(&dev->dev);
0309     msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) {
0310         if (write_msg)
0311             __pci_write_msi_msg(entry, &entry->msg);
0312         pci_msix_write_vector_ctrl(entry, entry->pci.msix_ctrl);
0313     }
0314     msi_unlock_descs(&dev->dev);
0315 
0316     pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
0317 }
0318 
0319 void pci_restore_msi_state(struct pci_dev *dev)
0320 {
0321     __pci_restore_msi_state(dev);
0322     __pci_restore_msix_state(dev);
0323 }
0324 EXPORT_SYMBOL_GPL(pci_restore_msi_state);
0325 
0326 static void pcim_msi_release(void *pcidev)
0327 {
0328     struct pci_dev *dev = pcidev;
0329 
0330     dev->is_msi_managed = false;
0331     pci_free_irq_vectors(dev);
0332 }
0333 
0334 /*
0335  * Needs to be separate from pcim_release to prevent an ordering problem
0336  * vs. msi_device_data_release() in the MSI core code.
0337  */
0338 static int pcim_setup_msi_release(struct pci_dev *dev)
0339 {
0340     int ret;
0341 
0342     if (!pci_is_managed(dev) || dev->is_msi_managed)
0343         return 0;
0344 
0345     ret = devm_add_action(&dev->dev, pcim_msi_release, dev);
0346     if (!ret)
0347         dev->is_msi_managed = true;
0348     return ret;
0349 }
0350 
0351 /*
0352  * Ordering vs. devres: msi device data has to be installed first so that
0353  * pcim_msi_release() is invoked before it on device release.
0354  */
0355 static int pci_setup_msi_context(struct pci_dev *dev)
0356 {
0357     int ret = msi_setup_device_data(&dev->dev);
0358 
0359     if (!ret)
0360         ret = pcim_setup_msi_release(dev);
0361     return ret;
0362 }
0363 
0364 static int msi_setup_msi_desc(struct pci_dev *dev, int nvec,
0365                   struct irq_affinity_desc *masks)
0366 {
0367     struct msi_desc desc;
0368     u16 control;
0369 
0370     /* MSI Entry Initialization */
0371     memset(&desc, 0, sizeof(desc));
0372 
0373     pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
0374     /* Lies, damned lies, and MSIs */
0375     if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING)
0376         control |= PCI_MSI_FLAGS_MASKBIT;
0377     /* Respect XEN's mask disabling */
0378     if (pci_msi_ignore_mask)
0379         control &= ~PCI_MSI_FLAGS_MASKBIT;
0380 
0381     desc.nvec_used          = nvec;
0382     desc.pci.msi_attrib.is_64   = !!(control & PCI_MSI_FLAGS_64BIT);
0383     desc.pci.msi_attrib.can_mask    = !!(control & PCI_MSI_FLAGS_MASKBIT);
0384     desc.pci.msi_attrib.default_irq = dev->irq;
0385     desc.pci.msi_attrib.multi_cap   = (control & PCI_MSI_FLAGS_QMASK) >> 1;
0386     desc.pci.msi_attrib.multiple    = ilog2(__roundup_pow_of_two(nvec));
0387     desc.affinity           = masks;
0388 
0389     if (control & PCI_MSI_FLAGS_64BIT)
0390         desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
0391     else
0392         desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_32;
0393 
0394     /* Save the initial mask status */
0395     if (desc.pci.msi_attrib.can_mask)
0396         pci_read_config_dword(dev, desc.pci.mask_pos, &desc.pci.msi_mask);
0397 
0398     return msi_add_msi_desc(&dev->dev, &desc);
0399 }
0400 
0401 static int msi_verify_entries(struct pci_dev *dev)
0402 {
0403     struct msi_desc *entry;
0404 
0405     if (!dev->no_64bit_msi)
0406         return 0;
0407 
0408     msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) {
0409         if (entry->msg.address_hi) {
0410             pci_err(dev, "arch assigned 64-bit MSI address %#x%08x but device only supports 32 bits\n",
0411                 entry->msg.address_hi, entry->msg.address_lo);
0412             break;
0413         }
0414     }
0415     return !entry ? 0 : -EIO;
0416 }
0417 
0418 /**
0419  * msi_capability_init - configure device's MSI capability structure
0420  * @dev: pointer to the pci_dev data structure of MSI device function
0421  * @nvec: number of interrupts to allocate
0422  * @affd: description of automatic IRQ affinity assignments (may be %NULL)
0423  *
0424  * Setup the MSI capability structure of the device with the requested
0425  * number of interrupts.  A return value of zero indicates the successful
0426  * setup of an entry with the new MSI IRQ.  A negative return value indicates
0427  * an error, and a positive return value indicates the number of interrupts
0428  * which could have been allocated.
0429  */
0430 static int msi_capability_init(struct pci_dev *dev, int nvec,
0431                    struct irq_affinity *affd)
0432 {
0433     struct irq_affinity_desc *masks = NULL;
0434     struct msi_desc *entry;
0435     int ret;
0436 
0437     /*
0438      * Disable MSI during setup in the hardware, but mark it enabled
0439      * so that setup code can evaluate it.
0440      */
0441     pci_msi_set_enable(dev, 0);
0442     dev->msi_enabled = 1;
0443 
0444     if (affd)
0445         masks = irq_create_affinity_masks(nvec, affd);
0446 
0447     msi_lock_descs(&dev->dev);
0448     ret = msi_setup_msi_desc(dev, nvec, masks);
0449     if (ret)
0450         goto fail;
0451 
0452     /* All MSIs are unmasked by default; mask them all */
0453     entry = msi_first_desc(&dev->dev, MSI_DESC_ALL);
0454     pci_msi_mask(entry, msi_multi_mask(entry));
0455 
0456     /* Configure MSI capability structure */
0457     ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
0458     if (ret)
0459         goto err;
0460 
0461     ret = msi_verify_entries(dev);
0462     if (ret)
0463         goto err;
0464 
0465     /* Set MSI enabled bits */
0466     pci_intx_for_msi(dev, 0);
0467     pci_msi_set_enable(dev, 1);
0468 
0469     pcibios_free_irq(dev);
0470     dev->irq = entry->irq;
0471     goto unlock;
0472 
0473 err:
0474     pci_msi_unmask(entry, msi_multi_mask(entry));
0475     free_msi_irqs(dev);
0476 fail:
0477     dev->msi_enabled = 0;
0478 unlock:
0479     msi_unlock_descs(&dev->dev);
0480     kfree(masks);
0481     return ret;
0482 }
0483 
0484 static void __iomem *msix_map_region(struct pci_dev *dev,
0485                      unsigned int nr_entries)
0486 {
0487     resource_size_t phys_addr;
0488     u32 table_offset;
0489     unsigned long flags;
0490     u8 bir;
0491 
0492     pci_read_config_dword(dev, dev->msix_cap + PCI_MSIX_TABLE,
0493                   &table_offset);
0494     bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR);
0495     flags = pci_resource_flags(dev, bir);
0496     if (!flags || (flags & IORESOURCE_UNSET))
0497         return NULL;
0498 
0499     table_offset &= PCI_MSIX_TABLE_OFFSET;
0500     phys_addr = pci_resource_start(dev, bir) + table_offset;
0501 
0502     return ioremap(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
0503 }
0504 
0505 static int msix_setup_msi_descs(struct pci_dev *dev, void __iomem *base,
0506                 struct msix_entry *entries, int nvec,
0507                 struct irq_affinity_desc *masks)
0508 {
0509     int ret = 0, i, vec_count = pci_msix_vec_count(dev);
0510     struct irq_affinity_desc *curmsk;
0511     struct msi_desc desc;
0512     void __iomem *addr;
0513 
0514     memset(&desc, 0, sizeof(desc));
0515 
0516     desc.nvec_used          = 1;
0517     desc.pci.msi_attrib.is_msix = 1;
0518     desc.pci.msi_attrib.is_64   = 1;
0519     desc.pci.msi_attrib.default_irq = dev->irq;
0520     desc.pci.mask_base      = base;
0521 
0522     for (i = 0, curmsk = masks; i < nvec; i++, curmsk++) {
0523         desc.msi_index = entries ? entries[i].entry : i;
0524         desc.affinity = masks ? curmsk : NULL;
0525         desc.pci.msi_attrib.is_virtual = desc.msi_index >= vec_count;
0526         desc.pci.msi_attrib.can_mask = !pci_msi_ignore_mask &&
0527                            !desc.pci.msi_attrib.is_virtual;
0528 
0529         if (!desc.pci.msi_attrib.can_mask) {
0530             addr = pci_msix_desc_addr(&desc);
0531             desc.pci.msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
0532         }
0533 
0534         ret = msi_add_msi_desc(&dev->dev, &desc);
0535         if (ret)
0536             break;
0537     }
0538     return ret;
0539 }
0540 
0541 static void msix_update_entries(struct pci_dev *dev, struct msix_entry *entries)
0542 {
0543     struct msi_desc *desc;
0544 
0545     if (entries) {
0546         msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL) {
0547             entries->vector = desc->irq;
0548             entries++;
0549         }
0550     }
0551 }
0552 
0553 static void msix_mask_all(void __iomem *base, int tsize)
0554 {
0555     u32 ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT;
0556     int i;
0557 
0558     if (pci_msi_ignore_mask)
0559         return;
0560 
0561     for (i = 0; i < tsize; i++, base += PCI_MSIX_ENTRY_SIZE)
0562         writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL);
0563 }
0564 
0565 static int msix_setup_interrupts(struct pci_dev *dev, void __iomem *base,
0566                  struct msix_entry *entries, int nvec,
0567                  struct irq_affinity *affd)
0568 {
0569     struct irq_affinity_desc *masks = NULL;
0570     int ret;
0571 
0572     if (affd)
0573         masks = irq_create_affinity_masks(nvec, affd);
0574 
0575     msi_lock_descs(&dev->dev);
0576     ret = msix_setup_msi_descs(dev, base, entries, nvec, masks);
0577     if (ret)
0578         goto out_free;
0579 
0580     ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
0581     if (ret)
0582         goto out_free;
0583 
0584     /* Check if all MSI entries honor device restrictions */
0585     ret = msi_verify_entries(dev);
0586     if (ret)
0587         goto out_free;
0588 
0589     msix_update_entries(dev, entries);
0590     goto out_unlock;
0591 
0592 out_free:
0593     free_msi_irqs(dev);
0594 out_unlock:
0595     msi_unlock_descs(&dev->dev);
0596     kfree(masks);
0597     return ret;
0598 }
0599 
0600 /**
0601  * msix_capability_init - configure device's MSI-X capability
0602  * @dev: pointer to the pci_dev data structure of MSI-X device function
0603  * @entries: pointer to an array of struct msix_entry entries
0604  * @nvec: number of @entries
0605  * @affd: Optional pointer to enable automatic affinity assignment
0606  *
0607  * Setup the MSI-X capability structure of device function with a
0608  * single MSI-X IRQ. A return of zero indicates the successful setup of
0609  * requested MSI-X entries with allocated IRQs or non-zero for otherwise.
0610  **/
0611 static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
0612                 int nvec, struct irq_affinity *affd)
0613 {
0614     void __iomem *base;
0615     int ret, tsize;
0616     u16 control;
0617 
0618     /*
0619      * Some devices require MSI-X to be enabled before the MSI-X
0620      * registers can be accessed.  Mask all the vectors to prevent
0621      * interrupts coming in before they're fully set up.
0622      */
0623     pci_msix_clear_and_set_ctrl(dev, 0, PCI_MSIX_FLAGS_MASKALL |
0624                     PCI_MSIX_FLAGS_ENABLE);
0625 
0626     /* Mark it enabled so setup functions can query it */
0627     dev->msix_enabled = 1;
0628 
0629     pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
0630     /* Request & Map MSI-X table region */
0631     tsize = msix_table_size(control);
0632     base = msix_map_region(dev, tsize);
0633     if (!base) {
0634         ret = -ENOMEM;
0635         goto out_disable;
0636     }
0637 
0638     dev->msix_base = base;
0639 
0640     ret = msix_setup_interrupts(dev, base, entries, nvec, affd);
0641     if (ret)
0642         goto out_disable;
0643 
0644     /* Disable INTX */
0645     pci_intx_for_msi(dev, 0);
0646 
0647     /*
0648      * Ensure that all table entries are masked to prevent
0649      * stale entries from firing in a crash kernel.
0650      *
0651      * Done late to deal with a broken Marvell NVME device
0652      * which takes the MSI-X mask bits into account even
0653      * when MSI-X is disabled, which prevents MSI delivery.
0654      */
0655     msix_mask_all(base, tsize);
0656     pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
0657 
0658     pcibios_free_irq(dev);
0659     return 0;
0660 
0661 out_disable:
0662     dev->msix_enabled = 0;
0663     pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE, 0);
0664 
0665     return ret;
0666 }
0667 
0668 /**
0669  * pci_msi_supported - check whether MSI may be enabled on a device
0670  * @dev: pointer to the pci_dev data structure of MSI device function
0671  * @nvec: how many MSIs have been requested?
0672  *
0673  * Look at global flags, the device itself, and its parent buses
0674  * to determine if MSI/-X are supported for the device. If MSI/-X is
0675  * supported return 1, else return 0.
0676  **/
0677 static int pci_msi_supported(struct pci_dev *dev, int nvec)
0678 {
0679     struct pci_bus *bus;
0680 
0681     /* MSI must be globally enabled and supported by the device */
0682     if (!pci_msi_enable)
0683         return 0;
0684 
0685     if (!dev || dev->no_msi)
0686         return 0;
0687 
0688     /*
0689      * You can't ask to have 0 or less MSIs configured.
0690      *  a) it's stupid ..
0691      *  b) the list manipulation code assumes nvec >= 1.
0692      */
0693     if (nvec < 1)
0694         return 0;
0695 
0696     /*
0697      * Any bridge which does NOT route MSI transactions from its
0698      * secondary bus to its primary bus must set NO_MSI flag on
0699      * the secondary pci_bus.
0700      *
0701      * The NO_MSI flag can either be set directly by:
0702      * - arch-specific PCI host bus controller drivers (deprecated)
0703      * - quirks for specific PCI bridges
0704      *
0705      * or indirectly by platform-specific PCI host bridge drivers by
0706      * advertising the 'msi_domain' property, which results in
0707      * the NO_MSI flag when no MSI domain is found for this bridge
0708      * at probe time.
0709      */
0710     for (bus = dev->bus; bus; bus = bus->parent)
0711         if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
0712             return 0;
0713 
0714     return 1;
0715 }
0716 
0717 /**
0718  * pci_msi_vec_count - Return the number of MSI vectors a device can send
0719  * @dev: device to report about
0720  *
0721  * This function returns the number of MSI vectors a device requested via
0722  * Multiple Message Capable register. It returns a negative errno if the
0723  * device is not capable sending MSI interrupts. Otherwise, the call succeeds
0724  * and returns a power of two, up to a maximum of 2^5 (32), according to the
0725  * MSI specification.
0726  **/
0727 int pci_msi_vec_count(struct pci_dev *dev)
0728 {
0729     int ret;
0730     u16 msgctl;
0731 
0732     if (!dev->msi_cap)
0733         return -EINVAL;
0734 
0735     pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl);
0736     ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
0737 
0738     return ret;
0739 }
0740 EXPORT_SYMBOL(pci_msi_vec_count);
0741 
0742 static void pci_msi_shutdown(struct pci_dev *dev)
0743 {
0744     struct msi_desc *desc;
0745 
0746     if (!pci_msi_enable || !dev || !dev->msi_enabled)
0747         return;
0748 
0749     pci_msi_set_enable(dev, 0);
0750     pci_intx_for_msi(dev, 1);
0751     dev->msi_enabled = 0;
0752 
0753     /* Return the device with MSI unmasked as initial states */
0754     desc = msi_first_desc(&dev->dev, MSI_DESC_ALL);
0755     if (!WARN_ON_ONCE(!desc))
0756         pci_msi_unmask(desc, msi_multi_mask(desc));
0757 
0758     /* Restore dev->irq to its default pin-assertion IRQ */
0759     dev->irq = desc->pci.msi_attrib.default_irq;
0760     pcibios_alloc_irq(dev);
0761 }
0762 
0763 void pci_disable_msi(struct pci_dev *dev)
0764 {
0765     if (!pci_msi_enable || !dev || !dev->msi_enabled)
0766         return;
0767 
0768     msi_lock_descs(&dev->dev);
0769     pci_msi_shutdown(dev);
0770     free_msi_irqs(dev);
0771     msi_unlock_descs(&dev->dev);
0772 }
0773 EXPORT_SYMBOL(pci_disable_msi);
0774 
0775 /**
0776  * pci_msix_vec_count - return the number of device's MSI-X table entries
0777  * @dev: pointer to the pci_dev data structure of MSI-X device function
0778  * This function returns the number of device's MSI-X table entries and
0779  * therefore the number of MSI-X vectors device is capable of sending.
0780  * It returns a negative errno if the device is not capable of sending MSI-X
0781  * interrupts.
0782  **/
0783 int pci_msix_vec_count(struct pci_dev *dev)
0784 {
0785     u16 control;
0786 
0787     if (!dev->msix_cap)
0788         return -EINVAL;
0789 
0790     pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
0791     return msix_table_size(control);
0792 }
0793 EXPORT_SYMBOL(pci_msix_vec_count);
0794 
0795 static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
0796                  int nvec, struct irq_affinity *affd, int flags)
0797 {
0798     int nr_entries;
0799     int i, j;
0800 
0801     if (!pci_msi_supported(dev, nvec) || dev->current_state != PCI_D0)
0802         return -EINVAL;
0803 
0804     nr_entries = pci_msix_vec_count(dev);
0805     if (nr_entries < 0)
0806         return nr_entries;
0807     if (nvec > nr_entries && !(flags & PCI_IRQ_VIRTUAL))
0808         return nr_entries;
0809 
0810     if (entries) {
0811         /* Check for any invalid entries */
0812         for (i = 0; i < nvec; i++) {
0813             if (entries[i].entry >= nr_entries)
0814                 return -EINVAL;     /* invalid entry */
0815             for (j = i + 1; j < nvec; j++) {
0816                 if (entries[i].entry == entries[j].entry)
0817                     return -EINVAL; /* duplicate entry */
0818             }
0819         }
0820     }
0821 
0822     /* Check whether driver already requested for MSI IRQ */
0823     if (dev->msi_enabled) {
0824         pci_info(dev, "can't enable MSI-X (MSI IRQ already assigned)\n");
0825         return -EINVAL;
0826     }
0827     return msix_capability_init(dev, entries, nvec, affd);
0828 }
0829 
0830 static void pci_msix_shutdown(struct pci_dev *dev)
0831 {
0832     struct msi_desc *desc;
0833 
0834     if (!pci_msi_enable || !dev || !dev->msix_enabled)
0835         return;
0836 
0837     if (pci_dev_is_disconnected(dev)) {
0838         dev->msix_enabled = 0;
0839         return;
0840     }
0841 
0842     /* Return the device with MSI-X masked as initial states */
0843     msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL)
0844         pci_msix_mask(desc);
0845 
0846     pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
0847     pci_intx_for_msi(dev, 1);
0848     dev->msix_enabled = 0;
0849     pcibios_alloc_irq(dev);
0850 }
0851 
0852 void pci_disable_msix(struct pci_dev *dev)
0853 {
0854     if (!pci_msi_enable || !dev || !dev->msix_enabled)
0855         return;
0856 
0857     msi_lock_descs(&dev->dev);
0858     pci_msix_shutdown(dev);
0859     free_msi_irqs(dev);
0860     msi_unlock_descs(&dev->dev);
0861 }
0862 EXPORT_SYMBOL(pci_disable_msix);
0863 
0864 static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
0865                   struct irq_affinity *affd)
0866 {
0867     int nvec;
0868     int rc;
0869 
0870     if (!pci_msi_supported(dev, minvec) || dev->current_state != PCI_D0)
0871         return -EINVAL;
0872 
0873     /* Check whether driver already requested MSI-X IRQs */
0874     if (dev->msix_enabled) {
0875         pci_info(dev, "can't enable MSI (MSI-X already enabled)\n");
0876         return -EINVAL;
0877     }
0878 
0879     if (maxvec < minvec)
0880         return -ERANGE;
0881 
0882     if (WARN_ON_ONCE(dev->msi_enabled))
0883         return -EINVAL;
0884 
0885     nvec = pci_msi_vec_count(dev);
0886     if (nvec < 0)
0887         return nvec;
0888     if (nvec < minvec)
0889         return -ENOSPC;
0890 
0891     if (nvec > maxvec)
0892         nvec = maxvec;
0893 
0894     rc = pci_setup_msi_context(dev);
0895     if (rc)
0896         return rc;
0897 
0898     for (;;) {
0899         if (affd) {
0900             nvec = irq_calc_affinity_vectors(minvec, nvec, affd);
0901             if (nvec < minvec)
0902                 return -ENOSPC;
0903         }
0904 
0905         rc = msi_capability_init(dev, nvec, affd);
0906         if (rc == 0)
0907             return nvec;
0908 
0909         if (rc < 0)
0910             return rc;
0911         if (rc < minvec)
0912             return -ENOSPC;
0913 
0914         nvec = rc;
0915     }
0916 }
0917 
0918 /* deprecated, don't use */
0919 int pci_enable_msi(struct pci_dev *dev)
0920 {
0921     int rc = __pci_enable_msi_range(dev, 1, 1, NULL);
0922     if (rc < 0)
0923         return rc;
0924     return 0;
0925 }
0926 EXPORT_SYMBOL(pci_enable_msi);
0927 
0928 static int __pci_enable_msix_range(struct pci_dev *dev,
0929                    struct msix_entry *entries, int minvec,
0930                    int maxvec, struct irq_affinity *affd,
0931                    int flags)
0932 {
0933     int rc, nvec = maxvec;
0934 
0935     if (maxvec < minvec)
0936         return -ERANGE;
0937 
0938     if (WARN_ON_ONCE(dev->msix_enabled))
0939         return -EINVAL;
0940 
0941     rc = pci_setup_msi_context(dev);
0942     if (rc)
0943         return rc;
0944 
0945     for (;;) {
0946         if (affd) {
0947             nvec = irq_calc_affinity_vectors(minvec, nvec, affd);
0948             if (nvec < minvec)
0949                 return -ENOSPC;
0950         }
0951 
0952         rc = __pci_enable_msix(dev, entries, nvec, affd, flags);
0953         if (rc == 0)
0954             return nvec;
0955 
0956         if (rc < 0)
0957             return rc;
0958         if (rc < minvec)
0959             return -ENOSPC;
0960 
0961         nvec = rc;
0962     }
0963 }
0964 
0965 /**
0966  * pci_enable_msix_range - configure device's MSI-X capability structure
0967  * @dev: pointer to the pci_dev data structure of MSI-X device function
0968  * @entries: pointer to an array of MSI-X entries
0969  * @minvec: minimum number of MSI-X IRQs requested
0970  * @maxvec: maximum number of MSI-X IRQs requested
0971  *
0972  * Setup the MSI-X capability structure of device function with a maximum
0973  * possible number of interrupts in the range between @minvec and @maxvec
0974  * upon its software driver call to request for MSI-X mode enabled on its
0975  * hardware device function. It returns a negative errno if an error occurs.
0976  * If it succeeds, it returns the actual number of interrupts allocated and
0977  * indicates the successful configuration of MSI-X capability structure
0978  * with new allocated MSI-X interrupts.
0979  **/
0980 int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
0981         int minvec, int maxvec)
0982 {
0983     return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0);
0984 }
0985 EXPORT_SYMBOL(pci_enable_msix_range);
0986 
0987 /**
0988  * pci_alloc_irq_vectors_affinity - allocate multiple IRQs for a device
0989  * @dev:        PCI device to operate on
0990  * @min_vecs:       minimum number of vectors required (must be >= 1)
0991  * @max_vecs:       maximum (desired) number of vectors
0992  * @flags:      flags or quirks for the allocation
0993  * @affd:       optional description of the affinity requirements
0994  *
0995  * Allocate up to @max_vecs interrupt vectors for @dev, using MSI-X or MSI
0996  * vectors if available, and fall back to a single legacy vector
0997  * if neither is available.  Return the number of vectors allocated,
0998  * (which might be smaller than @max_vecs) if successful, or a negative
0999  * error code on error. If less than @min_vecs interrupt vectors are
1000  * available for @dev the function will fail with -ENOSPC.
1001  *
1002  * To get the Linux IRQ number used for a vector that can be passed to
1003  * request_irq() use the pci_irq_vector() helper.
1004  */
1005 int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
1006                    unsigned int max_vecs, unsigned int flags,
1007                    struct irq_affinity *affd)
1008 {
1009     struct irq_affinity msi_default_affd = {0};
1010     int nvecs = -ENOSPC;
1011 
1012     if (flags & PCI_IRQ_AFFINITY) {
1013         if (!affd)
1014             affd = &msi_default_affd;
1015     } else {
1016         if (WARN_ON(affd))
1017             affd = NULL;
1018     }
1019 
1020     if (flags & PCI_IRQ_MSIX) {
1021         nvecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs,
1022                         affd, flags);
1023         if (nvecs > 0)
1024             return nvecs;
1025     }
1026 
1027     if (flags & PCI_IRQ_MSI) {
1028         nvecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, affd);
1029         if (nvecs > 0)
1030             return nvecs;
1031     }
1032 
1033     /* use legacy IRQ if allowed */
1034     if (flags & PCI_IRQ_LEGACY) {
1035         if (min_vecs == 1 && dev->irq) {
1036             /*
1037              * Invoke the affinity spreading logic to ensure that
1038              * the device driver can adjust queue configuration
1039              * for the single interrupt case.
1040              */
1041             if (affd)
1042                 irq_create_affinity_masks(1, affd);
1043             pci_intx(dev, 1);
1044             return 1;
1045         }
1046     }
1047 
1048     return nvecs;
1049 }
1050 EXPORT_SYMBOL(pci_alloc_irq_vectors_affinity);
1051 
1052 /**
1053  * pci_free_irq_vectors - free previously allocated IRQs for a device
1054  * @dev:        PCI device to operate on
1055  *
1056  * Undoes the allocations and enabling in pci_alloc_irq_vectors().
1057  */
1058 void pci_free_irq_vectors(struct pci_dev *dev)
1059 {
1060     pci_disable_msix(dev);
1061     pci_disable_msi(dev);
1062 }
1063 EXPORT_SYMBOL(pci_free_irq_vectors);
1064 
1065 /**
1066  * pci_irq_vector - return Linux IRQ number of a device vector
1067  * @dev:    PCI device to operate on
1068  * @nr:     Interrupt vector index (0-based)
1069  *
1070  * @nr has the following meanings depending on the interrupt mode:
1071  *   MSI-X: The index in the MSI-X vector table
1072  *   MSI:   The index of the enabled MSI vectors
1073  *   INTx:  Must be 0
1074  *
1075  * Return: The Linux interrupt number or -EINVAl if @nr is out of range.
1076  */
1077 int pci_irq_vector(struct pci_dev *dev, unsigned int nr)
1078 {
1079     unsigned int irq;
1080 
1081     if (!dev->msi_enabled && !dev->msix_enabled)
1082         return !nr ? dev->irq : -EINVAL;
1083 
1084     irq = msi_get_virq(&dev->dev, nr);
1085     return irq ? irq : -EINVAL;
1086 }
1087 EXPORT_SYMBOL(pci_irq_vector);
1088 
1089 /**
1090  * pci_irq_get_affinity - return the affinity of a particular MSI vector
1091  * @dev:    PCI device to operate on
1092  * @nr:     device-relative interrupt vector index (0-based).
1093  *
1094  * @nr has the following meanings depending on the interrupt mode:
1095  *   MSI-X: The index in the MSI-X vector table
1096  *   MSI:   The index of the enabled MSI vectors
1097  *   INTx:  Must be 0
1098  *
1099  * Return: A cpumask pointer or NULL if @nr is out of range
1100  */
1101 const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr)
1102 {
1103     int idx, irq = pci_irq_vector(dev, nr);
1104     struct msi_desc *desc;
1105 
1106     if (WARN_ON_ONCE(irq <= 0))
1107         return NULL;
1108 
1109     desc = irq_get_msi_desc(irq);
1110     /* Non-MSI does not have the information handy */
1111     if (!desc)
1112         return cpu_possible_mask;
1113 
1114     /* MSI[X] interrupts can be allocated without affinity descriptor */
1115     if (!desc->affinity)
1116         return NULL;
1117 
1118     /*
1119      * MSI has a mask array in the descriptor.
1120      * MSI-X has a single mask.
1121      */
1122     idx = dev->msi_enabled ? nr : 0;
1123     return &desc->affinity[idx].mask;
1124 }
1125 EXPORT_SYMBOL(pci_irq_get_affinity);
1126 
1127 struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
1128 {
1129     return to_pci_dev(desc->dev);
1130 }
1131 EXPORT_SYMBOL(msi_desc_to_pci_dev);
1132 
1133 void pci_no_msi(void)
1134 {
1135     pci_msi_enable = 0;
1136 }
1137 
1138 /**
1139  * pci_msi_enabled - is MSI enabled?
1140  *
1141  * Returns true if MSI has not been disabled by the command-line option
1142  * pci=nomsi.
1143  **/
1144 int pci_msi_enabled(void)
1145 {
1146     return pci_msi_enable;
1147 }
1148 EXPORT_SYMBOL(pci_msi_enabled);