Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 
0003 /*
0004  * Irqdomain for Linux to run as the root partition on Microsoft Hypervisor.
0005  *
0006  * Authors:
0007  *  Sunil Muthuswamy <sunilmut@microsoft.com>
0008  *  Wei Liu <wei.liu@kernel.org>
0009  */
0010 
0011 #include <linux/pci.h>
0012 #include <linux/irq.h>
0013 #include <asm/mshyperv.h>
0014 
0015 static int hv_map_interrupt(union hv_device_id device_id, bool level,
0016         int cpu, int vector, struct hv_interrupt_entry *entry)
0017 {
0018     struct hv_input_map_device_interrupt *input;
0019     struct hv_output_map_device_interrupt *output;
0020     struct hv_device_interrupt_descriptor *intr_desc;
0021     unsigned long flags;
0022     u64 status;
0023     int nr_bank, var_size;
0024 
0025     local_irq_save(flags);
0026 
0027     input = *this_cpu_ptr(hyperv_pcpu_input_arg);
0028     output = *this_cpu_ptr(hyperv_pcpu_output_arg);
0029 
0030     intr_desc = &input->interrupt_descriptor;
0031     memset(input, 0, sizeof(*input));
0032     input->partition_id = hv_current_partition_id;
0033     input->device_id = device_id.as_uint64;
0034     intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED;
0035     intr_desc->vector_count = 1;
0036     intr_desc->target.vector = vector;
0037 
0038     if (level)
0039         intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_LEVEL;
0040     else
0041         intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE;
0042 
0043     intr_desc->target.vp_set.valid_bank_mask = 0;
0044     intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K;
0045     nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu));
0046     if (nr_bank < 0) {
0047         local_irq_restore(flags);
0048         pr_err("%s: unable to generate VP set\n", __func__);
0049         return EINVAL;
0050     }
0051     intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET;
0052 
0053     /*
0054      * var-sized hypercall, var-size starts after vp_mask (thus
0055      * vp_set.format does not count, but vp_set.valid_bank_mask
0056      * does).
0057      */
0058     var_size = nr_bank + 1;
0059 
0060     status = hv_do_rep_hypercall(HVCALL_MAP_DEVICE_INTERRUPT, 0, var_size,
0061             input, output);
0062     *entry = output->interrupt_entry;
0063 
0064     local_irq_restore(flags);
0065 
0066     if (!hv_result_success(status))
0067         pr_err("%s: hypercall failed, status %lld\n", __func__, status);
0068 
0069     return hv_result(status);
0070 }
0071 
0072 static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry)
0073 {
0074     unsigned long flags;
0075     struct hv_input_unmap_device_interrupt *input;
0076     struct hv_interrupt_entry *intr_entry;
0077     u64 status;
0078 
0079     local_irq_save(flags);
0080     input = *this_cpu_ptr(hyperv_pcpu_input_arg);
0081 
0082     memset(input, 0, sizeof(*input));
0083     intr_entry = &input->interrupt_entry;
0084     input->partition_id = hv_current_partition_id;
0085     input->device_id = id;
0086     *intr_entry = *old_entry;
0087 
0088     status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL);
0089     local_irq_restore(flags);
0090 
0091     return hv_result(status);
0092 }
0093 
0094 #ifdef CONFIG_PCI_MSI
0095 struct rid_data {
0096     struct pci_dev *bridge;
0097     u32 rid;
0098 };
0099 
0100 static int get_rid_cb(struct pci_dev *pdev, u16 alias, void *data)
0101 {
0102     struct rid_data *rd = data;
0103     u8 bus = PCI_BUS_NUM(rd->rid);
0104 
0105     if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus) {
0106         rd->bridge = pdev;
0107         rd->rid = alias;
0108     }
0109 
0110     return 0;
0111 }
0112 
0113 static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev)
0114 {
0115     union hv_device_id dev_id;
0116     struct rid_data data = {
0117         .bridge = NULL,
0118         .rid = PCI_DEVID(dev->bus->number, dev->devfn)
0119     };
0120 
0121     pci_for_each_dma_alias(dev, get_rid_cb, &data);
0122 
0123     dev_id.as_uint64 = 0;
0124     dev_id.device_type = HV_DEVICE_TYPE_PCI;
0125     dev_id.pci.segment = pci_domain_nr(dev->bus);
0126 
0127     dev_id.pci.bdf.bus = PCI_BUS_NUM(data.rid);
0128     dev_id.pci.bdf.device = PCI_SLOT(data.rid);
0129     dev_id.pci.bdf.function = PCI_FUNC(data.rid);
0130     dev_id.pci.source_shadow = HV_SOURCE_SHADOW_NONE;
0131 
0132     if (data.bridge) {
0133         int pos;
0134 
0135         /*
0136          * Microsoft Hypervisor requires a bus range when the bridge is
0137          * running in PCI-X mode.
0138          *
0139          * To distinguish conventional vs PCI-X bridge, we can check
0140          * the bridge's PCI-X Secondary Status Register, Secondary Bus
0141          * Mode and Frequency bits. See PCI Express to PCI/PCI-X Bridge
0142          * Specification Revision 1.0 5.2.2.1.3.
0143          *
0144          * Value zero means it is in conventional mode, otherwise it is
0145          * in PCI-X mode.
0146          */
0147 
0148         pos = pci_find_capability(data.bridge, PCI_CAP_ID_PCIX);
0149         if (pos) {
0150             u16 status;
0151 
0152             pci_read_config_word(data.bridge, pos +
0153                     PCI_X_BRIDGE_SSTATUS, &status);
0154 
0155             if (status & PCI_X_SSTATUS_FREQ) {
0156                 /* Non-zero, PCI-X mode */
0157                 u8 sec_bus, sub_bus;
0158 
0159                 dev_id.pci.source_shadow = HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE;
0160 
0161                 pci_read_config_byte(data.bridge, PCI_SECONDARY_BUS, &sec_bus);
0162                 dev_id.pci.shadow_bus_range.secondary_bus = sec_bus;
0163                 pci_read_config_byte(data.bridge, PCI_SUBORDINATE_BUS, &sub_bus);
0164                 dev_id.pci.shadow_bus_range.subordinate_bus = sub_bus;
0165             }
0166         }
0167     }
0168 
0169     return dev_id;
0170 }
0171 
0172 static int hv_map_msi_interrupt(struct pci_dev *dev, int cpu, int vector,
0173                 struct hv_interrupt_entry *entry)
0174 {
0175     union hv_device_id device_id = hv_build_pci_dev_id(dev);
0176 
0177     return hv_map_interrupt(device_id, false, cpu, vector, entry);
0178 }
0179 
0180 static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg)
0181 {
0182     /* High address is always 0 */
0183     msg->address_hi = 0;
0184     msg->address_lo = entry->msi_entry.address.as_uint32;
0185     msg->data = entry->msi_entry.data.as_uint32;
0186 }
0187 
0188 static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry);
0189 static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
0190 {
0191     struct msi_desc *msidesc;
0192     struct pci_dev *dev;
0193     struct hv_interrupt_entry out_entry, *stored_entry;
0194     struct irq_cfg *cfg = irqd_cfg(data);
0195     const cpumask_t *affinity;
0196     int cpu;
0197     u64 status;
0198 
0199     msidesc = irq_data_get_msi_desc(data);
0200     dev = msi_desc_to_pci_dev(msidesc);
0201 
0202     if (!cfg) {
0203         pr_debug("%s: cfg is NULL", __func__);
0204         return;
0205     }
0206 
0207     affinity = irq_data_get_effective_affinity_mask(data);
0208     cpu = cpumask_first_and(affinity, cpu_online_mask);
0209 
0210     if (data->chip_data) {
0211         /*
0212          * This interrupt is already mapped. Let's unmap first.
0213          *
0214          * We don't use retarget interrupt hypercalls here because
0215          * Microsoft Hypervisor doens't allow root to change the vector
0216          * or specify VPs outside of the set that is initially used
0217          * during mapping.
0218          */
0219         stored_entry = data->chip_data;
0220         data->chip_data = NULL;
0221 
0222         status = hv_unmap_msi_interrupt(dev, stored_entry);
0223 
0224         kfree(stored_entry);
0225 
0226         if (status != HV_STATUS_SUCCESS) {
0227             pr_debug("%s: failed to unmap, status %lld", __func__, status);
0228             return;
0229         }
0230     }
0231 
0232     stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC);
0233     if (!stored_entry) {
0234         pr_debug("%s: failed to allocate chip data\n", __func__);
0235         return;
0236     }
0237 
0238     status = hv_map_msi_interrupt(dev, cpu, cfg->vector, &out_entry);
0239     if (status != HV_STATUS_SUCCESS) {
0240         kfree(stored_entry);
0241         return;
0242     }
0243 
0244     *stored_entry = out_entry;
0245     data->chip_data = stored_entry;
0246     entry_to_msi_msg(&out_entry, msg);
0247 
0248     return;
0249 }
0250 
0251 static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry)
0252 {
0253     return hv_unmap_interrupt(hv_build_pci_dev_id(dev).as_uint64, old_entry);
0254 }
0255 
0256 static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd)
0257 {
0258     struct hv_interrupt_entry old_entry;
0259     struct msi_msg msg;
0260     u64 status;
0261 
0262     if (!irqd->chip_data) {
0263         pr_debug("%s: no chip data\n!", __func__);
0264         return;
0265     }
0266 
0267     old_entry = *(struct hv_interrupt_entry *)irqd->chip_data;
0268     entry_to_msi_msg(&old_entry, &msg);
0269 
0270     kfree(irqd->chip_data);
0271     irqd->chip_data = NULL;
0272 
0273     status = hv_unmap_msi_interrupt(dev, &old_entry);
0274 
0275     if (status != HV_STATUS_SUCCESS)
0276         pr_err("%s: hypercall failed, status %lld\n", __func__, status);
0277 }
0278 
0279 static void hv_msi_free_irq(struct irq_domain *domain,
0280                 struct msi_domain_info *info, unsigned int virq)
0281 {
0282     struct irq_data *irqd = irq_get_irq_data(virq);
0283     struct msi_desc *desc;
0284 
0285     if (!irqd)
0286         return;
0287 
0288     desc = irq_data_get_msi_desc(irqd);
0289     if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev)))
0290         return;
0291 
0292     hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd);
0293 }
0294 
0295 /*
0296  * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
0297  * which implement the MSI or MSI-X Capability Structure.
0298  */
0299 static struct irq_chip hv_pci_msi_controller = {
0300     .name           = "HV-PCI-MSI",
0301     .irq_unmask     = pci_msi_unmask_irq,
0302     .irq_mask       = pci_msi_mask_irq,
0303     .irq_ack        = irq_chip_ack_parent,
0304     .irq_retrigger      = irq_chip_retrigger_hierarchy,
0305     .irq_compose_msi_msg    = hv_irq_compose_msi_msg,
0306     .irq_set_affinity   = msi_domain_set_affinity,
0307     .flags          = IRQCHIP_SKIP_SET_WAKE,
0308 };
0309 
0310 static struct msi_domain_ops pci_msi_domain_ops = {
0311     .msi_free       = hv_msi_free_irq,
0312     .msi_prepare        = pci_msi_prepare,
0313 };
0314 
0315 static struct msi_domain_info hv_pci_msi_domain_info = {
0316     .flags      = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
0317               MSI_FLAG_PCI_MSIX,
0318     .ops        = &pci_msi_domain_ops,
0319     .chip       = &hv_pci_msi_controller,
0320     .handler    = handle_edge_irq,
0321     .handler_name   = "edge",
0322 };
0323 
0324 struct irq_domain * __init hv_create_pci_msi_domain(void)
0325 {
0326     struct irq_domain *d = NULL;
0327     struct fwnode_handle *fn;
0328 
0329     fn = irq_domain_alloc_named_fwnode("HV-PCI-MSI");
0330     if (fn)
0331         d = pci_msi_create_irq_domain(fn, &hv_pci_msi_domain_info, x86_vector_domain);
0332 
0333     /* No point in going further if we can't get an irq domain */
0334     BUG_ON(!d);
0335 
0336     return d;
0337 }
0338 
0339 #endif /* CONFIG_PCI_MSI */
0340 
0341 int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry)
0342 {
0343     union hv_device_id device_id;
0344 
0345     device_id.as_uint64 = 0;
0346     device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
0347     device_id.ioapic.ioapic_id = (u8)ioapic_id;
0348 
0349     return hv_unmap_interrupt(device_id.as_uint64, entry);
0350 }
0351 EXPORT_SYMBOL_GPL(hv_unmap_ioapic_interrupt);
0352 
0353 int hv_map_ioapic_interrupt(int ioapic_id, bool level, int cpu, int vector,
0354         struct hv_interrupt_entry *entry)
0355 {
0356     union hv_device_id device_id;
0357 
0358     device_id.as_uint64 = 0;
0359     device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
0360     device_id.ioapic.ioapic_id = (u8)ioapic_id;
0361 
0362     return hv_map_interrupt(device_id, level, cpu, vector, entry);
0363 }
0364 EXPORT_SYMBOL_GPL(hv_map_ioapic_interrupt);