Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (C) 2015, 2016 ARM Ltd.
0004  */
0005 
0006 #include <linux/interrupt.h>
0007 #include <linux/irq.h>
0008 #include <linux/kvm.h>
0009 #include <linux/kvm_host.h>
0010 #include <linux/list_sort.h>
0011 #include <linux/nospec.h>
0012 
0013 #include <asm/kvm_hyp.h>
0014 
0015 #include "vgic.h"
0016 
0017 #define CREATE_TRACE_POINTS
0018 #include "trace.h"
0019 
0020 struct vgic_global kvm_vgic_global_state __ro_after_init = {
0021     .gicv3_cpuif = STATIC_KEY_FALSE_INIT,
0022 };
0023 
0024 /*
0025  * Locking order is always:
0026  * kvm->lock (mutex)
0027  *   its->cmd_lock (mutex)
0028  *     its->its_lock (mutex)
0029  *       vgic_cpu->ap_list_lock     must be taken with IRQs disabled
0030  *         kvm->lpi_list_lock       must be taken with IRQs disabled
0031  *           vgic_irq->irq_lock     must be taken with IRQs disabled
0032  *
0033  * As the ap_list_lock might be taken from the timer interrupt handler,
0034  * we have to disable IRQs before taking this lock and everything lower
0035  * than it.
0036  *
0037  * If you need to take multiple locks, always take the upper lock first,
0038  * then the lower ones, e.g. first take the its_lock, then the irq_lock.
0039  * If you are already holding a lock and need to take a higher one, you
0040  * have to drop the lower ranking lock first and re-acquire it after having
0041  * taken the upper one.
0042  *
0043  * When taking more than one ap_list_lock at the same time, always take the
0044  * lowest numbered VCPU's ap_list_lock first, so:
0045  *   vcpuX->vcpu_id < vcpuY->vcpu_id:
0046  *     raw_spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock);
0047  *     raw_spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock);
0048  *
0049  * Since the VGIC must support injecting virtual interrupts from ISRs, we have
0050  * to use the raw_spin_lock_irqsave/raw_spin_unlock_irqrestore versions of outer
0051  * spinlocks for any lock that may be taken while injecting an interrupt.
0052  */
0053 
0054 /*
0055  * Iterate over the VM's list of mapped LPIs to find the one with a
0056  * matching interrupt ID and return a reference to the IRQ structure.
0057  */
0058 static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
0059 {
0060     struct vgic_dist *dist = &kvm->arch.vgic;
0061     struct vgic_irq *irq = NULL;
0062     unsigned long flags;
0063 
0064     raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
0065 
0066     list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
0067         if (irq->intid != intid)
0068             continue;
0069 
0070         /*
0071          * This increases the refcount, the caller is expected to
0072          * call vgic_put_irq() later once it's finished with the IRQ.
0073          */
0074         vgic_get_irq_kref(irq);
0075         goto out_unlock;
0076     }
0077     irq = NULL;
0078 
0079 out_unlock:
0080     raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
0081 
0082     return irq;
0083 }
0084 
0085 /*
0086  * This looks up the virtual interrupt ID to get the corresponding
0087  * struct vgic_irq. It also increases the refcount, so any caller is expected
0088  * to call vgic_put_irq() once it's finished with this IRQ.
0089  */
0090 struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
0091                   u32 intid)
0092 {
0093     /* SGIs and PPIs */
0094     if (intid <= VGIC_MAX_PRIVATE) {
0095         intid = array_index_nospec(intid, VGIC_MAX_PRIVATE + 1);
0096         return &vcpu->arch.vgic_cpu.private_irqs[intid];
0097     }
0098 
0099     /* SPIs */
0100     if (intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) {
0101         intid = array_index_nospec(intid, kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS);
0102         return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
0103     }
0104 
0105     /* LPIs */
0106     if (intid >= VGIC_MIN_LPI)
0107         return vgic_get_lpi(kvm, intid);
0108 
0109     return NULL;
0110 }
0111 
0112 /*
0113  * We can't do anything in here, because we lack the kvm pointer to
0114  * lock and remove the item from the lpi_list. So we keep this function
0115  * empty and use the return value of kref_put() to trigger the freeing.
0116  */
0117 static void vgic_irq_release(struct kref *ref)
0118 {
0119 }
0120 
0121 /*
0122  * Drop the refcount on the LPI. Must be called with lpi_list_lock held.
0123  */
0124 void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
0125 {
0126     struct vgic_dist *dist = &kvm->arch.vgic;
0127 
0128     if (!kref_put(&irq->refcount, vgic_irq_release))
0129         return;
0130 
0131     list_del(&irq->lpi_list);
0132     dist->lpi_list_count--;
0133 
0134     kfree(irq);
0135 }
0136 
0137 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
0138 {
0139     struct vgic_dist *dist = &kvm->arch.vgic;
0140     unsigned long flags;
0141 
0142     if (irq->intid < VGIC_MIN_LPI)
0143         return;
0144 
0145     raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
0146     __vgic_put_lpi_locked(kvm, irq);
0147     raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
0148 }
0149 
0150 void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
0151 {
0152     struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
0153     struct vgic_irq *irq, *tmp;
0154     unsigned long flags;
0155 
0156     raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
0157 
0158     list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
0159         if (irq->intid >= VGIC_MIN_LPI) {
0160             raw_spin_lock(&irq->irq_lock);
0161             list_del(&irq->ap_list);
0162             irq->vcpu = NULL;
0163             raw_spin_unlock(&irq->irq_lock);
0164             vgic_put_irq(vcpu->kvm, irq);
0165         }
0166     }
0167 
0168     raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
0169 }
0170 
0171 void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
0172 {
0173     WARN_ON(irq_set_irqchip_state(irq->host_irq,
0174                       IRQCHIP_STATE_PENDING,
0175                       pending));
0176 }
0177 
0178 bool vgic_get_phys_line_level(struct vgic_irq *irq)
0179 {
0180     bool line_level;
0181 
0182     BUG_ON(!irq->hw);
0183 
0184     if (irq->ops && irq->ops->get_input_level)
0185         return irq->ops->get_input_level(irq->intid);
0186 
0187     WARN_ON(irq_get_irqchip_state(irq->host_irq,
0188                       IRQCHIP_STATE_PENDING,
0189                       &line_level));
0190     return line_level;
0191 }
0192 
0193 /* Set/Clear the physical active state */
0194 void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
0195 {
0196 
0197     BUG_ON(!irq->hw);
0198     WARN_ON(irq_set_irqchip_state(irq->host_irq,
0199                       IRQCHIP_STATE_ACTIVE,
0200                       active));
0201 }
0202 
0203 /**
0204  * kvm_vgic_target_oracle - compute the target vcpu for an irq
0205  *
0206  * @irq:    The irq to route. Must be already locked.
0207  *
0208  * Based on the current state of the interrupt (enabled, pending,
0209  * active, vcpu and target_vcpu), compute the next vcpu this should be
0210  * given to. Return NULL if this shouldn't be injected at all.
0211  *
0212  * Requires the IRQ lock to be held.
0213  */
0214 static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
0215 {
0216     lockdep_assert_held(&irq->irq_lock);
0217 
0218     /* If the interrupt is active, it must stay on the current vcpu */
0219     if (irq->active)
0220         return irq->vcpu ? : irq->target_vcpu;
0221 
0222     /*
0223      * If the IRQ is not active but enabled and pending, we should direct
0224      * it to its configured target VCPU.
0225      * If the distributor is disabled, pending interrupts shouldn't be
0226      * forwarded.
0227      */
0228     if (irq->enabled && irq_is_pending(irq)) {
0229         if (unlikely(irq->target_vcpu &&
0230                  !irq->target_vcpu->kvm->arch.vgic.enabled))
0231             return NULL;
0232 
0233         return irq->target_vcpu;
0234     }
0235 
0236     /* If neither active nor pending and enabled, then this IRQ should not
0237      * be queued to any VCPU.
0238      */
0239     return NULL;
0240 }
0241 
0242 /*
0243  * The order of items in the ap_lists defines how we'll pack things in LRs as
0244  * well, the first items in the list being the first things populated in the
0245  * LRs.
0246  *
0247  * A hard rule is that active interrupts can never be pushed out of the LRs
0248  * (and therefore take priority) since we cannot reliably trap on deactivation
0249  * of IRQs and therefore they have to be present in the LRs.
0250  *
0251  * Otherwise things should be sorted by the priority field and the GIC
0252  * hardware support will take care of preemption of priority groups etc.
0253  *
0254  * Return negative if "a" sorts before "b", 0 to preserve order, and positive
0255  * to sort "b" before "a".
0256  */
0257 static int vgic_irq_cmp(void *priv, const struct list_head *a,
0258             const struct list_head *b)
0259 {
0260     struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
0261     struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
0262     bool penda, pendb;
0263     int ret;
0264 
0265     /*
0266      * list_sort may call this function with the same element when
0267      * the list is fairly long.
0268      */
0269     if (unlikely(irqa == irqb))
0270         return 0;
0271 
0272     raw_spin_lock(&irqa->irq_lock);
0273     raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
0274 
0275     if (irqa->active || irqb->active) {
0276         ret = (int)irqb->active - (int)irqa->active;
0277         goto out;
0278     }
0279 
0280     penda = irqa->enabled && irq_is_pending(irqa);
0281     pendb = irqb->enabled && irq_is_pending(irqb);
0282 
0283     if (!penda || !pendb) {
0284         ret = (int)pendb - (int)penda;
0285         goto out;
0286     }
0287 
0288     /* Both pending and enabled, sort by priority */
0289     ret = irqa->priority - irqb->priority;
0290 out:
0291     raw_spin_unlock(&irqb->irq_lock);
0292     raw_spin_unlock(&irqa->irq_lock);
0293     return ret;
0294 }
0295 
0296 /* Must be called with the ap_list_lock held */
0297 static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
0298 {
0299     struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
0300 
0301     lockdep_assert_held(&vgic_cpu->ap_list_lock);
0302 
0303     list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
0304 }
0305 
0306 /*
0307  * Only valid injection if changing level for level-triggered IRQs or for a
0308  * rising edge, and in-kernel connected IRQ lines can only be controlled by
0309  * their owner.
0310  */
0311 static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner)
0312 {
0313     if (irq->owner != owner)
0314         return false;
0315 
0316     switch (irq->config) {
0317     case VGIC_CONFIG_LEVEL:
0318         return irq->line_level != level;
0319     case VGIC_CONFIG_EDGE:
0320         return level;
0321     }
0322 
0323     return false;
0324 }
0325 
0326 /*
0327  * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list.
0328  * Do the queuing if necessary, taking the right locks in the right order.
0329  * Returns true when the IRQ was queued, false otherwise.
0330  *
0331  * Needs to be entered with the IRQ lock already held, but will return
0332  * with all locks dropped.
0333  */
0334 bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
0335                unsigned long flags)
0336 {
0337     struct kvm_vcpu *vcpu;
0338 
0339     lockdep_assert_held(&irq->irq_lock);
0340 
0341 retry:
0342     vcpu = vgic_target_oracle(irq);
0343     if (irq->vcpu || !vcpu) {
0344         /*
0345          * If this IRQ is already on a VCPU's ap_list, then it
0346          * cannot be moved or modified and there is no more work for
0347          * us to do.
0348          *
0349          * Otherwise, if the irq is not pending and enabled, it does
0350          * not need to be inserted into an ap_list and there is also
0351          * no more work for us to do.
0352          */
0353         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
0354 
0355         /*
0356          * We have to kick the VCPU here, because we could be
0357          * queueing an edge-triggered interrupt for which we
0358          * get no EOI maintenance interrupt. In that case,
0359          * while the IRQ is already on the VCPU's AP list, the
0360          * VCPU could have EOI'ed the original interrupt and
0361          * won't see this one until it exits for some other
0362          * reason.
0363          */
0364         if (vcpu) {
0365             kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
0366             kvm_vcpu_kick(vcpu);
0367         }
0368         return false;
0369     }
0370 
0371     /*
0372      * We must unlock the irq lock to take the ap_list_lock where
0373      * we are going to insert this new pending interrupt.
0374      */
0375     raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
0376 
0377     /* someone can do stuff here, which we re-check below */
0378 
0379     raw_spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
0380     raw_spin_lock(&irq->irq_lock);
0381 
0382     /*
0383      * Did something change behind our backs?
0384      *
0385      * There are two cases:
0386      * 1) The irq lost its pending state or was disabled behind our
0387      *    backs and/or it was queued to another VCPU's ap_list.
0388      * 2) Someone changed the affinity on this irq behind our
0389      *    backs and we are now holding the wrong ap_list_lock.
0390      *
0391      * In both cases, drop the locks and retry.
0392      */
0393 
0394     if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) {
0395         raw_spin_unlock(&irq->irq_lock);
0396         raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock,
0397                        flags);
0398 
0399         raw_spin_lock_irqsave(&irq->irq_lock, flags);
0400         goto retry;
0401     }
0402 
0403     /*
0404      * Grab a reference to the irq to reflect the fact that it is
0405      * now in the ap_list.
0406      */
0407     vgic_get_irq_kref(irq);
0408     list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
0409     irq->vcpu = vcpu;
0410 
0411     raw_spin_unlock(&irq->irq_lock);
0412     raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
0413 
0414     kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
0415     kvm_vcpu_kick(vcpu);
0416 
0417     return true;
0418 }
0419 
0420 /**
0421  * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
0422  * @kvm:     The VM structure pointer
0423  * @cpuid:   The CPU for PPIs
0424  * @intid:   The INTID to inject a new state to.
0425  * @level:   Edge-triggered:  true:  to trigger the interrupt
0426  *                false: to ignore the call
0427  *       Level-sensitive  true:  raise the input signal
0428  *                false: lower the input signal
0429  * @owner:   The opaque pointer to the owner of the IRQ being raised to verify
0430  *           that the caller is allowed to inject this IRQ.  Userspace
0431  *           injections will have owner == NULL.
0432  *
0433  * The VGIC is not concerned with devices being active-LOW or active-HIGH for
0434  * level-sensitive interrupts.  You can think of the level parameter as 1
0435  * being HIGH and 0 being LOW and all devices being active-HIGH.
0436  */
0437 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
0438             bool level, void *owner)
0439 {
0440     struct kvm_vcpu *vcpu;
0441     struct vgic_irq *irq;
0442     unsigned long flags;
0443     int ret;
0444 
0445     trace_vgic_update_irq_pending(cpuid, intid, level);
0446 
0447     ret = vgic_lazy_init(kvm);
0448     if (ret)
0449         return ret;
0450 
0451     vcpu = kvm_get_vcpu(kvm, cpuid);
0452     if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS)
0453         return -EINVAL;
0454 
0455     irq = vgic_get_irq(kvm, vcpu, intid);
0456     if (!irq)
0457         return -EINVAL;
0458 
0459     raw_spin_lock_irqsave(&irq->irq_lock, flags);
0460 
0461     if (!vgic_validate_injection(irq, level, owner)) {
0462         /* Nothing to see here, move along... */
0463         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
0464         vgic_put_irq(kvm, irq);
0465         return 0;
0466     }
0467 
0468     if (irq->config == VGIC_CONFIG_LEVEL)
0469         irq->line_level = level;
0470     else
0471         irq->pending_latch = true;
0472 
0473     vgic_queue_irq_unlock(kvm, irq, flags);
0474     vgic_put_irq(kvm, irq);
0475 
0476     return 0;
0477 }
0478 
0479 /* @irq->irq_lock must be held */
0480 static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
0481                 unsigned int host_irq,
0482                 struct irq_ops *ops)
0483 {
0484     struct irq_desc *desc;
0485     struct irq_data *data;
0486 
0487     /*
0488      * Find the physical IRQ number corresponding to @host_irq
0489      */
0490     desc = irq_to_desc(host_irq);
0491     if (!desc) {
0492         kvm_err("%s: no interrupt descriptor\n", __func__);
0493         return -EINVAL;
0494     }
0495     data = irq_desc_get_irq_data(desc);
0496     while (data->parent_data)
0497         data = data->parent_data;
0498 
0499     irq->hw = true;
0500     irq->host_irq = host_irq;
0501     irq->hwintid = data->hwirq;
0502     irq->ops = ops;
0503     return 0;
0504 }
0505 
0506 /* @irq->irq_lock must be held */
0507 static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
0508 {
0509     irq->hw = false;
0510     irq->hwintid = 0;
0511     irq->ops = NULL;
0512 }
0513 
0514 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
0515               u32 vintid, struct irq_ops *ops)
0516 {
0517     struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
0518     unsigned long flags;
0519     int ret;
0520 
0521     BUG_ON(!irq);
0522 
0523     raw_spin_lock_irqsave(&irq->irq_lock, flags);
0524     ret = kvm_vgic_map_irq(vcpu, irq, host_irq, ops);
0525     raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
0526     vgic_put_irq(vcpu->kvm, irq);
0527 
0528     return ret;
0529 }
0530 
0531 /**
0532  * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ
0533  * @vcpu: The VCPU pointer
0534  * @vintid: The INTID of the interrupt
0535  *
0536  * Reset the active and pending states of a mapped interrupt.  Kernel
0537  * subsystems injecting mapped interrupts should reset their interrupt lines
0538  * when we are doing a reset of the VM.
0539  */
0540 void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid)
0541 {
0542     struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
0543     unsigned long flags;
0544 
0545     if (!irq->hw)
0546         goto out;
0547 
0548     raw_spin_lock_irqsave(&irq->irq_lock, flags);
0549     irq->active = false;
0550     irq->pending_latch = false;
0551     irq->line_level = false;
0552     raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
0553 out:
0554     vgic_put_irq(vcpu->kvm, irq);
0555 }
0556 
0557 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
0558 {
0559     struct vgic_irq *irq;
0560     unsigned long flags;
0561 
0562     if (!vgic_initialized(vcpu->kvm))
0563         return -EAGAIN;
0564 
0565     irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
0566     BUG_ON(!irq);
0567 
0568     raw_spin_lock_irqsave(&irq->irq_lock, flags);
0569     kvm_vgic_unmap_irq(irq);
0570     raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
0571     vgic_put_irq(vcpu->kvm, irq);
0572 
0573     return 0;
0574 }
0575 
0576 /**
0577  * kvm_vgic_set_owner - Set the owner of an interrupt for a VM
0578  *
0579  * @vcpu:   Pointer to the VCPU (used for PPIs)
0580  * @intid:  The virtual INTID identifying the interrupt (PPI or SPI)
0581  * @owner:  Opaque pointer to the owner
0582  *
0583  * Returns 0 if intid is not already used by another in-kernel device and the
0584  * owner is set, otherwise returns an error code.
0585  */
0586 int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
0587 {
0588     struct vgic_irq *irq;
0589     unsigned long flags;
0590     int ret = 0;
0591 
0592     if (!vgic_initialized(vcpu->kvm))
0593         return -EAGAIN;
0594 
0595     /* SGIs and LPIs cannot be wired up to any device */
0596     if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid))
0597         return -EINVAL;
0598 
0599     irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
0600     raw_spin_lock_irqsave(&irq->irq_lock, flags);
0601     if (irq->owner && irq->owner != owner)
0602         ret = -EEXIST;
0603     else
0604         irq->owner = owner;
0605     raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
0606 
0607     return ret;
0608 }
0609 
0610 /**
0611  * vgic_prune_ap_list - Remove non-relevant interrupts from the list
0612  *
0613  * @vcpu: The VCPU pointer
0614  *
0615  * Go over the list of "interesting" interrupts, and prune those that we
0616  * won't have to consider in the near future.
0617  */
0618 static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
0619 {
0620     struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
0621     struct vgic_irq *irq, *tmp;
0622 
0623     DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
0624 
0625 retry:
0626     raw_spin_lock(&vgic_cpu->ap_list_lock);
0627 
0628     list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
0629         struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
0630         bool target_vcpu_needs_kick = false;
0631 
0632         raw_spin_lock(&irq->irq_lock);
0633 
0634         BUG_ON(vcpu != irq->vcpu);
0635 
0636         target_vcpu = vgic_target_oracle(irq);
0637 
0638         if (!target_vcpu) {
0639             /*
0640              * We don't need to process this interrupt any
0641              * further, move it off the list.
0642              */
0643             list_del(&irq->ap_list);
0644             irq->vcpu = NULL;
0645             raw_spin_unlock(&irq->irq_lock);
0646 
0647             /*
0648              * This vgic_put_irq call matches the
0649              * vgic_get_irq_kref in vgic_queue_irq_unlock,
0650              * where we added the LPI to the ap_list. As
0651              * we remove the irq from the list, we drop
0652              * also drop the refcount.
0653              */
0654             vgic_put_irq(vcpu->kvm, irq);
0655             continue;
0656         }
0657 
0658         if (target_vcpu == vcpu) {
0659             /* We're on the right CPU */
0660             raw_spin_unlock(&irq->irq_lock);
0661             continue;
0662         }
0663 
0664         /* This interrupt looks like it has to be migrated. */
0665 
0666         raw_spin_unlock(&irq->irq_lock);
0667         raw_spin_unlock(&vgic_cpu->ap_list_lock);
0668 
0669         /*
0670          * Ensure locking order by always locking the smallest
0671          * ID first.
0672          */
0673         if (vcpu->vcpu_id < target_vcpu->vcpu_id) {
0674             vcpuA = vcpu;
0675             vcpuB = target_vcpu;
0676         } else {
0677             vcpuA = target_vcpu;
0678             vcpuB = vcpu;
0679         }
0680 
0681         raw_spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
0682         raw_spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
0683                       SINGLE_DEPTH_NESTING);
0684         raw_spin_lock(&irq->irq_lock);
0685 
0686         /*
0687          * If the affinity has been preserved, move the
0688          * interrupt around. Otherwise, it means things have
0689          * changed while the interrupt was unlocked, and we
0690          * need to replay this.
0691          *
0692          * In all cases, we cannot trust the list not to have
0693          * changed, so we restart from the beginning.
0694          */
0695         if (target_vcpu == vgic_target_oracle(irq)) {
0696             struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu;
0697 
0698             list_del(&irq->ap_list);
0699             irq->vcpu = target_vcpu;
0700             list_add_tail(&irq->ap_list, &new_cpu->ap_list_head);
0701             target_vcpu_needs_kick = true;
0702         }
0703 
0704         raw_spin_unlock(&irq->irq_lock);
0705         raw_spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
0706         raw_spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
0707 
0708         if (target_vcpu_needs_kick) {
0709             kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu);
0710             kvm_vcpu_kick(target_vcpu);
0711         }
0712 
0713         goto retry;
0714     }
0715 
0716     raw_spin_unlock(&vgic_cpu->ap_list_lock);
0717 }
0718 
0719 static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
0720 {
0721     if (kvm_vgic_global_state.type == VGIC_V2)
0722         vgic_v2_fold_lr_state(vcpu);
0723     else
0724         vgic_v3_fold_lr_state(vcpu);
0725 }
0726 
0727 /* Requires the irq_lock to be held. */
0728 static inline void vgic_populate_lr(struct kvm_vcpu *vcpu,
0729                     struct vgic_irq *irq, int lr)
0730 {
0731     lockdep_assert_held(&irq->irq_lock);
0732 
0733     if (kvm_vgic_global_state.type == VGIC_V2)
0734         vgic_v2_populate_lr(vcpu, irq, lr);
0735     else
0736         vgic_v3_populate_lr(vcpu, irq, lr);
0737 }
0738 
0739 static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr)
0740 {
0741     if (kvm_vgic_global_state.type == VGIC_V2)
0742         vgic_v2_clear_lr(vcpu, lr);
0743     else
0744         vgic_v3_clear_lr(vcpu, lr);
0745 }
0746 
0747 static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
0748 {
0749     if (kvm_vgic_global_state.type == VGIC_V2)
0750         vgic_v2_set_underflow(vcpu);
0751     else
0752         vgic_v3_set_underflow(vcpu);
0753 }
0754 
0755 /* Requires the ap_list_lock to be held. */
0756 static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
0757                  bool *multi_sgi)
0758 {
0759     struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
0760     struct vgic_irq *irq;
0761     int count = 0;
0762 
0763     *multi_sgi = false;
0764 
0765     lockdep_assert_held(&vgic_cpu->ap_list_lock);
0766 
0767     list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
0768         int w;
0769 
0770         raw_spin_lock(&irq->irq_lock);
0771         /* GICv2 SGIs can count for more than one... */
0772         w = vgic_irq_get_lr_count(irq);
0773         raw_spin_unlock(&irq->irq_lock);
0774 
0775         count += w;
0776         *multi_sgi |= (w > 1);
0777     }
0778     return count;
0779 }
0780 
0781 /* Requires the VCPU's ap_list_lock to be held. */
0782 static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
0783 {
0784     struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
0785     struct vgic_irq *irq;
0786     int count;
0787     bool multi_sgi;
0788     u8 prio = 0xff;
0789     int i = 0;
0790 
0791     lockdep_assert_held(&vgic_cpu->ap_list_lock);
0792 
0793     count = compute_ap_list_depth(vcpu, &multi_sgi);
0794     if (count > kvm_vgic_global_state.nr_lr || multi_sgi)
0795         vgic_sort_ap_list(vcpu);
0796 
0797     count = 0;
0798 
0799     list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
0800         raw_spin_lock(&irq->irq_lock);
0801 
0802         /*
0803          * If we have multi-SGIs in the pipeline, we need to
0804          * guarantee that they are all seen before any IRQ of
0805          * lower priority. In that case, we need to filter out
0806          * these interrupts by exiting early. This is easy as
0807          * the AP list has been sorted already.
0808          */
0809         if (multi_sgi && irq->priority > prio) {
0810             _raw_spin_unlock(&irq->irq_lock);
0811             break;
0812         }
0813 
0814         if (likely(vgic_target_oracle(irq) == vcpu)) {
0815             vgic_populate_lr(vcpu, irq, count++);
0816 
0817             if (irq->source)
0818                 prio = irq->priority;
0819         }
0820 
0821         raw_spin_unlock(&irq->irq_lock);
0822 
0823         if (count == kvm_vgic_global_state.nr_lr) {
0824             if (!list_is_last(&irq->ap_list,
0825                       &vgic_cpu->ap_list_head))
0826                 vgic_set_underflow(vcpu);
0827             break;
0828         }
0829     }
0830 
0831     /* Nuke remaining LRs */
0832     for (i = count ; i < kvm_vgic_global_state.nr_lr; i++)
0833         vgic_clear_lr(vcpu, i);
0834 
0835     if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
0836         vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count;
0837     else
0838         vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count;
0839 }
0840 
0841 static inline bool can_access_vgic_from_kernel(void)
0842 {
0843     /*
0844      * GICv2 can always be accessed from the kernel because it is
0845      * memory-mapped, and VHE systems can access GICv3 EL2 system
0846      * registers.
0847      */
0848     return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe();
0849 }
0850 
0851 static inline void vgic_save_state(struct kvm_vcpu *vcpu)
0852 {
0853     if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
0854         vgic_v2_save_state(vcpu);
0855     else
0856         __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
0857 }
0858 
0859 /* Sync back the hardware VGIC state into our emulation after a guest's run. */
0860 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
0861 {
0862     int used_lrs;
0863 
0864     /* An empty ap_list_head implies used_lrs == 0 */
0865     if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
0866         return;
0867 
0868     if (can_access_vgic_from_kernel())
0869         vgic_save_state(vcpu);
0870 
0871     if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
0872         used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs;
0873     else
0874         used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
0875 
0876     if (used_lrs)
0877         vgic_fold_lr_state(vcpu);
0878     vgic_prune_ap_list(vcpu);
0879 }
0880 
0881 static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
0882 {
0883     if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
0884         vgic_v2_restore_state(vcpu);
0885     else
0886         __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
0887 }
0888 
0889 /* Flush our emulation state into the GIC hardware before entering the guest. */
0890 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
0891 {
0892     /*
0893      * If there are no virtual interrupts active or pending for this
0894      * VCPU, then there is no work to do and we can bail out without
0895      * taking any lock.  There is a potential race with someone injecting
0896      * interrupts to the VCPU, but it is a benign race as the VCPU will
0897      * either observe the new interrupt before or after doing this check,
0898      * and introducing additional synchronization mechanism doesn't change
0899      * this.
0900      *
0901      * Note that we still need to go through the whole thing if anything
0902      * can be directly injected (GICv4).
0903      */
0904     if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) &&
0905         !vgic_supports_direct_msis(vcpu->kvm))
0906         return;
0907 
0908     DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
0909 
0910     if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) {
0911         raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
0912         vgic_flush_lr_state(vcpu);
0913         raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
0914     }
0915 
0916     if (can_access_vgic_from_kernel())
0917         vgic_restore_state(vcpu);
0918 
0919     if (vgic_supports_direct_msis(vcpu->kvm))
0920         vgic_v4_commit(vcpu);
0921 }
0922 
0923 void kvm_vgic_load(struct kvm_vcpu *vcpu)
0924 {
0925     if (unlikely(!vgic_initialized(vcpu->kvm)))
0926         return;
0927 
0928     if (kvm_vgic_global_state.type == VGIC_V2)
0929         vgic_v2_load(vcpu);
0930     else
0931         vgic_v3_load(vcpu);
0932 }
0933 
0934 void kvm_vgic_put(struct kvm_vcpu *vcpu)
0935 {
0936     if (unlikely(!vgic_initialized(vcpu->kvm)))
0937         return;
0938 
0939     if (kvm_vgic_global_state.type == VGIC_V2)
0940         vgic_v2_put(vcpu);
0941     else
0942         vgic_v3_put(vcpu);
0943 }
0944 
0945 void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu)
0946 {
0947     if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
0948         return;
0949 
0950     if (kvm_vgic_global_state.type == VGIC_V2)
0951         vgic_v2_vmcr_sync(vcpu);
0952     else
0953         vgic_v3_vmcr_sync(vcpu);
0954 }
0955 
0956 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
0957 {
0958     struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
0959     struct vgic_irq *irq;
0960     bool pending = false;
0961     unsigned long flags;
0962     struct vgic_vmcr vmcr;
0963 
0964     if (!vcpu->kvm->arch.vgic.enabled)
0965         return false;
0966 
0967     if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last)
0968         return true;
0969 
0970     vgic_get_vmcr(vcpu, &vmcr);
0971 
0972     raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
0973 
0974     list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
0975         raw_spin_lock(&irq->irq_lock);
0976         pending = irq_is_pending(irq) && irq->enabled &&
0977               !irq->active &&
0978               irq->priority < vmcr.pmr;
0979         raw_spin_unlock(&irq->irq_lock);
0980 
0981         if (pending)
0982             break;
0983     }
0984 
0985     raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
0986 
0987     return pending;
0988 }
0989 
0990 void vgic_kick_vcpus(struct kvm *kvm)
0991 {
0992     struct kvm_vcpu *vcpu;
0993     unsigned long c;
0994 
0995     /*
0996      * We've injected an interrupt, time to find out who deserves
0997      * a good kick...
0998      */
0999     kvm_for_each_vcpu(c, vcpu, kvm) {
1000         if (kvm_vgic_vcpu_pending_irq(vcpu)) {
1001             kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
1002             kvm_vcpu_kick(vcpu);
1003         }
1004     }
1005 }
1006 
1007 bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
1008 {
1009     struct vgic_irq *irq;
1010     bool map_is_active;
1011     unsigned long flags;
1012 
1013     if (!vgic_initialized(vcpu->kvm))
1014         return false;
1015 
1016     irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
1017     raw_spin_lock_irqsave(&irq->irq_lock, flags);
1018     map_is_active = irq->hw && irq->active;
1019     raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
1020     vgic_put_irq(vcpu->kvm, irq);
1021 
1022     return map_is_active;
1023 }
1024 
1025 /*
1026  * Level-triggered mapped IRQs are special because we only observe rising
1027  * edges as input to the VGIC.
1028  *
1029  * If the guest never acked the interrupt we have to sample the physical
1030  * line and set the line level, because the device state could have changed
1031  * or we simply need to process the still pending interrupt later.
1032  *
1033  * We could also have entered the guest with the interrupt active+pending.
1034  * On the next exit, we need to re-evaluate the pending state, as it could
1035  * otherwise result in a spurious interrupt by injecting a now potentially
1036  * stale pending state.
1037  *
1038  * If this causes us to lower the level, we have to also clear the physical
1039  * active state, since we will otherwise never be told when the interrupt
1040  * becomes asserted again.
1041  *
1042  * Another case is when the interrupt requires a helping hand on
1043  * deactivation (no HW deactivation, for example).
1044  */
1045 void vgic_irq_handle_resampling(struct vgic_irq *irq,
1046                 bool lr_deactivated, bool lr_pending)
1047 {
1048     if (vgic_irq_is_mapped_level(irq)) {
1049         bool resample = false;
1050 
1051         if (unlikely(vgic_irq_needs_resampling(irq))) {
1052             resample = !(irq->active || irq->pending_latch);
1053         } else if (lr_pending || (lr_deactivated && irq->line_level)) {
1054             irq->line_level = vgic_get_phys_line_level(irq);
1055             resample = !irq->line_level;
1056         }
1057 
1058         if (resample)
1059             vgic_irq_set_phys_active(irq, false);
1060     }
1061 }