Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright 2012 Michael Ellerman, IBM Corporation.
0004  * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
0005  */
0006 
0007 #include <linux/kernel.h>
0008 #include <linux/kvm_host.h>
0009 #include <linux/err.h>
0010 #include <linux/kernel_stat.h>
0011 #include <linux/pgtable.h>
0012 
0013 #include <asm/kvm_book3s.h>
0014 #include <asm/kvm_ppc.h>
0015 #include <asm/hvcall.h>
0016 #include <asm/xics.h>
0017 #include <asm/synch.h>
0018 #include <asm/cputhreads.h>
0019 #include <asm/ppc-opcode.h>
0020 #include <asm/pnv-pci.h>
0021 #include <asm/opal.h>
0022 #include <asm/smp.h>
0023 
0024 #include "book3s_xics.h"
0025 
0026 #define DEBUG_PASSUP
0027 
0028 int h_ipi_redirect = 1;
0029 EXPORT_SYMBOL(h_ipi_redirect);
0030 int kvm_irq_bypass = 1;
0031 EXPORT_SYMBOL(kvm_irq_bypass);
0032 
0033 static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
0034                 u32 new_irq, bool check_resend);
0035 static int xics_opal_set_server(unsigned int hw_irq, int server_cpu);
0036 
0037 /* -- ICS routines -- */
0038 static void ics_rm_check_resend(struct kvmppc_xics *xics,
0039                 struct kvmppc_ics *ics, struct kvmppc_icp *icp)
0040 {
0041     int i;
0042 
0043     for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
0044         struct ics_irq_state *state = &ics->irq_state[i];
0045         if (state->resend)
0046             icp_rm_deliver_irq(xics, icp, state->number, true);
0047     }
0048 
0049 }
0050 
0051 /* -- ICP routines -- */
0052 
0053 #ifdef CONFIG_SMP
0054 static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
0055 {
0056     int hcpu;
0057 
0058     hcpu = hcore << threads_shift;
0059     kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
0060     smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
0061     kvmppc_set_host_ipi(hcpu);
0062     smp_mb();
0063     kvmhv_rm_send_ipi(hcpu);
0064 }
0065 #else
0066 static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { }
0067 #endif
0068 
0069 /*
0070  * We start the search from our current CPU Id in the core map
0071  * and go in a circle until we get back to our ID looking for a
0072  * core that is running in host context and that hasn't already
0073  * been targeted for another rm_host_ops.
0074  *
0075  * In the future, could consider using a fairer algorithm (one
0076  * that distributes the IPIs better)
0077  *
0078  * Returns -1, if no CPU could be found in the host
0079  * Else, returns a CPU Id which has been reserved for use
0080  */
0081 static inline int grab_next_hostcore(int start,
0082         struct kvmppc_host_rm_core *rm_core, int max, int action)
0083 {
0084     bool success;
0085     int core;
0086     union kvmppc_rm_state old, new;
0087 
0088     for (core = start + 1; core < max; core++)  {
0089         old = new = READ_ONCE(rm_core[core].rm_state);
0090 
0091         if (!old.in_host || old.rm_action)
0092             continue;
0093 
0094         /* Try to grab this host core if not taken already. */
0095         new.rm_action = action;
0096 
0097         success = cmpxchg64(&rm_core[core].rm_state.raw,
0098                         old.raw, new.raw) == old.raw;
0099         if (success) {
0100             /*
0101              * Make sure that the store to the rm_action is made
0102              * visible before we return to caller (and the
0103              * subsequent store to rm_data) to synchronize with
0104              * the IPI handler.
0105              */
0106             smp_wmb();
0107             return core;
0108         }
0109     }
0110 
0111     return -1;
0112 }
0113 
0114 static inline int find_available_hostcore(int action)
0115 {
0116     int core;
0117     int my_core = smp_processor_id() >> threads_shift;
0118     struct kvmppc_host_rm_core *rm_core = kvmppc_host_rm_ops_hv->rm_core;
0119 
0120     core = grab_next_hostcore(my_core, rm_core, cpu_nr_cores(), action);
0121     if (core == -1)
0122         core = grab_next_hostcore(core, rm_core, my_core, action);
0123 
0124     return core;
0125 }
0126 
0127 static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
0128                 struct kvm_vcpu *this_vcpu)
0129 {
0130     struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
0131     int cpu;
0132     int hcore;
0133 
0134     /* Mark the target VCPU as having an interrupt pending */
0135     vcpu->stat.queue_intr++;
0136     set_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
0137 
0138     /* Kick self ? Just set MER and return */
0139     if (vcpu == this_vcpu) {
0140         mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_MER);
0141         return;
0142     }
0143 
0144     /*
0145      * Check if the core is loaded,
0146      * if not, find an available host core to post to wake the VCPU,
0147      * if we can't find one, set up state to eventually return too hard.
0148      */
0149     cpu = vcpu->arch.thread_cpu;
0150     if (cpu < 0 || cpu >= nr_cpu_ids) {
0151         hcore = -1;
0152         if (kvmppc_host_rm_ops_hv && h_ipi_redirect)
0153             hcore = find_available_hostcore(XICS_RM_KICK_VCPU);
0154         if (hcore != -1) {
0155             icp_send_hcore_msg(hcore, vcpu);
0156         } else {
0157             this_icp->rm_action |= XICS_RM_KICK_VCPU;
0158             this_icp->rm_kick_target = vcpu;
0159         }
0160         return;
0161     }
0162 
0163     smp_mb();
0164     kvmhv_rm_send_ipi(cpu);
0165 }
0166 
0167 static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
0168 {
0169     /* Note: Only called on self ! */
0170     clear_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
0171     mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER);
0172 }
0173 
0174 static inline bool icp_rm_try_update(struct kvmppc_icp *icp,
0175                      union kvmppc_icp_state old,
0176                      union kvmppc_icp_state new)
0177 {
0178     struct kvm_vcpu *this_vcpu = local_paca->kvm_hstate.kvm_vcpu;
0179     bool success;
0180 
0181     /* Calculate new output value */
0182     new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
0183 
0184     /* Attempt atomic update */
0185     success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
0186     if (!success)
0187         goto bail;
0188 
0189     /*
0190      * Check for output state update
0191      *
0192      * Note that this is racy since another processor could be updating
0193      * the state already. This is why we never clear the interrupt output
0194      * here, we only ever set it. The clear only happens prior to doing
0195      * an update and only by the processor itself. Currently we do it
0196      * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
0197      *
0198      * We also do not try to figure out whether the EE state has changed,
0199      * we unconditionally set it if the new state calls for it. The reason
0200      * for that is that we opportunistically remove the pending interrupt
0201      * flag when raising CPPR, so we need to set it back here if an
0202      * interrupt is still pending.
0203      */
0204     if (new.out_ee)
0205         icp_rm_set_vcpu_irq(icp->vcpu, this_vcpu);
0206 
0207     /* Expose the state change for debug purposes */
0208     this_vcpu->arch.icp->rm_dbgstate = new;
0209     this_vcpu->arch.icp->rm_dbgtgt = icp->vcpu;
0210 
0211  bail:
0212     return success;
0213 }
0214 
0215 static inline int check_too_hard(struct kvmppc_xics *xics,
0216                  struct kvmppc_icp *icp)
0217 {
0218     return (xics->real_mode_dbg || icp->rm_action) ? H_TOO_HARD : H_SUCCESS;
0219 }
0220 
0221 static void icp_rm_check_resend(struct kvmppc_xics *xics,
0222                  struct kvmppc_icp *icp)
0223 {
0224     u32 icsid;
0225 
0226     /* Order this load with the test for need_resend in the caller */
0227     smp_rmb();
0228     for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) {
0229         struct kvmppc_ics *ics = xics->ics[icsid];
0230 
0231         if (!test_and_clear_bit(icsid, icp->resend_map))
0232             continue;
0233         if (!ics)
0234             continue;
0235         ics_rm_check_resend(xics, ics, icp);
0236     }
0237 }
0238 
0239 static bool icp_rm_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
0240                    u32 *reject)
0241 {
0242     union kvmppc_icp_state old_state, new_state;
0243     bool success;
0244 
0245     do {
0246         old_state = new_state = READ_ONCE(icp->state);
0247 
0248         *reject = 0;
0249 
0250         /* See if we can deliver */
0251         success = new_state.cppr > priority &&
0252             new_state.mfrr > priority &&
0253             new_state.pending_pri > priority;
0254 
0255         /*
0256          * If we can, check for a rejection and perform the
0257          * delivery
0258          */
0259         if (success) {
0260             *reject = new_state.xisr;
0261             new_state.xisr = irq;
0262             new_state.pending_pri = priority;
0263         } else {
0264             /*
0265              * If we failed to deliver we set need_resend
0266              * so a subsequent CPPR state change causes us
0267              * to try a new delivery.
0268              */
0269             new_state.need_resend = true;
0270         }
0271 
0272     } while (!icp_rm_try_update(icp, old_state, new_state));
0273 
0274     return success;
0275 }
0276 
0277 static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
0278                 u32 new_irq, bool check_resend)
0279 {
0280     struct ics_irq_state *state;
0281     struct kvmppc_ics *ics;
0282     u32 reject;
0283     u16 src;
0284 
0285     /*
0286      * This is used both for initial delivery of an interrupt and
0287      * for subsequent rejection.
0288      *
0289      * Rejection can be racy vs. resends. We have evaluated the
0290      * rejection in an atomic ICP transaction which is now complete,
0291      * so potentially the ICP can already accept the interrupt again.
0292      *
0293      * So we need to retry the delivery. Essentially the reject path
0294      * boils down to a failed delivery. Always.
0295      *
0296      * Now the interrupt could also have moved to a different target,
0297      * thus we may need to re-do the ICP lookup as well
0298      */
0299 
0300  again:
0301     /* Get the ICS state and lock it */
0302     ics = kvmppc_xics_find_ics(xics, new_irq, &src);
0303     if (!ics) {
0304         /* Unsafe increment, but this does not need to be accurate */
0305         xics->err_noics++;
0306         return;
0307     }
0308     state = &ics->irq_state[src];
0309 
0310     /* Get a lock on the ICS */
0311     arch_spin_lock(&ics->lock);
0312 
0313     /* Get our server */
0314     if (!icp || state->server != icp->server_num) {
0315         icp = kvmppc_xics_find_server(xics->kvm, state->server);
0316         if (!icp) {
0317             /* Unsafe increment again*/
0318             xics->err_noicp++;
0319             goto out;
0320         }
0321     }
0322 
0323     if (check_resend)
0324         if (!state->resend)
0325             goto out;
0326 
0327     /* Clear the resend bit of that interrupt */
0328     state->resend = 0;
0329 
0330     /*
0331      * If masked, bail out
0332      *
0333      * Note: PAPR doesn't mention anything about masked pending
0334      * when doing a resend, only when doing a delivery.
0335      *
0336      * However that would have the effect of losing a masked
0337      * interrupt that was rejected and isn't consistent with
0338      * the whole masked_pending business which is about not
0339      * losing interrupts that occur while masked.
0340      *
0341      * I don't differentiate normal deliveries and resends, this
0342      * implementation will differ from PAPR and not lose such
0343      * interrupts.
0344      */
0345     if (state->priority == MASKED) {
0346         state->masked_pending = 1;
0347         goto out;
0348     }
0349 
0350     /*
0351      * Try the delivery, this will set the need_resend flag
0352      * in the ICP as part of the atomic transaction if the
0353      * delivery is not possible.
0354      *
0355      * Note that if successful, the new delivery might have itself
0356      * rejected an interrupt that was "delivered" before we took the
0357      * ics spin lock.
0358      *
0359      * In this case we do the whole sequence all over again for the
0360      * new guy. We cannot assume that the rejected interrupt is less
0361      * favored than the new one, and thus doesn't need to be delivered,
0362      * because by the time we exit icp_rm_try_to_deliver() the target
0363      * processor may well have already consumed & completed it, and thus
0364      * the rejected interrupt might actually be already acceptable.
0365      */
0366     if (icp_rm_try_to_deliver(icp, new_irq, state->priority, &reject)) {
0367         /*
0368          * Delivery was successful, did we reject somebody else ?
0369          */
0370         if (reject && reject != XICS_IPI) {
0371             arch_spin_unlock(&ics->lock);
0372             icp->n_reject++;
0373             new_irq = reject;
0374             check_resend = 0;
0375             goto again;
0376         }
0377     } else {
0378         /*
0379          * We failed to deliver the interrupt we need to set the
0380          * resend map bit and mark the ICS state as needing a resend
0381          */
0382         state->resend = 1;
0383 
0384         /*
0385          * Make sure when checking resend, we don't miss the resend
0386          * if resend_map bit is seen and cleared.
0387          */
0388         smp_wmb();
0389         set_bit(ics->icsid, icp->resend_map);
0390 
0391         /*
0392          * If the need_resend flag got cleared in the ICP some time
0393          * between icp_rm_try_to_deliver() atomic update and now, then
0394          * we know it might have missed the resend_map bit. So we
0395          * retry
0396          */
0397         smp_mb();
0398         if (!icp->state.need_resend) {
0399             state->resend = 0;
0400             arch_spin_unlock(&ics->lock);
0401             check_resend = 0;
0402             goto again;
0403         }
0404     }
0405  out:
0406     arch_spin_unlock(&ics->lock);
0407 }
0408 
0409 static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
0410                  u8 new_cppr)
0411 {
0412     union kvmppc_icp_state old_state, new_state;
0413     bool resend;
0414 
0415     /*
0416      * This handles several related states in one operation:
0417      *
0418      * ICP State: Down_CPPR
0419      *
0420      * Load CPPR with new value and if the XISR is 0
0421      * then check for resends:
0422      *
0423      * ICP State: Resend
0424      *
0425      * If MFRR is more favored than CPPR, check for IPIs
0426      * and notify ICS of a potential resend. This is done
0427      * asynchronously (when used in real mode, we will have
0428      * to exit here).
0429      *
0430      * We do not handle the complete Check_IPI as documented
0431      * here. In the PAPR, this state will be used for both
0432      * Set_MFRR and Down_CPPR. However, we know that we aren't
0433      * changing the MFRR state here so we don't need to handle
0434      * the case of an MFRR causing a reject of a pending irq,
0435      * this will have been handled when the MFRR was set in the
0436      * first place.
0437      *
0438      * Thus we don't have to handle rejects, only resends.
0439      *
0440      * When implementing real mode for HV KVM, resend will lead to
0441      * a H_TOO_HARD return and the whole transaction will be handled
0442      * in virtual mode.
0443      */
0444     do {
0445         old_state = new_state = READ_ONCE(icp->state);
0446 
0447         /* Down_CPPR */
0448         new_state.cppr = new_cppr;
0449 
0450         /*
0451          * Cut down Resend / Check_IPI / IPI
0452          *
0453          * The logic is that we cannot have a pending interrupt
0454          * trumped by an IPI at this point (see above), so we
0455          * know that either the pending interrupt is already an
0456          * IPI (in which case we don't care to override it) or
0457          * it's either more favored than us or non existent
0458          */
0459         if (new_state.mfrr < new_cppr &&
0460             new_state.mfrr <= new_state.pending_pri) {
0461             new_state.pending_pri = new_state.mfrr;
0462             new_state.xisr = XICS_IPI;
0463         }
0464 
0465         /* Latch/clear resend bit */
0466         resend = new_state.need_resend;
0467         new_state.need_resend = 0;
0468 
0469     } while (!icp_rm_try_update(icp, old_state, new_state));
0470 
0471     /*
0472      * Now handle resend checks. Those are asynchronous to the ICP
0473      * state update in HW (ie bus transactions) so we can handle them
0474      * separately here as well.
0475      */
0476     if (resend) {
0477         icp->n_check_resend++;
0478         icp_rm_check_resend(xics, icp);
0479     }
0480 }
0481 
0482 unsigned long xics_rm_h_xirr_x(struct kvm_vcpu *vcpu)
0483 {
0484     vcpu->arch.regs.gpr[5] = get_tb();
0485     return xics_rm_h_xirr(vcpu);
0486 }
0487 
0488 unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu)
0489 {
0490     union kvmppc_icp_state old_state, new_state;
0491     struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
0492     struct kvmppc_icp *icp = vcpu->arch.icp;
0493     u32 xirr;
0494 
0495     if (!xics || !xics->real_mode)
0496         return H_TOO_HARD;
0497 
0498     /* First clear the interrupt */
0499     icp_rm_clr_vcpu_irq(icp->vcpu);
0500 
0501     /*
0502      * ICP State: Accept_Interrupt
0503      *
0504      * Return the pending interrupt (if any) along with the
0505      * current CPPR, then clear the XISR & set CPPR to the
0506      * pending priority
0507      */
0508     do {
0509         old_state = new_state = READ_ONCE(icp->state);
0510 
0511         xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
0512         if (!old_state.xisr)
0513             break;
0514         new_state.cppr = new_state.pending_pri;
0515         new_state.pending_pri = 0xff;
0516         new_state.xisr = 0;
0517 
0518     } while (!icp_rm_try_update(icp, old_state, new_state));
0519 
0520     /* Return the result in GPR4 */
0521     vcpu->arch.regs.gpr[4] = xirr;
0522 
0523     return check_too_hard(xics, icp);
0524 }
0525 
0526 int xics_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
0527           unsigned long mfrr)
0528 {
0529     union kvmppc_icp_state old_state, new_state;
0530     struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
0531     struct kvmppc_icp *icp, *this_icp = vcpu->arch.icp;
0532     u32 reject;
0533     bool resend;
0534     bool local;
0535 
0536     if (!xics || !xics->real_mode)
0537         return H_TOO_HARD;
0538 
0539     local = this_icp->server_num == server;
0540     if (local)
0541         icp = this_icp;
0542     else
0543         icp = kvmppc_xics_find_server(vcpu->kvm, server);
0544     if (!icp)
0545         return H_PARAMETER;
0546 
0547     /*
0548      * ICP state: Set_MFRR
0549      *
0550      * If the CPPR is more favored than the new MFRR, then
0551      * nothing needs to be done as there can be no XISR to
0552      * reject.
0553      *
0554      * ICP state: Check_IPI
0555      *
0556      * If the CPPR is less favored, then we might be replacing
0557      * an interrupt, and thus need to possibly reject it.
0558      *
0559      * ICP State: IPI
0560      *
0561      * Besides rejecting any pending interrupts, we also
0562      * update XISR and pending_pri to mark IPI as pending.
0563      *
0564      * PAPR does not describe this state, but if the MFRR is being
0565      * made less favored than its earlier value, there might be
0566      * a previously-rejected interrupt needing to be resent.
0567      * Ideally, we would want to resend only if
0568      *  prio(pending_interrupt) < mfrr &&
0569      *  prio(pending_interrupt) < cppr
0570      * where pending interrupt is the one that was rejected. But
0571      * we don't have that state, so we simply trigger a resend
0572      * whenever the MFRR is made less favored.
0573      */
0574     do {
0575         old_state = new_state = READ_ONCE(icp->state);
0576 
0577         /* Set_MFRR */
0578         new_state.mfrr = mfrr;
0579 
0580         /* Check_IPI */
0581         reject = 0;
0582         resend = false;
0583         if (mfrr < new_state.cppr) {
0584             /* Reject a pending interrupt if not an IPI */
0585             if (mfrr <= new_state.pending_pri) {
0586                 reject = new_state.xisr;
0587                 new_state.pending_pri = mfrr;
0588                 new_state.xisr = XICS_IPI;
0589             }
0590         }
0591 
0592         if (mfrr > old_state.mfrr) {
0593             resend = new_state.need_resend;
0594             new_state.need_resend = 0;
0595         }
0596     } while (!icp_rm_try_update(icp, old_state, new_state));
0597 
0598     /* Handle reject in real mode */
0599     if (reject && reject != XICS_IPI) {
0600         this_icp->n_reject++;
0601         icp_rm_deliver_irq(xics, icp, reject, false);
0602     }
0603 
0604     /* Handle resends in real mode */
0605     if (resend) {
0606         this_icp->n_check_resend++;
0607         icp_rm_check_resend(xics, icp);
0608     }
0609 
0610     return check_too_hard(xics, this_icp);
0611 }
0612 
0613 int xics_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
0614 {
0615     union kvmppc_icp_state old_state, new_state;
0616     struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
0617     struct kvmppc_icp *icp = vcpu->arch.icp;
0618     u32 reject;
0619 
0620     if (!xics || !xics->real_mode)
0621         return H_TOO_HARD;
0622 
0623     /*
0624      * ICP State: Set_CPPR
0625      *
0626      * We can safely compare the new value with the current
0627      * value outside of the transaction as the CPPR is only
0628      * ever changed by the processor on itself
0629      */
0630     if (cppr > icp->state.cppr) {
0631         icp_rm_down_cppr(xics, icp, cppr);
0632         goto bail;
0633     } else if (cppr == icp->state.cppr)
0634         return H_SUCCESS;
0635 
0636     /*
0637      * ICP State: Up_CPPR
0638      *
0639      * The processor is raising its priority, this can result
0640      * in a rejection of a pending interrupt:
0641      *
0642      * ICP State: Reject_Current
0643      *
0644      * We can remove EE from the current processor, the update
0645      * transaction will set it again if needed
0646      */
0647     icp_rm_clr_vcpu_irq(icp->vcpu);
0648 
0649     do {
0650         old_state = new_state = READ_ONCE(icp->state);
0651 
0652         reject = 0;
0653         new_state.cppr = cppr;
0654 
0655         if (cppr <= new_state.pending_pri) {
0656             reject = new_state.xisr;
0657             new_state.xisr = 0;
0658             new_state.pending_pri = 0xff;
0659         }
0660 
0661     } while (!icp_rm_try_update(icp, old_state, new_state));
0662 
0663     /*
0664      * Check for rejects. They are handled by doing a new delivery
0665      * attempt (see comments in icp_rm_deliver_irq).
0666      */
0667     if (reject && reject != XICS_IPI) {
0668         icp->n_reject++;
0669         icp_rm_deliver_irq(xics, icp, reject, false);
0670     }
0671  bail:
0672     return check_too_hard(xics, icp);
0673 }
0674 
0675 static int ics_rm_eoi(struct kvm_vcpu *vcpu, u32 irq)
0676 {
0677     struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
0678     struct kvmppc_icp *icp = vcpu->arch.icp;
0679     struct kvmppc_ics *ics;
0680     struct ics_irq_state *state;
0681     u16 src;
0682     u32 pq_old, pq_new;
0683 
0684     /*
0685      * ICS EOI handling: For LSI, if P bit is still set, we need to
0686      * resend it.
0687      *
0688      * For MSI, we move Q bit into P (and clear Q). If it is set,
0689      * resend it.
0690      */
0691 
0692     ics = kvmppc_xics_find_ics(xics, irq, &src);
0693     if (!ics)
0694         goto bail;
0695 
0696     state = &ics->irq_state[src];
0697 
0698     if (state->lsi)
0699         pq_new = state->pq_state;
0700     else
0701         do {
0702             pq_old = state->pq_state;
0703             pq_new = pq_old >> 1;
0704         } while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
0705 
0706     if (pq_new & PQ_PRESENTED)
0707         icp_rm_deliver_irq(xics, NULL, irq, false);
0708 
0709     if (!hlist_empty(&vcpu->kvm->irq_ack_notifier_list)) {
0710         icp->rm_action |= XICS_RM_NOTIFY_EOI;
0711         icp->rm_eoied_irq = irq;
0712     }
0713 
0714     /* Handle passthrough interrupts */
0715     if (state->host_irq) {
0716         ++vcpu->stat.pthru_all;
0717         if (state->intr_cpu != -1) {
0718             int pcpu = raw_smp_processor_id();
0719 
0720             pcpu = cpu_first_thread_sibling(pcpu);
0721             ++vcpu->stat.pthru_host;
0722             if (state->intr_cpu != pcpu) {
0723                 ++vcpu->stat.pthru_bad_aff;
0724                 xics_opal_set_server(state->host_irq, pcpu);
0725             }
0726             state->intr_cpu = -1;
0727         }
0728     }
0729 
0730  bail:
0731     return check_too_hard(xics, icp);
0732 }
0733 
0734 int xics_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
0735 {
0736     struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
0737     struct kvmppc_icp *icp = vcpu->arch.icp;
0738     u32 irq = xirr & 0x00ffffff;
0739 
0740     if (!xics || !xics->real_mode)
0741         return H_TOO_HARD;
0742 
0743     /*
0744      * ICP State: EOI
0745      *
0746      * Note: If EOI is incorrectly used by SW to lower the CPPR
0747      * value (ie more favored), we do not check for rejection of
0748      * a pending interrupt, this is a SW error and PAPR specifies
0749      * that we don't have to deal with it.
0750      *
0751      * The sending of an EOI to the ICS is handled after the
0752      * CPPR update
0753      *
0754      * ICP State: Down_CPPR which we handle
0755      * in a separate function as it's shared with H_CPPR.
0756      */
0757     icp_rm_down_cppr(xics, icp, xirr >> 24);
0758 
0759     /* IPIs have no EOI */
0760     if (irq == XICS_IPI)
0761         return check_too_hard(xics, icp);
0762 
0763     return ics_rm_eoi(vcpu, irq);
0764 }
0765 
0766 static unsigned long eoi_rc;
0767 
0768 static void icp_eoi(struct irq_data *d, u32 hwirq, __be32 xirr, bool *again)
0769 {
0770     void __iomem *xics_phys;
0771     int64_t rc;
0772 
0773     rc = pnv_opal_pci_msi_eoi(d);
0774 
0775     if (rc)
0776         eoi_rc = rc;
0777 
0778     iosync();
0779 
0780     /* EOI it */
0781     xics_phys = local_paca->kvm_hstate.xics_phys;
0782     if (xics_phys) {
0783         __raw_rm_writel(xirr, xics_phys + XICS_XIRR);
0784     } else {
0785         rc = opal_int_eoi(be32_to_cpu(xirr));
0786         *again = rc > 0;
0787     }
0788 }
0789 
0790 static int xics_opal_set_server(unsigned int hw_irq, int server_cpu)
0791 {
0792     unsigned int mangle_cpu = get_hard_smp_processor_id(server_cpu) << 2;
0793 
0794     return opal_set_xive(hw_irq, mangle_cpu, DEFAULT_PRIORITY);
0795 }
0796 
0797 /*
0798  * Increment a per-CPU 32-bit unsigned integer variable.
0799  * Safe to call in real-mode. Handles vmalloc'ed addresses
0800  *
0801  * ToDo: Make this work for any integral type
0802  */
0803 
0804 static inline void this_cpu_inc_rm(unsigned int __percpu *addr)
0805 {
0806     unsigned long l;
0807     unsigned int *raddr;
0808     int cpu = smp_processor_id();
0809 
0810     raddr = per_cpu_ptr(addr, cpu);
0811     l = (unsigned long)raddr;
0812 
0813     if (get_region_id(l) == VMALLOC_REGION_ID) {
0814         l = vmalloc_to_phys(raddr);
0815         raddr = (unsigned int *)l;
0816     }
0817     ++*raddr;
0818 }
0819 
0820 /*
0821  * We don't try to update the flags in the irq_desc 'istate' field in
0822  * here as would happen in the normal IRQ handling path for several reasons:
0823  *  - state flags represent internal IRQ state and are not expected to be
0824  *    updated outside the IRQ subsystem
0825  *  - more importantly, these are useful for edge triggered interrupts,
0826  *    IRQ probing, etc., but we are only handling MSI/MSIx interrupts here
0827  *    and these states shouldn't apply to us.
0828  *
0829  * However, we do update irq_stats - we somewhat duplicate the code in
0830  * kstat_incr_irqs_this_cpu() for this since this function is defined
0831  * in irq/internal.h which we don't want to include here.
0832  * The only difference is that desc->kstat_irqs is an allocated per CPU
0833  * variable and could have been vmalloc'ed, so we can't directly
0834  * call __this_cpu_inc() on it. The kstat structure is a static
0835  * per CPU variable and it should be accessible by real-mode KVM.
0836  *
0837  */
0838 static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc)
0839 {
0840     this_cpu_inc_rm(desc->kstat_irqs);
0841     __this_cpu_inc(kstat.irqs_sum);
0842 }
0843 
0844 long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
0845                  __be32 xirr,
0846                  struct kvmppc_irq_map *irq_map,
0847                  struct kvmppc_passthru_irqmap *pimap,
0848                  bool *again)
0849 {
0850     struct kvmppc_xics *xics;
0851     struct kvmppc_icp *icp;
0852     struct kvmppc_ics *ics;
0853     struct ics_irq_state *state;
0854     u32 irq;
0855     u16 src;
0856     u32 pq_old, pq_new;
0857 
0858     irq = irq_map->v_hwirq;
0859     xics = vcpu->kvm->arch.xics;
0860     icp = vcpu->arch.icp;
0861 
0862     kvmppc_rm_handle_irq_desc(irq_map->desc);
0863 
0864     ics = kvmppc_xics_find_ics(xics, irq, &src);
0865     if (!ics)
0866         return 2;
0867 
0868     state = &ics->irq_state[src];
0869 
0870     /* only MSIs register bypass producers, so it must be MSI here */
0871     do {
0872         pq_old = state->pq_state;
0873         pq_new = ((pq_old << 1) & 3) | PQ_PRESENTED;
0874     } while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
0875 
0876     /* Test P=1, Q=0, this is the only case where we present */
0877     if (pq_new == PQ_PRESENTED)
0878         icp_rm_deliver_irq(xics, icp, irq, false);
0879 
0880     /* EOI the interrupt */
0881     icp_eoi(irq_desc_get_irq_data(irq_map->desc), irq_map->r_hwirq, xirr, again);
0882 
0883     if (check_too_hard(xics, icp) == H_TOO_HARD)
0884         return 2;
0885     else
0886         return -2;
0887 }
0888 
0889 /*  --- Non-real mode XICS-related built-in routines ---  */
0890 
0891 /*
0892  * Host Operations poked by RM KVM
0893  */
0894 static void rm_host_ipi_action(int action, void *data)
0895 {
0896     switch (action) {
0897     case XICS_RM_KICK_VCPU:
0898         kvmppc_host_rm_ops_hv->vcpu_kick(data);
0899         break;
0900     default:
0901         WARN(1, "Unexpected rm_action=%d data=%p\n", action, data);
0902         break;
0903     }
0904 
0905 }
0906 
0907 void kvmppc_xics_ipi_action(void)
0908 {
0909     int core;
0910     unsigned int cpu = smp_processor_id();
0911     struct kvmppc_host_rm_core *rm_corep;
0912 
0913     core = cpu >> threads_shift;
0914     rm_corep = &kvmppc_host_rm_ops_hv->rm_core[core];
0915 
0916     if (rm_corep->rm_data) {
0917         rm_host_ipi_action(rm_corep->rm_state.rm_action,
0918                             rm_corep->rm_data);
0919         /* Order these stores against the real mode KVM */
0920         rm_corep->rm_data = NULL;
0921         smp_wmb();
0922         rm_corep->rm_state.rm_action = 0;
0923     }
0924 }