Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * Copyright 2016,2017 IBM Corporation.
0004  */
0005 
0006 #define pr_fmt(fmt) "xive: " fmt
0007 
0008 #include <linux/types.h>
0009 #include <linux/threads.h>
0010 #include <linux/kernel.h>
0011 #include <linux/irq.h>
0012 #include <linux/irqdomain.h>
0013 #include <linux/debugfs.h>
0014 #include <linux/smp.h>
0015 #include <linux/interrupt.h>
0016 #include <linux/seq_file.h>
0017 #include <linux/init.h>
0018 #include <linux/cpu.h>
0019 #include <linux/of.h>
0020 #include <linux/slab.h>
0021 #include <linux/spinlock.h>
0022 #include <linux/msi.h>
0023 #include <linux/vmalloc.h>
0024 
0025 #include <asm/io.h>
0026 #include <asm/smp.h>
0027 #include <asm/machdep.h>
0028 #include <asm/irq.h>
0029 #include <asm/errno.h>
0030 #include <asm/xive.h>
0031 #include <asm/xive-regs.h>
0032 #include <asm/xmon.h>
0033 
0034 #include "xive-internal.h"
0035 
0036 #undef DEBUG_FLUSH
0037 #undef DEBUG_ALL
0038 
0039 #ifdef DEBUG_ALL
0040 #define DBG_VERBOSE(fmt, ...)   pr_devel("cpu %d - " fmt, \
0041                      smp_processor_id(), ## __VA_ARGS__)
0042 #else
0043 #define DBG_VERBOSE(fmt...) do { } while(0)
0044 #endif
0045 
0046 bool __xive_enabled;
0047 EXPORT_SYMBOL_GPL(__xive_enabled);
0048 bool xive_cmdline_disabled;
0049 
0050 /* We use only one priority for now */
0051 static u8 xive_irq_priority;
0052 
0053 /* TIMA exported to KVM */
0054 void __iomem *xive_tima;
0055 EXPORT_SYMBOL_GPL(xive_tima);
0056 u32 xive_tima_offset;
0057 
0058 /* Backend ops */
0059 static const struct xive_ops *xive_ops;
0060 
0061 /* Our global interrupt domain */
0062 static struct irq_domain *xive_irq_domain;
0063 
0064 #ifdef CONFIG_SMP
0065 /* The IPIs use the same logical irq number when on the same chip */
0066 static struct xive_ipi_desc {
0067     unsigned int irq;
0068     char name[16];
0069     atomic_t started;
0070 } *xive_ipis;
0071 
0072 /*
0073  * Use early_cpu_to_node() for hot-plugged CPUs
0074  */
0075 static unsigned int xive_ipi_cpu_to_irq(unsigned int cpu)
0076 {
0077     return xive_ipis[early_cpu_to_node(cpu)].irq;
0078 }
0079 #endif
0080 
0081 /* Xive state for each CPU */
0082 static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu);
0083 
0084 /* An invalid CPU target */
0085 #define XIVE_INVALID_TARGET (-1)
0086 
0087 /*
0088  * Global toggle to switch on/off StoreEOI
0089  */
0090 static bool xive_store_eoi = true;
0091 
0092 static bool xive_is_store_eoi(struct xive_irq_data *xd)
0093 {
0094     return xd->flags & XIVE_IRQ_FLAG_STORE_EOI && xive_store_eoi;
0095 }
0096 
0097 /*
0098  * Read the next entry in a queue, return its content if it's valid
0099  * or 0 if there is no new entry.
0100  *
0101  * The queue pointer is moved forward unless "just_peek" is set
0102  */
0103 static u32 xive_read_eq(struct xive_q *q, bool just_peek)
0104 {
0105     u32 cur;
0106 
0107     if (!q->qpage)
0108         return 0;
0109     cur = be32_to_cpup(q->qpage + q->idx);
0110 
0111     /* Check valid bit (31) vs current toggle polarity */
0112     if ((cur >> 31) == q->toggle)
0113         return 0;
0114 
0115     /* If consuming from the queue ... */
0116     if (!just_peek) {
0117         /* Next entry */
0118         q->idx = (q->idx + 1) & q->msk;
0119 
0120         /* Wrap around: flip valid toggle */
0121         if (q->idx == 0)
0122             q->toggle ^= 1;
0123     }
0124     /* Mask out the valid bit (31) */
0125     return cur & 0x7fffffff;
0126 }
0127 
0128 /*
0129  * Scans all the queue that may have interrupts in them
0130  * (based on "pending_prio") in priority order until an
0131  * interrupt is found or all the queues are empty.
0132  *
0133  * Then updates the CPPR (Current Processor Priority
0134  * Register) based on the most favored interrupt found
0135  * (0xff if none) and return what was found (0 if none).
0136  *
0137  * If just_peek is set, return the most favored pending
0138  * interrupt if any but don't update the queue pointers.
0139  *
0140  * Note: This function can operate generically on any number
0141  * of queues (up to 8). The current implementation of the XIVE
0142  * driver only uses a single queue however.
0143  *
0144  * Note2: This will also "flush" "the pending_count" of a queue
0145  * into the "count" when that queue is observed to be empty.
0146  * This is used to keep track of the amount of interrupts
0147  * targetting a queue. When an interrupt is moved away from
0148  * a queue, we only decrement that queue count once the queue
0149  * has been observed empty to avoid races.
0150  */
0151 static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
0152 {
0153     u32 irq = 0;
0154     u8 prio = 0;
0155 
0156     /* Find highest pending priority */
0157     while (xc->pending_prio != 0) {
0158         struct xive_q *q;
0159 
0160         prio = ffs(xc->pending_prio) - 1;
0161         DBG_VERBOSE("scan_irq: trying prio %d\n", prio);
0162 
0163         /* Try to fetch */
0164         irq = xive_read_eq(&xc->queue[prio], just_peek);
0165 
0166         /* Found something ? That's it */
0167         if (irq) {
0168             if (just_peek || irq_to_desc(irq))
0169                 break;
0170             /*
0171              * We should never get here; if we do then we must
0172              * have failed to synchronize the interrupt properly
0173              * when shutting it down.
0174              */
0175             pr_crit("xive: got interrupt %d without descriptor, dropping\n",
0176                 irq);
0177             WARN_ON(1);
0178             continue;
0179         }
0180 
0181         /* Clear pending bits */
0182         xc->pending_prio &= ~(1 << prio);
0183 
0184         /*
0185          * Check if the queue count needs adjusting due to
0186          * interrupts being moved away. See description of
0187          * xive_dec_target_count()
0188          */
0189         q = &xc->queue[prio];
0190         if (atomic_read(&q->pending_count)) {
0191             int p = atomic_xchg(&q->pending_count, 0);
0192             if (p) {
0193                 WARN_ON(p > atomic_read(&q->count));
0194                 atomic_sub(p, &q->count);
0195             }
0196         }
0197     }
0198 
0199     /* If nothing was found, set CPPR to 0xff */
0200     if (irq == 0)
0201         prio = 0xff;
0202 
0203     /* Update HW CPPR to match if necessary */
0204     if (prio != xc->cppr) {
0205         DBG_VERBOSE("scan_irq: adjusting CPPR to %d\n", prio);
0206         xc->cppr = prio;
0207         out_8(xive_tima + xive_tima_offset + TM_CPPR, prio);
0208     }
0209 
0210     return irq;
0211 }
0212 
0213 /*
0214  * This is used to perform the magic loads from an ESB
0215  * described in xive-regs.h
0216  */
0217 static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset)
0218 {
0219     u64 val;
0220 
0221     if (offset == XIVE_ESB_SET_PQ_10 && xive_is_store_eoi(xd))
0222         offset |= XIVE_ESB_LD_ST_MO;
0223 
0224     if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw)
0225         val = xive_ops->esb_rw(xd->hw_irq, offset, 0, 0);
0226     else
0227         val = in_be64(xd->eoi_mmio + offset);
0228 
0229     return (u8)val;
0230 }
0231 
0232 static void xive_esb_write(struct xive_irq_data *xd, u32 offset, u64 data)
0233 {
0234     if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw)
0235         xive_ops->esb_rw(xd->hw_irq, offset, data, 1);
0236     else
0237         out_be64(xd->eoi_mmio + offset, data);
0238 }
0239 
0240 #if defined(CONFIG_XMON) || defined(CONFIG_DEBUG_FS)
0241 static void xive_irq_data_dump(struct xive_irq_data *xd, char *buffer, size_t size)
0242 {
0243     u64 val = xive_esb_read(xd, XIVE_ESB_GET);
0244 
0245     snprintf(buffer, size, "flags=%c%c%c PQ=%c%c 0x%016llx 0x%016llx",
0246          xive_is_store_eoi(xd) ? 'S' : ' ',
0247          xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ',
0248          xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ',
0249          val & XIVE_ESB_VAL_P ? 'P' : '-',
0250          val & XIVE_ESB_VAL_Q ? 'Q' : '-',
0251          xd->trig_page, xd->eoi_page);
0252 }
0253 #endif
0254 
0255 #ifdef CONFIG_XMON
0256 static notrace void xive_dump_eq(const char *name, struct xive_q *q)
0257 {
0258     u32 i0, i1, idx;
0259 
0260     if (!q->qpage)
0261         return;
0262     idx = q->idx;
0263     i0 = be32_to_cpup(q->qpage + idx);
0264     idx = (idx + 1) & q->msk;
0265     i1 = be32_to_cpup(q->qpage + idx);
0266     xmon_printf("%s idx=%d T=%d %08x %08x ...", name,
0267              q->idx, q->toggle, i0, i1);
0268 }
0269 
0270 notrace void xmon_xive_do_dump(int cpu)
0271 {
0272     struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
0273 
0274     xmon_printf("CPU %d:", cpu);
0275     if (xc) {
0276         xmon_printf("pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr);
0277 
0278 #ifdef CONFIG_SMP
0279         {
0280             char buffer[128];
0281 
0282             xive_irq_data_dump(&xc->ipi_data, buffer, sizeof(buffer));
0283             xmon_printf("IPI=0x%08x %s", xc->hw_ipi, buffer);
0284         }
0285 #endif
0286         xive_dump_eq("EQ", &xc->queue[xive_irq_priority]);
0287     }
0288     xmon_printf("\n");
0289 }
0290 
0291 static struct irq_data *xive_get_irq_data(u32 hw_irq)
0292 {
0293     unsigned int irq = irq_find_mapping(xive_irq_domain, hw_irq);
0294 
0295     return irq ? irq_get_irq_data(irq) : NULL;
0296 }
0297 
0298 int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d)
0299 {
0300     int rc;
0301     u32 target;
0302     u8 prio;
0303     u32 lirq;
0304 
0305     rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq);
0306     if (rc) {
0307         xmon_printf("IRQ 0x%08x : no config rc=%d\n", hw_irq, rc);
0308         return rc;
0309     }
0310 
0311     xmon_printf("IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ",
0312             hw_irq, target, prio, lirq);
0313 
0314     if (!d)
0315         d = xive_get_irq_data(hw_irq);
0316 
0317     if (d) {
0318         char buffer[128];
0319 
0320         xive_irq_data_dump(irq_data_get_irq_handler_data(d),
0321                    buffer, sizeof(buffer));
0322         xmon_printf("%s", buffer);
0323     }
0324 
0325     xmon_printf("\n");
0326     return 0;
0327 }
0328 
0329 void xmon_xive_get_irq_all(void)
0330 {
0331     unsigned int i;
0332     struct irq_desc *desc;
0333 
0334     for_each_irq_desc(i, desc) {
0335         struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i);
0336 
0337         if (d)
0338             xmon_xive_get_irq_config(irqd_to_hwirq(d), d);
0339     }
0340 }
0341 
0342 #endif /* CONFIG_XMON */
0343 
0344 static unsigned int xive_get_irq(void)
0345 {
0346     struct xive_cpu *xc = __this_cpu_read(xive_cpu);
0347     u32 irq;
0348 
0349     /*
0350      * This can be called either as a result of a HW interrupt or
0351      * as a "replay" because EOI decided there was still something
0352      * in one of the queues.
0353      *
0354      * First we perform an ACK cycle in order to update our mask
0355      * of pending priorities. This will also have the effect of
0356      * updating the CPPR to the most favored pending interrupts.
0357      *
0358      * In the future, if we have a way to differentiate a first
0359      * entry (on HW interrupt) from a replay triggered by EOI,
0360      * we could skip this on replays unless we soft-mask tells us
0361      * that a new HW interrupt occurred.
0362      */
0363     xive_ops->update_pending(xc);
0364 
0365     DBG_VERBOSE("get_irq: pending=%02x\n", xc->pending_prio);
0366 
0367     /* Scan our queue(s) for interrupts */
0368     irq = xive_scan_interrupts(xc, false);
0369 
0370     DBG_VERBOSE("get_irq: got irq 0x%x, new pending=0x%02x\n",
0371         irq, xc->pending_prio);
0372 
0373     /* Return pending interrupt if any */
0374     if (irq == XIVE_BAD_IRQ)
0375         return 0;
0376     return irq;
0377 }
0378 
0379 /*
0380  * After EOI'ing an interrupt, we need to re-check the queue
0381  * to see if another interrupt is pending since multiple
0382  * interrupts can coalesce into a single notification to the
0383  * CPU.
0384  *
0385  * If we find that there is indeed more in there, we call
0386  * force_external_irq_replay() to make Linux synthetize an
0387  * external interrupt on the next call to local_irq_restore().
0388  */
0389 static void xive_do_queue_eoi(struct xive_cpu *xc)
0390 {
0391     if (xive_scan_interrupts(xc, true) != 0) {
0392         DBG_VERBOSE("eoi: pending=0x%02x\n", xc->pending_prio);
0393         force_external_irq_replay();
0394     }
0395 }
0396 
0397 /*
0398  * EOI an interrupt at the source. There are several methods
0399  * to do this depending on the HW version and source type
0400  */
0401 static void xive_do_source_eoi(struct xive_irq_data *xd)
0402 {
0403     u8 eoi_val;
0404 
0405     xd->stale_p = false;
0406 
0407     /* If the XIVE supports the new "store EOI facility, use it */
0408     if (xive_is_store_eoi(xd)) {
0409         xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0);
0410         return;
0411     }
0412 
0413     /*
0414      * For LSIs, we use the "EOI cycle" special load rather than
0415      * PQ bits, as they are automatically re-triggered in HW when
0416      * still pending.
0417      */
0418     if (xd->flags & XIVE_IRQ_FLAG_LSI) {
0419         xive_esb_read(xd, XIVE_ESB_LOAD_EOI);
0420         return;
0421     }
0422 
0423     /*
0424      * Otherwise, we use the special MMIO that does a clear of
0425      * both P and Q and returns the old Q. This allows us to then
0426      * do a re-trigger if Q was set rather than synthesizing an
0427      * interrupt in software
0428      */
0429     eoi_val = xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
0430     DBG_VERBOSE("eoi_val=%x\n", eoi_val);
0431 
0432     /* Re-trigger if needed */
0433     if ((eoi_val & XIVE_ESB_VAL_Q) && xd->trig_mmio)
0434         out_be64(xd->trig_mmio, 0);
0435 }
0436 
0437 /* irq_chip eoi callback, called with irq descriptor lock held */
0438 static void xive_irq_eoi(struct irq_data *d)
0439 {
0440     struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
0441     struct xive_cpu *xc = __this_cpu_read(xive_cpu);
0442 
0443     DBG_VERBOSE("eoi_irq: irq=%d [0x%lx] pending=%02x\n",
0444             d->irq, irqd_to_hwirq(d), xc->pending_prio);
0445 
0446     /*
0447      * EOI the source if it hasn't been disabled and hasn't
0448      * been passed-through to a KVM guest
0449      */
0450     if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
0451         !(xd->flags & XIVE_IRQ_FLAG_NO_EOI))
0452         xive_do_source_eoi(xd);
0453     else
0454         xd->stale_p = true;
0455 
0456     /*
0457      * Clear saved_p to indicate that it's no longer occupying
0458      * a queue slot on the target queue
0459      */
0460     xd->saved_p = false;
0461 
0462     /* Check for more work in the queue */
0463     xive_do_queue_eoi(xc);
0464 }
0465 
0466 /*
0467  * Helper used to mask and unmask an interrupt source.
0468  */
0469 static void xive_do_source_set_mask(struct xive_irq_data *xd,
0470                     bool mask)
0471 {
0472     u64 val;
0473 
0474     pr_debug("%s: HW 0x%x %smask\n", __func__, xd->hw_irq, mask ? "" : "un");
0475 
0476     /*
0477      * If the interrupt had P set, it may be in a queue.
0478      *
0479      * We need to make sure we don't re-enable it until it
0480      * has been fetched from that queue and EOId. We keep
0481      * a copy of that P state and use it to restore the
0482      * ESB accordingly on unmask.
0483      */
0484     if (mask) {
0485         val = xive_esb_read(xd, XIVE_ESB_SET_PQ_01);
0486         if (!xd->stale_p && !!(val & XIVE_ESB_VAL_P))
0487             xd->saved_p = true;
0488         xd->stale_p = false;
0489     } else if (xd->saved_p) {
0490         xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
0491         xd->saved_p = false;
0492     } else {
0493         xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
0494         xd->stale_p = false;
0495     }
0496 }
0497 
0498 /*
0499  * Try to chose "cpu" as a new interrupt target. Increments
0500  * the queue accounting for that target if it's not already
0501  * full.
0502  */
0503 static bool xive_try_pick_target(int cpu)
0504 {
0505     struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
0506     struct xive_q *q = &xc->queue[xive_irq_priority];
0507     int max;
0508 
0509     /*
0510      * Calculate max number of interrupts in that queue.
0511      *
0512      * We leave a gap of 1 just in case...
0513      */
0514     max = (q->msk + 1) - 1;
0515     return !!atomic_add_unless(&q->count, 1, max);
0516 }
0517 
0518 /*
0519  * Un-account an interrupt for a target CPU. We don't directly
0520  * decrement q->count since the interrupt might still be present
0521  * in the queue.
0522  *
0523  * Instead increment a separate counter "pending_count" which
0524  * will be substracted from "count" later when that CPU observes
0525  * the queue to be empty.
0526  */
0527 static void xive_dec_target_count(int cpu)
0528 {
0529     struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
0530     struct xive_q *q = &xc->queue[xive_irq_priority];
0531 
0532     if (WARN_ON(cpu < 0 || !xc)) {
0533         pr_err("%s: cpu=%d xc=%p\n", __func__, cpu, xc);
0534         return;
0535     }
0536 
0537     /*
0538      * We increment the "pending count" which will be used
0539      * to decrement the target queue count whenever it's next
0540      * processed and found empty. This ensure that we don't
0541      * decrement while we still have the interrupt there
0542      * occupying a slot.
0543      */
0544     atomic_inc(&q->pending_count);
0545 }
0546 
0547 /* Find a tentative CPU target in a CPU mask */
0548 static int xive_find_target_in_mask(const struct cpumask *mask,
0549                     unsigned int fuzz)
0550 {
0551     int cpu, first, num, i;
0552 
0553     /* Pick up a starting point CPU in the mask based on  fuzz */
0554     num = min_t(int, cpumask_weight(mask), nr_cpu_ids);
0555     first = fuzz % num;
0556 
0557     /* Locate it */
0558     cpu = cpumask_first(mask);
0559     for (i = 0; i < first && cpu < nr_cpu_ids; i++)
0560         cpu = cpumask_next(cpu, mask);
0561 
0562     /* Sanity check */
0563     if (WARN_ON(cpu >= nr_cpu_ids))
0564         cpu = cpumask_first(cpu_online_mask);
0565 
0566     /* Remember first one to handle wrap-around */
0567     first = cpu;
0568 
0569     /*
0570      * Now go through the entire mask until we find a valid
0571      * target.
0572      */
0573     do {
0574         /*
0575          * We re-check online as the fallback case passes us
0576          * an untested affinity mask
0577          */
0578         if (cpu_online(cpu) && xive_try_pick_target(cpu))
0579             return cpu;
0580         cpu = cpumask_next(cpu, mask);
0581         /* Wrap around */
0582         if (cpu >= nr_cpu_ids)
0583             cpu = cpumask_first(mask);
0584     } while (cpu != first);
0585 
0586     return -1;
0587 }
0588 
0589 /*
0590  * Pick a target CPU for an interrupt. This is done at
0591  * startup or if the affinity is changed in a way that
0592  * invalidates the current target.
0593  */
0594 static int xive_pick_irq_target(struct irq_data *d,
0595                 const struct cpumask *affinity)
0596 {
0597     static unsigned int fuzz;
0598     struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
0599     cpumask_var_t mask;
0600     int cpu = -1;
0601 
0602     /*
0603      * If we have chip IDs, first we try to build a mask of
0604      * CPUs matching the CPU and find a target in there
0605      */
0606     if (xd->src_chip != XIVE_INVALID_CHIP_ID &&
0607         zalloc_cpumask_var(&mask, GFP_ATOMIC)) {
0608         /* Build a mask of matching chip IDs */
0609         for_each_cpu_and(cpu, affinity, cpu_online_mask) {
0610             struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
0611             if (xc->chip_id == xd->src_chip)
0612                 cpumask_set_cpu(cpu, mask);
0613         }
0614         /* Try to find a target */
0615         if (cpumask_empty(mask))
0616             cpu = -1;
0617         else
0618             cpu = xive_find_target_in_mask(mask, fuzz++);
0619         free_cpumask_var(mask);
0620         if (cpu >= 0)
0621             return cpu;
0622         fuzz--;
0623     }
0624 
0625     /* No chip IDs, fallback to using the affinity mask */
0626     return xive_find_target_in_mask(affinity, fuzz++);
0627 }
0628 
0629 static unsigned int xive_irq_startup(struct irq_data *d)
0630 {
0631     struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
0632     unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
0633     int target, rc;
0634 
0635     xd->saved_p = false;
0636     xd->stale_p = false;
0637 
0638     pr_debug("%s: irq %d [0x%x] data @%p\n", __func__, d->irq, hw_irq, d);
0639 
0640     /* Pick a target */
0641     target = xive_pick_irq_target(d, irq_data_get_affinity_mask(d));
0642     if (target == XIVE_INVALID_TARGET) {
0643         /* Try again breaking affinity */
0644         target = xive_pick_irq_target(d, cpu_online_mask);
0645         if (target == XIVE_INVALID_TARGET)
0646             return -ENXIO;
0647         pr_warn("irq %d started with broken affinity\n", d->irq);
0648     }
0649 
0650     /* Sanity check */
0651     if (WARN_ON(target == XIVE_INVALID_TARGET ||
0652             target >= nr_cpu_ids))
0653         target = smp_processor_id();
0654 
0655     xd->target = target;
0656 
0657     /*
0658      * Configure the logical number to be the Linux IRQ number
0659      * and set the target queue
0660      */
0661     rc = xive_ops->configure_irq(hw_irq,
0662                      get_hard_smp_processor_id(target),
0663                      xive_irq_priority, d->irq);
0664     if (rc)
0665         return rc;
0666 
0667     /* Unmask the ESB */
0668     xive_do_source_set_mask(xd, false);
0669 
0670     return 0;
0671 }
0672 
0673 /* called with irq descriptor lock held */
0674 static void xive_irq_shutdown(struct irq_data *d)
0675 {
0676     struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
0677     unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
0678 
0679     pr_debug("%s: irq %d [0x%x] data @%p\n", __func__, d->irq, hw_irq, d);
0680 
0681     if (WARN_ON(xd->target == XIVE_INVALID_TARGET))
0682         return;
0683 
0684     /* Mask the interrupt at the source */
0685     xive_do_source_set_mask(xd, true);
0686 
0687     /*
0688      * Mask the interrupt in HW in the IVT/EAS and set the number
0689      * to be the "bad" IRQ number
0690      */
0691     xive_ops->configure_irq(hw_irq,
0692                 get_hard_smp_processor_id(xd->target),
0693                 0xff, XIVE_BAD_IRQ);
0694 
0695     xive_dec_target_count(xd->target);
0696     xd->target = XIVE_INVALID_TARGET;
0697 }
0698 
0699 static void xive_irq_unmask(struct irq_data *d)
0700 {
0701     struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
0702 
0703     pr_debug("%s: irq %d data @%p\n", __func__, d->irq, xd);
0704 
0705     xive_do_source_set_mask(xd, false);
0706 }
0707 
0708 static void xive_irq_mask(struct irq_data *d)
0709 {
0710     struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
0711 
0712     pr_debug("%s: irq %d data @%p\n", __func__, d->irq, xd);
0713 
0714     xive_do_source_set_mask(xd, true);
0715 }
0716 
0717 static int xive_irq_set_affinity(struct irq_data *d,
0718                  const struct cpumask *cpumask,
0719                  bool force)
0720 {
0721     struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
0722     unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
0723     u32 target, old_target;
0724     int rc = 0;
0725 
0726     pr_debug("%s: irq %d/0x%x\n", __func__, d->irq, hw_irq);
0727 
0728     /* Is this valid ? */
0729     if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids)
0730         return -EINVAL;
0731 
0732     /*
0733      * If existing target is already in the new mask, and is
0734      * online then do nothing.
0735      */
0736     if (xd->target != XIVE_INVALID_TARGET &&
0737         cpu_online(xd->target) &&
0738         cpumask_test_cpu(xd->target, cpumask))
0739         return IRQ_SET_MASK_OK;
0740 
0741     /* Pick a new target */
0742     target = xive_pick_irq_target(d, cpumask);
0743 
0744     /* No target found */
0745     if (target == XIVE_INVALID_TARGET)
0746         return -ENXIO;
0747 
0748     /* Sanity check */
0749     if (WARN_ON(target >= nr_cpu_ids))
0750         target = smp_processor_id();
0751 
0752     old_target = xd->target;
0753 
0754     /*
0755      * Only configure the irq if it's not currently passed-through to
0756      * a KVM guest
0757      */
0758     if (!irqd_is_forwarded_to_vcpu(d))
0759         rc = xive_ops->configure_irq(hw_irq,
0760                          get_hard_smp_processor_id(target),
0761                          xive_irq_priority, d->irq);
0762     if (rc < 0) {
0763         pr_err("Error %d reconfiguring irq %d\n", rc, d->irq);
0764         return rc;
0765     }
0766 
0767     pr_debug("  target: 0x%x\n", target);
0768     xd->target = target;
0769 
0770     /* Give up previous target */
0771     if (old_target != XIVE_INVALID_TARGET)
0772         xive_dec_target_count(old_target);
0773 
0774     return IRQ_SET_MASK_OK;
0775 }
0776 
0777 static int xive_irq_set_type(struct irq_data *d, unsigned int flow_type)
0778 {
0779     struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
0780 
0781     /*
0782      * We only support these. This has really no effect other than setting
0783      * the corresponding descriptor bits mind you but those will in turn
0784      * affect the resend function when re-enabling an edge interrupt.
0785      *
0786      * Set set the default to edge as explained in map().
0787      */
0788     if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE)
0789         flow_type = IRQ_TYPE_EDGE_RISING;
0790 
0791     if (flow_type != IRQ_TYPE_EDGE_RISING &&
0792         flow_type != IRQ_TYPE_LEVEL_LOW)
0793         return -EINVAL;
0794 
0795     irqd_set_trigger_type(d, flow_type);
0796 
0797     /*
0798      * Double check it matches what the FW thinks
0799      *
0800      * NOTE: We don't know yet if the PAPR interface will provide
0801      * the LSI vs MSI information apart from the device-tree so
0802      * this check might have to move into an optional backend call
0803      * that is specific to the native backend
0804      */
0805     if ((flow_type == IRQ_TYPE_LEVEL_LOW) !=
0806         !!(xd->flags & XIVE_IRQ_FLAG_LSI)) {
0807         pr_warn("Interrupt %d (HW 0x%x) type mismatch, Linux says %s, FW says %s\n",
0808             d->irq, (u32)irqd_to_hwirq(d),
0809             (flow_type == IRQ_TYPE_LEVEL_LOW) ? "Level" : "Edge",
0810             (xd->flags & XIVE_IRQ_FLAG_LSI) ? "Level" : "Edge");
0811     }
0812 
0813     return IRQ_SET_MASK_OK_NOCOPY;
0814 }
0815 
0816 static int xive_irq_retrigger(struct irq_data *d)
0817 {
0818     struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
0819 
0820     /* This should be only for MSIs */
0821     if (WARN_ON(xd->flags & XIVE_IRQ_FLAG_LSI))
0822         return 0;
0823 
0824     /*
0825      * To perform a retrigger, we first set the PQ bits to
0826      * 11, then perform an EOI.
0827      */
0828     xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
0829     xive_do_source_eoi(xd);
0830 
0831     return 1;
0832 }
0833 
0834 /*
0835  * Caller holds the irq descriptor lock, so this won't be called
0836  * concurrently with xive_get_irqchip_state on the same interrupt.
0837  */
0838 static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
0839 {
0840     struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
0841     unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
0842     int rc;
0843     u8 pq;
0844 
0845     /*
0846      * This is called by KVM with state non-NULL for enabling
0847      * pass-through or NULL for disabling it
0848      */
0849     if (state) {
0850         irqd_set_forwarded_to_vcpu(d);
0851 
0852         /* Set it to PQ=10 state to prevent further sends */
0853         pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
0854         if (!xd->stale_p) {
0855             xd->saved_p = !!(pq & XIVE_ESB_VAL_P);
0856             xd->stale_p = !xd->saved_p;
0857         }
0858 
0859         /* No target ? nothing to do */
0860         if (xd->target == XIVE_INVALID_TARGET) {
0861             /*
0862              * An untargetted interrupt should have been
0863              * also masked at the source
0864              */
0865             WARN_ON(xd->saved_p);
0866 
0867             return 0;
0868         }
0869 
0870         /*
0871          * If P was set, adjust state to PQ=11 to indicate
0872          * that a resend is needed for the interrupt to reach
0873          * the guest. Also remember the value of P.
0874          *
0875          * This also tells us that it's in flight to a host queue
0876          * or has already been fetched but hasn't been EOIed yet
0877          * by the host. This it's potentially using up a host
0878          * queue slot. This is important to know because as long
0879          * as this is the case, we must not hard-unmask it when
0880          * "returning" that interrupt to the host.
0881          *
0882          * This saved_p is cleared by the host EOI, when we know
0883          * for sure the queue slot is no longer in use.
0884          */
0885         if (xd->saved_p) {
0886             xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
0887 
0888             /*
0889              * Sync the XIVE source HW to ensure the interrupt
0890              * has gone through the EAS before we change its
0891              * target to the guest. That should guarantee us
0892              * that we *will* eventually get an EOI for it on
0893              * the host. Otherwise there would be a small window
0894              * for P to be seen here but the interrupt going
0895              * to the guest queue.
0896              */
0897             if (xive_ops->sync_source)
0898                 xive_ops->sync_source(hw_irq);
0899         }
0900     } else {
0901         irqd_clr_forwarded_to_vcpu(d);
0902 
0903         /* No host target ? hard mask and return */
0904         if (xd->target == XIVE_INVALID_TARGET) {
0905             xive_do_source_set_mask(xd, true);
0906             return 0;
0907         }
0908 
0909         /*
0910          * Sync the XIVE source HW to ensure the interrupt
0911          * has gone through the EAS before we change its
0912          * target to the host.
0913          */
0914         if (xive_ops->sync_source)
0915             xive_ops->sync_source(hw_irq);
0916 
0917         /*
0918          * By convention we are called with the interrupt in
0919          * a PQ=10 or PQ=11 state, ie, it won't fire and will
0920          * have latched in Q whether there's a pending HW
0921          * interrupt or not.
0922          *
0923          * First reconfigure the target.
0924          */
0925         rc = xive_ops->configure_irq(hw_irq,
0926                          get_hard_smp_processor_id(xd->target),
0927                          xive_irq_priority, d->irq);
0928         if (rc)
0929             return rc;
0930 
0931         /*
0932          * Then if saved_p is not set, effectively re-enable the
0933          * interrupt with an EOI. If it is set, we know there is
0934          * still a message in a host queue somewhere that will be
0935          * EOId eventually.
0936          *
0937          * Note: We don't check irqd_irq_disabled(). Effectively,
0938          * we *will* let the irq get through even if masked if the
0939          * HW is still firing it in order to deal with the whole
0940          * saved_p business properly. If the interrupt triggers
0941          * while masked, the generic code will re-mask it anyway.
0942          */
0943         if (!xd->saved_p)
0944             xive_do_source_eoi(xd);
0945 
0946     }
0947     return 0;
0948 }
0949 
0950 /* Called with irq descriptor lock held. */
0951 static int xive_get_irqchip_state(struct irq_data *data,
0952                   enum irqchip_irq_state which, bool *state)
0953 {
0954     struct xive_irq_data *xd = irq_data_get_irq_handler_data(data);
0955     u8 pq;
0956 
0957     switch (which) {
0958     case IRQCHIP_STATE_ACTIVE:
0959         pq = xive_esb_read(xd, XIVE_ESB_GET);
0960 
0961         /*
0962          * The esb value being all 1's means we couldn't get
0963          * the PQ state of the interrupt through mmio. It may
0964          * happen, for example when querying a PHB interrupt
0965          * while the PHB is in an error state. We consider the
0966          * interrupt to be inactive in that case.
0967          */
0968         *state = (pq != XIVE_ESB_INVALID) && !xd->stale_p &&
0969             (xd->saved_p || (!!(pq & XIVE_ESB_VAL_P) &&
0970              !irqd_irq_disabled(data)));
0971         return 0;
0972     default:
0973         return -EINVAL;
0974     }
0975 }
0976 
0977 static struct irq_chip xive_irq_chip = {
0978     .name = "XIVE-IRQ",
0979     .irq_startup = xive_irq_startup,
0980     .irq_shutdown = xive_irq_shutdown,
0981     .irq_eoi = xive_irq_eoi,
0982     .irq_mask = xive_irq_mask,
0983     .irq_unmask = xive_irq_unmask,
0984     .irq_set_affinity = xive_irq_set_affinity,
0985     .irq_set_type = xive_irq_set_type,
0986     .irq_retrigger = xive_irq_retrigger,
0987     .irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity,
0988     .irq_get_irqchip_state = xive_get_irqchip_state,
0989 };
0990 
0991 bool is_xive_irq(struct irq_chip *chip)
0992 {
0993     return chip == &xive_irq_chip;
0994 }
0995 EXPORT_SYMBOL_GPL(is_xive_irq);
0996 
0997 void xive_cleanup_irq_data(struct xive_irq_data *xd)
0998 {
0999     pr_debug("%s for HW 0x%x\n", __func__, xd->hw_irq);
1000 
1001     if (xd->eoi_mmio) {
1002         iounmap(xd->eoi_mmio);
1003         if (xd->eoi_mmio == xd->trig_mmio)
1004             xd->trig_mmio = NULL;
1005         xd->eoi_mmio = NULL;
1006     }
1007     if (xd->trig_mmio) {
1008         iounmap(xd->trig_mmio);
1009         xd->trig_mmio = NULL;
1010     }
1011 }
1012 EXPORT_SYMBOL_GPL(xive_cleanup_irq_data);
1013 
1014 static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw)
1015 {
1016     struct xive_irq_data *xd;
1017     int rc;
1018 
1019     xd = kzalloc(sizeof(struct xive_irq_data), GFP_KERNEL);
1020     if (!xd)
1021         return -ENOMEM;
1022     rc = xive_ops->populate_irq_data(hw, xd);
1023     if (rc) {
1024         kfree(xd);
1025         return rc;
1026     }
1027     xd->target = XIVE_INVALID_TARGET;
1028     irq_set_handler_data(virq, xd);
1029 
1030     /*
1031      * Turn OFF by default the interrupt being mapped. A side
1032      * effect of this check is the mapping the ESB page of the
1033      * interrupt in the Linux address space. This prevents page
1034      * fault issues in the crash handler which masks all
1035      * interrupts.
1036      */
1037     xive_esb_read(xd, XIVE_ESB_SET_PQ_01);
1038 
1039     return 0;
1040 }
1041 
1042 void xive_irq_free_data(unsigned int virq)
1043 {
1044     struct xive_irq_data *xd = irq_get_handler_data(virq);
1045 
1046     if (!xd)
1047         return;
1048     irq_set_handler_data(virq, NULL);
1049     xive_cleanup_irq_data(xd);
1050     kfree(xd);
1051 }
1052 EXPORT_SYMBOL_GPL(xive_irq_free_data);
1053 
1054 #ifdef CONFIG_SMP
1055 
1056 static void xive_cause_ipi(int cpu)
1057 {
1058     struct xive_cpu *xc;
1059     struct xive_irq_data *xd;
1060 
1061     xc = per_cpu(xive_cpu, cpu);
1062 
1063     DBG_VERBOSE("IPI CPU %d -> %d (HW IRQ 0x%x)\n",
1064             smp_processor_id(), cpu, xc->hw_ipi);
1065 
1066     xd = &xc->ipi_data;
1067     if (WARN_ON(!xd->trig_mmio))
1068         return;
1069     out_be64(xd->trig_mmio, 0);
1070 }
1071 
1072 static irqreturn_t xive_muxed_ipi_action(int irq, void *dev_id)
1073 {
1074     return smp_ipi_demux();
1075 }
1076 
1077 static void xive_ipi_eoi(struct irq_data *d)
1078 {
1079     struct xive_cpu *xc = __this_cpu_read(xive_cpu);
1080 
1081     /* Handle possible race with unplug and drop stale IPIs */
1082     if (!xc)
1083         return;
1084 
1085     DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n",
1086             d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio);
1087 
1088     xive_do_source_eoi(&xc->ipi_data);
1089     xive_do_queue_eoi(xc);
1090 }
1091 
1092 static void xive_ipi_do_nothing(struct irq_data *d)
1093 {
1094     /*
1095      * Nothing to do, we never mask/unmask IPIs, but the callback
1096      * has to exist for the struct irq_chip.
1097      */
1098 }
1099 
1100 static struct irq_chip xive_ipi_chip = {
1101     .name = "XIVE-IPI",
1102     .irq_eoi = xive_ipi_eoi,
1103     .irq_mask = xive_ipi_do_nothing,
1104     .irq_unmask = xive_ipi_do_nothing,
1105 };
1106 
1107 /*
1108  * IPIs are marked per-cpu. We use separate HW interrupts under the
1109  * hood but associated with the same "linux" interrupt
1110  */
1111 struct xive_ipi_alloc_info {
1112     irq_hw_number_t hwirq;
1113 };
1114 
1115 static int xive_ipi_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
1116                      unsigned int nr_irqs, void *arg)
1117 {
1118     struct xive_ipi_alloc_info *info = arg;
1119     int i;
1120 
1121     for (i = 0; i < nr_irqs; i++) {
1122         irq_domain_set_info(domain, virq + i, info->hwirq + i, &xive_ipi_chip,
1123                     domain->host_data, handle_percpu_irq,
1124                     NULL, NULL);
1125     }
1126     return 0;
1127 }
1128 
1129 static const struct irq_domain_ops xive_ipi_irq_domain_ops = {
1130     .alloc  = xive_ipi_irq_domain_alloc,
1131 };
1132 
1133 static int __init xive_init_ipis(void)
1134 {
1135     struct fwnode_handle *fwnode;
1136     struct irq_domain *ipi_domain;
1137     unsigned int node;
1138     int ret = -ENOMEM;
1139 
1140     fwnode = irq_domain_alloc_named_fwnode("XIVE-IPI");
1141     if (!fwnode)
1142         goto out;
1143 
1144     ipi_domain = irq_domain_create_linear(fwnode, nr_node_ids,
1145                           &xive_ipi_irq_domain_ops, NULL);
1146     if (!ipi_domain)
1147         goto out_free_fwnode;
1148 
1149     xive_ipis = kcalloc(nr_node_ids, sizeof(*xive_ipis), GFP_KERNEL | __GFP_NOFAIL);
1150     if (!xive_ipis)
1151         goto out_free_domain;
1152 
1153     for_each_node(node) {
1154         struct xive_ipi_desc *xid = &xive_ipis[node];
1155         struct xive_ipi_alloc_info info = { node };
1156 
1157         /*
1158          * Map one IPI interrupt per node for all cpus of that node.
1159          * Since the HW interrupt number doesn't have any meaning,
1160          * simply use the node number.
1161          */
1162         ret = irq_domain_alloc_irqs(ipi_domain, 1, node, &info);
1163         if (ret < 0)
1164             goto out_free_xive_ipis;
1165         xid->irq = ret;
1166 
1167         snprintf(xid->name, sizeof(xid->name), "IPI-%d", node);
1168     }
1169 
1170     return ret;
1171 
1172 out_free_xive_ipis:
1173     kfree(xive_ipis);
1174 out_free_domain:
1175     irq_domain_remove(ipi_domain);
1176 out_free_fwnode:
1177     irq_domain_free_fwnode(fwnode);
1178 out:
1179     return ret;
1180 }
1181 
1182 static int xive_request_ipi(unsigned int cpu)
1183 {
1184     struct xive_ipi_desc *xid = &xive_ipis[early_cpu_to_node(cpu)];
1185     int ret;
1186 
1187     if (atomic_inc_return(&xid->started) > 1)
1188         return 0;
1189 
1190     ret = request_irq(xid->irq, xive_muxed_ipi_action,
1191               IRQF_NO_DEBUG | IRQF_PERCPU | IRQF_NO_THREAD,
1192               xid->name, NULL);
1193 
1194     WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret);
1195     return ret;
1196 }
1197 
1198 static int xive_setup_cpu_ipi(unsigned int cpu)
1199 {
1200     unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu);
1201     struct xive_cpu *xc;
1202     int rc;
1203 
1204     pr_debug("Setting up IPI for CPU %d\n", cpu);
1205 
1206     xc = per_cpu(xive_cpu, cpu);
1207 
1208     /* Check if we are already setup */
1209     if (xc->hw_ipi != XIVE_BAD_IRQ)
1210         return 0;
1211 
1212     /* Register the IPI */
1213     xive_request_ipi(cpu);
1214 
1215     /* Grab an IPI from the backend, this will populate xc->hw_ipi */
1216     if (xive_ops->get_ipi(cpu, xc))
1217         return -EIO;
1218 
1219     /*
1220      * Populate the IRQ data in the xive_cpu structure and
1221      * configure the HW / enable the IPIs.
1222      */
1223     rc = xive_ops->populate_irq_data(xc->hw_ipi, &xc->ipi_data);
1224     if (rc) {
1225         pr_err("Failed to populate IPI data on CPU %d\n", cpu);
1226         return -EIO;
1227     }
1228     rc = xive_ops->configure_irq(xc->hw_ipi,
1229                      get_hard_smp_processor_id(cpu),
1230                      xive_irq_priority, xive_ipi_irq);
1231     if (rc) {
1232         pr_err("Failed to map IPI CPU %d\n", cpu);
1233         return -EIO;
1234     }
1235     pr_debug("CPU %d HW IPI 0x%x, virq %d, trig_mmio=%p\n", cpu,
1236          xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio);
1237 
1238     /* Unmask it */
1239     xive_do_source_set_mask(&xc->ipi_data, false);
1240 
1241     return 0;
1242 }
1243 
1244 noinstr static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc)
1245 {
1246     unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu);
1247 
1248     /* Disable the IPI and free the IRQ data */
1249 
1250     /* Already cleaned up ? */
1251     if (xc->hw_ipi == XIVE_BAD_IRQ)
1252         return;
1253 
1254     /* TODO: clear IPI mapping */
1255 
1256     /* Mask the IPI */
1257     xive_do_source_set_mask(&xc->ipi_data, true);
1258 
1259     /*
1260      * Note: We don't call xive_cleanup_irq_data() to free
1261      * the mappings as this is called from an IPI on kexec
1262      * which is not a safe environment to call iounmap()
1263      */
1264 
1265     /* Deconfigure/mask in the backend */
1266     xive_ops->configure_irq(xc->hw_ipi, hard_smp_processor_id(),
1267                 0xff, xive_ipi_irq);
1268 
1269     /* Free the IPIs in the backend */
1270     xive_ops->put_ipi(cpu, xc);
1271 }
1272 
1273 void __init xive_smp_probe(void)
1274 {
1275     smp_ops->cause_ipi = xive_cause_ipi;
1276 
1277     /* Register the IPI */
1278     xive_init_ipis();
1279 
1280     /* Allocate and setup IPI for the boot CPU */
1281     xive_setup_cpu_ipi(smp_processor_id());
1282 }
1283 
1284 #endif /* CONFIG_SMP */
1285 
1286 static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq,
1287                    irq_hw_number_t hw)
1288 {
1289     int rc;
1290 
1291     /*
1292      * Mark interrupts as edge sensitive by default so that resend
1293      * actually works. Will fix that up below if needed.
1294      */
1295     irq_clear_status_flags(virq, IRQ_LEVEL);
1296 
1297     rc = xive_irq_alloc_data(virq, hw);
1298     if (rc)
1299         return rc;
1300 
1301     irq_set_chip_and_handler(virq, &xive_irq_chip, handle_fasteoi_irq);
1302 
1303     return 0;
1304 }
1305 
1306 static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq)
1307 {
1308     xive_irq_free_data(virq);
1309 }
1310 
1311 static int xive_irq_domain_xlate(struct irq_domain *h, struct device_node *ct,
1312                  const u32 *intspec, unsigned int intsize,
1313                  irq_hw_number_t *out_hwirq, unsigned int *out_flags)
1314 
1315 {
1316     *out_hwirq = intspec[0];
1317 
1318     /*
1319      * If intsize is at least 2, we look for the type in the second cell,
1320      * we assume the LSB indicates a level interrupt.
1321      */
1322     if (intsize > 1) {
1323         if (intspec[1] & 1)
1324             *out_flags = IRQ_TYPE_LEVEL_LOW;
1325         else
1326             *out_flags = IRQ_TYPE_EDGE_RISING;
1327     } else
1328         *out_flags = IRQ_TYPE_LEVEL_LOW;
1329 
1330     return 0;
1331 }
1332 
1333 static int xive_irq_domain_match(struct irq_domain *h, struct device_node *node,
1334                  enum irq_domain_bus_token bus_token)
1335 {
1336     return xive_ops->match(node);
1337 }
1338 
1339 #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
1340 static const char * const esb_names[] = { "RESET", "OFF", "PENDING", "QUEUED" };
1341 
1342 static const struct {
1343     u64  mask;
1344     char *name;
1345 } xive_irq_flags[] = {
1346     { XIVE_IRQ_FLAG_STORE_EOI, "STORE_EOI" },
1347     { XIVE_IRQ_FLAG_LSI,       "LSI"       },
1348     { XIVE_IRQ_FLAG_H_INT_ESB, "H_INT_ESB" },
1349     { XIVE_IRQ_FLAG_NO_EOI,    "NO_EOI"    },
1350 };
1351 
1352 static void xive_irq_domain_debug_show(struct seq_file *m, struct irq_domain *d,
1353                        struct irq_data *irqd, int ind)
1354 {
1355     struct xive_irq_data *xd;
1356     u64 val;
1357     int i;
1358 
1359     /* No IRQ domain level information. To be done */
1360     if (!irqd)
1361         return;
1362 
1363     if (!is_xive_irq(irq_data_get_irq_chip(irqd)))
1364         return;
1365 
1366     seq_printf(m, "%*sXIVE:\n", ind, "");
1367     ind++;
1368 
1369     xd = irq_data_get_irq_handler_data(irqd);
1370     if (!xd) {
1371         seq_printf(m, "%*snot assigned\n", ind, "");
1372         return;
1373     }
1374 
1375     val = xive_esb_read(xd, XIVE_ESB_GET);
1376     seq_printf(m, "%*sESB:      %s\n", ind, "", esb_names[val & 0x3]);
1377     seq_printf(m, "%*sPstate:   %s %s\n", ind, "", xd->stale_p ? "stale" : "",
1378            xd->saved_p ? "saved" : "");
1379     seq_printf(m, "%*sTarget:   %d\n", ind, "", xd->target);
1380     seq_printf(m, "%*sChip:     %d\n", ind, "", xd->src_chip);
1381     seq_printf(m, "%*sTrigger:  0x%016llx\n", ind, "", xd->trig_page);
1382     seq_printf(m, "%*sEOI:      0x%016llx\n", ind, "", xd->eoi_page);
1383     seq_printf(m, "%*sFlags:    0x%llx\n", ind, "", xd->flags);
1384     for (i = 0; i < ARRAY_SIZE(xive_irq_flags); i++) {
1385         if (xd->flags & xive_irq_flags[i].mask)
1386             seq_printf(m, "%*s%s\n", ind + 12, "", xive_irq_flags[i].name);
1387     }
1388 }
1389 #endif
1390 
1391 #ifdef  CONFIG_IRQ_DOMAIN_HIERARCHY
1392 static int xive_irq_domain_translate(struct irq_domain *d,
1393                      struct irq_fwspec *fwspec,
1394                      unsigned long *hwirq,
1395                      unsigned int *type)
1396 {
1397     return xive_irq_domain_xlate(d, to_of_node(fwspec->fwnode),
1398                      fwspec->param, fwspec->param_count,
1399                      hwirq, type);
1400 }
1401 
1402 static int xive_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
1403                  unsigned int nr_irqs, void *arg)
1404 {
1405     struct irq_fwspec *fwspec = arg;
1406     irq_hw_number_t hwirq;
1407     unsigned int type = IRQ_TYPE_NONE;
1408     int i, rc;
1409 
1410     rc = xive_irq_domain_translate(domain, fwspec, &hwirq, &type);
1411     if (rc)
1412         return rc;
1413 
1414     pr_debug("%s %d/0x%lx #%d\n", __func__, virq, hwirq, nr_irqs);
1415 
1416     for (i = 0; i < nr_irqs; i++) {
1417         /* TODO: call xive_irq_domain_map() */
1418 
1419         /*
1420          * Mark interrupts as edge sensitive by default so that resend
1421          * actually works. Will fix that up below if needed.
1422          */
1423         irq_clear_status_flags(virq, IRQ_LEVEL);
1424 
1425         /* allocates and sets handler data */
1426         rc = xive_irq_alloc_data(virq + i, hwirq + i);
1427         if (rc)
1428             return rc;
1429 
1430         irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
1431                           &xive_irq_chip, domain->host_data);
1432         irq_set_handler(virq + i, handle_fasteoi_irq);
1433     }
1434 
1435     return 0;
1436 }
1437 
1438 static void xive_irq_domain_free(struct irq_domain *domain,
1439                  unsigned int virq, unsigned int nr_irqs)
1440 {
1441     int i;
1442 
1443     pr_debug("%s %d #%d\n", __func__, virq, nr_irqs);
1444 
1445     for (i = 0; i < nr_irqs; i++)
1446         xive_irq_free_data(virq + i);
1447 }
1448 #endif
1449 
1450 static const struct irq_domain_ops xive_irq_domain_ops = {
1451 #ifdef  CONFIG_IRQ_DOMAIN_HIERARCHY
1452     .alloc  = xive_irq_domain_alloc,
1453     .free   = xive_irq_domain_free,
1454     .translate = xive_irq_domain_translate,
1455 #endif
1456     .match = xive_irq_domain_match,
1457     .map = xive_irq_domain_map,
1458     .unmap = xive_irq_domain_unmap,
1459     .xlate = xive_irq_domain_xlate,
1460 #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
1461     .debug_show = xive_irq_domain_debug_show,
1462 #endif
1463 };
1464 
1465 static void __init xive_init_host(struct device_node *np)
1466 {
1467     xive_irq_domain = irq_domain_add_tree(np, &xive_irq_domain_ops, NULL);
1468     if (WARN_ON(xive_irq_domain == NULL))
1469         return;
1470     irq_set_default_host(xive_irq_domain);
1471 }
1472 
1473 static void xive_cleanup_cpu_queues(unsigned int cpu, struct xive_cpu *xc)
1474 {
1475     if (xc->queue[xive_irq_priority].qpage)
1476         xive_ops->cleanup_queue(cpu, xc, xive_irq_priority);
1477 }
1478 
1479 static int xive_setup_cpu_queues(unsigned int cpu, struct xive_cpu *xc)
1480 {
1481     int rc = 0;
1482 
1483     /* We setup 1 queues for now with a 64k page */
1484     if (!xc->queue[xive_irq_priority].qpage)
1485         rc = xive_ops->setup_queue(cpu, xc, xive_irq_priority);
1486 
1487     return rc;
1488 }
1489 
1490 static int xive_prepare_cpu(unsigned int cpu)
1491 {
1492     struct xive_cpu *xc;
1493 
1494     xc = per_cpu(xive_cpu, cpu);
1495     if (!xc) {
1496         xc = kzalloc_node(sizeof(struct xive_cpu),
1497                   GFP_KERNEL, cpu_to_node(cpu));
1498         if (!xc)
1499             return -ENOMEM;
1500         xc->hw_ipi = XIVE_BAD_IRQ;
1501         xc->chip_id = XIVE_INVALID_CHIP_ID;
1502         if (xive_ops->prepare_cpu)
1503             xive_ops->prepare_cpu(cpu, xc);
1504 
1505         per_cpu(xive_cpu, cpu) = xc;
1506     }
1507 
1508     /* Setup EQs if not already */
1509     return xive_setup_cpu_queues(cpu, xc);
1510 }
1511 
1512 static void xive_setup_cpu(void)
1513 {
1514     struct xive_cpu *xc = __this_cpu_read(xive_cpu);
1515 
1516     /* The backend might have additional things to do */
1517     if (xive_ops->setup_cpu)
1518         xive_ops->setup_cpu(smp_processor_id(), xc);
1519 
1520     /* Set CPPR to 0xff to enable flow of interrupts */
1521     xc->cppr = 0xff;
1522     out_8(xive_tima + xive_tima_offset + TM_CPPR, 0xff);
1523 }
1524 
1525 #ifdef CONFIG_SMP
1526 void xive_smp_setup_cpu(void)
1527 {
1528     pr_debug("SMP setup CPU %d\n", smp_processor_id());
1529 
1530     /* This will have already been done on the boot CPU */
1531     if (smp_processor_id() != boot_cpuid)
1532         xive_setup_cpu();
1533 
1534 }
1535 
1536 int xive_smp_prepare_cpu(unsigned int cpu)
1537 {
1538     int rc;
1539 
1540     /* Allocate per-CPU data and queues */
1541     rc = xive_prepare_cpu(cpu);
1542     if (rc)
1543         return rc;
1544 
1545     /* Allocate and setup IPI for the new CPU */
1546     return xive_setup_cpu_ipi(cpu);
1547 }
1548 
1549 #ifdef CONFIG_HOTPLUG_CPU
1550 static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc)
1551 {
1552     u32 irq;
1553 
1554     /* We assume local irqs are disabled */
1555     WARN_ON(!irqs_disabled());
1556 
1557     /* Check what's already in the CPU queue */
1558     while ((irq = xive_scan_interrupts(xc, false)) != 0) {
1559         /*
1560          * We need to re-route that interrupt to its new destination.
1561          * First get and lock the descriptor
1562          */
1563         struct irq_desc *desc = irq_to_desc(irq);
1564         struct irq_data *d = irq_desc_get_irq_data(desc);
1565         struct xive_irq_data *xd;
1566 
1567         /*
1568          * Ignore anything that isn't a XIVE irq and ignore
1569          * IPIs, so can just be dropped.
1570          */
1571         if (d->domain != xive_irq_domain)
1572             continue;
1573 
1574         /*
1575          * The IRQ should have already been re-routed, it's just a
1576          * stale in the old queue, so re-trigger it in order to make
1577          * it reach is new destination.
1578          */
1579 #ifdef DEBUG_FLUSH
1580         pr_info("CPU %d: Got irq %d while offline, re-sending...\n",
1581             cpu, irq);
1582 #endif
1583         raw_spin_lock(&desc->lock);
1584         xd = irq_desc_get_handler_data(desc);
1585 
1586         /*
1587          * Clear saved_p to indicate that it's no longer pending
1588          */
1589         xd->saved_p = false;
1590 
1591         /*
1592          * For LSIs, we EOI, this will cause a resend if it's
1593          * still asserted. Otherwise do an MSI retrigger.
1594          */
1595         if (xd->flags & XIVE_IRQ_FLAG_LSI)
1596             xive_do_source_eoi(xd);
1597         else
1598             xive_irq_retrigger(d);
1599 
1600         raw_spin_unlock(&desc->lock);
1601     }
1602 }
1603 
1604 void xive_smp_disable_cpu(void)
1605 {
1606     struct xive_cpu *xc = __this_cpu_read(xive_cpu);
1607     unsigned int cpu = smp_processor_id();
1608 
1609     /* Migrate interrupts away from the CPU */
1610     irq_migrate_all_off_this_cpu();
1611 
1612     /* Set CPPR to 0 to disable flow of interrupts */
1613     xc->cppr = 0;
1614     out_8(xive_tima + xive_tima_offset + TM_CPPR, 0);
1615 
1616     /* Flush everything still in the queue */
1617     xive_flush_cpu_queue(cpu, xc);
1618 
1619     /* Re-enable CPPR  */
1620     xc->cppr = 0xff;
1621     out_8(xive_tima + xive_tima_offset + TM_CPPR, 0xff);
1622 }
1623 
1624 void xive_flush_interrupt(void)
1625 {
1626     struct xive_cpu *xc = __this_cpu_read(xive_cpu);
1627     unsigned int cpu = smp_processor_id();
1628 
1629     /* Called if an interrupt occurs while the CPU is hot unplugged */
1630     xive_flush_cpu_queue(cpu, xc);
1631 }
1632 
1633 #endif /* CONFIG_HOTPLUG_CPU */
1634 
1635 #endif /* CONFIG_SMP */
1636 
1637 noinstr void xive_teardown_cpu(void)
1638 {
1639     struct xive_cpu *xc = __this_cpu_read(xive_cpu);
1640     unsigned int cpu = smp_processor_id();
1641 
1642     /* Set CPPR to 0 to disable flow of interrupts */
1643     xc->cppr = 0;
1644     out_8(xive_tima + xive_tima_offset + TM_CPPR, 0);
1645 
1646     if (xive_ops->teardown_cpu)
1647         xive_ops->teardown_cpu(cpu, xc);
1648 
1649 #ifdef CONFIG_SMP
1650     /* Get rid of IPI */
1651     xive_cleanup_cpu_ipi(cpu, xc);
1652 #endif
1653 
1654     /* Disable and free the queues */
1655     xive_cleanup_cpu_queues(cpu, xc);
1656 }
1657 
1658 void xive_shutdown(void)
1659 {
1660     xive_ops->shutdown();
1661 }
1662 
1663 bool __init xive_core_init(struct device_node *np, const struct xive_ops *ops,
1664                void __iomem *area, u32 offset, u8 max_prio)
1665 {
1666     xive_tima = area;
1667     xive_tima_offset = offset;
1668     xive_ops = ops;
1669     xive_irq_priority = max_prio;
1670 
1671     ppc_md.get_irq = xive_get_irq;
1672     __xive_enabled = true;
1673 
1674     pr_debug("Initializing host..\n");
1675     xive_init_host(np);
1676 
1677     pr_debug("Initializing boot CPU..\n");
1678 
1679     /* Allocate per-CPU data and queues */
1680     xive_prepare_cpu(smp_processor_id());
1681 
1682     /* Get ready for interrupts */
1683     xive_setup_cpu();
1684 
1685     pr_info("Interrupt handling initialized with %s backend\n",
1686         xive_ops->name);
1687     pr_info("Using priority %d for all interrupts\n", max_prio);
1688 
1689     return true;
1690 }
1691 
1692 __be32 *xive_queue_page_alloc(unsigned int cpu, u32 queue_shift)
1693 {
1694     unsigned int alloc_order;
1695     struct page *pages;
1696     __be32 *qpage;
1697 
1698     alloc_order = xive_alloc_order(queue_shift);
1699     pages = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, alloc_order);
1700     if (!pages)
1701         return ERR_PTR(-ENOMEM);
1702     qpage = (__be32 *)page_address(pages);
1703     memset(qpage, 0, 1 << queue_shift);
1704 
1705     return qpage;
1706 }
1707 
1708 static int __init xive_off(char *arg)
1709 {
1710     xive_cmdline_disabled = true;
1711     return 1;
1712 }
1713 __setup("xive=off", xive_off);
1714 
1715 static int __init xive_store_eoi_cmdline(char *arg)
1716 {
1717     if (!arg)
1718         return 1;
1719 
1720     if (strncmp(arg, "off", 3) == 0) {
1721         pr_info("StoreEOI disabled on kernel command line\n");
1722         xive_store_eoi = false;
1723     }
1724     return 1;
1725 }
1726 __setup("xive.store-eoi=", xive_store_eoi_cmdline);
1727 
1728 #ifdef CONFIG_DEBUG_FS
1729 static void xive_debug_show_ipi(struct seq_file *m, int cpu)
1730 {
1731     struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
1732 
1733     seq_printf(m, "CPU %d: ", cpu);
1734     if (xc) {
1735         seq_printf(m, "pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr);
1736 
1737 #ifdef CONFIG_SMP
1738         {
1739             char buffer[128];
1740 
1741             xive_irq_data_dump(&xc->ipi_data, buffer, sizeof(buffer));
1742             seq_printf(m, "IPI=0x%08x %s", xc->hw_ipi, buffer);
1743         }
1744 #endif
1745     }
1746     seq_puts(m, "\n");
1747 }
1748 
1749 static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d)
1750 {
1751     unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
1752     int rc;
1753     u32 target;
1754     u8 prio;
1755     u32 lirq;
1756     char buffer[128];
1757 
1758     rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq);
1759     if (rc) {
1760         seq_printf(m, "IRQ 0x%08x : no config rc=%d\n", hw_irq, rc);
1761         return;
1762     }
1763 
1764     seq_printf(m, "IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ",
1765            hw_irq, target, prio, lirq);
1766 
1767     xive_irq_data_dump(irq_data_get_irq_handler_data(d), buffer, sizeof(buffer));
1768     seq_puts(m, buffer);
1769     seq_puts(m, "\n");
1770 }
1771 
1772 static int xive_irq_debug_show(struct seq_file *m, void *private)
1773 {
1774     unsigned int i;
1775     struct irq_desc *desc;
1776 
1777     for_each_irq_desc(i, desc) {
1778         struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i);
1779 
1780         if (d)
1781             xive_debug_show_irq(m, d);
1782     }
1783     return 0;
1784 }
1785 DEFINE_SHOW_ATTRIBUTE(xive_irq_debug);
1786 
1787 static int xive_ipi_debug_show(struct seq_file *m, void *private)
1788 {
1789     int cpu;
1790 
1791     if (xive_ops->debug_show)
1792         xive_ops->debug_show(m, private);
1793 
1794     for_each_online_cpu(cpu)
1795         xive_debug_show_ipi(m, cpu);
1796     return 0;
1797 }
1798 DEFINE_SHOW_ATTRIBUTE(xive_ipi_debug);
1799 
1800 static void xive_eq_debug_show_one(struct seq_file *m, struct xive_q *q, u8 prio)
1801 {
1802     int i;
1803 
1804     seq_printf(m, "EQ%d idx=%d T=%d\n", prio, q->idx, q->toggle);
1805     if (q->qpage) {
1806         for (i = 0; i < q->msk + 1; i++) {
1807             if (!(i % 8))
1808                 seq_printf(m, "%05d ", i);
1809             seq_printf(m, "%08x%s", be32_to_cpup(q->qpage + i),
1810                    (i + 1) % 8 ? " " : "\n");
1811         }
1812     }
1813     seq_puts(m, "\n");
1814 }
1815 
1816 static int xive_eq_debug_show(struct seq_file *m, void *private)
1817 {
1818     int cpu = (long)m->private;
1819     struct xive_cpu *xc = per_cpu(xive_cpu, cpu);
1820 
1821     if (xc)
1822         xive_eq_debug_show_one(m, &xc->queue[xive_irq_priority],
1823                        xive_irq_priority);
1824     return 0;
1825 }
1826 DEFINE_SHOW_ATTRIBUTE(xive_eq_debug);
1827 
1828 static void xive_core_debugfs_create(void)
1829 {
1830     struct dentry *xive_dir;
1831     struct dentry *xive_eq_dir;
1832     long cpu;
1833     char name[16];
1834 
1835     xive_dir = debugfs_create_dir("xive", arch_debugfs_dir);
1836     if (IS_ERR(xive_dir))
1837         return;
1838 
1839     debugfs_create_file("ipis", 0400, xive_dir,
1840                 NULL, &xive_ipi_debug_fops);
1841     debugfs_create_file("interrupts", 0400, xive_dir,
1842                 NULL, &xive_irq_debug_fops);
1843     xive_eq_dir = debugfs_create_dir("eqs", xive_dir);
1844     for_each_possible_cpu(cpu) {
1845         snprintf(name, sizeof(name), "cpu%ld", cpu);
1846         debugfs_create_file(name, 0400, xive_eq_dir, (void *)cpu,
1847                     &xive_eq_debug_fops);
1848     }
1849     debugfs_create_bool("store-eoi", 0600, xive_dir, &xive_store_eoi);
1850 
1851     if (xive_ops->debug_create)
1852         xive_ops->debug_create(xive_dir);
1853 }
1854 #else
1855 static inline void xive_core_debugfs_create(void) { }
1856 #endif /* CONFIG_DEBUG_FS */
1857 
1858 int xive_core_debug_init(void)
1859 {
1860     if (xive_enabled() && IS_ENABLED(CONFIG_DEBUG_FS))
1861         xive_core_debugfs_create();
1862 
1863     return 0;
1864 }