Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Driver for the L3 cache PMUs in Qualcomm Technologies chips.
0004  *
0005  * The driver supports a distributed cache architecture where the overall
0006  * cache for a socket is comprised of multiple slices each with its own PMU.
0007  * Access to each individual PMU is provided even though all CPUs share all
0008  * the slices. User space needs to aggregate to individual counts to provide
0009  * a global picture.
0010  *
0011  * See Documentation/admin-guide/perf/qcom_l3_pmu.rst for more details.
0012  *
0013  * Copyright (c) 2015-2017, The Linux Foundation. All rights reserved.
0014  */
0015 
0016 #include <linux/acpi.h>
0017 #include <linux/bitops.h>
0018 #include <linux/interrupt.h>
0019 #include <linux/io.h>
0020 #include <linux/list.h>
0021 #include <linux/module.h>
0022 #include <linux/perf_event.h>
0023 #include <linux/platform_device.h>
0024 
0025 /*
0026  * General constants
0027  */
0028 
0029 /* Number of counters on each PMU */
0030 #define L3_NUM_COUNTERS  8
0031 /* Mask for the event type field within perf_event_attr.config and EVTYPE reg */
0032 #define L3_EVTYPE_MASK   0xFF
0033 /*
0034  * Bit position of the 'long counter' flag within perf_event_attr.config.
0035  * Reserve some space between the event type and this flag to allow expansion
0036  * in the event type field.
0037  */
0038 #define L3_EVENT_LC_BIT  32
0039 
0040 /*
0041  * Register offsets
0042  */
0043 
0044 /* Perfmon registers */
0045 #define L3_HML3_PM_CR       0x000
0046 #define L3_HML3_PM_EVCNTR(__cntr) (0x420 + ((__cntr) & 0x7) * 8)
0047 #define L3_HML3_PM_CNTCTL(__cntr) (0x120 + ((__cntr) & 0x7) * 8)
0048 #define L3_HML3_PM_EVTYPE(__cntr) (0x220 + ((__cntr) & 0x7) * 8)
0049 #define L3_HML3_PM_FILTRA   0x300
0050 #define L3_HML3_PM_FILTRB   0x308
0051 #define L3_HML3_PM_FILTRC   0x310
0052 #define L3_HML3_PM_FILTRAM  0x304
0053 #define L3_HML3_PM_FILTRBM  0x30C
0054 #define L3_HML3_PM_FILTRCM  0x314
0055 
0056 /* Basic counter registers */
0057 #define L3_M_BC_CR         0x500
0058 #define L3_M_BC_SATROLL_CR 0x504
0059 #define L3_M_BC_CNTENSET   0x508
0060 #define L3_M_BC_CNTENCLR   0x50C
0061 #define L3_M_BC_INTENSET   0x510
0062 #define L3_M_BC_INTENCLR   0x514
0063 #define L3_M_BC_GANG       0x718
0064 #define L3_M_BC_OVSR       0x740
0065 #define L3_M_BC_IRQCTL     0x96C
0066 
0067 /*
0068  * Bit field definitions
0069  */
0070 
0071 /* L3_HML3_PM_CR */
0072 #define PM_CR_RESET           (0)
0073 
0074 /* L3_HML3_PM_XCNTCTL/L3_HML3_PM_CNTCTLx */
0075 #define PMCNT_RESET           (0)
0076 
0077 /* L3_HML3_PM_EVTYPEx */
0078 #define EVSEL(__val)          ((__val) & L3_EVTYPE_MASK)
0079 
0080 /* Reset value for all the filter registers */
0081 #define PM_FLTR_RESET         (0)
0082 
0083 /* L3_M_BC_CR */
0084 #define BC_RESET              (1UL << 1)
0085 #define BC_ENABLE             (1UL << 0)
0086 
0087 /* L3_M_BC_SATROLL_CR */
0088 #define BC_SATROLL_CR_RESET   (0)
0089 
0090 /* L3_M_BC_CNTENSET */
0091 #define PMCNTENSET(__cntr)    (1UL << ((__cntr) & 0x7))
0092 
0093 /* L3_M_BC_CNTENCLR */
0094 #define PMCNTENCLR(__cntr)    (1UL << ((__cntr) & 0x7))
0095 #define BC_CNTENCLR_RESET     (0xFF)
0096 
0097 /* L3_M_BC_INTENSET */
0098 #define PMINTENSET(__cntr)    (1UL << ((__cntr) & 0x7))
0099 
0100 /* L3_M_BC_INTENCLR */
0101 #define PMINTENCLR(__cntr)    (1UL << ((__cntr) & 0x7))
0102 #define BC_INTENCLR_RESET     (0xFF)
0103 
0104 /* L3_M_BC_GANG */
0105 #define GANG_EN(__cntr)       (1UL << ((__cntr) & 0x7))
0106 #define BC_GANG_RESET         (0)
0107 
0108 /* L3_M_BC_OVSR */
0109 #define PMOVSRCLR(__cntr)     (1UL << ((__cntr) & 0x7))
0110 #define PMOVSRCLR_RESET       (0xFF)
0111 
0112 /* L3_M_BC_IRQCTL */
0113 #define PMIRQONMSBEN(__cntr)  (1UL << ((__cntr) & 0x7))
0114 #define BC_IRQCTL_RESET       (0x0)
0115 
0116 /*
0117  * Events
0118  */
0119 
0120 #define L3_EVENT_CYCLES     0x01
0121 #define L3_EVENT_READ_HIT       0x20
0122 #define L3_EVENT_READ_MISS      0x21
0123 #define L3_EVENT_READ_HIT_D     0x22
0124 #define L3_EVENT_READ_MISS_D        0x23
0125 #define L3_EVENT_WRITE_HIT      0x24
0126 #define L3_EVENT_WRITE_MISS     0x25
0127 
0128 /*
0129  * Decoding of settings from perf_event_attr
0130  *
0131  * The config format for perf events is:
0132  * - config: bits 0-7: event type
0133  *           bit  32:  HW counter size requested, 0: 32 bits, 1: 64 bits
0134  */
0135 
0136 static inline u32 get_event_type(struct perf_event *event)
0137 {
0138     return (event->attr.config) & L3_EVTYPE_MASK;
0139 }
0140 
0141 static inline bool event_uses_long_counter(struct perf_event *event)
0142 {
0143     return !!(event->attr.config & BIT_ULL(L3_EVENT_LC_BIT));
0144 }
0145 
0146 static inline int event_num_counters(struct perf_event *event)
0147 {
0148     return event_uses_long_counter(event) ? 2 : 1;
0149 }
0150 
0151 /*
0152  * Main PMU, inherits from the core perf PMU type
0153  */
0154 struct l3cache_pmu {
0155     struct pmu      pmu;
0156     struct hlist_node   node;
0157     void __iomem        *regs;
0158     struct perf_event   *events[L3_NUM_COUNTERS];
0159     unsigned long       used_mask[BITS_TO_LONGS(L3_NUM_COUNTERS)];
0160     cpumask_t       cpumask;
0161 };
0162 
0163 #define to_l3cache_pmu(p) (container_of(p, struct l3cache_pmu, pmu))
0164 
0165 /*
0166  * Type used to group hardware counter operations
0167  *
0168  * Used to implement two types of hardware counters, standard (32bits) and
0169  * long (64bits). The hardware supports counter chaining which we use to
0170  * implement long counters. This support is exposed via the 'lc' flag field
0171  * in perf_event_attr.config.
0172  */
0173 struct l3cache_event_ops {
0174     /* Called to start event monitoring */
0175     void (*start)(struct perf_event *event);
0176     /* Called to stop event monitoring */
0177     void (*stop)(struct perf_event *event, int flags);
0178     /* Called to update the perf_event */
0179     void (*update)(struct perf_event *event);
0180 };
0181 
0182 /*
0183  * Implementation of long counter operations
0184  *
0185  * 64bit counters are implemented by chaining two of the 32bit physical
0186  * counters. The PMU only supports chaining of adjacent even/odd pairs
0187  * and for simplicity the driver always configures the odd counter to
0188  * count the overflows of the lower-numbered even counter. Note that since
0189  * the resulting hardware counter is 64bits no IRQs are required to maintain
0190  * the software counter which is also 64bits.
0191  */
0192 
0193 static void qcom_l3_cache__64bit_counter_start(struct perf_event *event)
0194 {
0195     struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
0196     int idx = event->hw.idx;
0197     u32 evsel = get_event_type(event);
0198     u32 gang;
0199 
0200     /* Set the odd counter to count the overflows of the even counter */
0201     gang = readl_relaxed(l3pmu->regs + L3_M_BC_GANG);
0202     gang |= GANG_EN(idx + 1);
0203     writel_relaxed(gang, l3pmu->regs + L3_M_BC_GANG);
0204 
0205     /* Initialize the hardware counters and reset prev_count*/
0206     local64_set(&event->hw.prev_count, 0);
0207     writel_relaxed(0, l3pmu->regs + L3_HML3_PM_EVCNTR(idx + 1));
0208     writel_relaxed(0, l3pmu->regs + L3_HML3_PM_EVCNTR(idx));
0209 
0210     /*
0211      * Set the event types, the upper half must use zero and the lower
0212      * half the actual event type
0213      */
0214     writel_relaxed(EVSEL(0), l3pmu->regs + L3_HML3_PM_EVTYPE(idx + 1));
0215     writel_relaxed(EVSEL(evsel), l3pmu->regs + L3_HML3_PM_EVTYPE(idx));
0216 
0217     /* Finally, enable the counters */
0218     writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(idx + 1));
0219     writel_relaxed(PMCNTENSET(idx + 1), l3pmu->regs + L3_M_BC_CNTENSET);
0220     writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(idx));
0221     writel_relaxed(PMCNTENSET(idx), l3pmu->regs + L3_M_BC_CNTENSET);
0222 }
0223 
0224 static void qcom_l3_cache__64bit_counter_stop(struct perf_event *event,
0225                           int flags)
0226 {
0227     struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
0228     int idx = event->hw.idx;
0229     u32 gang = readl_relaxed(l3pmu->regs + L3_M_BC_GANG);
0230 
0231     /* Disable the counters */
0232     writel_relaxed(PMCNTENCLR(idx), l3pmu->regs + L3_M_BC_CNTENCLR);
0233     writel_relaxed(PMCNTENCLR(idx + 1), l3pmu->regs + L3_M_BC_CNTENCLR);
0234 
0235     /* Disable chaining */
0236     writel_relaxed(gang & ~GANG_EN(idx + 1), l3pmu->regs + L3_M_BC_GANG);
0237 }
0238 
0239 static void qcom_l3_cache__64bit_counter_update(struct perf_event *event)
0240 {
0241     struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
0242     int idx = event->hw.idx;
0243     u32 hi, lo;
0244     u64 prev, new;
0245 
0246     do {
0247         prev = local64_read(&event->hw.prev_count);
0248         do {
0249             hi = readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx + 1));
0250             lo = readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx));
0251         } while (hi != readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx + 1)));
0252         new = ((u64)hi << 32) | lo;
0253     } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
0254 
0255     local64_add(new - prev, &event->count);
0256 }
0257 
0258 static const struct l3cache_event_ops event_ops_long = {
0259     .start = qcom_l3_cache__64bit_counter_start,
0260     .stop = qcom_l3_cache__64bit_counter_stop,
0261     .update = qcom_l3_cache__64bit_counter_update,
0262 };
0263 
0264 /*
0265  * Implementation of standard counter operations
0266  *
0267  * 32bit counters use a single physical counter and a hardware feature that
0268  * asserts the overflow IRQ on the toggling of the most significant bit in
0269  * the counter. This feature allows the counters to be left free-running
0270  * without needing the usual reprogramming required to properly handle races
0271  * during concurrent calls to update.
0272  */
0273 
0274 static void qcom_l3_cache__32bit_counter_start(struct perf_event *event)
0275 {
0276     struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
0277     int idx = event->hw.idx;
0278     u32 evsel = get_event_type(event);
0279     u32 irqctl = readl_relaxed(l3pmu->regs + L3_M_BC_IRQCTL);
0280 
0281     /* Set the counter to assert the overflow IRQ on MSB toggling */
0282     writel_relaxed(irqctl | PMIRQONMSBEN(idx), l3pmu->regs + L3_M_BC_IRQCTL);
0283 
0284     /* Initialize the hardware counter and reset prev_count*/
0285     local64_set(&event->hw.prev_count, 0);
0286     writel_relaxed(0, l3pmu->regs + L3_HML3_PM_EVCNTR(idx));
0287 
0288     /* Set the event type */
0289     writel_relaxed(EVSEL(evsel), l3pmu->regs + L3_HML3_PM_EVTYPE(idx));
0290 
0291     /* Enable interrupt generation by this counter */
0292     writel_relaxed(PMINTENSET(idx), l3pmu->regs + L3_M_BC_INTENSET);
0293 
0294     /* Finally, enable the counter */
0295     writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(idx));
0296     writel_relaxed(PMCNTENSET(idx), l3pmu->regs + L3_M_BC_CNTENSET);
0297 }
0298 
0299 static void qcom_l3_cache__32bit_counter_stop(struct perf_event *event,
0300                           int flags)
0301 {
0302     struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
0303     int idx = event->hw.idx;
0304     u32 irqctl = readl_relaxed(l3pmu->regs + L3_M_BC_IRQCTL);
0305 
0306     /* Disable the counter */
0307     writel_relaxed(PMCNTENCLR(idx), l3pmu->regs + L3_M_BC_CNTENCLR);
0308 
0309     /* Disable interrupt generation by this counter */
0310     writel_relaxed(PMINTENCLR(idx), l3pmu->regs + L3_M_BC_INTENCLR);
0311 
0312     /* Set the counter to not assert the overflow IRQ on MSB toggling */
0313     writel_relaxed(irqctl & ~PMIRQONMSBEN(idx), l3pmu->regs + L3_M_BC_IRQCTL);
0314 }
0315 
0316 static void qcom_l3_cache__32bit_counter_update(struct perf_event *event)
0317 {
0318     struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
0319     int idx = event->hw.idx;
0320     u32 prev, new;
0321 
0322     do {
0323         prev = local64_read(&event->hw.prev_count);
0324         new = readl_relaxed(l3pmu->regs + L3_HML3_PM_EVCNTR(idx));
0325     } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
0326 
0327     local64_add(new - prev, &event->count);
0328 }
0329 
0330 static const struct l3cache_event_ops event_ops_std = {
0331     .start = qcom_l3_cache__32bit_counter_start,
0332     .stop = qcom_l3_cache__32bit_counter_stop,
0333     .update = qcom_l3_cache__32bit_counter_update,
0334 };
0335 
0336 /* Retrieve the appropriate operations for the given event */
0337 static
0338 const struct l3cache_event_ops *l3cache_event_get_ops(struct perf_event *event)
0339 {
0340     if (event_uses_long_counter(event))
0341         return &event_ops_long;
0342     else
0343         return &event_ops_std;
0344 }
0345 
0346 /*
0347  * Top level PMU functions.
0348  */
0349 
0350 static inline void qcom_l3_cache__init(struct l3cache_pmu *l3pmu)
0351 {
0352     int i;
0353 
0354     writel_relaxed(BC_RESET, l3pmu->regs + L3_M_BC_CR);
0355 
0356     /*
0357      * Use writel for the first programming command to ensure the basic
0358      * counter unit is stopped before proceeding
0359      */
0360     writel(BC_SATROLL_CR_RESET, l3pmu->regs + L3_M_BC_SATROLL_CR);
0361 
0362     writel_relaxed(BC_CNTENCLR_RESET, l3pmu->regs + L3_M_BC_CNTENCLR);
0363     writel_relaxed(BC_INTENCLR_RESET, l3pmu->regs + L3_M_BC_INTENCLR);
0364     writel_relaxed(PMOVSRCLR_RESET, l3pmu->regs + L3_M_BC_OVSR);
0365     writel_relaxed(BC_GANG_RESET, l3pmu->regs + L3_M_BC_GANG);
0366     writel_relaxed(BC_IRQCTL_RESET, l3pmu->regs + L3_M_BC_IRQCTL);
0367     writel_relaxed(PM_CR_RESET, l3pmu->regs + L3_HML3_PM_CR);
0368 
0369     for (i = 0; i < L3_NUM_COUNTERS; ++i) {
0370         writel_relaxed(PMCNT_RESET, l3pmu->regs + L3_HML3_PM_CNTCTL(i));
0371         writel_relaxed(EVSEL(0), l3pmu->regs + L3_HML3_PM_EVTYPE(i));
0372     }
0373 
0374     writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRA);
0375     writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRAM);
0376     writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRB);
0377     writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRBM);
0378     writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRC);
0379     writel_relaxed(PM_FLTR_RESET, l3pmu->regs + L3_HML3_PM_FILTRCM);
0380 
0381     /*
0382      * Use writel here to ensure all programming commands are done
0383      *  before proceeding
0384      */
0385     writel(BC_ENABLE, l3pmu->regs + L3_M_BC_CR);
0386 }
0387 
0388 static irqreturn_t qcom_l3_cache__handle_irq(int irq_num, void *data)
0389 {
0390     struct l3cache_pmu *l3pmu = data;
0391     /* Read the overflow status register */
0392     long status = readl_relaxed(l3pmu->regs + L3_M_BC_OVSR);
0393     int idx;
0394 
0395     if (status == 0)
0396         return IRQ_NONE;
0397 
0398     /* Clear the bits we read on the overflow status register */
0399     writel_relaxed(status, l3pmu->regs + L3_M_BC_OVSR);
0400 
0401     for_each_set_bit(idx, &status, L3_NUM_COUNTERS) {
0402         struct perf_event *event;
0403         const struct l3cache_event_ops *ops;
0404 
0405         event = l3pmu->events[idx];
0406         if (!event)
0407             continue;
0408 
0409         /*
0410          * Since the IRQ is not enabled for events using long counters
0411          * we should never see one of those here, however, be consistent
0412          * and use the ops indirections like in the other operations.
0413          */
0414 
0415         ops = l3cache_event_get_ops(event);
0416         ops->update(event);
0417     }
0418 
0419     return IRQ_HANDLED;
0420 }
0421 
0422 /*
0423  * Implementation of abstract pmu functionality required by
0424  * the core perf events code.
0425  */
0426 
0427 static void qcom_l3_cache__pmu_enable(struct pmu *pmu)
0428 {
0429     struct l3cache_pmu *l3pmu = to_l3cache_pmu(pmu);
0430 
0431     /* Ensure the other programming commands are observed before enabling */
0432     wmb();
0433 
0434     writel_relaxed(BC_ENABLE, l3pmu->regs + L3_M_BC_CR);
0435 }
0436 
0437 static void qcom_l3_cache__pmu_disable(struct pmu *pmu)
0438 {
0439     struct l3cache_pmu *l3pmu = to_l3cache_pmu(pmu);
0440 
0441     writel_relaxed(0, l3pmu->regs + L3_M_BC_CR);
0442 
0443     /* Ensure the basic counter unit is stopped before proceeding */
0444     wmb();
0445 }
0446 
0447 /*
0448  * We must NOT create groups containing events from multiple hardware PMUs,
0449  * although mixing different software and hardware PMUs is allowed.
0450  */
0451 static bool qcom_l3_cache__validate_event_group(struct perf_event *event)
0452 {
0453     struct perf_event *leader = event->group_leader;
0454     struct perf_event *sibling;
0455     int counters = 0;
0456 
0457     if (leader->pmu != event->pmu && !is_software_event(leader))
0458         return false;
0459 
0460     counters = event_num_counters(event);
0461     counters += event_num_counters(leader);
0462 
0463     for_each_sibling_event(sibling, leader) {
0464         if (is_software_event(sibling))
0465             continue;
0466         if (sibling->pmu != event->pmu)
0467             return false;
0468         counters += event_num_counters(sibling);
0469     }
0470 
0471     /*
0472      * If the group requires more counters than the HW has, it
0473      * cannot ever be scheduled.
0474      */
0475     return counters <= L3_NUM_COUNTERS;
0476 }
0477 
0478 static int qcom_l3_cache__event_init(struct perf_event *event)
0479 {
0480     struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
0481     struct hw_perf_event *hwc = &event->hw;
0482 
0483     /*
0484      * Is the event for this PMU?
0485      */
0486     if (event->attr.type != event->pmu->type)
0487         return -ENOENT;
0488 
0489     /*
0490      * Sampling not supported since these events are not core-attributable.
0491      */
0492     if (hwc->sample_period)
0493         return -EINVAL;
0494 
0495     /*
0496      * Task mode not available, we run the counters as socket counters,
0497      * not attributable to any CPU and therefore cannot attribute per-task.
0498      */
0499     if (event->cpu < 0)
0500         return -EINVAL;
0501 
0502     /* Validate the group */
0503     if (!qcom_l3_cache__validate_event_group(event))
0504         return -EINVAL;
0505 
0506     hwc->idx = -1;
0507 
0508     /*
0509      * Many perf core operations (eg. events rotation) operate on a
0510      * single CPU context. This is obvious for CPU PMUs, where one
0511      * expects the same sets of events being observed on all CPUs,
0512      * but can lead to issues for off-core PMUs, like this one, where
0513      * each event could be theoretically assigned to a different CPU.
0514      * To mitigate this, we enforce CPU assignment to one designated
0515      * processor (the one described in the "cpumask" attribute exported
0516      * by the PMU device). perf user space tools honor this and avoid
0517      * opening more than one copy of the events.
0518      */
0519     event->cpu = cpumask_first(&l3pmu->cpumask);
0520 
0521     return 0;
0522 }
0523 
0524 static void qcom_l3_cache__event_start(struct perf_event *event, int flags)
0525 {
0526     struct hw_perf_event *hwc = &event->hw;
0527     const struct l3cache_event_ops *ops = l3cache_event_get_ops(event);
0528 
0529     hwc->state = 0;
0530     ops->start(event);
0531 }
0532 
0533 static void qcom_l3_cache__event_stop(struct perf_event *event, int flags)
0534 {
0535     struct hw_perf_event *hwc = &event->hw;
0536     const struct l3cache_event_ops *ops = l3cache_event_get_ops(event);
0537 
0538     if (hwc->state & PERF_HES_STOPPED)
0539         return;
0540 
0541     ops->stop(event, flags);
0542     if (flags & PERF_EF_UPDATE)
0543         ops->update(event);
0544     hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
0545 }
0546 
0547 static int qcom_l3_cache__event_add(struct perf_event *event, int flags)
0548 {
0549     struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
0550     struct hw_perf_event *hwc = &event->hw;
0551     int order = event_uses_long_counter(event) ? 1 : 0;
0552     int idx;
0553 
0554     /*
0555      * Try to allocate a counter.
0556      */
0557     idx = bitmap_find_free_region(l3pmu->used_mask, L3_NUM_COUNTERS, order);
0558     if (idx < 0)
0559         /* The counters are all in use. */
0560         return -EAGAIN;
0561 
0562     hwc->idx = idx;
0563     hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
0564     l3pmu->events[idx] = event;
0565 
0566     if (flags & PERF_EF_START)
0567         qcom_l3_cache__event_start(event, 0);
0568 
0569     /* Propagate changes to the userspace mapping. */
0570     perf_event_update_userpage(event);
0571 
0572     return 0;
0573 }
0574 
0575 static void qcom_l3_cache__event_del(struct perf_event *event, int flags)
0576 {
0577     struct l3cache_pmu *l3pmu = to_l3cache_pmu(event->pmu);
0578     struct hw_perf_event *hwc = &event->hw;
0579     int order = event_uses_long_counter(event) ? 1 : 0;
0580 
0581     /* Stop and clean up */
0582     qcom_l3_cache__event_stop(event,  flags | PERF_EF_UPDATE);
0583     l3pmu->events[hwc->idx] = NULL;
0584     bitmap_release_region(l3pmu->used_mask, hwc->idx, order);
0585 
0586     /* Propagate changes to the userspace mapping. */
0587     perf_event_update_userpage(event);
0588 }
0589 
0590 static void qcom_l3_cache__event_read(struct perf_event *event)
0591 {
0592     const struct l3cache_event_ops *ops = l3cache_event_get_ops(event);
0593 
0594     ops->update(event);
0595 }
0596 
0597 /*
0598  * Add sysfs attributes
0599  *
0600  * We export:
0601  * - formats, used by perf user space and other tools to configure events
0602  * - events, used by perf user space and other tools to create events
0603  *   symbolically, e.g.:
0604  *     perf stat -a -e l3cache_0_0/event=read-miss/ ls
0605  *     perf stat -a -e l3cache_0_0/event=0x21/ ls
0606  * - cpumask, used by perf user space and other tools to know on which CPUs
0607  *   to open the events
0608  */
0609 
0610 /* formats */
0611 
0612 static ssize_t l3cache_pmu_format_show(struct device *dev,
0613                        struct device_attribute *attr, char *buf)
0614 {
0615     struct dev_ext_attribute *eattr;
0616 
0617     eattr = container_of(attr, struct dev_ext_attribute, attr);
0618     return sysfs_emit(buf, "%s\n", (char *) eattr->var);
0619 }
0620 
0621 #define L3CACHE_PMU_FORMAT_ATTR(_name, _config)                   \
0622     (&((struct dev_ext_attribute[]) {                     \
0623         { .attr = __ATTR(_name, 0444, l3cache_pmu_format_show, NULL), \
0624           .var = (void *) _config, }                      \
0625     })[0].attr.attr)
0626 
0627 static struct attribute *qcom_l3_cache_pmu_formats[] = {
0628     L3CACHE_PMU_FORMAT_ATTR(event, "config:0-7"),
0629     L3CACHE_PMU_FORMAT_ATTR(lc, "config:" __stringify(L3_EVENT_LC_BIT)),
0630     NULL,
0631 };
0632 
0633 static const struct attribute_group qcom_l3_cache_pmu_format_group = {
0634     .name = "format",
0635     .attrs = qcom_l3_cache_pmu_formats,
0636 };
0637 
0638 /* events */
0639 
0640 static ssize_t l3cache_pmu_event_show(struct device *dev,
0641                      struct device_attribute *attr, char *page)
0642 {
0643     struct perf_pmu_events_attr *pmu_attr;
0644 
0645     pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
0646     return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
0647 }
0648 
0649 #define L3CACHE_EVENT_ATTR(_name, _id)                       \
0650     PMU_EVENT_ATTR_ID(_name, l3cache_pmu_event_show, _id)
0651 
0652 static struct attribute *qcom_l3_cache_pmu_events[] = {
0653     L3CACHE_EVENT_ATTR(cycles, L3_EVENT_CYCLES),
0654     L3CACHE_EVENT_ATTR(read-hit, L3_EVENT_READ_HIT),
0655     L3CACHE_EVENT_ATTR(read-miss, L3_EVENT_READ_MISS),
0656     L3CACHE_EVENT_ATTR(read-hit-d-side, L3_EVENT_READ_HIT_D),
0657     L3CACHE_EVENT_ATTR(read-miss-d-side, L3_EVENT_READ_MISS_D),
0658     L3CACHE_EVENT_ATTR(write-hit, L3_EVENT_WRITE_HIT),
0659     L3CACHE_EVENT_ATTR(write-miss, L3_EVENT_WRITE_MISS),
0660     NULL
0661 };
0662 
0663 static const struct attribute_group qcom_l3_cache_pmu_events_group = {
0664     .name = "events",
0665     .attrs = qcom_l3_cache_pmu_events,
0666 };
0667 
0668 /* cpumask */
0669 
0670 static ssize_t cpumask_show(struct device *dev,
0671                 struct device_attribute *attr, char *buf)
0672 {
0673     struct l3cache_pmu *l3pmu = to_l3cache_pmu(dev_get_drvdata(dev));
0674 
0675     return cpumap_print_to_pagebuf(true, buf, &l3pmu->cpumask);
0676 }
0677 
0678 static DEVICE_ATTR_RO(cpumask);
0679 
0680 static struct attribute *qcom_l3_cache_pmu_cpumask_attrs[] = {
0681     &dev_attr_cpumask.attr,
0682     NULL,
0683 };
0684 
0685 static const struct attribute_group qcom_l3_cache_pmu_cpumask_attr_group = {
0686     .attrs = qcom_l3_cache_pmu_cpumask_attrs,
0687 };
0688 
0689 /*
0690  * Per PMU device attribute groups
0691  */
0692 static const struct attribute_group *qcom_l3_cache_pmu_attr_grps[] = {
0693     &qcom_l3_cache_pmu_format_group,
0694     &qcom_l3_cache_pmu_events_group,
0695     &qcom_l3_cache_pmu_cpumask_attr_group,
0696     NULL,
0697 };
0698 
0699 /*
0700  * Probing functions and data.
0701  */
0702 
0703 static int qcom_l3_cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
0704 {
0705     struct l3cache_pmu *l3pmu = hlist_entry_safe(node, struct l3cache_pmu, node);
0706 
0707     /* If there is not a CPU/PMU association pick this CPU */
0708     if (cpumask_empty(&l3pmu->cpumask))
0709         cpumask_set_cpu(cpu, &l3pmu->cpumask);
0710 
0711     return 0;
0712 }
0713 
0714 static int qcom_l3_cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
0715 {
0716     struct l3cache_pmu *l3pmu = hlist_entry_safe(node, struct l3cache_pmu, node);
0717     unsigned int target;
0718 
0719     if (!cpumask_test_and_clear_cpu(cpu, &l3pmu->cpumask))
0720         return 0;
0721     target = cpumask_any_but(cpu_online_mask, cpu);
0722     if (target >= nr_cpu_ids)
0723         return 0;
0724     perf_pmu_migrate_context(&l3pmu->pmu, cpu, target);
0725     cpumask_set_cpu(target, &l3pmu->cpumask);
0726     return 0;
0727 }
0728 
0729 static int qcom_l3_cache_pmu_probe(struct platform_device *pdev)
0730 {
0731     struct l3cache_pmu *l3pmu;
0732     struct acpi_device *acpi_dev;
0733     struct resource *memrc;
0734     int ret;
0735     char *name;
0736 
0737     /* Initialize the PMU data structures */
0738 
0739     acpi_dev = ACPI_COMPANION(&pdev->dev);
0740     if (!acpi_dev)
0741         return -ENODEV;
0742 
0743     l3pmu = devm_kzalloc(&pdev->dev, sizeof(*l3pmu), GFP_KERNEL);
0744     name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "l3cache_%s_%s",
0745               acpi_dev->parent->pnp.unique_id, acpi_dev->pnp.unique_id);
0746     if (!l3pmu || !name)
0747         return -ENOMEM;
0748 
0749     l3pmu->pmu = (struct pmu) {
0750         .task_ctx_nr    = perf_invalid_context,
0751 
0752         .pmu_enable = qcom_l3_cache__pmu_enable,
0753         .pmu_disable    = qcom_l3_cache__pmu_disable,
0754         .event_init = qcom_l3_cache__event_init,
0755         .add        = qcom_l3_cache__event_add,
0756         .del        = qcom_l3_cache__event_del,
0757         .start      = qcom_l3_cache__event_start,
0758         .stop       = qcom_l3_cache__event_stop,
0759         .read       = qcom_l3_cache__event_read,
0760 
0761         .attr_groups    = qcom_l3_cache_pmu_attr_grps,
0762         .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
0763     };
0764 
0765     memrc = platform_get_resource(pdev, IORESOURCE_MEM, 0);
0766     l3pmu->regs = devm_ioremap_resource(&pdev->dev, memrc);
0767     if (IS_ERR(l3pmu->regs))
0768         return PTR_ERR(l3pmu->regs);
0769 
0770     qcom_l3_cache__init(l3pmu);
0771 
0772     ret = platform_get_irq(pdev, 0);
0773     if (ret <= 0)
0774         return ret;
0775 
0776     ret = devm_request_irq(&pdev->dev, ret, qcom_l3_cache__handle_irq, 0,
0777                    name, l3pmu);
0778     if (ret) {
0779         dev_err(&pdev->dev, "Request for IRQ failed for slice @%pa\n",
0780             &memrc->start);
0781         return ret;
0782     }
0783 
0784     /* Add this instance to the list used by the offline callback */
0785     ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, &l3pmu->node);
0786     if (ret) {
0787         dev_err(&pdev->dev, "Error %d registering hotplug", ret);
0788         return ret;
0789     }
0790 
0791     ret = perf_pmu_register(&l3pmu->pmu, name, -1);
0792     if (ret < 0) {
0793         dev_err(&pdev->dev, "Failed to register L3 cache PMU (%d)\n", ret);
0794         return ret;
0795     }
0796 
0797     dev_info(&pdev->dev, "Registered %s, type: %d\n", name, l3pmu->pmu.type);
0798 
0799     return 0;
0800 }
0801 
0802 static const struct acpi_device_id qcom_l3_cache_pmu_acpi_match[] = {
0803     { "QCOM8081", },
0804     { }
0805 };
0806 MODULE_DEVICE_TABLE(acpi, qcom_l3_cache_pmu_acpi_match);
0807 
0808 static struct platform_driver qcom_l3_cache_pmu_driver = {
0809     .driver = {
0810         .name = "qcom-l3cache-pmu",
0811         .acpi_match_table = ACPI_PTR(qcom_l3_cache_pmu_acpi_match),
0812         .suppress_bind_attrs = true,
0813     },
0814     .probe = qcom_l3_cache_pmu_probe,
0815 };
0816 
0817 static int __init register_qcom_l3_cache_pmu_driver(void)
0818 {
0819     int ret;
0820 
0821     /* Install a hook to update the reader CPU in case it goes offline */
0822     ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
0823                       "perf/qcom/l3cache:online",
0824                       qcom_l3_cache_pmu_online_cpu,
0825                       qcom_l3_cache_pmu_offline_cpu);
0826     if (ret)
0827         return ret;
0828 
0829     return platform_driver_register(&qcom_l3_cache_pmu_driver);
0830 }
0831 device_initcall(register_qcom_l3_cache_pmu_driver);