Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * L220/L310 cache controller support
0004  *
0005  * Copyright (C) 2016 ARM Limited
0006  */
0007 #include <linux/errno.h>
0008 #include <linux/hrtimer.h>
0009 #include <linux/io.h>
0010 #include <linux/list.h>
0011 #include <linux/perf_event.h>
0012 #include <linux/printk.h>
0013 #include <linux/slab.h>
0014 #include <linux/types.h>
0015 
0016 #include <asm/hardware/cache-l2x0.h>
0017 
0018 #define PMU_NR_COUNTERS 2
0019 
0020 static void __iomem *l2x0_base;
0021 static struct pmu *l2x0_pmu;
0022 static cpumask_t pmu_cpu;
0023 
0024 static const char *l2x0_name;
0025 
0026 static ktime_t l2x0_pmu_poll_period;
0027 static struct hrtimer l2x0_pmu_hrtimer;
0028 
0029 /*
0030  * The L220/PL310 PMU has two equivalent counters, Counter1 and Counter0.
0031  * Registers controlling these are laid out in pairs, in descending order, i.e.
0032  * the register for Counter1 comes first, followed by the register for
0033  * Counter0.
0034  * We ensure that idx 0 -> Counter0, and idx1 -> Counter1.
0035  */
0036 static struct perf_event *events[PMU_NR_COUNTERS];
0037 
0038 /* Find an unused counter */
0039 static int l2x0_pmu_find_idx(void)
0040 {
0041     int i;
0042 
0043     for (i = 0; i < PMU_NR_COUNTERS; i++) {
0044         if (!events[i])
0045             return i;
0046     }
0047 
0048     return -1;
0049 }
0050 
0051 /* How many counters are allocated? */
0052 static int l2x0_pmu_num_active_counters(void)
0053 {
0054     int i, cnt = 0;
0055 
0056     for (i = 0; i < PMU_NR_COUNTERS; i++) {
0057         if (events[i])
0058             cnt++;
0059     }
0060 
0061     return cnt;
0062 }
0063 
0064 static void l2x0_pmu_counter_config_write(int idx, u32 val)
0065 {
0066     writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_CFG - 4 * idx);
0067 }
0068 
0069 static u32 l2x0_pmu_counter_read(int idx)
0070 {
0071     return readl_relaxed(l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx);
0072 }
0073 
0074 static void l2x0_pmu_counter_write(int idx, u32 val)
0075 {
0076     writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx);
0077 }
0078 
0079 static void __l2x0_pmu_enable(void)
0080 {
0081     u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL);
0082     val |= L2X0_EVENT_CNT_CTRL_ENABLE;
0083     writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL);
0084 }
0085 
0086 static void __l2x0_pmu_disable(void)
0087 {
0088     u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL);
0089     val &= ~L2X0_EVENT_CNT_CTRL_ENABLE;
0090     writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL);
0091 }
0092 
0093 static void l2x0_pmu_enable(struct pmu *pmu)
0094 {
0095     if (l2x0_pmu_num_active_counters() == 0)
0096         return;
0097 
0098     __l2x0_pmu_enable();
0099 }
0100 
0101 static void l2x0_pmu_disable(struct pmu *pmu)
0102 {
0103     if (l2x0_pmu_num_active_counters() == 0)
0104         return;
0105 
0106     __l2x0_pmu_disable();
0107 }
0108 
0109 static void warn_if_saturated(u32 count)
0110 {
0111     if (count != 0xffffffff)
0112         return;
0113 
0114     pr_warn_ratelimited("L2X0 counter saturated. Poll period too long\n");
0115 }
0116 
0117 static void l2x0_pmu_event_read(struct perf_event *event)
0118 {
0119     struct hw_perf_event *hw = &event->hw;
0120     u64 prev_count, new_count, mask;
0121 
0122     do {
0123          prev_count = local64_read(&hw->prev_count);
0124          new_count = l2x0_pmu_counter_read(hw->idx);
0125     } while (local64_xchg(&hw->prev_count, new_count) != prev_count);
0126 
0127     mask = GENMASK_ULL(31, 0);
0128     local64_add((new_count - prev_count) & mask, &event->count);
0129 
0130     warn_if_saturated(new_count);
0131 }
0132 
0133 static void l2x0_pmu_event_configure(struct perf_event *event)
0134 {
0135     struct hw_perf_event *hw = &event->hw;
0136 
0137     /*
0138      * The L2X0 counters saturate at 0xffffffff rather than wrapping, so we
0139      * will *always* lose some number of events when a counter saturates,
0140      * and have no way of detecting how many were lost.
0141      *
0142      * To minimize the impact of this, we try to maximize the period by
0143      * always starting counters at zero. To ensure that group ratios are
0144      * representative, we poll periodically to avoid counters saturating.
0145      * See l2x0_pmu_poll().
0146      */
0147     local64_set(&hw->prev_count, 0);
0148     l2x0_pmu_counter_write(hw->idx, 0);
0149 }
0150 
0151 static enum hrtimer_restart l2x0_pmu_poll(struct hrtimer *hrtimer)
0152 {
0153     unsigned long flags;
0154     int i;
0155 
0156     local_irq_save(flags);
0157     __l2x0_pmu_disable();
0158 
0159     for (i = 0; i < PMU_NR_COUNTERS; i++) {
0160         struct perf_event *event = events[i];
0161 
0162         if (!event)
0163             continue;
0164 
0165         l2x0_pmu_event_read(event);
0166         l2x0_pmu_event_configure(event);
0167     }
0168 
0169     __l2x0_pmu_enable();
0170     local_irq_restore(flags);
0171 
0172     hrtimer_forward_now(hrtimer, l2x0_pmu_poll_period);
0173     return HRTIMER_RESTART;
0174 }
0175 
0176 
0177 static void __l2x0_pmu_event_enable(int idx, u32 event)
0178 {
0179     u32 val;
0180 
0181     val = event << L2X0_EVENT_CNT_CFG_SRC_SHIFT;
0182     val |= L2X0_EVENT_CNT_CFG_INT_DISABLED;
0183     l2x0_pmu_counter_config_write(idx, val);
0184 }
0185 
0186 static void l2x0_pmu_event_start(struct perf_event *event, int flags)
0187 {
0188     struct hw_perf_event *hw = &event->hw;
0189 
0190     if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
0191         return;
0192 
0193     if (flags & PERF_EF_RELOAD) {
0194         WARN_ON_ONCE(!(hw->state & PERF_HES_UPTODATE));
0195         l2x0_pmu_event_configure(event);
0196     }
0197 
0198     hw->state = 0;
0199 
0200     __l2x0_pmu_event_enable(hw->idx, hw->config_base);
0201 }
0202 
0203 static void __l2x0_pmu_event_disable(int idx)
0204 {
0205     u32 val;
0206 
0207     val = L2X0_EVENT_CNT_CFG_SRC_DISABLED << L2X0_EVENT_CNT_CFG_SRC_SHIFT;
0208     val |= L2X0_EVENT_CNT_CFG_INT_DISABLED;
0209     l2x0_pmu_counter_config_write(idx, val);
0210 }
0211 
0212 static void l2x0_pmu_event_stop(struct perf_event *event, int flags)
0213 {
0214     struct hw_perf_event *hw = &event->hw;
0215 
0216     if (WARN_ON_ONCE(event->hw.state & PERF_HES_STOPPED))
0217         return;
0218 
0219     __l2x0_pmu_event_disable(hw->idx);
0220 
0221     hw->state |= PERF_HES_STOPPED;
0222 
0223     if (flags & PERF_EF_UPDATE) {
0224         l2x0_pmu_event_read(event);
0225         hw->state |= PERF_HES_UPTODATE;
0226     }
0227 }
0228 
0229 static int l2x0_pmu_event_add(struct perf_event *event, int flags)
0230 {
0231     struct hw_perf_event *hw = &event->hw;
0232     int idx = l2x0_pmu_find_idx();
0233 
0234     if (idx == -1)
0235         return -EAGAIN;
0236 
0237     /*
0238      * Pin the timer, so that the overflows are handled by the chosen
0239      * event->cpu (this is the same one as presented in "cpumask"
0240      * attribute).
0241      */
0242     if (l2x0_pmu_num_active_counters() == 0)
0243         hrtimer_start(&l2x0_pmu_hrtimer, l2x0_pmu_poll_period,
0244                   HRTIMER_MODE_REL_PINNED);
0245 
0246     events[idx] = event;
0247     hw->idx = idx;
0248 
0249     l2x0_pmu_event_configure(event);
0250 
0251     hw->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
0252 
0253     if (flags & PERF_EF_START)
0254         l2x0_pmu_event_start(event, 0);
0255 
0256     return 0;
0257 }
0258 
0259 static void l2x0_pmu_event_del(struct perf_event *event, int flags)
0260 {
0261     struct hw_perf_event *hw = &event->hw;
0262 
0263     l2x0_pmu_event_stop(event, PERF_EF_UPDATE);
0264 
0265     events[hw->idx] = NULL;
0266     hw->idx = -1;
0267 
0268     if (l2x0_pmu_num_active_counters() == 0)
0269         hrtimer_cancel(&l2x0_pmu_hrtimer);
0270 }
0271 
0272 static bool l2x0_pmu_group_is_valid(struct perf_event *event)
0273 {
0274     struct pmu *pmu = event->pmu;
0275     struct perf_event *leader = event->group_leader;
0276     struct perf_event *sibling;
0277     int num_hw = 0;
0278 
0279     if (leader->pmu == pmu)
0280         num_hw++;
0281     else if (!is_software_event(leader))
0282         return false;
0283 
0284     for_each_sibling_event(sibling, leader) {
0285         if (sibling->pmu == pmu)
0286             num_hw++;
0287         else if (!is_software_event(sibling))
0288             return false;
0289     }
0290 
0291     return num_hw <= PMU_NR_COUNTERS;
0292 }
0293 
0294 static int l2x0_pmu_event_init(struct perf_event *event)
0295 {
0296     struct hw_perf_event *hw = &event->hw;
0297 
0298     if (event->attr.type != l2x0_pmu->type)
0299         return -ENOENT;
0300 
0301     if (is_sampling_event(event) ||
0302         event->attach_state & PERF_ATTACH_TASK)
0303         return -EINVAL;
0304 
0305     if (event->cpu < 0)
0306         return -EINVAL;
0307 
0308     if (event->attr.config & ~L2X0_EVENT_CNT_CFG_SRC_MASK)
0309         return -EINVAL;
0310 
0311     hw->config_base = event->attr.config;
0312 
0313     if (!l2x0_pmu_group_is_valid(event))
0314         return -EINVAL;
0315 
0316     event->cpu = cpumask_first(&pmu_cpu);
0317 
0318     return 0;
0319 }
0320 
0321 struct l2x0_event_attribute {
0322     struct device_attribute attr;
0323     unsigned int config;
0324     bool pl310_only;
0325 };
0326 
0327 #define L2X0_EVENT_ATTR(_name, _config, _pl310_only)                \
0328     (&((struct l2x0_event_attribute[]) {{                   \
0329         .attr = __ATTR(_name, S_IRUGO, l2x0_pmu_event_show, NULL),  \
0330         .config = _config,                      \
0331         .pl310_only = _pl310_only,                  \
0332     }})[0].attr.attr)
0333 
0334 #define L220_PLUS_EVENT_ATTR(_name, _config)                    \
0335     L2X0_EVENT_ATTR(_name, _config, false)
0336 
0337 #define PL310_EVENT_ATTR(_name, _config)                    \
0338     L2X0_EVENT_ATTR(_name, _config, true)
0339 
0340 static ssize_t l2x0_pmu_event_show(struct device *dev,
0341                    struct device_attribute *attr, char *buf)
0342 {
0343     struct l2x0_event_attribute *lattr;
0344 
0345     lattr = container_of(attr, typeof(*lattr), attr);
0346     return snprintf(buf, PAGE_SIZE, "config=0x%x\n", lattr->config);
0347 }
0348 
0349 static umode_t l2x0_pmu_event_attr_is_visible(struct kobject *kobj,
0350                           struct attribute *attr,
0351                           int unused)
0352 {
0353     struct device *dev = kobj_to_dev(kobj);
0354     struct pmu *pmu = dev_get_drvdata(dev);
0355     struct l2x0_event_attribute *lattr;
0356 
0357     lattr = container_of(attr, typeof(*lattr), attr.attr);
0358 
0359     if (!lattr->pl310_only || strcmp("l2c_310", pmu->name) == 0)
0360         return attr->mode;
0361 
0362     return 0;
0363 }
0364 
0365 static struct attribute *l2x0_pmu_event_attrs[] = {
0366     L220_PLUS_EVENT_ATTR(co,    0x1),
0367     L220_PLUS_EVENT_ATTR(drhit, 0x2),
0368     L220_PLUS_EVENT_ATTR(drreq, 0x3),
0369     L220_PLUS_EVENT_ATTR(dwhit, 0x4),
0370     L220_PLUS_EVENT_ATTR(dwreq, 0x5),
0371     L220_PLUS_EVENT_ATTR(dwtreq,    0x6),
0372     L220_PLUS_EVENT_ATTR(irhit, 0x7),
0373     L220_PLUS_EVENT_ATTR(irreq, 0x8),
0374     L220_PLUS_EVENT_ATTR(wa,    0x9),
0375     PL310_EVENT_ATTR(ipfalloc,  0xa),
0376     PL310_EVENT_ATTR(epfhit,    0xb),
0377     PL310_EVENT_ATTR(epfalloc,  0xc),
0378     PL310_EVENT_ATTR(srrcvd,    0xd),
0379     PL310_EVENT_ATTR(srconf,    0xe),
0380     PL310_EVENT_ATTR(epfrcvd,   0xf),
0381     NULL
0382 };
0383 
0384 static struct attribute_group l2x0_pmu_event_attrs_group = {
0385     .name = "events",
0386     .attrs = l2x0_pmu_event_attrs,
0387     .is_visible = l2x0_pmu_event_attr_is_visible,
0388 };
0389 
0390 static ssize_t l2x0_pmu_cpumask_show(struct device *dev,
0391                      struct device_attribute *attr, char *buf)
0392 {
0393     return cpumap_print_to_pagebuf(true, buf, &pmu_cpu);
0394 }
0395 
0396 static struct device_attribute l2x0_pmu_cpumask_attr =
0397         __ATTR(cpumask, S_IRUGO, l2x0_pmu_cpumask_show, NULL);
0398 
0399 static struct attribute *l2x0_pmu_cpumask_attrs[] = {
0400     &l2x0_pmu_cpumask_attr.attr,
0401     NULL,
0402 };
0403 
0404 static struct attribute_group l2x0_pmu_cpumask_attr_group = {
0405     .attrs = l2x0_pmu_cpumask_attrs,
0406 };
0407 
0408 static const struct attribute_group *l2x0_pmu_attr_groups[] = {
0409     &l2x0_pmu_event_attrs_group,
0410     &l2x0_pmu_cpumask_attr_group,
0411     NULL,
0412 };
0413 
0414 static void l2x0_pmu_reset(void)
0415 {
0416     int i;
0417 
0418     __l2x0_pmu_disable();
0419 
0420     for (i = 0; i < PMU_NR_COUNTERS; i++)
0421         __l2x0_pmu_event_disable(i);
0422 }
0423 
0424 static int l2x0_pmu_offline_cpu(unsigned int cpu)
0425 {
0426     unsigned int target;
0427 
0428     if (!cpumask_test_and_clear_cpu(cpu, &pmu_cpu))
0429         return 0;
0430 
0431     target = cpumask_any_but(cpu_online_mask, cpu);
0432     if (target >= nr_cpu_ids)
0433         return 0;
0434 
0435     perf_pmu_migrate_context(l2x0_pmu, cpu, target);
0436     cpumask_set_cpu(target, &pmu_cpu);
0437 
0438     return 0;
0439 }
0440 
0441 void l2x0_pmu_suspend(void)
0442 {
0443     int i;
0444 
0445     if (!l2x0_pmu)
0446         return;
0447 
0448     l2x0_pmu_disable(l2x0_pmu);
0449 
0450     for (i = 0; i < PMU_NR_COUNTERS; i++) {
0451         if (events[i])
0452             l2x0_pmu_event_stop(events[i], PERF_EF_UPDATE);
0453     }
0454 
0455 }
0456 
0457 void l2x0_pmu_resume(void)
0458 {
0459     int i;
0460 
0461     if (!l2x0_pmu)
0462         return;
0463 
0464     l2x0_pmu_reset();
0465 
0466     for (i = 0; i < PMU_NR_COUNTERS; i++) {
0467         if (events[i])
0468             l2x0_pmu_event_start(events[i], PERF_EF_RELOAD);
0469     }
0470 
0471     l2x0_pmu_enable(l2x0_pmu);
0472 }
0473 
0474 void __init l2x0_pmu_register(void __iomem *base, u32 part)
0475 {
0476     /*
0477      * Determine whether we support the PMU, and choose the name for sysfs.
0478      * This is also used by l2x0_pmu_event_attr_is_visible to determine
0479      * which events to display, as the PL310 PMU supports a superset of
0480      * L220 events.
0481      *
0482      * The L210 PMU has a different programmer's interface, and is not
0483      * supported by this driver.
0484      *
0485      * We must defer registering the PMU until the perf subsystem is up and
0486      * running, so just stash the name and base, and leave that to another
0487      * initcall.
0488      */
0489     switch (part & L2X0_CACHE_ID_PART_MASK) {
0490     case L2X0_CACHE_ID_PART_L220:
0491         l2x0_name = "l2c_220";
0492         break;
0493     case L2X0_CACHE_ID_PART_L310:
0494         l2x0_name = "l2c_310";
0495         break;
0496     default:
0497         return;
0498     }
0499 
0500     l2x0_base = base;
0501 }
0502 
0503 static __init int l2x0_pmu_init(void)
0504 {
0505     int ret;
0506 
0507     if (!l2x0_base)
0508         return 0;
0509 
0510     l2x0_pmu = kzalloc(sizeof(*l2x0_pmu), GFP_KERNEL);
0511     if (!l2x0_pmu) {
0512         pr_warn("Unable to allocate L2x0 PMU\n");
0513         return -ENOMEM;
0514     }
0515 
0516     *l2x0_pmu = (struct pmu) {
0517         .task_ctx_nr = perf_invalid_context,
0518         .pmu_enable = l2x0_pmu_enable,
0519         .pmu_disable = l2x0_pmu_disable,
0520         .read = l2x0_pmu_event_read,
0521         .start = l2x0_pmu_event_start,
0522         .stop = l2x0_pmu_event_stop,
0523         .add = l2x0_pmu_event_add,
0524         .del = l2x0_pmu_event_del,
0525         .event_init = l2x0_pmu_event_init,
0526         .attr_groups = l2x0_pmu_attr_groups,
0527         .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
0528     };
0529 
0530     l2x0_pmu_reset();
0531 
0532     /*
0533      * We always use a hrtimer rather than an interrupt.
0534      * See comments in l2x0_pmu_event_configure and l2x0_pmu_poll.
0535      *
0536      * Polling once a second allows the counters to fill up to 1/128th on a
0537      * quad-core test chip with cores clocked at 400MHz. Hopefully this
0538      * leaves sufficient headroom to avoid overflow on production silicon
0539      * at higher frequencies.
0540      */
0541     l2x0_pmu_poll_period = ms_to_ktime(1000);
0542     hrtimer_init(&l2x0_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
0543     l2x0_pmu_hrtimer.function = l2x0_pmu_poll;
0544 
0545     cpumask_set_cpu(0, &pmu_cpu);
0546     ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE,
0547                     "perf/arm/l2x0:online", NULL,
0548                     l2x0_pmu_offline_cpu);
0549     if (ret)
0550         goto out_pmu;
0551 
0552     ret = perf_pmu_register(l2x0_pmu, l2x0_name, -1);
0553     if (ret)
0554         goto out_cpuhp;
0555 
0556     return 0;
0557 
0558 out_cpuhp:
0559     cpuhp_remove_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE);
0560 out_pmu:
0561     kfree(l2x0_pmu);
0562     l2x0_pmu = NULL;
0563     return ret;
0564 }
0565 device_initcall(l2x0_pmu_init);