Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Support cstate residency counters
0003  *
0004  * Copyright (C) 2015, Intel Corp.
0005  * Author: Kan Liang (kan.liang@intel.com)
0006  *
0007  * This library is free software; you can redistribute it and/or
0008  * modify it under the terms of the GNU Library General Public
0009  * License as published by the Free Software Foundation; either
0010  * version 2 of the License, or (at your option) any later version.
0011  *
0012  * This library is distributed in the hope that it will be useful,
0013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0015  * Library General Public License for more details.
0016  *
0017  */
0018 
0019 /*
0020  * This file export cstate related free running (read-only) counters
0021  * for perf. These counters may be use simultaneously by other tools,
0022  * such as turbostat. However, it still make sense to implement them
0023  * in perf. Because we can conveniently collect them together with
0024  * other events, and allow to use them from tools without special MSR
0025  * access code.
0026  *
0027  * The events only support system-wide mode counting. There is no
0028  * sampling support because it is not supported by the hardware.
0029  *
0030  * According to counters' scope and category, two PMUs are registered
0031  * with the perf_event core subsystem.
0032  *  - 'cstate_core': The counter is available for each physical core.
0033  *    The counters include CORE_C*_RESIDENCY.
0034  *  - 'cstate_pkg': The counter is available for each physical package.
0035  *    The counters include PKG_C*_RESIDENCY.
0036  *
0037  * All of these counters are specified in the IntelĀ® 64 and IA-32
0038  * Architectures Software Developer.s Manual Vol3b.
0039  *
0040  * Model specific counters:
0041  *  MSR_CORE_C1_RES: CORE C1 Residency Counter
0042  *           perf code: 0x00
0043  *           Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL
0044  *           Scope: Core (each processor core has a MSR)
0045  *  MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
0046  *                 perf code: 0x01
0047  *                 Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM,
0048  *                      CNL,KBL,CML,TNT
0049  *                 Scope: Core
0050  *  MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
0051  *                 perf code: 0x02
0052  *                 Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
0053  *                      SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
0054  *                      TGL,TNT,RKL,ADL,RPL,SPR
0055  *                 Scope: Core
0056  *  MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
0057  *                 perf code: 0x03
0058  *                 Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML,
0059  *                      ICL,TGL,RKL,ADL,RPL
0060  *                 Scope: Core
0061  *  MSR_PKG_C2_RESIDENCY:  Package C2 Residency Counter.
0062  *                 perf code: 0x00
0063  *                 Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
0064  *                      KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL,
0065  *                      RPL,SPR
0066  *                 Scope: Package (physical package)
0067  *  MSR_PKG_C3_RESIDENCY:  Package C3 Residency Counter.
0068  *                 perf code: 0x01
0069  *                 Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
0070  *                      GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL,
0071  *                      ADL,RPL
0072  *                 Scope: Package (physical package)
0073  *  MSR_PKG_C6_RESIDENCY:  Package C6 Residency Counter.
0074  *                 perf code: 0x02
0075  *                 Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
0076  *                      SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
0077  *                      TGL,TNT,RKL,ADL,RPL,SPR
0078  *                 Scope: Package (physical package)
0079  *  MSR_PKG_C7_RESIDENCY:  Package C7 Residency Counter.
0080  *                 perf code: 0x03
0081  *                 Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL,
0082  *                      KBL,CML,ICL,TGL,RKL,ADL,RPL
0083  *                 Scope: Package (physical package)
0084  *  MSR_PKG_C8_RESIDENCY:  Package C8 Residency Counter.
0085  *                 perf code: 0x04
0086  *                 Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL,
0087  *                      ADL,RPL
0088  *                 Scope: Package (physical package)
0089  *  MSR_PKG_C9_RESIDENCY:  Package C9 Residency Counter.
0090  *                 perf code: 0x05
0091  *                 Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL,
0092  *                      ADL,RPL
0093  *                 Scope: Package (physical package)
0094  *  MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
0095  *                 perf code: 0x06
0096  *                 Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL,
0097  *                      TNT,RKL,ADL,RPL
0098  *                 Scope: Package (physical package)
0099  *
0100  */
0101 
0102 #include <linux/module.h>
0103 #include <linux/slab.h>
0104 #include <linux/perf_event.h>
0105 #include <linux/nospec.h>
0106 #include <asm/cpu_device_id.h>
0107 #include <asm/intel-family.h>
0108 #include "../perf_event.h"
0109 #include "../probe.h"
0110 
0111 MODULE_LICENSE("GPL");
0112 
0113 #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format)     \
0114 static ssize_t __cstate_##_var##_show(struct device *dev,   \
0115                 struct device_attribute *attr,  \
0116                 char *page)         \
0117 {                               \
0118     BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);     \
0119     return sprintf(page, _format "\n");         \
0120 }                               \
0121 static struct device_attribute format_attr_##_var =     \
0122     __ATTR(_name, 0444, __cstate_##_var##_show, NULL)
0123 
0124 static ssize_t cstate_get_attr_cpumask(struct device *dev,
0125                        struct device_attribute *attr,
0126                        char *buf);
0127 
0128 /* Model -> events mapping */
0129 struct cstate_model {
0130     unsigned long       core_events;
0131     unsigned long       pkg_events;
0132     unsigned long       quirks;
0133 };
0134 
0135 /* Quirk flags */
0136 #define SLM_PKG_C6_USE_C7_MSR   (1UL << 0)
0137 #define KNL_CORE_C6_MSR     (1UL << 1)
0138 
0139 struct perf_cstate_msr {
0140     u64 msr;
0141     struct  perf_pmu_events_attr *attr;
0142 };
0143 
0144 
0145 /* cstate_core PMU */
0146 static struct pmu cstate_core_pmu;
0147 static bool has_cstate_core;
0148 
0149 enum perf_cstate_core_events {
0150     PERF_CSTATE_CORE_C1_RES = 0,
0151     PERF_CSTATE_CORE_C3_RES,
0152     PERF_CSTATE_CORE_C6_RES,
0153     PERF_CSTATE_CORE_C7_RES,
0154 
0155     PERF_CSTATE_CORE_EVENT_MAX,
0156 };
0157 
0158 PMU_EVENT_ATTR_STRING(c1-residency, attr_cstate_core_c1, "event=0x00");
0159 PMU_EVENT_ATTR_STRING(c3-residency, attr_cstate_core_c3, "event=0x01");
0160 PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_core_c6, "event=0x02");
0161 PMU_EVENT_ATTR_STRING(c7-residency, attr_cstate_core_c7, "event=0x03");
0162 
0163 static unsigned long core_msr_mask;
0164 
0165 PMU_EVENT_GROUP(events, cstate_core_c1);
0166 PMU_EVENT_GROUP(events, cstate_core_c3);
0167 PMU_EVENT_GROUP(events, cstate_core_c6);
0168 PMU_EVENT_GROUP(events, cstate_core_c7);
0169 
0170 static bool test_msr(int idx, void *data)
0171 {
0172     return test_bit(idx, (unsigned long *) data);
0173 }
0174 
0175 static struct perf_msr core_msr[] = {
0176     [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES,      &group_cstate_core_c1,  test_msr },
0177     [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY,    &group_cstate_core_c3,  test_msr },
0178     [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY,    &group_cstate_core_c6,  test_msr },
0179     [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY,    &group_cstate_core_c7,  test_msr },
0180 };
0181 
0182 static struct attribute *attrs_empty[] = {
0183     NULL,
0184 };
0185 
0186 /*
0187  * There are no default events, but we need to create
0188  * "events" group (with empty attrs) before updating
0189  * it with detected events.
0190  */
0191 static struct attribute_group core_events_attr_group = {
0192     .name = "events",
0193     .attrs = attrs_empty,
0194 };
0195 
0196 DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63");
0197 static struct attribute *core_format_attrs[] = {
0198     &format_attr_core_event.attr,
0199     NULL,
0200 };
0201 
0202 static struct attribute_group core_format_attr_group = {
0203     .name = "format",
0204     .attrs = core_format_attrs,
0205 };
0206 
0207 static cpumask_t cstate_core_cpu_mask;
0208 static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL);
0209 
0210 static struct attribute *cstate_cpumask_attrs[] = {
0211     &dev_attr_cpumask.attr,
0212     NULL,
0213 };
0214 
0215 static struct attribute_group cpumask_attr_group = {
0216     .attrs = cstate_cpumask_attrs,
0217 };
0218 
0219 static const struct attribute_group *core_attr_groups[] = {
0220     &core_events_attr_group,
0221     &core_format_attr_group,
0222     &cpumask_attr_group,
0223     NULL,
0224 };
0225 
0226 /* cstate_pkg PMU */
0227 static struct pmu cstate_pkg_pmu;
0228 static bool has_cstate_pkg;
0229 
0230 enum perf_cstate_pkg_events {
0231     PERF_CSTATE_PKG_C2_RES = 0,
0232     PERF_CSTATE_PKG_C3_RES,
0233     PERF_CSTATE_PKG_C6_RES,
0234     PERF_CSTATE_PKG_C7_RES,
0235     PERF_CSTATE_PKG_C8_RES,
0236     PERF_CSTATE_PKG_C9_RES,
0237     PERF_CSTATE_PKG_C10_RES,
0238 
0239     PERF_CSTATE_PKG_EVENT_MAX,
0240 };
0241 
0242 PMU_EVENT_ATTR_STRING(c2-residency,  attr_cstate_pkg_c2,  "event=0x00");
0243 PMU_EVENT_ATTR_STRING(c3-residency,  attr_cstate_pkg_c3,  "event=0x01");
0244 PMU_EVENT_ATTR_STRING(c6-residency,  attr_cstate_pkg_c6,  "event=0x02");
0245 PMU_EVENT_ATTR_STRING(c7-residency,  attr_cstate_pkg_c7,  "event=0x03");
0246 PMU_EVENT_ATTR_STRING(c8-residency,  attr_cstate_pkg_c8,  "event=0x04");
0247 PMU_EVENT_ATTR_STRING(c9-residency,  attr_cstate_pkg_c9,  "event=0x05");
0248 PMU_EVENT_ATTR_STRING(c10-residency, attr_cstate_pkg_c10, "event=0x06");
0249 
0250 static unsigned long pkg_msr_mask;
0251 
0252 PMU_EVENT_GROUP(events, cstate_pkg_c2);
0253 PMU_EVENT_GROUP(events, cstate_pkg_c3);
0254 PMU_EVENT_GROUP(events, cstate_pkg_c6);
0255 PMU_EVENT_GROUP(events, cstate_pkg_c7);
0256 PMU_EVENT_GROUP(events, cstate_pkg_c8);
0257 PMU_EVENT_GROUP(events, cstate_pkg_c9);
0258 PMU_EVENT_GROUP(events, cstate_pkg_c10);
0259 
0260 static struct perf_msr pkg_msr[] = {
0261     [PERF_CSTATE_PKG_C2_RES]  = { MSR_PKG_C2_RESIDENCY, &group_cstate_pkg_c2,   test_msr },
0262     [PERF_CSTATE_PKG_C3_RES]  = { MSR_PKG_C3_RESIDENCY, &group_cstate_pkg_c3,   test_msr },
0263     [PERF_CSTATE_PKG_C6_RES]  = { MSR_PKG_C6_RESIDENCY, &group_cstate_pkg_c6,   test_msr },
0264     [PERF_CSTATE_PKG_C7_RES]  = { MSR_PKG_C7_RESIDENCY, &group_cstate_pkg_c7,   test_msr },
0265     [PERF_CSTATE_PKG_C8_RES]  = { MSR_PKG_C8_RESIDENCY, &group_cstate_pkg_c8,   test_msr },
0266     [PERF_CSTATE_PKG_C9_RES]  = { MSR_PKG_C9_RESIDENCY, &group_cstate_pkg_c9,   test_msr },
0267     [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY,    &group_cstate_pkg_c10,  test_msr },
0268 };
0269 
0270 static struct attribute_group pkg_events_attr_group = {
0271     .name = "events",
0272     .attrs = attrs_empty,
0273 };
0274 
0275 DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63");
0276 static struct attribute *pkg_format_attrs[] = {
0277     &format_attr_pkg_event.attr,
0278     NULL,
0279 };
0280 static struct attribute_group pkg_format_attr_group = {
0281     .name = "format",
0282     .attrs = pkg_format_attrs,
0283 };
0284 
0285 static cpumask_t cstate_pkg_cpu_mask;
0286 
0287 static const struct attribute_group *pkg_attr_groups[] = {
0288     &pkg_events_attr_group,
0289     &pkg_format_attr_group,
0290     &cpumask_attr_group,
0291     NULL,
0292 };
0293 
0294 static ssize_t cstate_get_attr_cpumask(struct device *dev,
0295                        struct device_attribute *attr,
0296                        char *buf)
0297 {
0298     struct pmu *pmu = dev_get_drvdata(dev);
0299 
0300     if (pmu == &cstate_core_pmu)
0301         return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask);
0302     else if (pmu == &cstate_pkg_pmu)
0303         return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask);
0304     else
0305         return 0;
0306 }
0307 
0308 static int cstate_pmu_event_init(struct perf_event *event)
0309 {
0310     u64 cfg = event->attr.config;
0311     int cpu;
0312 
0313     if (event->attr.type != event->pmu->type)
0314         return -ENOENT;
0315 
0316     /* unsupported modes and filters */
0317     if (event->attr.sample_period) /* no sampling */
0318         return -EINVAL;
0319 
0320     if (event->cpu < 0)
0321         return -EINVAL;
0322 
0323     if (event->pmu == &cstate_core_pmu) {
0324         if (cfg >= PERF_CSTATE_CORE_EVENT_MAX)
0325             return -EINVAL;
0326         cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_CORE_EVENT_MAX);
0327         if (!(core_msr_mask & (1 << cfg)))
0328             return -EINVAL;
0329         event->hw.event_base = core_msr[cfg].msr;
0330         cpu = cpumask_any_and(&cstate_core_cpu_mask,
0331                       topology_sibling_cpumask(event->cpu));
0332     } else if (event->pmu == &cstate_pkg_pmu) {
0333         if (cfg >= PERF_CSTATE_PKG_EVENT_MAX)
0334             return -EINVAL;
0335         cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX);
0336         if (!(pkg_msr_mask & (1 << cfg)))
0337             return -EINVAL;
0338         event->hw.event_base = pkg_msr[cfg].msr;
0339         cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
0340                       topology_die_cpumask(event->cpu));
0341     } else {
0342         return -ENOENT;
0343     }
0344 
0345     if (cpu >= nr_cpu_ids)
0346         return -ENODEV;
0347 
0348     event->cpu = cpu;
0349     event->hw.config = cfg;
0350     event->hw.idx = -1;
0351     return 0;
0352 }
0353 
0354 static inline u64 cstate_pmu_read_counter(struct perf_event *event)
0355 {
0356     u64 val;
0357 
0358     rdmsrl(event->hw.event_base, val);
0359     return val;
0360 }
0361 
0362 static void cstate_pmu_event_update(struct perf_event *event)
0363 {
0364     struct hw_perf_event *hwc = &event->hw;
0365     u64 prev_raw_count, new_raw_count;
0366 
0367 again:
0368     prev_raw_count = local64_read(&hwc->prev_count);
0369     new_raw_count = cstate_pmu_read_counter(event);
0370 
0371     if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
0372                 new_raw_count) != prev_raw_count)
0373         goto again;
0374 
0375     local64_add(new_raw_count - prev_raw_count, &event->count);
0376 }
0377 
0378 static void cstate_pmu_event_start(struct perf_event *event, int mode)
0379 {
0380     local64_set(&event->hw.prev_count, cstate_pmu_read_counter(event));
0381 }
0382 
0383 static void cstate_pmu_event_stop(struct perf_event *event, int mode)
0384 {
0385     cstate_pmu_event_update(event);
0386 }
0387 
0388 static void cstate_pmu_event_del(struct perf_event *event, int mode)
0389 {
0390     cstate_pmu_event_stop(event, PERF_EF_UPDATE);
0391 }
0392 
0393 static int cstate_pmu_event_add(struct perf_event *event, int mode)
0394 {
0395     if (mode & PERF_EF_START)
0396         cstate_pmu_event_start(event, mode);
0397 
0398     return 0;
0399 }
0400 
0401 /*
0402  * Check if exiting cpu is the designated reader. If so migrate the
0403  * events when there is a valid target available
0404  */
0405 static int cstate_cpu_exit(unsigned int cpu)
0406 {
0407     unsigned int target;
0408 
0409     if (has_cstate_core &&
0410         cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) {
0411 
0412         target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu);
0413         /* Migrate events if there is a valid target */
0414         if (target < nr_cpu_ids) {
0415             cpumask_set_cpu(target, &cstate_core_cpu_mask);
0416             perf_pmu_migrate_context(&cstate_core_pmu, cpu, target);
0417         }
0418     }
0419 
0420     if (has_cstate_pkg &&
0421         cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) {
0422 
0423         target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
0424         /* Migrate events if there is a valid target */
0425         if (target < nr_cpu_ids) {
0426             cpumask_set_cpu(target, &cstate_pkg_cpu_mask);
0427             perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
0428         }
0429     }
0430     return 0;
0431 }
0432 
0433 static int cstate_cpu_init(unsigned int cpu)
0434 {
0435     unsigned int target;
0436 
0437     /*
0438      * If this is the first online thread of that core, set it in
0439      * the core cpu mask as the designated reader.
0440      */
0441     target = cpumask_any_and(&cstate_core_cpu_mask,
0442                  topology_sibling_cpumask(cpu));
0443 
0444     if (has_cstate_core && target >= nr_cpu_ids)
0445         cpumask_set_cpu(cpu, &cstate_core_cpu_mask);
0446 
0447     /*
0448      * If this is the first online thread of that package, set it
0449      * in the package cpu mask as the designated reader.
0450      */
0451     target = cpumask_any_and(&cstate_pkg_cpu_mask,
0452                  topology_die_cpumask(cpu));
0453     if (has_cstate_pkg && target >= nr_cpu_ids)
0454         cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);
0455 
0456     return 0;
0457 }
0458 
0459 static const struct attribute_group *core_attr_update[] = {
0460     &group_cstate_core_c1,
0461     &group_cstate_core_c3,
0462     &group_cstate_core_c6,
0463     &group_cstate_core_c7,
0464     NULL,
0465 };
0466 
0467 static const struct attribute_group *pkg_attr_update[] = {
0468     &group_cstate_pkg_c2,
0469     &group_cstate_pkg_c3,
0470     &group_cstate_pkg_c6,
0471     &group_cstate_pkg_c7,
0472     &group_cstate_pkg_c8,
0473     &group_cstate_pkg_c9,
0474     &group_cstate_pkg_c10,
0475     NULL,
0476 };
0477 
0478 static struct pmu cstate_core_pmu = {
0479     .attr_groups    = core_attr_groups,
0480     .attr_update    = core_attr_update,
0481     .name       = "cstate_core",
0482     .task_ctx_nr    = perf_invalid_context,
0483     .event_init = cstate_pmu_event_init,
0484     .add        = cstate_pmu_event_add,
0485     .del        = cstate_pmu_event_del,
0486     .start      = cstate_pmu_event_start,
0487     .stop       = cstate_pmu_event_stop,
0488     .read       = cstate_pmu_event_update,
0489     .capabilities   = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
0490     .module     = THIS_MODULE,
0491 };
0492 
0493 static struct pmu cstate_pkg_pmu = {
0494     .attr_groups    = pkg_attr_groups,
0495     .attr_update    = pkg_attr_update,
0496     .name       = "cstate_pkg",
0497     .task_ctx_nr    = perf_invalid_context,
0498     .event_init = cstate_pmu_event_init,
0499     .add        = cstate_pmu_event_add,
0500     .del        = cstate_pmu_event_del,
0501     .start      = cstate_pmu_event_start,
0502     .stop       = cstate_pmu_event_stop,
0503     .read       = cstate_pmu_event_update,
0504     .capabilities   = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
0505     .module     = THIS_MODULE,
0506 };
0507 
0508 static const struct cstate_model nhm_cstates __initconst = {
0509     .core_events        = BIT(PERF_CSTATE_CORE_C3_RES) |
0510                   BIT(PERF_CSTATE_CORE_C6_RES),
0511 
0512     .pkg_events     = BIT(PERF_CSTATE_PKG_C3_RES) |
0513                   BIT(PERF_CSTATE_PKG_C6_RES) |
0514                   BIT(PERF_CSTATE_PKG_C7_RES),
0515 };
0516 
0517 static const struct cstate_model snb_cstates __initconst = {
0518     .core_events        = BIT(PERF_CSTATE_CORE_C3_RES) |
0519                   BIT(PERF_CSTATE_CORE_C6_RES) |
0520                   BIT(PERF_CSTATE_CORE_C7_RES),
0521 
0522     .pkg_events     = BIT(PERF_CSTATE_PKG_C2_RES) |
0523                   BIT(PERF_CSTATE_PKG_C3_RES) |
0524                   BIT(PERF_CSTATE_PKG_C6_RES) |
0525                   BIT(PERF_CSTATE_PKG_C7_RES),
0526 };
0527 
0528 static const struct cstate_model hswult_cstates __initconst = {
0529     .core_events        = BIT(PERF_CSTATE_CORE_C3_RES) |
0530                   BIT(PERF_CSTATE_CORE_C6_RES) |
0531                   BIT(PERF_CSTATE_CORE_C7_RES),
0532 
0533     .pkg_events     = BIT(PERF_CSTATE_PKG_C2_RES) |
0534                   BIT(PERF_CSTATE_PKG_C3_RES) |
0535                   BIT(PERF_CSTATE_PKG_C6_RES) |
0536                   BIT(PERF_CSTATE_PKG_C7_RES) |
0537                   BIT(PERF_CSTATE_PKG_C8_RES) |
0538                   BIT(PERF_CSTATE_PKG_C9_RES) |
0539                   BIT(PERF_CSTATE_PKG_C10_RES),
0540 };
0541 
0542 static const struct cstate_model cnl_cstates __initconst = {
0543     .core_events        = BIT(PERF_CSTATE_CORE_C1_RES) |
0544                   BIT(PERF_CSTATE_CORE_C3_RES) |
0545                   BIT(PERF_CSTATE_CORE_C6_RES) |
0546                   BIT(PERF_CSTATE_CORE_C7_RES),
0547 
0548     .pkg_events     = BIT(PERF_CSTATE_PKG_C2_RES) |
0549                   BIT(PERF_CSTATE_PKG_C3_RES) |
0550                   BIT(PERF_CSTATE_PKG_C6_RES) |
0551                   BIT(PERF_CSTATE_PKG_C7_RES) |
0552                   BIT(PERF_CSTATE_PKG_C8_RES) |
0553                   BIT(PERF_CSTATE_PKG_C9_RES) |
0554                   BIT(PERF_CSTATE_PKG_C10_RES),
0555 };
0556 
0557 static const struct cstate_model icl_cstates __initconst = {
0558     .core_events        = BIT(PERF_CSTATE_CORE_C6_RES) |
0559                   BIT(PERF_CSTATE_CORE_C7_RES),
0560 
0561     .pkg_events     = BIT(PERF_CSTATE_PKG_C2_RES) |
0562                   BIT(PERF_CSTATE_PKG_C3_RES) |
0563                   BIT(PERF_CSTATE_PKG_C6_RES) |
0564                   BIT(PERF_CSTATE_PKG_C7_RES) |
0565                   BIT(PERF_CSTATE_PKG_C8_RES) |
0566                   BIT(PERF_CSTATE_PKG_C9_RES) |
0567                   BIT(PERF_CSTATE_PKG_C10_RES),
0568 };
0569 
0570 static const struct cstate_model icx_cstates __initconst = {
0571     .core_events        = BIT(PERF_CSTATE_CORE_C1_RES) |
0572                   BIT(PERF_CSTATE_CORE_C6_RES),
0573 
0574     .pkg_events     = BIT(PERF_CSTATE_PKG_C2_RES) |
0575                   BIT(PERF_CSTATE_PKG_C6_RES),
0576 };
0577 
0578 static const struct cstate_model adl_cstates __initconst = {
0579     .core_events        = BIT(PERF_CSTATE_CORE_C1_RES) |
0580                   BIT(PERF_CSTATE_CORE_C6_RES) |
0581                   BIT(PERF_CSTATE_CORE_C7_RES),
0582 
0583     .pkg_events     = BIT(PERF_CSTATE_PKG_C2_RES) |
0584                   BIT(PERF_CSTATE_PKG_C3_RES) |
0585                   BIT(PERF_CSTATE_PKG_C6_RES) |
0586                   BIT(PERF_CSTATE_PKG_C7_RES) |
0587                   BIT(PERF_CSTATE_PKG_C8_RES) |
0588                   BIT(PERF_CSTATE_PKG_C9_RES) |
0589                   BIT(PERF_CSTATE_PKG_C10_RES),
0590 };
0591 
0592 static const struct cstate_model slm_cstates __initconst = {
0593     .core_events        = BIT(PERF_CSTATE_CORE_C1_RES) |
0594                   BIT(PERF_CSTATE_CORE_C6_RES),
0595 
0596     .pkg_events     = BIT(PERF_CSTATE_PKG_C6_RES),
0597     .quirks         = SLM_PKG_C6_USE_C7_MSR,
0598 };
0599 
0600 
0601 static const struct cstate_model knl_cstates __initconst = {
0602     .core_events        = BIT(PERF_CSTATE_CORE_C6_RES),
0603 
0604     .pkg_events     = BIT(PERF_CSTATE_PKG_C2_RES) |
0605                   BIT(PERF_CSTATE_PKG_C3_RES) |
0606                   BIT(PERF_CSTATE_PKG_C6_RES),
0607     .quirks         = KNL_CORE_C6_MSR,
0608 };
0609 
0610 
0611 static const struct cstate_model glm_cstates __initconst = {
0612     .core_events        = BIT(PERF_CSTATE_CORE_C1_RES) |
0613                   BIT(PERF_CSTATE_CORE_C3_RES) |
0614                   BIT(PERF_CSTATE_CORE_C6_RES),
0615 
0616     .pkg_events     = BIT(PERF_CSTATE_PKG_C2_RES) |
0617                   BIT(PERF_CSTATE_PKG_C3_RES) |
0618                   BIT(PERF_CSTATE_PKG_C6_RES) |
0619                   BIT(PERF_CSTATE_PKG_C10_RES),
0620 };
0621 
0622 
0623 static const struct x86_cpu_id intel_cstates_match[] __initconst = {
0624     X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,     &nhm_cstates),
0625     X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,      &nhm_cstates),
0626     X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,      &nhm_cstates),
0627 
0628     X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,        &nhm_cstates),
0629     X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,     &nhm_cstates),
0630     X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,     &nhm_cstates),
0631 
0632     X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,     &snb_cstates),
0633     X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,   &snb_cstates),
0634 
0635     X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,       &snb_cstates),
0636     X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,     &snb_cstates),
0637 
0638     X86_MATCH_INTEL_FAM6_MODEL(HASWELL,     &snb_cstates),
0639     X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,       &snb_cstates),
0640     X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,       &snb_cstates),
0641 
0642     X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,       &hswult_cstates),
0643 
0644     X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &slm_cstates),
0645     X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D,   &slm_cstates),
0646     X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT,    &slm_cstates),
0647 
0648     X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,       &snb_cstates),
0649     X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,     &snb_cstates),
0650     X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,     &snb_cstates),
0651     X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,     &snb_cstates),
0652 
0653     X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,       &snb_cstates),
0654     X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,     &snb_cstates),
0655     X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,       &snb_cstates),
0656 
0657     X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,      &hswult_cstates),
0658     X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,        &hswult_cstates),
0659     X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L,     &hswult_cstates),
0660     X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE,       &hswult_cstates),
0661 
0662     X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L,    &cnl_cstates),
0663 
0664     X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,    &knl_cstates),
0665     X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,    &knl_cstates),
0666 
0667     X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,   &glm_cstates),
0668     X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &glm_cstates),
0669     X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS,  &glm_cstates),
0670     X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,  &glm_cstates),
0671     X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT,    &glm_cstates),
0672     X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L,  &glm_cstates),
0673 
0674     X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,       &icl_cstates),
0675     X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,     &icl_cstates),
0676     X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,       &icx_cstates),
0677     X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,       &icx_cstates),
0678     X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    &icx_cstates),
0679 
0680     X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,     &icl_cstates),
0681     X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,       &icl_cstates),
0682     X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,      &icl_cstates),
0683     X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,       &adl_cstates),
0684     X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,     &adl_cstates),
0685     X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,     &adl_cstates),
0686     X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,      &adl_cstates),
0687     X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,    &adl_cstates),
0688     { },
0689 };
0690 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
0691 
0692 static int __init cstate_probe(const struct cstate_model *cm)
0693 {
0694     /* SLM has different MSR for PKG C6 */
0695     if (cm->quirks & SLM_PKG_C6_USE_C7_MSR)
0696         pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
0697 
0698     /* KNL has different MSR for CORE C6 */
0699     if (cm->quirks & KNL_CORE_C6_MSR)
0700         pkg_msr[PERF_CSTATE_CORE_C6_RES].msr = MSR_KNL_CORE_C6_RESIDENCY;
0701 
0702 
0703     core_msr_mask = perf_msr_probe(core_msr, PERF_CSTATE_CORE_EVENT_MAX,
0704                        true, (void *) &cm->core_events);
0705 
0706     pkg_msr_mask = perf_msr_probe(pkg_msr, PERF_CSTATE_PKG_EVENT_MAX,
0707                       true, (void *) &cm->pkg_events);
0708 
0709     has_cstate_core = !!core_msr_mask;
0710     has_cstate_pkg  = !!pkg_msr_mask;
0711 
0712     return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
0713 }
0714 
0715 static inline void cstate_cleanup(void)
0716 {
0717     cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE);
0718     cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING);
0719 
0720     if (has_cstate_core)
0721         perf_pmu_unregister(&cstate_core_pmu);
0722 
0723     if (has_cstate_pkg)
0724         perf_pmu_unregister(&cstate_pkg_pmu);
0725 }
0726 
0727 static int __init cstate_init(void)
0728 {
0729     int err;
0730 
0731     cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING,
0732               "perf/x86/cstate:starting", cstate_cpu_init, NULL);
0733     cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE,
0734               "perf/x86/cstate:online", NULL, cstate_cpu_exit);
0735 
0736     if (has_cstate_core) {
0737         err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1);
0738         if (err) {
0739             has_cstate_core = false;
0740             pr_info("Failed to register cstate core pmu\n");
0741             cstate_cleanup();
0742             return err;
0743         }
0744     }
0745 
0746     if (has_cstate_pkg) {
0747         if (topology_max_die_per_package() > 1) {
0748             err = perf_pmu_register(&cstate_pkg_pmu,
0749                         "cstate_die", -1);
0750         } else {
0751             err = perf_pmu_register(&cstate_pkg_pmu,
0752                         cstate_pkg_pmu.name, -1);
0753         }
0754         if (err) {
0755             has_cstate_pkg = false;
0756             pr_info("Failed to register cstate pkg pmu\n");
0757             cstate_cleanup();
0758             return err;
0759         }
0760     }
0761     return 0;
0762 }
0763 
0764 static int __init cstate_pmu_init(void)
0765 {
0766     const struct x86_cpu_id *id;
0767     int err;
0768 
0769     if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
0770         return -ENODEV;
0771 
0772     id = x86_match_cpu(intel_cstates_match);
0773     if (!id)
0774         return -ENODEV;
0775 
0776     err = cstate_probe((const struct cstate_model *) id->driver_data);
0777     if (err)
0778         return err;
0779 
0780     return cstate_init();
0781 }
0782 module_init(cstate_pmu_init);
0783 
0784 static void __exit cstate_pmu_exit(void)
0785 {
0786     cstate_cleanup();
0787 }
0788 module_exit(cstate_pmu_exit);