0001
0002 #include <linux/perf_event.h>
0003 #include <linux/sysfs.h>
0004 #include <linux/nospec.h>
0005 #include <asm/intel-family.h>
0006 #include "probe.h"
0007
0008 enum perf_msr_id {
0009 PERF_MSR_TSC = 0,
0010 PERF_MSR_APERF = 1,
0011 PERF_MSR_MPERF = 2,
0012 PERF_MSR_PPERF = 3,
0013 PERF_MSR_SMI = 4,
0014 PERF_MSR_PTSC = 5,
0015 PERF_MSR_IRPERF = 6,
0016 PERF_MSR_THERM = 7,
0017 PERF_MSR_EVENT_MAX,
0018 };
0019
0020 static bool test_aperfmperf(int idx, void *data)
0021 {
0022 return boot_cpu_has(X86_FEATURE_APERFMPERF);
0023 }
0024
0025 static bool test_ptsc(int idx, void *data)
0026 {
0027 return boot_cpu_has(X86_FEATURE_PTSC);
0028 }
0029
0030 static bool test_irperf(int idx, void *data)
0031 {
0032 return boot_cpu_has(X86_FEATURE_IRPERF);
0033 }
0034
0035 static bool test_therm_status(int idx, void *data)
0036 {
0037 return boot_cpu_has(X86_FEATURE_DTHERM);
0038 }
0039
0040 static bool test_intel(int idx, void *data)
0041 {
0042 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
0043 boot_cpu_data.x86 != 6)
0044 return false;
0045
0046 switch (boot_cpu_data.x86_model) {
0047 case INTEL_FAM6_NEHALEM:
0048 case INTEL_FAM6_NEHALEM_G:
0049 case INTEL_FAM6_NEHALEM_EP:
0050 case INTEL_FAM6_NEHALEM_EX:
0051
0052 case INTEL_FAM6_WESTMERE:
0053 case INTEL_FAM6_WESTMERE_EP:
0054 case INTEL_FAM6_WESTMERE_EX:
0055
0056 case INTEL_FAM6_SANDYBRIDGE:
0057 case INTEL_FAM6_SANDYBRIDGE_X:
0058
0059 case INTEL_FAM6_IVYBRIDGE:
0060 case INTEL_FAM6_IVYBRIDGE_X:
0061
0062 case INTEL_FAM6_HASWELL:
0063 case INTEL_FAM6_HASWELL_X:
0064 case INTEL_FAM6_HASWELL_L:
0065 case INTEL_FAM6_HASWELL_G:
0066
0067 case INTEL_FAM6_BROADWELL:
0068 case INTEL_FAM6_BROADWELL_D:
0069 case INTEL_FAM6_BROADWELL_G:
0070 case INTEL_FAM6_BROADWELL_X:
0071 case INTEL_FAM6_SAPPHIRERAPIDS_X:
0072
0073 case INTEL_FAM6_ATOM_SILVERMONT:
0074 case INTEL_FAM6_ATOM_SILVERMONT_D:
0075 case INTEL_FAM6_ATOM_AIRMONT:
0076
0077 case INTEL_FAM6_ATOM_GOLDMONT:
0078 case INTEL_FAM6_ATOM_GOLDMONT_D:
0079 case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
0080 case INTEL_FAM6_ATOM_TREMONT_D:
0081 case INTEL_FAM6_ATOM_TREMONT:
0082 case INTEL_FAM6_ATOM_TREMONT_L:
0083
0084 case INTEL_FAM6_XEON_PHI_KNL:
0085 case INTEL_FAM6_XEON_PHI_KNM:
0086 if (idx == PERF_MSR_SMI)
0087 return true;
0088 break;
0089
0090 case INTEL_FAM6_SKYLAKE_L:
0091 case INTEL_FAM6_SKYLAKE:
0092 case INTEL_FAM6_SKYLAKE_X:
0093 case INTEL_FAM6_KABYLAKE_L:
0094 case INTEL_FAM6_KABYLAKE:
0095 case INTEL_FAM6_COMETLAKE_L:
0096 case INTEL_FAM6_COMETLAKE:
0097 case INTEL_FAM6_ICELAKE_L:
0098 case INTEL_FAM6_ICELAKE:
0099 case INTEL_FAM6_ICELAKE_X:
0100 case INTEL_FAM6_ICELAKE_D:
0101 case INTEL_FAM6_TIGERLAKE_L:
0102 case INTEL_FAM6_TIGERLAKE:
0103 case INTEL_FAM6_ROCKETLAKE:
0104 case INTEL_FAM6_ALDERLAKE:
0105 case INTEL_FAM6_ALDERLAKE_L:
0106 case INTEL_FAM6_ALDERLAKE_N:
0107 case INTEL_FAM6_RAPTORLAKE:
0108 case INTEL_FAM6_RAPTORLAKE_P:
0109 if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
0110 return true;
0111 break;
0112 }
0113
0114 return false;
0115 }
0116
0117 PMU_EVENT_ATTR_STRING(tsc, attr_tsc, "event=0x00" );
0118 PMU_EVENT_ATTR_STRING(aperf, attr_aperf, "event=0x01" );
0119 PMU_EVENT_ATTR_STRING(mperf, attr_mperf, "event=0x02" );
0120 PMU_EVENT_ATTR_STRING(pperf, attr_pperf, "event=0x03" );
0121 PMU_EVENT_ATTR_STRING(smi, attr_smi, "event=0x04" );
0122 PMU_EVENT_ATTR_STRING(ptsc, attr_ptsc, "event=0x05" );
0123 PMU_EVENT_ATTR_STRING(irperf, attr_irperf, "event=0x06" );
0124 PMU_EVENT_ATTR_STRING(cpu_thermal_margin, attr_therm, "event=0x07" );
0125 PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, attr_therm_snap, "1" );
0126 PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, attr_therm_unit, "C" );
0127
0128 static unsigned long msr_mask;
0129
0130 PMU_EVENT_GROUP(events, aperf);
0131 PMU_EVENT_GROUP(events, mperf);
0132 PMU_EVENT_GROUP(events, pperf);
0133 PMU_EVENT_GROUP(events, smi);
0134 PMU_EVENT_GROUP(events, ptsc);
0135 PMU_EVENT_GROUP(events, irperf);
0136
0137 static struct attribute *attrs_therm[] = {
0138 &attr_therm.attr.attr,
0139 &attr_therm_snap.attr.attr,
0140 &attr_therm_unit.attr.attr,
0141 NULL,
0142 };
0143
0144 static struct attribute_group group_therm = {
0145 .name = "events",
0146 .attrs = attrs_therm,
0147 };
0148
0149 static struct perf_msr msr[] = {
0150 [PERF_MSR_TSC] = { .no_check = true, },
0151 [PERF_MSR_APERF] = { MSR_IA32_APERF, &group_aperf, test_aperfmperf, },
0152 [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &group_mperf, test_aperfmperf, },
0153 [PERF_MSR_PPERF] = { MSR_PPERF, &group_pperf, test_intel, },
0154 [PERF_MSR_SMI] = { MSR_SMI_COUNT, &group_smi, test_intel, },
0155 [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &group_ptsc, test_ptsc, },
0156 [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &group_irperf, test_irperf, },
0157 [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, &group_therm, test_therm_status, },
0158 };
0159
0160 static struct attribute *events_attrs[] = {
0161 &attr_tsc.attr.attr,
0162 NULL,
0163 };
0164
0165 static struct attribute_group events_attr_group = {
0166 .name = "events",
0167 .attrs = events_attrs,
0168 };
0169
0170 PMU_FORMAT_ATTR(event, "config:0-63");
0171 static struct attribute *format_attrs[] = {
0172 &format_attr_event.attr,
0173 NULL,
0174 };
0175 static struct attribute_group format_attr_group = {
0176 .name = "format",
0177 .attrs = format_attrs,
0178 };
0179
0180 static const struct attribute_group *attr_groups[] = {
0181 &events_attr_group,
0182 &format_attr_group,
0183 NULL,
0184 };
0185
0186 static const struct attribute_group *attr_update[] = {
0187 &group_aperf,
0188 &group_mperf,
0189 &group_pperf,
0190 &group_smi,
0191 &group_ptsc,
0192 &group_irperf,
0193 &group_therm,
0194 NULL,
0195 };
0196
0197 static int msr_event_init(struct perf_event *event)
0198 {
0199 u64 cfg = event->attr.config;
0200
0201 if (event->attr.type != event->pmu->type)
0202 return -ENOENT;
0203
0204
0205 if (event->attr.sample_period)
0206 return -EINVAL;
0207
0208 if (cfg >= PERF_MSR_EVENT_MAX)
0209 return -EINVAL;
0210
0211 cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX);
0212
0213 if (!(msr_mask & (1 << cfg)))
0214 return -EINVAL;
0215
0216 event->hw.idx = -1;
0217 event->hw.event_base = msr[cfg].msr;
0218 event->hw.config = cfg;
0219
0220 return 0;
0221 }
0222
0223 static inline u64 msr_read_counter(struct perf_event *event)
0224 {
0225 u64 now;
0226
0227 if (event->hw.event_base)
0228 rdmsrl(event->hw.event_base, now);
0229 else
0230 now = rdtsc_ordered();
0231
0232 return now;
0233 }
0234
0235 static void msr_event_update(struct perf_event *event)
0236 {
0237 u64 prev, now;
0238 s64 delta;
0239
0240
0241 again:
0242 prev = local64_read(&event->hw.prev_count);
0243 now = msr_read_counter(event);
0244
0245 if (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev)
0246 goto again;
0247
0248 delta = now - prev;
0249 if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) {
0250 delta = sign_extend64(delta, 31);
0251 local64_add(delta, &event->count);
0252 } else if (unlikely(event->hw.event_base == MSR_IA32_THERM_STATUS)) {
0253
0254 now = now & (1ULL << 31) ? (now >> 16) & 0x3f : -1;
0255 local64_set(&event->count, now);
0256 } else {
0257 local64_add(delta, &event->count);
0258 }
0259 }
0260
0261 static void msr_event_start(struct perf_event *event, int flags)
0262 {
0263 u64 now = msr_read_counter(event);
0264
0265 local64_set(&event->hw.prev_count, now);
0266 }
0267
0268 static void msr_event_stop(struct perf_event *event, int flags)
0269 {
0270 msr_event_update(event);
0271 }
0272
0273 static void msr_event_del(struct perf_event *event, int flags)
0274 {
0275 msr_event_stop(event, PERF_EF_UPDATE);
0276 }
0277
0278 static int msr_event_add(struct perf_event *event, int flags)
0279 {
0280 if (flags & PERF_EF_START)
0281 msr_event_start(event, flags);
0282
0283 return 0;
0284 }
0285
0286 static struct pmu pmu_msr = {
0287 .task_ctx_nr = perf_sw_context,
0288 .attr_groups = attr_groups,
0289 .event_init = msr_event_init,
0290 .add = msr_event_add,
0291 .del = msr_event_del,
0292 .start = msr_event_start,
0293 .stop = msr_event_stop,
0294 .read = msr_event_update,
0295 .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
0296 .attr_update = attr_update,
0297 };
0298
0299 static int __init msr_init(void)
0300 {
0301 if (!boot_cpu_has(X86_FEATURE_TSC)) {
0302 pr_cont("no MSR PMU driver.\n");
0303 return 0;
0304 }
0305
0306 msr_mask = perf_msr_probe(msr, PERF_MSR_EVENT_MAX, true, NULL);
0307
0308 perf_pmu_register(&pmu_msr, "msr", -1);
0309
0310 return 0;
0311 }
0312 device_initcall(msr_init);