0001
0002
0003
0004
0005
0006 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0007
0008 #include <linux/stddef.h>
0009 #include <linux/types.h>
0010 #include <linux/init.h>
0011 #include <linux/slab.h>
0012 #include <linux/export.h>
0013 #include <linux/nmi.h>
0014
0015 #include <asm/cpufeature.h>
0016 #include <asm/hardirq.h>
0017 #include <asm/apic.h>
0018
0019 #include "../perf_event.h"
0020
0021
0022
0023
0024 static u64 zx_pmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = {
0025
0026 [PERF_COUNT_HW_CPU_CYCLES] = 0x0082,
0027 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
0028 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0515,
0029 [PERF_COUNT_HW_CACHE_MISSES] = 0x051a,
0030 [PERF_COUNT_HW_BUS_CYCLES] = 0x0083,
0031 };
0032
0033 static struct event_constraint zxc_event_constraints[] __read_mostly = {
0034
0035 FIXED_EVENT_CONSTRAINT(0x0082, 1),
0036 EVENT_CONSTRAINT_END
0037 };
0038
0039 static struct event_constraint zxd_event_constraints[] __read_mostly = {
0040
0041 FIXED_EVENT_CONSTRAINT(0x00c0, 0),
0042 FIXED_EVENT_CONSTRAINT(0x0082, 1),
0043 FIXED_EVENT_CONSTRAINT(0x0083, 2),
0044 EVENT_CONSTRAINT_END
0045 };
0046
0047 static __initconst const u64 zxd_hw_cache_event_ids
0048 [PERF_COUNT_HW_CACHE_MAX]
0049 [PERF_COUNT_HW_CACHE_OP_MAX]
0050 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
0051 [C(L1D)] = {
0052 [C(OP_READ)] = {
0053 [C(RESULT_ACCESS)] = 0x0042,
0054 [C(RESULT_MISS)] = 0x0538,
0055 },
0056 [C(OP_WRITE)] = {
0057 [C(RESULT_ACCESS)] = 0x0043,
0058 [C(RESULT_MISS)] = 0x0562,
0059 },
0060 [C(OP_PREFETCH)] = {
0061 [C(RESULT_ACCESS)] = -1,
0062 [C(RESULT_MISS)] = -1,
0063 },
0064 },
0065 [C(L1I)] = {
0066 [C(OP_READ)] = {
0067 [C(RESULT_ACCESS)] = 0x0300,
0068 [C(RESULT_MISS)] = 0x0301,
0069 },
0070 [C(OP_WRITE)] = {
0071 [C(RESULT_ACCESS)] = -1,
0072 [C(RESULT_MISS)] = -1,
0073 },
0074 [C(OP_PREFETCH)] = {
0075 [C(RESULT_ACCESS)] = 0x030a,
0076 [C(RESULT_MISS)] = 0x030b,
0077 },
0078 },
0079 [C(LL)] = {
0080 [C(OP_READ)] = {
0081 [C(RESULT_ACCESS)] = -1,
0082 [C(RESULT_MISS)] = -1,
0083 },
0084 [C(OP_WRITE)] = {
0085 [C(RESULT_ACCESS)] = -1,
0086 [C(RESULT_MISS)] = -1,
0087 },
0088 [C(OP_PREFETCH)] = {
0089 [C(RESULT_ACCESS)] = -1,
0090 [C(RESULT_MISS)] = -1,
0091 },
0092 },
0093 [C(DTLB)] = {
0094 [C(OP_READ)] = {
0095 [C(RESULT_ACCESS)] = 0x0042,
0096 [C(RESULT_MISS)] = 0x052c,
0097 },
0098 [C(OP_WRITE)] = {
0099 [C(RESULT_ACCESS)] = 0x0043,
0100 [C(RESULT_MISS)] = 0x0530,
0101 },
0102 [C(OP_PREFETCH)] = {
0103 [C(RESULT_ACCESS)] = 0x0564,
0104 [C(RESULT_MISS)] = 0x0565,
0105 },
0106 },
0107 [C(ITLB)] = {
0108 [C(OP_READ)] = {
0109 [C(RESULT_ACCESS)] = 0x00c0,
0110 [C(RESULT_MISS)] = 0x0534,
0111 },
0112 [C(OP_WRITE)] = {
0113 [C(RESULT_ACCESS)] = -1,
0114 [C(RESULT_MISS)] = -1,
0115 },
0116 [C(OP_PREFETCH)] = {
0117 [C(RESULT_ACCESS)] = -1,
0118 [C(RESULT_MISS)] = -1,
0119 },
0120 },
0121 [C(BPU)] = {
0122 [C(OP_READ)] = {
0123 [C(RESULT_ACCESS)] = 0x0700,
0124 [C(RESULT_MISS)] = 0x0709,
0125 },
0126 [C(OP_WRITE)] = {
0127 [C(RESULT_ACCESS)] = -1,
0128 [C(RESULT_MISS)] = -1,
0129 },
0130 [C(OP_PREFETCH)] = {
0131 [C(RESULT_ACCESS)] = -1,
0132 [C(RESULT_MISS)] = -1,
0133 },
0134 },
0135 [C(NODE)] = {
0136 [C(OP_READ)] = {
0137 [C(RESULT_ACCESS)] = -1,
0138 [C(RESULT_MISS)] = -1,
0139 },
0140 [C(OP_WRITE)] = {
0141 [C(RESULT_ACCESS)] = -1,
0142 [C(RESULT_MISS)] = -1,
0143 },
0144 [C(OP_PREFETCH)] = {
0145 [C(RESULT_ACCESS)] = -1,
0146 [C(RESULT_MISS)] = -1,
0147 },
0148 },
0149 };
0150
0151 static __initconst const u64 zxe_hw_cache_event_ids
0152 [PERF_COUNT_HW_CACHE_MAX]
0153 [PERF_COUNT_HW_CACHE_OP_MAX]
0154 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
0155 [C(L1D)] = {
0156 [C(OP_READ)] = {
0157 [C(RESULT_ACCESS)] = 0x0568,
0158 [C(RESULT_MISS)] = 0x054b,
0159 },
0160 [C(OP_WRITE)] = {
0161 [C(RESULT_ACCESS)] = 0x0669,
0162 [C(RESULT_MISS)] = 0x0562,
0163 },
0164 [C(OP_PREFETCH)] = {
0165 [C(RESULT_ACCESS)] = -1,
0166 [C(RESULT_MISS)] = -1,
0167 },
0168 },
0169 [C(L1I)] = {
0170 [C(OP_READ)] = {
0171 [C(RESULT_ACCESS)] = 0x0300,
0172 [C(RESULT_MISS)] = 0x0301,
0173 },
0174 [C(OP_WRITE)] = {
0175 [C(RESULT_ACCESS)] = -1,
0176 [C(RESULT_MISS)] = -1,
0177 },
0178 [C(OP_PREFETCH)] = {
0179 [C(RESULT_ACCESS)] = 0x030a,
0180 [C(RESULT_MISS)] = 0x030b,
0181 },
0182 },
0183 [C(LL)] = {
0184 [C(OP_READ)] = {
0185 [C(RESULT_ACCESS)] = 0x0,
0186 [C(RESULT_MISS)] = 0x0,
0187 },
0188 [C(OP_WRITE)] = {
0189 [C(RESULT_ACCESS)] = 0x0,
0190 [C(RESULT_MISS)] = 0x0,
0191 },
0192 [C(OP_PREFETCH)] = {
0193 [C(RESULT_ACCESS)] = 0x0,
0194 [C(RESULT_MISS)] = 0x0,
0195 },
0196 },
0197 [C(DTLB)] = {
0198 [C(OP_READ)] = {
0199 [C(RESULT_ACCESS)] = 0x0568,
0200 [C(RESULT_MISS)] = 0x052c,
0201 },
0202 [C(OP_WRITE)] = {
0203 [C(RESULT_ACCESS)] = 0x0669,
0204 [C(RESULT_MISS)] = 0x0530,
0205 },
0206 [C(OP_PREFETCH)] = {
0207 [C(RESULT_ACCESS)] = 0x0564,
0208 [C(RESULT_MISS)] = 0x0565,
0209 },
0210 },
0211 [C(ITLB)] = {
0212 [C(OP_READ)] = {
0213 [C(RESULT_ACCESS)] = 0x00c0,
0214 [C(RESULT_MISS)] = 0x0534,
0215 },
0216 [C(OP_WRITE)] = {
0217 [C(RESULT_ACCESS)] = -1,
0218 [C(RESULT_MISS)] = -1,
0219 },
0220 [C(OP_PREFETCH)] = {
0221 [C(RESULT_ACCESS)] = -1,
0222 [C(RESULT_MISS)] = -1,
0223 },
0224 },
0225 [C(BPU)] = {
0226 [C(OP_READ)] = {
0227 [C(RESULT_ACCESS)] = 0x0028,
0228 [C(RESULT_MISS)] = 0x0029,
0229 },
0230 [C(OP_WRITE)] = {
0231 [C(RESULT_ACCESS)] = -1,
0232 [C(RESULT_MISS)] = -1,
0233 },
0234 [C(OP_PREFETCH)] = {
0235 [C(RESULT_ACCESS)] = -1,
0236 [C(RESULT_MISS)] = -1,
0237 },
0238 },
0239 [C(NODE)] = {
0240 [C(OP_READ)] = {
0241 [C(RESULT_ACCESS)] = -1,
0242 [C(RESULT_MISS)] = -1,
0243 },
0244 [C(OP_WRITE)] = {
0245 [C(RESULT_ACCESS)] = -1,
0246 [C(RESULT_MISS)] = -1,
0247 },
0248 [C(OP_PREFETCH)] = {
0249 [C(RESULT_ACCESS)] = -1,
0250 [C(RESULT_MISS)] = -1,
0251 },
0252 },
0253 };
0254
0255 static void zhaoxin_pmu_disable_all(void)
0256 {
0257 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
0258 }
0259
0260 static void zhaoxin_pmu_enable_all(int added)
0261 {
0262 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
0263 }
0264
0265 static inline u64 zhaoxin_pmu_get_status(void)
0266 {
0267 u64 status;
0268
0269 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
0270
0271 return status;
0272 }
0273
0274 static inline void zhaoxin_pmu_ack_status(u64 ack)
0275 {
0276 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
0277 }
0278
0279 static inline void zxc_pmu_ack_status(u64 ack)
0280 {
0281
0282
0283
0284 zhaoxin_pmu_enable_all(0);
0285 zhaoxin_pmu_ack_status(ack);
0286 zhaoxin_pmu_disable_all();
0287 }
0288
0289 static void zhaoxin_pmu_disable_fixed(struct hw_perf_event *hwc)
0290 {
0291 int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
0292 u64 ctrl_val, mask;
0293
0294 mask = 0xfULL << (idx * 4);
0295
0296 rdmsrl(hwc->config_base, ctrl_val);
0297 ctrl_val &= ~mask;
0298 wrmsrl(hwc->config_base, ctrl_val);
0299 }
0300
0301 static void zhaoxin_pmu_disable_event(struct perf_event *event)
0302 {
0303 struct hw_perf_event *hwc = &event->hw;
0304
0305 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
0306 zhaoxin_pmu_disable_fixed(hwc);
0307 return;
0308 }
0309
0310 x86_pmu_disable_event(event);
0311 }
0312
0313 static void zhaoxin_pmu_enable_fixed(struct hw_perf_event *hwc)
0314 {
0315 int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
0316 u64 ctrl_val, bits, mask;
0317
0318
0319
0320
0321
0322
0323 bits = 0x8ULL;
0324 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
0325 bits |= 0x2;
0326 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
0327 bits |= 0x1;
0328
0329 bits <<= (idx * 4);
0330 mask = 0xfULL << (idx * 4);
0331
0332 rdmsrl(hwc->config_base, ctrl_val);
0333 ctrl_val &= ~mask;
0334 ctrl_val |= bits;
0335 wrmsrl(hwc->config_base, ctrl_val);
0336 }
0337
0338 static void zhaoxin_pmu_enable_event(struct perf_event *event)
0339 {
0340 struct hw_perf_event *hwc = &event->hw;
0341
0342 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
0343 zhaoxin_pmu_enable_fixed(hwc);
0344 return;
0345 }
0346
0347 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
0348 }
0349
0350
0351
0352
0353
0354 static int zhaoxin_pmu_handle_irq(struct pt_regs *regs)
0355 {
0356 struct perf_sample_data data;
0357 struct cpu_hw_events *cpuc;
0358 int handled = 0;
0359 u64 status;
0360 int bit;
0361
0362 cpuc = this_cpu_ptr(&cpu_hw_events);
0363 apic_write(APIC_LVTPC, APIC_DM_NMI);
0364 zhaoxin_pmu_disable_all();
0365 status = zhaoxin_pmu_get_status();
0366 if (!status)
0367 goto done;
0368
0369 again:
0370 if (x86_pmu.enabled_ack)
0371 zxc_pmu_ack_status(status);
0372 else
0373 zhaoxin_pmu_ack_status(status);
0374
0375 inc_irq_stat(apic_perf_irqs);
0376
0377
0378
0379
0380
0381 if (__test_and_clear_bit(63, (unsigned long *)&status)) {
0382 if (!status)
0383 goto done;
0384 }
0385
0386 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
0387 struct perf_event *event = cpuc->events[bit];
0388
0389 handled++;
0390
0391 if (!test_bit(bit, cpuc->active_mask))
0392 continue;
0393
0394 x86_perf_event_update(event);
0395 perf_sample_data_init(&data, 0, event->hw.last_period);
0396
0397 if (!x86_perf_event_set_period(event))
0398 continue;
0399
0400 if (perf_event_overflow(event, &data, regs))
0401 x86_pmu_stop(event, 0);
0402 }
0403
0404
0405
0406
0407 status = zhaoxin_pmu_get_status();
0408 if (status)
0409 goto again;
0410
0411 done:
0412 zhaoxin_pmu_enable_all(0);
0413 return handled;
0414 }
0415
0416 static u64 zhaoxin_pmu_event_map(int hw_event)
0417 {
0418 return zx_pmon_event_map[hw_event];
0419 }
0420
0421 static struct event_constraint *
0422 zhaoxin_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
0423 struct perf_event *event)
0424 {
0425 struct event_constraint *c;
0426
0427 if (x86_pmu.event_constraints) {
0428 for_each_event_constraint(c, x86_pmu.event_constraints) {
0429 if ((event->hw.config & c->cmask) == c->code)
0430 return c;
0431 }
0432 }
0433
0434 return &unconstrained;
0435 }
0436
0437 PMU_FORMAT_ATTR(event, "config:0-7");
0438 PMU_FORMAT_ATTR(umask, "config:8-15");
0439 PMU_FORMAT_ATTR(edge, "config:18");
0440 PMU_FORMAT_ATTR(inv, "config:23");
0441 PMU_FORMAT_ATTR(cmask, "config:24-31");
0442
0443 static struct attribute *zx_arch_formats_attr[] = {
0444 &format_attr_event.attr,
0445 &format_attr_umask.attr,
0446 &format_attr_edge.attr,
0447 &format_attr_inv.attr,
0448 &format_attr_cmask.attr,
0449 NULL,
0450 };
0451
0452 static ssize_t zhaoxin_event_sysfs_show(char *page, u64 config)
0453 {
0454 u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
0455
0456 return x86_event_sysfs_show(page, config, event);
0457 }
0458
0459 static const struct x86_pmu zhaoxin_pmu __initconst = {
0460 .name = "zhaoxin",
0461 .handle_irq = zhaoxin_pmu_handle_irq,
0462 .disable_all = zhaoxin_pmu_disable_all,
0463 .enable_all = zhaoxin_pmu_enable_all,
0464 .enable = zhaoxin_pmu_enable_event,
0465 .disable = zhaoxin_pmu_disable_event,
0466 .hw_config = x86_pmu_hw_config,
0467 .schedule_events = x86_schedule_events,
0468 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
0469 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
0470 .event_map = zhaoxin_pmu_event_map,
0471 .max_events = ARRAY_SIZE(zx_pmon_event_map),
0472 .apic = 1,
0473
0474
0475
0476 .max_period = (1ULL << 47) - 1,
0477 .get_event_constraints = zhaoxin_get_event_constraints,
0478
0479 .format_attrs = zx_arch_formats_attr,
0480 .events_sysfs_show = zhaoxin_event_sysfs_show,
0481 };
0482
0483 static const struct { int id; char *name; } zx_arch_events_map[] __initconst = {
0484 { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
0485 { PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
0486 { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
0487 { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
0488 { PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
0489 { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
0490 { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
0491 };
0492
0493 static __init void zhaoxin_arch_events_quirk(void)
0494 {
0495 int bit;
0496
0497
0498 for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(zx_arch_events_map)) {
0499 zx_pmon_event_map[zx_arch_events_map[bit].id] = 0;
0500 pr_warn("CPUID marked event: \'%s\' unavailable\n",
0501 zx_arch_events_map[bit].name);
0502 }
0503 }
0504
0505 __init int zhaoxin_pmu_init(void)
0506 {
0507 union cpuid10_edx edx;
0508 union cpuid10_eax eax;
0509 union cpuid10_ebx ebx;
0510 struct event_constraint *c;
0511 unsigned int unused;
0512 int version;
0513
0514 pr_info("Welcome to zhaoxin pmu!\n");
0515
0516
0517
0518
0519
0520 cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
0521
0522 if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT - 1)
0523 return -ENODEV;
0524
0525 version = eax.split.version_id;
0526 if (version != 2)
0527 return -ENODEV;
0528
0529 x86_pmu = zhaoxin_pmu;
0530 pr_info("Version check pass!\n");
0531
0532 x86_pmu.version = version;
0533 x86_pmu.num_counters = eax.split.num_counters;
0534 x86_pmu.cntval_bits = eax.split.bit_width;
0535 x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
0536 x86_pmu.events_maskl = ebx.full;
0537 x86_pmu.events_mask_len = eax.split.mask_length;
0538
0539 x86_pmu.num_counters_fixed = edx.split.num_counters_fixed;
0540 x86_add_quirk(zhaoxin_arch_events_quirk);
0541
0542 switch (boot_cpu_data.x86) {
0543 case 0x06:
0544 if (boot_cpu_data.x86_model == 0x0f || boot_cpu_data.x86_model == 0x19) {
0545
0546 x86_pmu.max_period = x86_pmu.cntval_mask >> 1;
0547
0548
0549 x86_pmu.enabled_ack = 1;
0550
0551 x86_pmu.event_constraints = zxc_event_constraints;
0552 zx_pmon_event_map[PERF_COUNT_HW_INSTRUCTIONS] = 0;
0553 zx_pmon_event_map[PERF_COUNT_HW_CACHE_REFERENCES] = 0;
0554 zx_pmon_event_map[PERF_COUNT_HW_CACHE_MISSES] = 0;
0555 zx_pmon_event_map[PERF_COUNT_HW_BUS_CYCLES] = 0;
0556
0557 pr_cont("ZXC events, ");
0558 break;
0559 }
0560 return -ENODEV;
0561
0562 case 0x07:
0563 zx_pmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
0564 X86_CONFIG(.event = 0x01, .umask = 0x01, .inv = 0x01, .cmask = 0x01);
0565
0566 zx_pmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
0567 X86_CONFIG(.event = 0x0f, .umask = 0x04, .inv = 0, .cmask = 0);
0568
0569 switch (boot_cpu_data.x86_model) {
0570 case 0x1b:
0571 memcpy(hw_cache_event_ids, zxd_hw_cache_event_ids,
0572 sizeof(hw_cache_event_ids));
0573
0574 x86_pmu.event_constraints = zxd_event_constraints;
0575
0576 zx_pmon_event_map[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0700;
0577 zx_pmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x0709;
0578
0579 pr_cont("ZXD events, ");
0580 break;
0581 case 0x3b:
0582 memcpy(hw_cache_event_ids, zxe_hw_cache_event_ids,
0583 sizeof(hw_cache_event_ids));
0584
0585 x86_pmu.event_constraints = zxd_event_constraints;
0586
0587 zx_pmon_event_map[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0028;
0588 zx_pmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x0029;
0589
0590 pr_cont("ZXE events, ");
0591 break;
0592 default:
0593 return -ENODEV;
0594 }
0595 break;
0596
0597 default:
0598 return -ENODEV;
0599 }
0600
0601 x86_pmu.intel_ctrl = (1 << (x86_pmu.num_counters)) - 1;
0602 x86_pmu.intel_ctrl |= ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
0603
0604 if (x86_pmu.event_constraints) {
0605 for_each_event_constraint(c, x86_pmu.event_constraints) {
0606 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
0607 c->weight += x86_pmu.num_counters;
0608 }
0609 }
0610
0611 return 0;
0612 }
0613