0001
0002 #include <linux/types.h>
0003 #include <linux/interrupt.h>
0004
0005 #include <asm/xen/hypercall.h>
0006 #include <xen/xen.h>
0007 #include <xen/page.h>
0008 #include <xen/interface/xen.h>
0009 #include <xen/interface/vcpu.h>
0010 #include <xen/interface/xenpmu.h>
0011
0012 #include "xen-ops.h"
0013 #include "pmu.h"
0014
0015
0016 #include "../events/perf_event.h"
0017
0018 #define XENPMU_IRQ_PROCESSING 1
0019 struct xenpmu {
0020
0021 struct xen_pmu_data *xenpmu_data;
0022
0023 uint8_t flags;
0024 };
0025 static DEFINE_PER_CPU(struct xenpmu, xenpmu_shared);
0026 #define get_xenpmu_data() (this_cpu_ptr(&xenpmu_shared)->xenpmu_data)
0027 #define get_xenpmu_flags() (this_cpu_ptr(&xenpmu_shared)->flags)
0028
0029
0030 #define field_offset(ctxt, field) ((void *)((uintptr_t)ctxt + \
0031 (uintptr_t)ctxt->field))
0032
0033
0034 #define F15H_NUM_COUNTERS 6
0035 #define F10H_NUM_COUNTERS 4
0036
0037 static __read_mostly uint32_t amd_counters_base;
0038 static __read_mostly uint32_t amd_ctrls_base;
0039 static __read_mostly int amd_msr_step;
0040 static __read_mostly int k7_counters_mirrored;
0041 static __read_mostly int amd_num_counters;
0042
0043
0044 #define MSR_TYPE_COUNTER 0
0045 #define MSR_TYPE_CTRL 1
0046 #define MSR_TYPE_GLOBAL 2
0047 #define MSR_TYPE_ARCH_COUNTER 3
0048 #define MSR_TYPE_ARCH_CTRL 4
0049
0050
0051 #define PMU_GENERAL_NR_SHIFT 8
0052 #define PMU_GENERAL_NR_BITS 8
0053 #define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) \
0054 << PMU_GENERAL_NR_SHIFT)
0055
0056
0057 #define PMU_FIXED_NR_SHIFT 0
0058 #define PMU_FIXED_NR_BITS 5
0059 #define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) - 1) \
0060 << PMU_FIXED_NR_SHIFT)
0061
0062
0063 #define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_PMC0))
0064
0065 #define INTEL_PMC_TYPE_SHIFT 30
0066
0067 static __read_mostly int intel_num_arch_counters, intel_num_fixed_counters;
0068
0069
0070 static void xen_pmu_arch_init(void)
0071 {
0072 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
0073
0074 switch (boot_cpu_data.x86) {
0075 case 0x15:
0076 amd_num_counters = F15H_NUM_COUNTERS;
0077 amd_counters_base = MSR_F15H_PERF_CTR;
0078 amd_ctrls_base = MSR_F15H_PERF_CTL;
0079 amd_msr_step = 2;
0080 k7_counters_mirrored = 1;
0081 break;
0082 case 0x10:
0083 case 0x12:
0084 case 0x14:
0085 case 0x16:
0086 default:
0087 amd_num_counters = F10H_NUM_COUNTERS;
0088 amd_counters_base = MSR_K7_PERFCTR0;
0089 amd_ctrls_base = MSR_K7_EVNTSEL0;
0090 amd_msr_step = 1;
0091 k7_counters_mirrored = 0;
0092 break;
0093 }
0094 } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
0095 amd_num_counters = F10H_NUM_COUNTERS;
0096 amd_counters_base = MSR_K7_PERFCTR0;
0097 amd_ctrls_base = MSR_K7_EVNTSEL0;
0098 amd_msr_step = 1;
0099 k7_counters_mirrored = 0;
0100 } else {
0101 uint32_t eax, ebx, ecx, edx;
0102
0103 cpuid(0xa, &eax, &ebx, &ecx, &edx);
0104
0105 intel_num_arch_counters = (eax & PMU_GENERAL_NR_MASK) >>
0106 PMU_GENERAL_NR_SHIFT;
0107 intel_num_fixed_counters = (edx & PMU_FIXED_NR_MASK) >>
0108 PMU_FIXED_NR_SHIFT;
0109 }
0110 }
0111
0112 static inline uint32_t get_fam15h_addr(u32 addr)
0113 {
0114 switch (addr) {
0115 case MSR_K7_PERFCTR0:
0116 case MSR_K7_PERFCTR1:
0117 case MSR_K7_PERFCTR2:
0118 case MSR_K7_PERFCTR3:
0119 return MSR_F15H_PERF_CTR + (addr - MSR_K7_PERFCTR0);
0120 case MSR_K7_EVNTSEL0:
0121 case MSR_K7_EVNTSEL1:
0122 case MSR_K7_EVNTSEL2:
0123 case MSR_K7_EVNTSEL3:
0124 return MSR_F15H_PERF_CTL + (addr - MSR_K7_EVNTSEL0);
0125 default:
0126 break;
0127 }
0128
0129 return addr;
0130 }
0131
0132 static inline bool is_amd_pmu_msr(unsigned int msr)
0133 {
0134 if ((msr >= MSR_F15H_PERF_CTL &&
0135 msr < MSR_F15H_PERF_CTR + (amd_num_counters * 2)) ||
0136 (msr >= MSR_K7_EVNTSEL0 &&
0137 msr < MSR_K7_PERFCTR0 + amd_num_counters))
0138 return true;
0139
0140 return false;
0141 }
0142
0143 static int is_intel_pmu_msr(u32 msr_index, int *type, int *index)
0144 {
0145 u32 msr_index_pmc;
0146
0147 switch (msr_index) {
0148 case MSR_CORE_PERF_FIXED_CTR_CTRL:
0149 case MSR_IA32_DS_AREA:
0150 case MSR_IA32_PEBS_ENABLE:
0151 *type = MSR_TYPE_CTRL;
0152 return true;
0153
0154 case MSR_CORE_PERF_GLOBAL_CTRL:
0155 case MSR_CORE_PERF_GLOBAL_STATUS:
0156 case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
0157 *type = MSR_TYPE_GLOBAL;
0158 return true;
0159
0160 default:
0161
0162 if ((msr_index >= MSR_CORE_PERF_FIXED_CTR0) &&
0163 (msr_index < MSR_CORE_PERF_FIXED_CTR0 +
0164 intel_num_fixed_counters)) {
0165 *index = msr_index - MSR_CORE_PERF_FIXED_CTR0;
0166 *type = MSR_TYPE_COUNTER;
0167 return true;
0168 }
0169
0170 if ((msr_index >= MSR_P6_EVNTSEL0) &&
0171 (msr_index < MSR_P6_EVNTSEL0 + intel_num_arch_counters)) {
0172 *index = msr_index - MSR_P6_EVNTSEL0;
0173 *type = MSR_TYPE_ARCH_CTRL;
0174 return true;
0175 }
0176
0177 msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK;
0178 if ((msr_index_pmc >= MSR_IA32_PERFCTR0) &&
0179 (msr_index_pmc < MSR_IA32_PERFCTR0 +
0180 intel_num_arch_counters)) {
0181 *type = MSR_TYPE_ARCH_COUNTER;
0182 *index = msr_index_pmc - MSR_IA32_PERFCTR0;
0183 return true;
0184 }
0185 return false;
0186 }
0187 }
0188
0189 static bool xen_intel_pmu_emulate(unsigned int msr, u64 *val, int type,
0190 int index, bool is_read)
0191 {
0192 uint64_t *reg = NULL;
0193 struct xen_pmu_intel_ctxt *ctxt;
0194 uint64_t *fix_counters;
0195 struct xen_pmu_cntr_pair *arch_cntr_pair;
0196 struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
0197 uint8_t xenpmu_flags = get_xenpmu_flags();
0198
0199
0200 if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
0201 return false;
0202
0203 ctxt = &xenpmu_data->pmu.c.intel;
0204
0205 switch (msr) {
0206 case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
0207 reg = &ctxt->global_ovf_ctrl;
0208 break;
0209 case MSR_CORE_PERF_GLOBAL_STATUS:
0210 reg = &ctxt->global_status;
0211 break;
0212 case MSR_CORE_PERF_GLOBAL_CTRL:
0213 reg = &ctxt->global_ctrl;
0214 break;
0215 case MSR_CORE_PERF_FIXED_CTR_CTRL:
0216 reg = &ctxt->fixed_ctrl;
0217 break;
0218 default:
0219 switch (type) {
0220 case MSR_TYPE_COUNTER:
0221 fix_counters = field_offset(ctxt, fixed_counters);
0222 reg = &fix_counters[index];
0223 break;
0224 case MSR_TYPE_ARCH_COUNTER:
0225 arch_cntr_pair = field_offset(ctxt, arch_counters);
0226 reg = &arch_cntr_pair[index].counter;
0227 break;
0228 case MSR_TYPE_ARCH_CTRL:
0229 arch_cntr_pair = field_offset(ctxt, arch_counters);
0230 reg = &arch_cntr_pair[index].control;
0231 break;
0232 default:
0233 return false;
0234 }
0235 }
0236
0237 if (reg) {
0238 if (is_read)
0239 *val = *reg;
0240 else {
0241 *reg = *val;
0242
0243 if (msr == MSR_CORE_PERF_GLOBAL_OVF_CTRL)
0244 ctxt->global_status &= (~(*val));
0245 }
0246 return true;
0247 }
0248
0249 return false;
0250 }
0251
0252 static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read)
0253 {
0254 uint64_t *reg = NULL;
0255 int i, off = 0;
0256 struct xen_pmu_amd_ctxt *ctxt;
0257 uint64_t *counter_regs, *ctrl_regs;
0258 struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
0259 uint8_t xenpmu_flags = get_xenpmu_flags();
0260
0261 if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
0262 return false;
0263
0264 if (k7_counters_mirrored &&
0265 ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)))
0266 msr = get_fam15h_addr(msr);
0267
0268 ctxt = &xenpmu_data->pmu.c.amd;
0269 for (i = 0; i < amd_num_counters; i++) {
0270 if (msr == amd_ctrls_base + off) {
0271 ctrl_regs = field_offset(ctxt, ctrls);
0272 reg = &ctrl_regs[i];
0273 break;
0274 } else if (msr == amd_counters_base + off) {
0275 counter_regs = field_offset(ctxt, counters);
0276 reg = &counter_regs[i];
0277 break;
0278 }
0279 off += amd_msr_step;
0280 }
0281
0282 if (reg) {
0283 if (is_read)
0284 *val = *reg;
0285 else
0286 *reg = *val;
0287
0288 return true;
0289 }
0290 return false;
0291 }
0292
0293 bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
0294 {
0295 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
0296 if (is_amd_pmu_msr(msr)) {
0297 if (!xen_amd_pmu_emulate(msr, val, 1))
0298 *val = native_read_msr_safe(msr, err);
0299 return true;
0300 }
0301 } else {
0302 int type, index;
0303
0304 if (is_intel_pmu_msr(msr, &type, &index)) {
0305 if (!xen_intel_pmu_emulate(msr, val, type, index, 1))
0306 *val = native_read_msr_safe(msr, err);
0307 return true;
0308 }
0309 }
0310
0311 return false;
0312 }
0313
0314 bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err)
0315 {
0316 uint64_t val = ((uint64_t)high << 32) | low;
0317
0318 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
0319 if (is_amd_pmu_msr(msr)) {
0320 if (!xen_amd_pmu_emulate(msr, &val, 0))
0321 *err = native_write_msr_safe(msr, low, high);
0322 return true;
0323 }
0324 } else {
0325 int type, index;
0326
0327 if (is_intel_pmu_msr(msr, &type, &index)) {
0328 if (!xen_intel_pmu_emulate(msr, &val, type, index, 0))
0329 *err = native_write_msr_safe(msr, low, high);
0330 return true;
0331 }
0332 }
0333
0334 return false;
0335 }
0336
0337 static unsigned long long xen_amd_read_pmc(int counter)
0338 {
0339 struct xen_pmu_amd_ctxt *ctxt;
0340 uint64_t *counter_regs;
0341 struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
0342 uint8_t xenpmu_flags = get_xenpmu_flags();
0343
0344 if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
0345 uint32_t msr;
0346 int err;
0347
0348 msr = amd_counters_base + (counter * amd_msr_step);
0349 return native_read_msr_safe(msr, &err);
0350 }
0351
0352 ctxt = &xenpmu_data->pmu.c.amd;
0353 counter_regs = field_offset(ctxt, counters);
0354 return counter_regs[counter];
0355 }
0356
0357 static unsigned long long xen_intel_read_pmc(int counter)
0358 {
0359 struct xen_pmu_intel_ctxt *ctxt;
0360 uint64_t *fixed_counters;
0361 struct xen_pmu_cntr_pair *arch_cntr_pair;
0362 struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
0363 uint8_t xenpmu_flags = get_xenpmu_flags();
0364
0365 if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
0366 uint32_t msr;
0367 int err;
0368
0369 if (counter & (1 << INTEL_PMC_TYPE_SHIFT))
0370 msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff);
0371 else
0372 msr = MSR_IA32_PERFCTR0 + counter;
0373
0374 return native_read_msr_safe(msr, &err);
0375 }
0376
0377 ctxt = &xenpmu_data->pmu.c.intel;
0378 if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) {
0379 fixed_counters = field_offset(ctxt, fixed_counters);
0380 return fixed_counters[counter & 0xffff];
0381 }
0382
0383 arch_cntr_pair = field_offset(ctxt, arch_counters);
0384 return arch_cntr_pair[counter].counter;
0385 }
0386
0387 unsigned long long xen_read_pmc(int counter)
0388 {
0389 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
0390 return xen_amd_read_pmc(counter);
0391 else
0392 return xen_intel_read_pmc(counter);
0393 }
0394
0395 int pmu_apic_update(uint32_t val)
0396 {
0397 int ret;
0398 struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
0399
0400 if (!xenpmu_data) {
0401 pr_warn_once("%s: pmudata not initialized\n", __func__);
0402 return -EINVAL;
0403 }
0404
0405 xenpmu_data->pmu.l.lapic_lvtpc = val;
0406
0407 if (get_xenpmu_flags() & XENPMU_IRQ_PROCESSING)
0408 return 0;
0409
0410 ret = HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set, NULL);
0411
0412 return ret;
0413 }
0414
0415
0416 static unsigned int xen_guest_state(void)
0417 {
0418 const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
0419 unsigned int state = 0;
0420
0421 if (!xenpmu_data) {
0422 pr_warn_once("%s: pmudata not initialized\n", __func__);
0423 return state;
0424 }
0425
0426 if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF))
0427 return state;
0428
0429 state |= PERF_GUEST_ACTIVE;
0430
0431 if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV) {
0432 if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER)
0433 state |= PERF_GUEST_USER;
0434 } else if (xenpmu_data->pmu.r.regs.cpl & 3) {
0435 state |= PERF_GUEST_USER;
0436 }
0437
0438 return state;
0439 }
0440
0441 static unsigned long xen_get_guest_ip(void)
0442 {
0443 const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
0444
0445 if (!xenpmu_data) {
0446 pr_warn_once("%s: pmudata not initialized\n", __func__);
0447 return 0;
0448 }
0449
0450 return xenpmu_data->pmu.r.regs.ip;
0451 }
0452
0453 static struct perf_guest_info_callbacks xen_guest_cbs = {
0454 .state = xen_guest_state,
0455 .get_ip = xen_get_guest_ip,
0456 };
0457
0458
0459 static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
0460 struct pt_regs *regs, uint64_t pmu_flags)
0461 {
0462 regs->ip = xen_regs->ip;
0463 regs->cs = xen_regs->cs;
0464 regs->sp = xen_regs->sp;
0465
0466 if (pmu_flags & PMU_SAMPLE_PV) {
0467 if (pmu_flags & PMU_SAMPLE_USER)
0468 regs->cs |= 3;
0469 else
0470 regs->cs &= ~3;
0471 } else {
0472 if (xen_regs->cpl)
0473 regs->cs |= 3;
0474 else
0475 regs->cs &= ~3;
0476 }
0477 }
0478
0479 irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
0480 {
0481 int err, ret = IRQ_NONE;
0482 struct pt_regs regs = {0};
0483 const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
0484 uint8_t xenpmu_flags = get_xenpmu_flags();
0485
0486 if (!xenpmu_data) {
0487 pr_warn_once("%s: pmudata not initialized\n", __func__);
0488 return ret;
0489 }
0490
0491 this_cpu_ptr(&xenpmu_shared)->flags =
0492 xenpmu_flags | XENPMU_IRQ_PROCESSING;
0493 xen_convert_regs(&xenpmu_data->pmu.r.regs, ®s,
0494 xenpmu_data->pmu.pmu_flags);
0495 if (x86_pmu.handle_irq(®s))
0496 ret = IRQ_HANDLED;
0497
0498
0499 err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL);
0500 this_cpu_ptr(&xenpmu_shared)->flags = xenpmu_flags;
0501 if (err) {
0502 pr_warn_once("%s: failed hypercall, err: %d\n", __func__, err);
0503 return IRQ_NONE;
0504 }
0505
0506 return ret;
0507 }
0508
0509 bool is_xen_pmu;
0510
0511 void xen_pmu_init(int cpu)
0512 {
0513 int err;
0514 struct xen_pmu_params xp;
0515 unsigned long pfn;
0516 struct xen_pmu_data *xenpmu_data;
0517
0518 BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE);
0519
0520 if (xen_hvm_domain() || (cpu != 0 && !is_xen_pmu))
0521 return;
0522
0523 xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL);
0524 if (!xenpmu_data) {
0525 pr_err("VPMU init: No memory\n");
0526 return;
0527 }
0528 pfn = virt_to_pfn(xenpmu_data);
0529
0530 xp.val = pfn_to_mfn(pfn);
0531 xp.vcpu = cpu;
0532 xp.version.maj = XENPMU_VER_MAJ;
0533 xp.version.min = XENPMU_VER_MIN;
0534 err = HYPERVISOR_xenpmu_op(XENPMU_init, &xp);
0535 if (err)
0536 goto fail;
0537
0538 per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data;
0539 per_cpu(xenpmu_shared, cpu).flags = 0;
0540
0541 if (!is_xen_pmu) {
0542 is_xen_pmu = true;
0543 perf_register_guest_info_callbacks(&xen_guest_cbs);
0544 xen_pmu_arch_init();
0545 }
0546
0547 return;
0548
0549 fail:
0550 if (err == -EOPNOTSUPP || err == -ENOSYS)
0551 pr_info_once("VPMU disabled by hypervisor.\n");
0552 else
0553 pr_info_once("Could not initialize VPMU for cpu %d, error %d\n",
0554 cpu, err);
0555 free_pages((unsigned long)xenpmu_data, 0);
0556 }
0557
0558 void xen_pmu_finish(int cpu)
0559 {
0560 struct xen_pmu_params xp;
0561
0562 if (xen_hvm_domain())
0563 return;
0564
0565 xp.vcpu = cpu;
0566 xp.version.maj = XENPMU_VER_MAJ;
0567 xp.version.min = XENPMU_VER_MIN;
0568
0569 (void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp);
0570
0571 free_pages((unsigned long)per_cpu(xenpmu_shared, cpu).xenpmu_data, 0);
0572 per_cpu(xenpmu_shared, cpu).xenpmu_data = NULL;
0573 }