x86/kvm/pmu.c

0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Kernel-based Virtual Machine -- Performance Monitoring Unit support
0004  *
0005  * Copyright 2015 Red Hat, Inc. and/or its affiliates.
0006  *
0007  * Authors:
0008  *   Avi Kivity   <avi@redhat.com>
0009  *   Gleb Natapov <gleb@redhat.com>
0010  *   Wei Huang    <wei@redhat.com>
0011  */
0012
0013 #include <linux/types.h>
0014 #include <linux/kvm_host.h>
0015 #include <linux/perf_event.h>
0016 #include <linux/bsearch.h>
0017 #include <linux/sort.h>
0018 #include <asm/perf_event.h>
0019 #include <asm/cpu_device_id.h>
0020 #include "x86.h"
0021 #include "cpuid.h"
0022 #include "lapic.h"
0023 #include "pmu.h"
0024
0025 /* This is enough to filter the vast majority of currently defined events. */
0026 #define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
0027
0028 struct x86_pmu_capability __read_mostly kvm_pmu_cap;
0029 EXPORT_SYMBOL_GPL(kvm_pmu_cap);
0030
0031 static const struct x86_cpu_id vmx_icl_pebs_cpu[] = {
0032     X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, NULL),
0033     X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, NULL),
0034     {}
0035 };
0036
0037 /* NOTE:
0038  * - Each perf counter is defined as "struct kvm_pmc";
0039  * - There are two types of perf counters: general purpose (gp) and fixed.
0040  *   gp counters are stored in gp_counters[] and fixed counters are stored
0041  *   in fixed_counters[] respectively. Both of them are part of "struct
0042  *   kvm_pmu";
0043  * - pmu.c understands the difference between gp counters and fixed counters.
0044  *   However AMD doesn't support fixed-counters;
0045  * - There are three types of index to access perf counters (PMC):
0046  *     1. MSR (named msr): For example Intel has MSR_IA32_PERFCTRn and AMD
0047  *        has MSR_K7_PERFCTRn and, for families 15H and later,
0048  *        MSR_F15H_PERF_CTRn, where MSR_F15H_PERF_CTR[0-3] are
0049  *        aliased to MSR_K7_PERFCTRn.
0050  *     2. MSR Index (named idx): This normally is used by RDPMC instruction.
0051  *        For instance AMD RDPMC instruction uses 0000_0003h in ECX to access
0052  *        C001_0007h (MSR_K7_PERCTR3). Intel has a similar mechanism, except
0053  *        that it also supports fixed counters. idx can be used to as index to
0054  *        gp and fixed counters.
0055  *     3. Global PMC Index (named pmc): pmc is an index specific to PMU
0056  *        code. Each pmc, stored in kvm_pmc.idx field, is unique across
0057  *        all perf counters (both gp and fixed). The mapping relationship
0058  *        between pmc and perf counters is as the following:
0059  *        * Intel: [0 .. INTEL_PMC_MAX_GENERIC-1] <=> gp counters
0060  *                 [INTEL_PMC_IDX_FIXED .. INTEL_PMC_IDX_FIXED + 2] <=> fixed
0061  *        * AMD:   [0 .. AMD64_NUM_COUNTERS-1] and, for families 15H
0062  *          and later, [0 .. AMD64_NUM_COUNTERS_CORE-1] <=> gp counters
0063  */
0064
0065 static struct kvm_pmu_ops kvm_pmu_ops __read_mostly;
0066
0067 #define KVM_X86_PMU_OP(func)                         \
0068     DEFINE_STATIC_CALL_NULL(kvm_x86_pmu_##func,              \
0069                 *(((struct kvm_pmu_ops *)0)->func));
0070 #define KVM_X86_PMU_OP_OPTIONAL KVM_X86_PMU_OP
0071 #include <asm/kvm-x86-pmu-ops.h>
0072
0073 void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
0074 {
0075     memcpy(&kvm_pmu_ops, pmu_ops, sizeof(kvm_pmu_ops));
0076
0077 #define __KVM_X86_PMU_OP(func) \
0078     static_call_update(kvm_x86_pmu_##func, kvm_pmu_ops.func);
0079 #define KVM_X86_PMU_OP(func) \
0080     WARN_ON(!kvm_pmu_ops.func); __KVM_X86_PMU_OP(func)
0081 #define KVM_X86_PMU_OP_OPTIONAL __KVM_X86_PMU_OP
0082 #include <asm/kvm-x86-pmu-ops.h>
0083 #undef __KVM_X86_PMU_OP
0084 }
0085
0086 static inline bool pmc_is_enabled(struct kvm_pmc *pmc)
0087 {
0088     return static_call(kvm_x86_pmu_pmc_is_enabled)(pmc);
0089 }
0090
0091 static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
0092 {
0093     struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work);
0094     struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
0095
0096     kvm_pmu_deliver_pmi(vcpu);
0097 }
0098
0099 static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
0100 {
0101     struct kvm_pmu *pmu = pmc_to_pmu(pmc);
0102     bool skip_pmi = false;
0103
0104     /* Ignore counters that have been reprogrammed already. */
0105     if (test_and_set_bit(pmc->idx, pmu->reprogram_pmi))
0106         return;
0107
0108     if (pmc->perf_event && pmc->perf_event->attr.precise_ip) {
0109         /* Indicate PEBS overflow PMI to guest. */
0110         skip_pmi = __test_and_set_bit(GLOBAL_STATUS_BUFFER_OVF_BIT,
0111                           (unsigned long *)&pmu->global_status);
0112     } else {
0113         __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
0114     }
0115     kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
0116
0117     if (!pmc->intr || skip_pmi)
0118         return;
0119
0120     /*
0121      * Inject PMI. If vcpu was in a guest mode during NMI PMI
0122      * can be ejected on a guest mode re-entry. Otherwise we can't
0123      * be sure that vcpu wasn't executing hlt instruction at the
0124      * time of vmexit and is not going to re-enter guest mode until
0125      * woken up. So we should wake it, but this is impossible from
0126      * NMI context. Do it from irq work instead.
0127      */
0128     if (in_pmi && !kvm_handling_nmi_from_guest(pmc->vcpu))
0129         irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
0130     else
0131         kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
0132 }
0133
0134 static void kvm_perf_overflow(struct perf_event *perf_event,
0135                   struct perf_sample_data *data,
0136                   struct pt_regs *regs)
0137 {
0138     struct kvm_pmc *pmc = perf_event->overflow_handler_context;
0139
0140     __kvm_perf_overflow(pmc, true);
0141 }
0142
0143 static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
0144                   u64 config, bool exclude_user,
0145                   bool exclude_kernel, bool intr)
0146 {
0147     struct kvm_pmu *pmu = pmc_to_pmu(pmc);
0148     struct perf_event *event;
0149     struct perf_event_attr attr = {
0150         .type = type,
0151         .size = sizeof(attr),
0152         .pinned = true,
0153         .exclude_idle = true,
0154         .exclude_host = 1,
0155         .exclude_user = exclude_user,
0156         .exclude_kernel = exclude_kernel,
0157         .config = config,
0158     };
0159     bool pebs = test_bit(pmc->idx, (unsigned long *)&pmu->pebs_enable);
0160
0161     attr.sample_period = get_sample_period(pmc, pmc->counter);
0162
0163     if ((attr.config & HSW_IN_TX_CHECKPOINTED) &&
0164         guest_cpuid_is_intel(pmc->vcpu)) {
0165         /*
0166          * HSW_IN_TX_CHECKPOINTED is not supported with nonzero
0167          * period. Just clear the sample period so at least
0168          * allocating the counter doesn't fail.
0169          */
0170         attr.sample_period = 0;
0171     }
0172     if (pebs) {
0173         /*
0174          * The non-zero precision level of guest event makes the ordinary
0175          * guest event becomes a guest PEBS event and triggers the host
0176          * PEBS PMI handler to determine whether the PEBS overflow PMI
0177          * comes from the host counters or the guest.
0178          *
0179          * For most PEBS hardware events, the difference in the software
0180          * precision levels of guest and host PEBS events will not affect
0181          * the accuracy of the PEBS profiling result, because the "event IP"
0182          * in the PEBS record is calibrated on the guest side.
0183          *
0184          * On Icelake everything is fine. Other hardware (GLC+, TNT+) that
0185          * could possibly care here is unsupported and needs changes.
0186          */
0187         attr.precise_ip = 1;
0188         if (x86_match_cpu(vmx_icl_pebs_cpu) && pmc->idx == 32)
0189             attr.precise_ip = 3;
0190     }
0191
0192     event = perf_event_create_kernel_counter(&attr, -1, current,
0193                          kvm_perf_overflow, pmc);
0194     if (IS_ERR(event)) {
0195         pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
0196                 PTR_ERR(event), pmc->idx);
0197         return;
0198     }
0199
0200     pmc->perf_event = event;
0201     pmc_to_pmu(pmc)->event_count++;
0202     clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
0203     pmc->is_paused = false;
0204     pmc->intr = intr || pebs;
0205 }
0206
0207 static void pmc_pause_counter(struct kvm_pmc *pmc)
0208 {
0209     u64 counter = pmc->counter;
0210
0211     if (!pmc->perf_event || pmc->is_paused)
0212         return;
0213
0214     /* update counter, reset event value to avoid redundant accumulation */
0215     counter += perf_event_pause(pmc->perf_event, true);
0216     pmc->counter = counter & pmc_bitmask(pmc);
0217     pmc->is_paused = true;
0218 }
0219
0220 static bool pmc_resume_counter(struct kvm_pmc *pmc)
0221 {
0222     if (!pmc->perf_event)
0223         return false;
0224
0225     /* recalibrate sample period and check if it's accepted by perf core */
0226     if (perf_event_period(pmc->perf_event,
0227                   get_sample_period(pmc, pmc->counter)))
0228         return false;
0229
0230     if (!test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) &&
0231         pmc->perf_event->attr.precise_ip)
0232         return false;
0233
0234     /* reuse perf_event to serve as pmc_reprogram_counter() does*/
0235     perf_event_enable(pmc->perf_event);
0236     pmc->is_paused = false;
0237
0238     clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
0239     return true;
0240 }
0241
0242 static int cmp_u64(const void *pa, const void *pb)
0243 {
0244     u64 a = *(u64 *)pa;
0245     u64 b = *(u64 *)pb;
0246
0247     return (a > b) - (a < b);
0248 }
0249
0250 static bool check_pmu_event_filter(struct kvm_pmc *pmc)
0251 {
0252     struct kvm_pmu_event_filter *filter;
0253     struct kvm *kvm = pmc->vcpu->kvm;
0254     bool allow_event = true;
0255     __u64 key;
0256     int idx;
0257
0258     if (!static_call(kvm_x86_pmu_hw_event_available)(pmc))
0259         return false;
0260
0261     filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
0262     if (!filter)
0263         goto out;
0264
0265     if (pmc_is_gp(pmc)) {
0266         key = pmc->eventsel & AMD64_RAW_EVENT_MASK_NB;
0267         if (bsearch(&key, filter->events, filter->nevents,
0268                 sizeof(__u64), cmp_u64))
0269             allow_event = filter->action == KVM_PMU_EVENT_ALLOW;
0270         else
0271             allow_event = filter->action == KVM_PMU_EVENT_DENY;
0272     } else {
0273         idx = pmc->idx - INTEL_PMC_IDX_FIXED;
0274         if (filter->action == KVM_PMU_EVENT_DENY &&
0275             test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
0276             allow_event = false;
0277         if (filter->action == KVM_PMU_EVENT_ALLOW &&
0278             !test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
0279             allow_event = false;
0280     }
0281
0282 out:
0283     return allow_event;
0284 }
0285
0286 void reprogram_counter(struct kvm_pmc *pmc)
0287 {
0288     struct kvm_pmu *pmu = pmc_to_pmu(pmc);
0289     u64 eventsel = pmc->eventsel;
0290     u64 new_config = eventsel;
0291     u8 fixed_ctr_ctrl;
0292
0293     pmc_pause_counter(pmc);
0294
0295     if (!pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc))
0296         return;
0297
0298     if (!check_pmu_event_filter(pmc))
0299         return;
0300
0301     if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
0302         printk_once("kvm pmu: pin control bit is ignored\n");
0303
0304     if (pmc_is_fixed(pmc)) {
0305         fixed_ctr_ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl,
0306                           pmc->idx - INTEL_PMC_IDX_FIXED);
0307         if (fixed_ctr_ctrl & 0x1)
0308             eventsel |= ARCH_PERFMON_EVENTSEL_OS;
0309         if (fixed_ctr_ctrl & 0x2)
0310             eventsel |= ARCH_PERFMON_EVENTSEL_USR;
0311         if (fixed_ctr_ctrl & 0x8)
0312             eventsel |= ARCH_PERFMON_EVENTSEL_INT;
0313         new_config = (u64)fixed_ctr_ctrl;
0314     }
0315
0316     if (pmc->current_config == new_config && pmc_resume_counter(pmc))
0317         return;
0318
0319     pmc_release_perf_event(pmc);
0320
0321     pmc->current_config = new_config;
0322     pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
0323                   (eventsel & pmu->raw_event_mask),
0324                   !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
0325                   !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
0326                   eventsel & ARCH_PERFMON_EVENTSEL_INT);
0327 }
0328 EXPORT_SYMBOL_GPL(reprogram_counter);
0329
0330 void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
0331 {
0332     struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
0333     int bit;
0334
0335     for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) {
0336         struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, bit);
0337
0338         if (unlikely(!pmc || !pmc->perf_event)) {
0339             clear_bit(bit, pmu->reprogram_pmi);
0340             continue;
0341         }
0342         reprogram_counter(pmc);
0343     }
0344
0345     /*
0346      * Unused perf_events are only released if the corresponding MSRs
0347      * weren't accessed during the last vCPU time slice. kvm_arch_sched_in
0348      * triggers KVM_REQ_PMU if cleanup is needed.
0349      */
0350     if (unlikely(pmu->need_cleanup))
0351         kvm_pmu_cleanup(vcpu);
0352 }
0353
0354 /* check if idx is a valid index to access PMU */
0355 bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
0356 {
0357     return static_call(kvm_x86_pmu_is_valid_rdpmc_ecx)(vcpu, idx);
0358 }
0359
0360 bool is_vmware_backdoor_pmc(u32 pmc_idx)
0361 {
0362     switch (pmc_idx) {
0363     case VMWARE_BACKDOOR_PMC_HOST_TSC:
0364     case VMWARE_BACKDOOR_PMC_REAL_TIME:
0365     case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
0366         return true;
0367     }
0368     return false;
0369 }
0370
0371 static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
0372 {
0373     u64 ctr_val;
0374
0375     switch (idx) {
0376     case VMWARE_BACKDOOR_PMC_HOST_TSC:
0377         ctr_val = rdtsc();
0378         break;
0379     case VMWARE_BACKDOOR_PMC_REAL_TIME:
0380         ctr_val = ktime_get_boottime_ns();
0381         break;
0382     case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
0383         ctr_val = ktime_get_boottime_ns() +
0384             vcpu->kvm->arch.kvmclock_offset;
0385         break;
0386     default:
0387         return 1;
0388     }
0389
0390     *data = ctr_val;
0391     return 0;
0392 }
0393
0394 int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
0395 {
0396     bool fast_mode = idx & (1u << 31);
0397     struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
0398     struct kvm_pmc *pmc;
0399     u64 mask = fast_mode ? ~0u : ~0ull;
0400
0401     if (!pmu->version)
0402         return 1;
0403
0404     if (is_vmware_backdoor_pmc(idx))
0405         return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
0406
0407     pmc = static_call(kvm_x86_pmu_rdpmc_ecx_to_pmc)(vcpu, idx, &mask);
0408     if (!pmc)
0409         return 1;
0410
0411     if (!(kvm_read_cr4(vcpu) & X86_CR4_PCE) &&
0412         (static_call(kvm_x86_get_cpl)(vcpu) != 0) &&
0413         (kvm_read_cr0(vcpu) & X86_CR0_PE))
0414         return 1;
0415
0416     *data = pmc_read_counter(pmc) & mask;
0417     return 0;
0418 }
0419
0420 void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
0421 {
0422     if (lapic_in_kernel(vcpu)) {
0423         static_call_cond(kvm_x86_pmu_deliver_pmi)(vcpu);
0424         kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
0425     }
0426 }
0427
0428 bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
0429 {
0430     return static_call(kvm_x86_pmu_msr_idx_to_pmc)(vcpu, msr) ||
0431         static_call(kvm_x86_pmu_is_valid_msr)(vcpu, msr);
0432 }
0433
0434 static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
0435 {
0436     struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
0437     struct kvm_pmc *pmc = static_call(kvm_x86_pmu_msr_idx_to_pmc)(vcpu, msr);
0438
0439     if (pmc)
0440         __set_bit(pmc->idx, pmu->pmc_in_use);
0441 }
0442
0443 int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
0444 {
0445     return static_call(kvm_x86_pmu_get_msr)(vcpu, msr_info);
0446 }
0447
0448 int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
0449 {
0450     kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
0451     return static_call(kvm_x86_pmu_set_msr)(vcpu, msr_info);
0452 }
0453
0454 /* refresh PMU settings. This function generally is called when underlying
0455  * settings are changed (such as changes of PMU CPUID by guest VMs), which
0456  * should rarely happen.
0457  */
0458 void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
0459 {
0460     static_call(kvm_x86_pmu_refresh)(vcpu);
0461 }
0462
0463 void kvm_pmu_reset(struct kvm_vcpu *vcpu)
0464 {
0465     struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
0466
0467     irq_work_sync(&pmu->irq_work);
0468     static_call(kvm_x86_pmu_reset)(vcpu);
0469 }
0470
0471 void kvm_pmu_init(struct kvm_vcpu *vcpu)
0472 {
0473     struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
0474
0475     memset(pmu, 0, sizeof(*pmu));
0476     static_call(kvm_x86_pmu_init)(vcpu);
0477     init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
0478     pmu->event_count = 0;
0479     pmu->need_cleanup = false;
0480     kvm_pmu_refresh(vcpu);
0481 }
0482
0483 /* Release perf_events for vPMCs that have been unused for a full time slice.  */
0484 void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
0485 {
0486     struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
0487     struct kvm_pmc *pmc = NULL;
0488     DECLARE_BITMAP(bitmask, X86_PMC_IDX_MAX);
0489     int i;
0490
0491     pmu->need_cleanup = false;
0492
0493     bitmap_andnot(bitmask, pmu->all_valid_pmc_idx,
0494               pmu->pmc_in_use, X86_PMC_IDX_MAX);
0495
0496     for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) {
0497         pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
0498
0499         if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc))
0500             pmc_stop_counter(pmc);
0501     }
0502
0503     static_call_cond(kvm_x86_pmu_cleanup)(vcpu);
0504
0505     bitmap_zero(pmu->pmc_in_use, X86_PMC_IDX_MAX);
0506 }
0507
0508 void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
0509 {
0510     kvm_pmu_reset(vcpu);
0511 }
0512
0513 static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
0514 {
0515     u64 prev_count;
0516
0517     prev_count = pmc->counter;
0518     pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc);
0519
0520     reprogram_counter(pmc);
0521     if (pmc->counter < prev_count)
0522         __kvm_perf_overflow(pmc, false);
0523 }
0524
0525 static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
0526     unsigned int perf_hw_id)
0527 {
0528     return !((pmc->eventsel ^ perf_get_hw_event_config(perf_hw_id)) &
0529         AMD64_RAW_EVENT_MASK_NB);
0530 }
0531
0532 static inline bool cpl_is_matched(struct kvm_pmc *pmc)
0533 {
0534     bool select_os, select_user;
0535     u64 config = pmc->current_config;
0536
0537     if (pmc_is_gp(pmc)) {
0538         select_os = config & ARCH_PERFMON_EVENTSEL_OS;
0539         select_user = config & ARCH_PERFMON_EVENTSEL_USR;
0540     } else {
0541         select_os = config & 0x1;
0542         select_user = config & 0x2;
0543     }
0544
0545     return (static_call(kvm_x86_get_cpl)(pmc->vcpu) == 0) ? select_os : select_user;
0546 }
0547
0548 void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
0549 {
0550     struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
0551     struct kvm_pmc *pmc;
0552     int i;
0553
0554     for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
0555         pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
0556
0557         if (!pmc || !pmc_is_enabled(pmc) || !pmc_speculative_in_use(pmc))
0558             continue;
0559
0560         /* Ignore checks for edge detect, pin control, invert and CMASK bits */
0561         if (eventsel_match_perf_hw_id(pmc, perf_hw_id) && cpl_is_matched(pmc))
0562             kvm_pmu_incr_counter(pmc);
0563     }
0564 }
0565 EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event);
0566
0567 int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
0568 {
0569     struct kvm_pmu_event_filter tmp, *filter;
0570     size_t size;
0571     int r;
0572
0573     if (copy_from_user(&tmp, argp, sizeof(tmp)))
0574         return -EFAULT;
0575
0576     if (tmp.action != KVM_PMU_EVENT_ALLOW &&
0577         tmp.action != KVM_PMU_EVENT_DENY)
0578         return -EINVAL;
0579
0580     if (tmp.flags != 0)
0581         return -EINVAL;
0582
0583     if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS)
0584         return -E2BIG;
0585
0586     size = struct_size(filter, events, tmp.nevents);
0587     filter = kmalloc(size, GFP_KERNEL_ACCOUNT);
0588     if (!filter)
0589         return -ENOMEM;
0590
0591     r = -EFAULT;
0592     if (copy_from_user(filter, argp, size))
0593         goto cleanup;
0594
0595     /* Ensure nevents can't be changed between the user copies. */
0596     *filter = tmp;
0597
0598     /*
0599      * Sort the in-kernel list so that we can search it with bsearch.
0600      */
0601     sort(&filter->events, filter->nevents, sizeof(__u64), cmp_u64, NULL);
0602
0603     mutex_lock(&kvm->lock);
0604     filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
0605                      mutex_is_locked(&kvm->lock));
0606     mutex_unlock(&kvm->lock);
0607
0608     synchronize_srcu_expedited(&kvm->srcu);
0609     r = 0;
0610 cleanup:
0611     kfree(filter);
0612     return r;
0613 }