Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (C) 2015 Linaro Ltd.
0004  * Author: Shannon Zhao <shannon.zhao@linaro.org>
0005  */
0006 
0007 #include <linux/cpu.h>
0008 #include <linux/kvm.h>
0009 #include <linux/kvm_host.h>
0010 #include <linux/list.h>
0011 #include <linux/perf_event.h>
0012 #include <linux/perf/arm_pmu.h>
0013 #include <linux/uaccess.h>
0014 #include <asm/kvm_emulate.h>
0015 #include <kvm/arm_pmu.h>
0016 #include <kvm/arm_vgic.h>
0017 
0018 DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
0019 
0020 static LIST_HEAD(arm_pmus);
0021 static DEFINE_MUTEX(arm_pmus_lock);
0022 
0023 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
0024 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
0025 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
0026 
0027 #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
0028 
0029 static u32 kvm_pmu_event_mask(struct kvm *kvm)
0030 {
0031     unsigned int pmuver;
0032 
0033     pmuver = kvm->arch.arm_pmu->pmuver;
0034 
0035     switch (pmuver) {
0036     case ID_AA64DFR0_PMUVER_8_0:
0037         return GENMASK(9, 0);
0038     case ID_AA64DFR0_PMUVER_8_1:
0039     case ID_AA64DFR0_PMUVER_8_4:
0040     case ID_AA64DFR0_PMUVER_8_5:
0041     case ID_AA64DFR0_PMUVER_8_7:
0042         return GENMASK(15, 0);
0043     default:        /* Shouldn't be here, just for sanity */
0044         WARN_ONCE(1, "Unknown PMU version %d\n", pmuver);
0045         return 0;
0046     }
0047 }
0048 
0049 /**
0050  * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
0051  * @vcpu: The vcpu pointer
0052  * @select_idx: The counter index
0053  */
0054 static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
0055 {
0056     return (select_idx == ARMV8_PMU_CYCLE_IDX &&
0057         __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
0058 }
0059 
0060 static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
0061 {
0062     struct kvm_pmu *pmu;
0063     struct kvm_vcpu_arch *vcpu_arch;
0064 
0065     pmc -= pmc->idx;
0066     pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
0067     vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
0068     return container_of(vcpu_arch, struct kvm_vcpu, arch);
0069 }
0070 
0071 /**
0072  * kvm_pmu_pmc_is_chained - determine if the pmc is chained
0073  * @pmc: The PMU counter pointer
0074  */
0075 static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
0076 {
0077     struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
0078 
0079     return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
0080 }
0081 
0082 /**
0083  * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
0084  * @select_idx: The counter index
0085  */
0086 static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
0087 {
0088     return select_idx & 0x1;
0089 }
0090 
0091 /**
0092  * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
0093  * @pmc: The PMU counter pointer
0094  *
0095  * When a pair of PMCs are chained together we use the low counter (canonical)
0096  * to hold the underlying perf event.
0097  */
0098 static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
0099 {
0100     if (kvm_pmu_pmc_is_chained(pmc) &&
0101         kvm_pmu_idx_is_high_counter(pmc->idx))
0102         return pmc - 1;
0103 
0104     return pmc;
0105 }
0106 static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
0107 {
0108     if (kvm_pmu_idx_is_high_counter(pmc->idx))
0109         return pmc - 1;
0110     else
0111         return pmc + 1;
0112 }
0113 
0114 /**
0115  * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
0116  * @vcpu: The vcpu pointer
0117  * @select_idx: The counter index
0118  */
0119 static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
0120 {
0121     u64 eventsel, reg;
0122 
0123     select_idx |= 0x1;
0124 
0125     if (select_idx == ARMV8_PMU_CYCLE_IDX)
0126         return false;
0127 
0128     reg = PMEVTYPER0_EL0 + select_idx;
0129     eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm);
0130 
0131     return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
0132 }
0133 
0134 /**
0135  * kvm_pmu_get_pair_counter_value - get PMU counter value
0136  * @vcpu: The vcpu pointer
0137  * @pmc: The PMU counter pointer
0138  */
0139 static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
0140                       struct kvm_pmc *pmc)
0141 {
0142     u64 counter, counter_high, reg, enabled, running;
0143 
0144     if (kvm_pmu_pmc_is_chained(pmc)) {
0145         pmc = kvm_pmu_get_canonical_pmc(pmc);
0146         reg = PMEVCNTR0_EL0 + pmc->idx;
0147 
0148         counter = __vcpu_sys_reg(vcpu, reg);
0149         counter_high = __vcpu_sys_reg(vcpu, reg + 1);
0150 
0151         counter = lower_32_bits(counter) | (counter_high << 32);
0152     } else {
0153         reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
0154               ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
0155         counter = __vcpu_sys_reg(vcpu, reg);
0156     }
0157 
0158     /*
0159      * The real counter value is equal to the value of counter register plus
0160      * the value perf event counts.
0161      */
0162     if (pmc->perf_event)
0163         counter += perf_event_read_value(pmc->perf_event, &enabled,
0164                          &running);
0165 
0166     return counter;
0167 }
0168 
0169 /**
0170  * kvm_pmu_get_counter_value - get PMU counter value
0171  * @vcpu: The vcpu pointer
0172  * @select_idx: The counter index
0173  */
0174 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
0175 {
0176     u64 counter;
0177     struct kvm_pmu *pmu = &vcpu->arch.pmu;
0178     struct kvm_pmc *pmc = &pmu->pmc[select_idx];
0179 
0180     if (!kvm_vcpu_has_pmu(vcpu))
0181         return 0;
0182 
0183     counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
0184 
0185     if (kvm_pmu_pmc_is_chained(pmc) &&
0186         kvm_pmu_idx_is_high_counter(select_idx))
0187         counter = upper_32_bits(counter);
0188     else if (select_idx != ARMV8_PMU_CYCLE_IDX)
0189         counter = lower_32_bits(counter);
0190 
0191     return counter;
0192 }
0193 
0194 /**
0195  * kvm_pmu_set_counter_value - set PMU counter value
0196  * @vcpu: The vcpu pointer
0197  * @select_idx: The counter index
0198  * @val: The counter value
0199  */
0200 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
0201 {
0202     u64 reg;
0203 
0204     if (!kvm_vcpu_has_pmu(vcpu))
0205         return;
0206 
0207     reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
0208           ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
0209     __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
0210 
0211     /* Recreate the perf event to reflect the updated sample_period */
0212     kvm_pmu_create_perf_event(vcpu, select_idx);
0213 }
0214 
0215 /**
0216  * kvm_pmu_release_perf_event - remove the perf event
0217  * @pmc: The PMU counter pointer
0218  */
0219 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
0220 {
0221     pmc = kvm_pmu_get_canonical_pmc(pmc);
0222     if (pmc->perf_event) {
0223         perf_event_disable(pmc->perf_event);
0224         perf_event_release_kernel(pmc->perf_event);
0225         pmc->perf_event = NULL;
0226     }
0227 }
0228 
0229 /**
0230  * kvm_pmu_stop_counter - stop PMU counter
0231  * @pmc: The PMU counter pointer
0232  *
0233  * If this counter has been configured to monitor some event, release it here.
0234  */
0235 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
0236 {
0237     u64 counter, reg, val;
0238 
0239     pmc = kvm_pmu_get_canonical_pmc(pmc);
0240     if (!pmc->perf_event)
0241         return;
0242 
0243     counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
0244 
0245     if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
0246         reg = PMCCNTR_EL0;
0247         val = counter;
0248     } else {
0249         reg = PMEVCNTR0_EL0 + pmc->idx;
0250         val = lower_32_bits(counter);
0251     }
0252 
0253     __vcpu_sys_reg(vcpu, reg) = val;
0254 
0255     if (kvm_pmu_pmc_is_chained(pmc))
0256         __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
0257 
0258     kvm_pmu_release_perf_event(pmc);
0259 }
0260 
0261 /**
0262  * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
0263  * @vcpu: The vcpu pointer
0264  *
0265  */
0266 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
0267 {
0268     int i;
0269     struct kvm_pmu *pmu = &vcpu->arch.pmu;
0270 
0271     for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
0272         pmu->pmc[i].idx = i;
0273 }
0274 
0275 /**
0276  * kvm_pmu_vcpu_reset - reset pmu state for cpu
0277  * @vcpu: The vcpu pointer
0278  *
0279  */
0280 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
0281 {
0282     unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
0283     struct kvm_pmu *pmu = &vcpu->arch.pmu;
0284     int i;
0285 
0286     for_each_set_bit(i, &mask, 32)
0287         kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
0288 
0289     bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
0290 }
0291 
0292 /**
0293  * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu
0294  * @vcpu: The vcpu pointer
0295  *
0296  */
0297 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
0298 {
0299     int i;
0300     struct kvm_pmu *pmu = &vcpu->arch.pmu;
0301 
0302     for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
0303         kvm_pmu_release_perf_event(&pmu->pmc[i]);
0304     irq_work_sync(&vcpu->arch.pmu.overflow_work);
0305 }
0306 
0307 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
0308 {
0309     u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
0310 
0311     val &= ARMV8_PMU_PMCR_N_MASK;
0312     if (val == 0)
0313         return BIT(ARMV8_PMU_CYCLE_IDX);
0314     else
0315         return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX);
0316 }
0317 
0318 /**
0319  * kvm_pmu_enable_counter_mask - enable selected PMU counters
0320  * @vcpu: The vcpu pointer
0321  * @val: the value guest writes to PMCNTENSET register
0322  *
0323  * Call perf_event_enable to start counting the perf event
0324  */
0325 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
0326 {
0327     int i;
0328     struct kvm_pmu *pmu = &vcpu->arch.pmu;
0329     struct kvm_pmc *pmc;
0330 
0331     if (!kvm_vcpu_has_pmu(vcpu))
0332         return;
0333 
0334     if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
0335         return;
0336 
0337     for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
0338         if (!(val & BIT(i)))
0339             continue;
0340 
0341         pmc = &pmu->pmc[i];
0342 
0343         /* A change in the enable state may affect the chain state */
0344         kvm_pmu_update_pmc_chained(vcpu, i);
0345         kvm_pmu_create_perf_event(vcpu, i);
0346 
0347         /* At this point, pmc must be the canonical */
0348         if (pmc->perf_event) {
0349             perf_event_enable(pmc->perf_event);
0350             if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
0351                 kvm_debug("fail to enable perf event\n");
0352         }
0353     }
0354 }
0355 
0356 /**
0357  * kvm_pmu_disable_counter_mask - disable selected PMU counters
0358  * @vcpu: The vcpu pointer
0359  * @val: the value guest writes to PMCNTENCLR register
0360  *
0361  * Call perf_event_disable to stop counting the perf event
0362  */
0363 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
0364 {
0365     int i;
0366     struct kvm_pmu *pmu = &vcpu->arch.pmu;
0367     struct kvm_pmc *pmc;
0368 
0369     if (!kvm_vcpu_has_pmu(vcpu) || !val)
0370         return;
0371 
0372     for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
0373         if (!(val & BIT(i)))
0374             continue;
0375 
0376         pmc = &pmu->pmc[i];
0377 
0378         /* A change in the enable state may affect the chain state */
0379         kvm_pmu_update_pmc_chained(vcpu, i);
0380         kvm_pmu_create_perf_event(vcpu, i);
0381 
0382         /* At this point, pmc must be the canonical */
0383         if (pmc->perf_event)
0384             perf_event_disable(pmc->perf_event);
0385     }
0386 }
0387 
0388 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
0389 {
0390     u64 reg = 0;
0391 
0392     if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
0393         reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
0394         reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
0395         reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
0396     }
0397 
0398     return reg;
0399 }
0400 
0401 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
0402 {
0403     struct kvm_pmu *pmu = &vcpu->arch.pmu;
0404     bool overflow;
0405 
0406     if (!kvm_vcpu_has_pmu(vcpu))
0407         return;
0408 
0409     overflow = !!kvm_pmu_overflow_status(vcpu);
0410     if (pmu->irq_level == overflow)
0411         return;
0412 
0413     pmu->irq_level = overflow;
0414 
0415     if (likely(irqchip_in_kernel(vcpu->kvm))) {
0416         int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
0417                           pmu->irq_num, overflow, pmu);
0418         WARN_ON(ret);
0419     }
0420 }
0421 
0422 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
0423 {
0424     struct kvm_pmu *pmu = &vcpu->arch.pmu;
0425     struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
0426     bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU;
0427 
0428     if (likely(irqchip_in_kernel(vcpu->kvm)))
0429         return false;
0430 
0431     return pmu->irq_level != run_level;
0432 }
0433 
0434 /*
0435  * Reflect the PMU overflow interrupt output level into the kvm_run structure
0436  */
0437 void kvm_pmu_update_run(struct kvm_vcpu *vcpu)
0438 {
0439     struct kvm_sync_regs *regs = &vcpu->run->s.regs;
0440 
0441     /* Populate the timer bitmap for user space */
0442     regs->device_irq_level &= ~KVM_ARM_DEV_PMU;
0443     if (vcpu->arch.pmu.irq_level)
0444         regs->device_irq_level |= KVM_ARM_DEV_PMU;
0445 }
0446 
0447 /**
0448  * kvm_pmu_flush_hwstate - flush pmu state to cpu
0449  * @vcpu: The vcpu pointer
0450  *
0451  * Check if the PMU has overflowed while we were running in the host, and inject
0452  * an interrupt if that was the case.
0453  */
0454 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu)
0455 {
0456     kvm_pmu_update_state(vcpu);
0457 }
0458 
0459 /**
0460  * kvm_pmu_sync_hwstate - sync pmu state from cpu
0461  * @vcpu: The vcpu pointer
0462  *
0463  * Check if the PMU has overflowed while we were running in the guest, and
0464  * inject an interrupt if that was the case.
0465  */
0466 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
0467 {
0468     kvm_pmu_update_state(vcpu);
0469 }
0470 
0471 /**
0472  * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
0473  * to the event.
0474  * This is why we need a callback to do it once outside of the NMI context.
0475  */
0476 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
0477 {
0478     struct kvm_vcpu *vcpu;
0479     struct kvm_pmu *pmu;
0480 
0481     pmu = container_of(work, struct kvm_pmu, overflow_work);
0482     vcpu = kvm_pmc_to_vcpu(pmu->pmc);
0483 
0484     kvm_vcpu_kick(vcpu);
0485 }
0486 
0487 /**
0488  * When the perf event overflows, set the overflow status and inform the vcpu.
0489  */
0490 static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
0491                   struct perf_sample_data *data,
0492                   struct pt_regs *regs)
0493 {
0494     struct kvm_pmc *pmc = perf_event->overflow_handler_context;
0495     struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
0496     struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
0497     int idx = pmc->idx;
0498     u64 period;
0499 
0500     cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
0501 
0502     /*
0503      * Reset the sample period to the architectural limit,
0504      * i.e. the point where the counter overflows.
0505      */
0506     period = -(local64_read(&perf_event->count));
0507 
0508     if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
0509         period &= GENMASK(31, 0);
0510 
0511     local64_set(&perf_event->hw.period_left, 0);
0512     perf_event->attr.sample_period = period;
0513     perf_event->hw.sample_period = period;
0514 
0515     __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
0516 
0517     if (kvm_pmu_overflow_status(vcpu)) {
0518         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
0519 
0520         if (!in_nmi())
0521             kvm_vcpu_kick(vcpu);
0522         else
0523             irq_work_queue(&vcpu->arch.pmu.overflow_work);
0524     }
0525 
0526     cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
0527 }
0528 
0529 /**
0530  * kvm_pmu_software_increment - do software increment
0531  * @vcpu: The vcpu pointer
0532  * @val: the value guest writes to PMSWINC register
0533  */
0534 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
0535 {
0536     struct kvm_pmu *pmu = &vcpu->arch.pmu;
0537     int i;
0538 
0539     if (!kvm_vcpu_has_pmu(vcpu))
0540         return;
0541 
0542     if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
0543         return;
0544 
0545     /* Weed out disabled counters */
0546     val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
0547 
0548     for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
0549         u64 type, reg;
0550 
0551         if (!(val & BIT(i)))
0552             continue;
0553 
0554         /* PMSWINC only applies to ... SW_INC! */
0555         type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
0556         type &= kvm_pmu_event_mask(vcpu->kvm);
0557         if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
0558             continue;
0559 
0560         /* increment this even SW_INC counter */
0561         reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
0562         reg = lower_32_bits(reg);
0563         __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
0564 
0565         if (reg) /* no overflow on the low part */
0566             continue;
0567 
0568         if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) {
0569             /* increment the high counter */
0570             reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
0571             reg = lower_32_bits(reg);
0572             __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
0573             if (!reg) /* mark overflow on the high counter */
0574                 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
0575         } else {
0576             /* mark overflow on low counter */
0577             __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
0578         }
0579     }
0580 }
0581 
0582 /**
0583  * kvm_pmu_handle_pmcr - handle PMCR register
0584  * @vcpu: The vcpu pointer
0585  * @val: the value guest writes to PMCR register
0586  */
0587 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
0588 {
0589     int i;
0590 
0591     if (!kvm_vcpu_has_pmu(vcpu))
0592         return;
0593 
0594     if (val & ARMV8_PMU_PMCR_E) {
0595         kvm_pmu_enable_counter_mask(vcpu,
0596                __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
0597     } else {
0598         kvm_pmu_disable_counter_mask(vcpu,
0599                __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
0600     }
0601 
0602     if (val & ARMV8_PMU_PMCR_C)
0603         kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
0604 
0605     if (val & ARMV8_PMU_PMCR_P) {
0606         unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
0607         mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
0608         for_each_set_bit(i, &mask, 32)
0609             kvm_pmu_set_counter_value(vcpu, i, 0);
0610     }
0611 }
0612 
0613 static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
0614 {
0615     return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
0616            (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
0617 }
0618 
0619 /**
0620  * kvm_pmu_create_perf_event - create a perf event for a counter
0621  * @vcpu: The vcpu pointer
0622  * @select_idx: The number of selected counter
0623  */
0624 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
0625 {
0626     struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu;
0627     struct kvm_pmu *pmu = &vcpu->arch.pmu;
0628     struct kvm_pmc *pmc;
0629     struct perf_event *event;
0630     struct perf_event_attr attr;
0631     u64 eventsel, counter, reg, data;
0632 
0633     /*
0634      * For chained counters the event type and filtering attributes are
0635      * obtained from the low/even counter. We also use this counter to
0636      * determine if the event is enabled/disabled.
0637      */
0638     pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]);
0639 
0640     reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
0641           ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
0642     data = __vcpu_sys_reg(vcpu, reg);
0643 
0644     kvm_pmu_stop_counter(vcpu, pmc);
0645     if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
0646         eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
0647     else
0648         eventsel = data & kvm_pmu_event_mask(vcpu->kvm);
0649 
0650     /* Software increment event doesn't need to be backed by a perf event */
0651     if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
0652         return;
0653 
0654     /*
0655      * If we have a filter in place and that the event isn't allowed, do
0656      * not install a perf event either.
0657      */
0658     if (vcpu->kvm->arch.pmu_filter &&
0659         !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
0660         return;
0661 
0662     memset(&attr, 0, sizeof(struct perf_event_attr));
0663     attr.type = arm_pmu->pmu.type;
0664     attr.size = sizeof(attr);
0665     attr.pinned = 1;
0666     attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
0667     attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
0668     attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
0669     attr.exclude_hv = 1; /* Don't count EL2 events */
0670     attr.exclude_host = 1; /* Don't count host events */
0671     attr.config = eventsel;
0672 
0673     counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
0674 
0675     if (kvm_pmu_pmc_is_chained(pmc)) {
0676         /**
0677          * The initial sample period (overflow count) of an event. For
0678          * chained counters we only support overflow interrupts on the
0679          * high counter.
0680          */
0681         attr.sample_period = (-counter) & GENMASK(63, 0);
0682         attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
0683 
0684         event = perf_event_create_kernel_counter(&attr, -1, current,
0685                              kvm_pmu_perf_overflow,
0686                              pmc + 1);
0687     } else {
0688         /* The initial sample period (overflow count) of an event. */
0689         if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
0690             attr.sample_period = (-counter) & GENMASK(63, 0);
0691         else
0692             attr.sample_period = (-counter) & GENMASK(31, 0);
0693 
0694         event = perf_event_create_kernel_counter(&attr, -1, current,
0695                          kvm_pmu_perf_overflow, pmc);
0696     }
0697 
0698     if (IS_ERR(event)) {
0699         pr_err_once("kvm: pmu event creation failed %ld\n",
0700                 PTR_ERR(event));
0701         return;
0702     }
0703 
0704     pmc->perf_event = event;
0705 }
0706 
0707 /**
0708  * kvm_pmu_update_pmc_chained - update chained bitmap
0709  * @vcpu: The vcpu pointer
0710  * @select_idx: The number of selected counter
0711  *
0712  * Update the chained bitmap based on the event type written in the
0713  * typer register and the enable state of the odd register.
0714  */
0715 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
0716 {
0717     struct kvm_pmu *pmu = &vcpu->arch.pmu;
0718     struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc;
0719     bool new_state, old_state;
0720 
0721     old_state = kvm_pmu_pmc_is_chained(pmc);
0722     new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
0723             kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1);
0724 
0725     if (old_state == new_state)
0726         return;
0727 
0728     canonical_pmc = kvm_pmu_get_canonical_pmc(pmc);
0729     kvm_pmu_stop_counter(vcpu, canonical_pmc);
0730     if (new_state) {
0731         /*
0732          * During promotion from !chained to chained we must ensure
0733          * the adjacent counter is stopped and its event destroyed
0734          */
0735         kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc));
0736         set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
0737         return;
0738     }
0739     clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
0740 }
0741 
0742 /**
0743  * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
0744  * @vcpu: The vcpu pointer
0745  * @data: The data guest writes to PMXEVTYPER_EL0
0746  * @select_idx: The number of selected counter
0747  *
0748  * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
0749  * event with given hardware event number. Here we call perf_event API to
0750  * emulate this action and create a kernel perf event for it.
0751  */
0752 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
0753                     u64 select_idx)
0754 {
0755     u64 reg, mask;
0756 
0757     if (!kvm_vcpu_has_pmu(vcpu))
0758         return;
0759 
0760     mask  =  ARMV8_PMU_EVTYPE_MASK;
0761     mask &= ~ARMV8_PMU_EVTYPE_EVENT;
0762     mask |= kvm_pmu_event_mask(vcpu->kvm);
0763 
0764     reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
0765           ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
0766 
0767     __vcpu_sys_reg(vcpu, reg) = data & mask;
0768 
0769     kvm_pmu_update_pmc_chained(vcpu, select_idx);
0770     kvm_pmu_create_perf_event(vcpu, select_idx);
0771 }
0772 
0773 void kvm_host_pmu_init(struct arm_pmu *pmu)
0774 {
0775     struct arm_pmu_entry *entry;
0776 
0777     if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
0778         return;
0779 
0780     mutex_lock(&arm_pmus_lock);
0781 
0782     entry = kmalloc(sizeof(*entry), GFP_KERNEL);
0783     if (!entry)
0784         goto out_unlock;
0785 
0786     entry->arm_pmu = pmu;
0787     list_add_tail(&entry->entry, &arm_pmus);
0788 
0789     if (list_is_singular(&arm_pmus))
0790         static_branch_enable(&kvm_arm_pmu_available);
0791 
0792 out_unlock:
0793     mutex_unlock(&arm_pmus_lock);
0794 }
0795 
0796 static struct arm_pmu *kvm_pmu_probe_armpmu(void)
0797 {
0798     struct perf_event_attr attr = { };
0799     struct perf_event *event;
0800     struct arm_pmu *pmu = NULL;
0801 
0802     /*
0803      * Create a dummy event that only counts user cycles. As we'll never
0804      * leave this function with the event being live, it will never
0805      * count anything. But it allows us to probe some of the PMU
0806      * details. Yes, this is terrible.
0807      */
0808     attr.type = PERF_TYPE_RAW;
0809     attr.size = sizeof(attr);
0810     attr.pinned = 1;
0811     attr.disabled = 0;
0812     attr.exclude_user = 0;
0813     attr.exclude_kernel = 1;
0814     attr.exclude_hv = 1;
0815     attr.exclude_host = 1;
0816     attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
0817     attr.sample_period = GENMASK(63, 0);
0818 
0819     event = perf_event_create_kernel_counter(&attr, -1, current,
0820                          kvm_pmu_perf_overflow, &attr);
0821 
0822     if (IS_ERR(event)) {
0823         pr_err_once("kvm: pmu event creation failed %ld\n",
0824                 PTR_ERR(event));
0825         return NULL;
0826     }
0827 
0828     if (event->pmu) {
0829         pmu = to_arm_pmu(event->pmu);
0830         if (pmu->pmuver == 0 ||
0831             pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
0832             pmu = NULL;
0833     }
0834 
0835     perf_event_disable(event);
0836     perf_event_release_kernel(event);
0837 
0838     return pmu;
0839 }
0840 
0841 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
0842 {
0843     unsigned long *bmap = vcpu->kvm->arch.pmu_filter;
0844     u64 val, mask = 0;
0845     int base, i, nr_events;
0846 
0847     if (!kvm_vcpu_has_pmu(vcpu))
0848         return 0;
0849 
0850     if (!pmceid1) {
0851         val = read_sysreg(pmceid0_el0);
0852         base = 0;
0853     } else {
0854         val = read_sysreg(pmceid1_el0);
0855         /*
0856          * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
0857          * as RAZ
0858          */
0859         if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_4)
0860             val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
0861         base = 32;
0862     }
0863 
0864     if (!bmap)
0865         return val;
0866 
0867     nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1;
0868 
0869     for (i = 0; i < 32; i += 8) {
0870         u64 byte;
0871 
0872         byte = bitmap_get_value8(bmap, base + i);
0873         mask |= byte << i;
0874         if (nr_events >= (0x4000 + base + 32)) {
0875             byte = bitmap_get_value8(bmap, 0x4000 + base + i);
0876             mask |= byte << (32 + i);
0877         }
0878     }
0879 
0880     return val & mask;
0881 }
0882 
0883 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
0884 {
0885     if (!kvm_vcpu_has_pmu(vcpu))
0886         return 0;
0887 
0888     if (!vcpu->arch.pmu.created)
0889         return -EINVAL;
0890 
0891     /*
0892      * A valid interrupt configuration for the PMU is either to have a
0893      * properly configured interrupt number and using an in-kernel
0894      * irqchip, or to not have an in-kernel GIC and not set an IRQ.
0895      */
0896     if (irqchip_in_kernel(vcpu->kvm)) {
0897         int irq = vcpu->arch.pmu.irq_num;
0898         /*
0899          * If we are using an in-kernel vgic, at this point we know
0900          * the vgic will be initialized, so we can check the PMU irq
0901          * number against the dimensions of the vgic and make sure
0902          * it's valid.
0903          */
0904         if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq))
0905             return -EINVAL;
0906     } else if (kvm_arm_pmu_irq_initialized(vcpu)) {
0907            return -EINVAL;
0908     }
0909 
0910     /* One-off reload of the PMU on first run */
0911     kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
0912 
0913     return 0;
0914 }
0915 
0916 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
0917 {
0918     if (irqchip_in_kernel(vcpu->kvm)) {
0919         int ret;
0920 
0921         /*
0922          * If using the PMU with an in-kernel virtual GIC
0923          * implementation, we require the GIC to be already
0924          * initialized when initializing the PMU.
0925          */
0926         if (!vgic_initialized(vcpu->kvm))
0927             return -ENODEV;
0928 
0929         if (!kvm_arm_pmu_irq_initialized(vcpu))
0930             return -ENXIO;
0931 
0932         ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num,
0933                      &vcpu->arch.pmu);
0934         if (ret)
0935             return ret;
0936     }
0937 
0938     init_irq_work(&vcpu->arch.pmu.overflow_work,
0939               kvm_pmu_perf_overflow_notify_vcpu);
0940 
0941     vcpu->arch.pmu.created = true;
0942     return 0;
0943 }
0944 
0945 /*
0946  * For one VM the interrupt type must be same for each vcpu.
0947  * As a PPI, the interrupt number is the same for all vcpus,
0948  * while as an SPI it must be a separate number per vcpu.
0949  */
0950 static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
0951 {
0952     unsigned long i;
0953     struct kvm_vcpu *vcpu;
0954 
0955     kvm_for_each_vcpu(i, vcpu, kvm) {
0956         if (!kvm_arm_pmu_irq_initialized(vcpu))
0957             continue;
0958 
0959         if (irq_is_ppi(irq)) {
0960             if (vcpu->arch.pmu.irq_num != irq)
0961                 return false;
0962         } else {
0963             if (vcpu->arch.pmu.irq_num == irq)
0964                 return false;
0965         }
0966     }
0967 
0968     return true;
0969 }
0970 
0971 static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
0972 {
0973     struct kvm *kvm = vcpu->kvm;
0974     struct arm_pmu_entry *entry;
0975     struct arm_pmu *arm_pmu;
0976     int ret = -ENXIO;
0977 
0978     mutex_lock(&kvm->lock);
0979     mutex_lock(&arm_pmus_lock);
0980 
0981     list_for_each_entry(entry, &arm_pmus, entry) {
0982         arm_pmu = entry->arm_pmu;
0983         if (arm_pmu->pmu.type == pmu_id) {
0984             if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) ||
0985                 (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) {
0986                 ret = -EBUSY;
0987                 break;
0988             }
0989 
0990             kvm->arch.arm_pmu = arm_pmu;
0991             cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus);
0992             ret = 0;
0993             break;
0994         }
0995     }
0996 
0997     mutex_unlock(&arm_pmus_lock);
0998     mutex_unlock(&kvm->lock);
0999     return ret;
1000 }
1001 
1002 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1003 {
1004     struct kvm *kvm = vcpu->kvm;
1005 
1006     if (!kvm_vcpu_has_pmu(vcpu))
1007         return -ENODEV;
1008 
1009     if (vcpu->arch.pmu.created)
1010         return -EBUSY;
1011 
1012     mutex_lock(&kvm->lock);
1013     if (!kvm->arch.arm_pmu) {
1014         /* No PMU set, get the default one */
1015         kvm->arch.arm_pmu = kvm_pmu_probe_armpmu();
1016         if (!kvm->arch.arm_pmu) {
1017             mutex_unlock(&kvm->lock);
1018             return -ENODEV;
1019         }
1020     }
1021     mutex_unlock(&kvm->lock);
1022 
1023     switch (attr->attr) {
1024     case KVM_ARM_VCPU_PMU_V3_IRQ: {
1025         int __user *uaddr = (int __user *)(long)attr->addr;
1026         int irq;
1027 
1028         if (!irqchip_in_kernel(kvm))
1029             return -EINVAL;
1030 
1031         if (get_user(irq, uaddr))
1032             return -EFAULT;
1033 
1034         /* The PMU overflow interrupt can be a PPI or a valid SPI. */
1035         if (!(irq_is_ppi(irq) || irq_is_spi(irq)))
1036             return -EINVAL;
1037 
1038         if (!pmu_irq_is_valid(kvm, irq))
1039             return -EINVAL;
1040 
1041         if (kvm_arm_pmu_irq_initialized(vcpu))
1042             return -EBUSY;
1043 
1044         kvm_debug("Set kvm ARM PMU irq: %d\n", irq);
1045         vcpu->arch.pmu.irq_num = irq;
1046         return 0;
1047     }
1048     case KVM_ARM_VCPU_PMU_V3_FILTER: {
1049         struct kvm_pmu_event_filter __user *uaddr;
1050         struct kvm_pmu_event_filter filter;
1051         int nr_events;
1052 
1053         nr_events = kvm_pmu_event_mask(kvm) + 1;
1054 
1055         uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
1056 
1057         if (copy_from_user(&filter, uaddr, sizeof(filter)))
1058             return -EFAULT;
1059 
1060         if (((u32)filter.base_event + filter.nevents) > nr_events ||
1061             (filter.action != KVM_PMU_EVENT_ALLOW &&
1062              filter.action != KVM_PMU_EVENT_DENY))
1063             return -EINVAL;
1064 
1065         mutex_lock(&kvm->lock);
1066 
1067         if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) {
1068             mutex_unlock(&kvm->lock);
1069             return -EBUSY;
1070         }
1071 
1072         if (!kvm->arch.pmu_filter) {
1073             kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
1074             if (!kvm->arch.pmu_filter) {
1075                 mutex_unlock(&kvm->lock);
1076                 return -ENOMEM;
1077             }
1078 
1079             /*
1080              * The default depends on the first applied filter.
1081              * If it allows events, the default is to deny.
1082              * Conversely, if the first filter denies a set of
1083              * events, the default is to allow.
1084              */
1085             if (filter.action == KVM_PMU_EVENT_ALLOW)
1086                 bitmap_zero(kvm->arch.pmu_filter, nr_events);
1087             else
1088                 bitmap_fill(kvm->arch.pmu_filter, nr_events);
1089         }
1090 
1091         if (filter.action == KVM_PMU_EVENT_ALLOW)
1092             bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
1093         else
1094             bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
1095 
1096         mutex_unlock(&kvm->lock);
1097 
1098         return 0;
1099     }
1100     case KVM_ARM_VCPU_PMU_V3_SET_PMU: {
1101         int __user *uaddr = (int __user *)(long)attr->addr;
1102         int pmu_id;
1103 
1104         if (get_user(pmu_id, uaddr))
1105             return -EFAULT;
1106 
1107         return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id);
1108     }
1109     case KVM_ARM_VCPU_PMU_V3_INIT:
1110         return kvm_arm_pmu_v3_init(vcpu);
1111     }
1112 
1113     return -ENXIO;
1114 }
1115 
1116 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1117 {
1118     switch (attr->attr) {
1119     case KVM_ARM_VCPU_PMU_V3_IRQ: {
1120         int __user *uaddr = (int __user *)(long)attr->addr;
1121         int irq;
1122 
1123         if (!irqchip_in_kernel(vcpu->kvm))
1124             return -EINVAL;
1125 
1126         if (!kvm_vcpu_has_pmu(vcpu))
1127             return -ENODEV;
1128 
1129         if (!kvm_arm_pmu_irq_initialized(vcpu))
1130             return -ENXIO;
1131 
1132         irq = vcpu->arch.pmu.irq_num;
1133         return put_user(irq, uaddr);
1134     }
1135     }
1136 
1137     return -ENXIO;
1138 }
1139 
1140 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
1141 {
1142     switch (attr->attr) {
1143     case KVM_ARM_VCPU_PMU_V3_IRQ:
1144     case KVM_ARM_VCPU_PMU_V3_INIT:
1145     case KVM_ARM_VCPU_PMU_V3_FILTER:
1146     case KVM_ARM_VCPU_PMU_V3_SET_PMU:
1147         if (kvm_vcpu_has_pmu(vcpu))
1148             return 0;
1149     }
1150 
1151     return -ENXIO;
1152 }