Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 
0003 /*
0004  * Local APIC virtualization
0005  *
0006  * Copyright (C) 2006 Qumranet, Inc.
0007  * Copyright (C) 2007 Novell
0008  * Copyright (C) 2007 Intel
0009  * Copyright 2009 Red Hat, Inc. and/or its affiliates.
0010  *
0011  * Authors:
0012  *   Dor Laor <dor.laor@qumranet.com>
0013  *   Gregory Haskins <ghaskins@novell.com>
0014  *   Yaozu (Eddie) Dong <eddie.dong@intel.com>
0015  *
0016  * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
0017  */
0018 
0019 #include <linux/kvm_host.h>
0020 #include <linux/kvm.h>
0021 #include <linux/mm.h>
0022 #include <linux/highmem.h>
0023 #include <linux/smp.h>
0024 #include <linux/hrtimer.h>
0025 #include <linux/io.h>
0026 #include <linux/export.h>
0027 #include <linux/math64.h>
0028 #include <linux/slab.h>
0029 #include <asm/processor.h>
0030 #include <asm/mce.h>
0031 #include <asm/msr.h>
0032 #include <asm/page.h>
0033 #include <asm/current.h>
0034 #include <asm/apicdef.h>
0035 #include <asm/delay.h>
0036 #include <linux/atomic.h>
0037 #include <linux/jump_label.h>
0038 #include "kvm_cache_regs.h"
0039 #include "irq.h"
0040 #include "ioapic.h"
0041 #include "trace.h"
0042 #include "x86.h"
0043 #include "cpuid.h"
0044 #include "hyperv.h"
0045 
0046 #ifndef CONFIG_X86_64
0047 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
0048 #else
0049 #define mod_64(x, y) ((x) % (y))
0050 #endif
0051 
0052 #define PRId64 "d"
0053 #define PRIx64 "llx"
0054 #define PRIu64 "u"
0055 #define PRIo64 "o"
0056 
0057 /* 14 is the version for Xeon and Pentium 8.4.8*/
0058 #define APIC_VERSION            0x14UL
0059 #define LAPIC_MMIO_LENGTH       (1 << 12)
0060 /* followed define is not in apicdef.h */
0061 #define MAX_APIC_VECTOR         256
0062 #define APIC_VECTORS_PER_REG        32
0063 
0064 static bool lapic_timer_advance_dynamic __read_mostly;
0065 #define LAPIC_TIMER_ADVANCE_ADJUST_MIN  100 /* clock cycles */
0066 #define LAPIC_TIMER_ADVANCE_ADJUST_MAX  10000   /* clock cycles */
0067 #define LAPIC_TIMER_ADVANCE_NS_INIT 1000
0068 #define LAPIC_TIMER_ADVANCE_NS_MAX     5000
0069 /* step-by-step approximation to mitigate fluctuation */
0070 #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
0071 static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data);
0072 static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data);
0073 
0074 static inline void __kvm_lapic_set_reg(char *regs, int reg_off, u32 val)
0075 {
0076     *((u32 *) (regs + reg_off)) = val;
0077 }
0078 
0079 static inline void kvm_lapic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
0080 {
0081     __kvm_lapic_set_reg(apic->regs, reg_off, val);
0082 }
0083 
0084 static __always_inline u64 __kvm_lapic_get_reg64(char *regs, int reg)
0085 {
0086     BUILD_BUG_ON(reg != APIC_ICR);
0087     return *((u64 *) (regs + reg));
0088 }
0089 
0090 static __always_inline u64 kvm_lapic_get_reg64(struct kvm_lapic *apic, int reg)
0091 {
0092     return __kvm_lapic_get_reg64(apic->regs, reg);
0093 }
0094 
0095 static __always_inline void __kvm_lapic_set_reg64(char *regs, int reg, u64 val)
0096 {
0097     BUILD_BUG_ON(reg != APIC_ICR);
0098     *((u64 *) (regs + reg)) = val;
0099 }
0100 
0101 static __always_inline void kvm_lapic_set_reg64(struct kvm_lapic *apic,
0102                         int reg, u64 val)
0103 {
0104     __kvm_lapic_set_reg64(apic->regs, reg, val);
0105 }
0106 
0107 static inline int apic_test_vector(int vec, void *bitmap)
0108 {
0109     return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
0110 }
0111 
0112 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
0113 {
0114     struct kvm_lapic *apic = vcpu->arch.apic;
0115 
0116     return apic_test_vector(vector, apic->regs + APIC_ISR) ||
0117         apic_test_vector(vector, apic->regs + APIC_IRR);
0118 }
0119 
0120 static inline int __apic_test_and_set_vector(int vec, void *bitmap)
0121 {
0122     return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
0123 }
0124 
0125 static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
0126 {
0127     return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
0128 }
0129 
0130 __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ);
0131 __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ);
0132 
0133 static inline int apic_enabled(struct kvm_lapic *apic)
0134 {
0135     return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic);
0136 }
0137 
0138 #define LVT_MASK    \
0139     (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
0140 
0141 #define LINT_MASK   \
0142     (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
0143      APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
0144 
0145 static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
0146 {
0147     return apic->vcpu->vcpu_id;
0148 }
0149 
0150 static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
0151 {
0152     return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) &&
0153         (kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm));
0154 }
0155 
0156 bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
0157 {
0158     return kvm_x86_ops.set_hv_timer
0159            && !(kvm_mwait_in_guest(vcpu->kvm) ||
0160             kvm_can_post_timer_interrupt(vcpu));
0161 }
0162 EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer);
0163 
0164 static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
0165 {
0166     return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
0167 }
0168 
0169 static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
0170         u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
0171     switch (map->mode) {
0172     case KVM_APIC_MODE_X2APIC: {
0173         u32 offset = (dest_id >> 16) * 16;
0174         u32 max_apic_id = map->max_apic_id;
0175 
0176         if (offset <= max_apic_id) {
0177             u8 cluster_size = min(max_apic_id - offset + 1, 16U);
0178 
0179             offset = array_index_nospec(offset, map->max_apic_id + 1);
0180             *cluster = &map->phys_map[offset];
0181             *mask = dest_id & (0xffff >> (16 - cluster_size));
0182         } else {
0183             *mask = 0;
0184         }
0185 
0186         return true;
0187         }
0188     case KVM_APIC_MODE_XAPIC_FLAT:
0189         *cluster = map->xapic_flat_map;
0190         *mask = dest_id & 0xff;
0191         return true;
0192     case KVM_APIC_MODE_XAPIC_CLUSTER:
0193         *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
0194         *mask = dest_id & 0xf;
0195         return true;
0196     default:
0197         /* Not optimized. */
0198         return false;
0199     }
0200 }
0201 
0202 static void kvm_apic_map_free(struct rcu_head *rcu)
0203 {
0204     struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
0205 
0206     kvfree(map);
0207 }
0208 
0209 /*
0210  * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock.
0211  *
0212  * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with
0213  * apic_map_lock_held.
0214  */
0215 enum {
0216     CLEAN,
0217     UPDATE_IN_PROGRESS,
0218     DIRTY
0219 };
0220 
0221 void kvm_recalculate_apic_map(struct kvm *kvm)
0222 {
0223     struct kvm_apic_map *new, *old = NULL;
0224     struct kvm_vcpu *vcpu;
0225     unsigned long i;
0226     u32 max_id = 255; /* enough space for any xAPIC ID */
0227 
0228     /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map.  */
0229     if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
0230         return;
0231 
0232     WARN_ONCE(!irqchip_in_kernel(kvm),
0233           "Dirty APIC map without an in-kernel local APIC");
0234 
0235     mutex_lock(&kvm->arch.apic_map_lock);
0236     /*
0237      * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
0238      * (if clean) or the APIC registers (if dirty).
0239      */
0240     if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
0241                    DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
0242         /* Someone else has updated the map. */
0243         mutex_unlock(&kvm->arch.apic_map_lock);
0244         return;
0245     }
0246 
0247     kvm_for_each_vcpu(i, vcpu, kvm)
0248         if (kvm_apic_present(vcpu))
0249             max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
0250 
0251     new = kvzalloc(sizeof(struct kvm_apic_map) +
0252                        sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
0253                GFP_KERNEL_ACCOUNT);
0254 
0255     if (!new)
0256         goto out;
0257 
0258     new->max_apic_id = max_id;
0259 
0260     kvm_for_each_vcpu(i, vcpu, kvm) {
0261         struct kvm_lapic *apic = vcpu->arch.apic;
0262         struct kvm_lapic **cluster;
0263         u16 mask;
0264         u32 ldr;
0265         u8 xapic_id;
0266         u32 x2apic_id;
0267 
0268         if (!kvm_apic_present(vcpu))
0269             continue;
0270 
0271         xapic_id = kvm_xapic_id(apic);
0272         x2apic_id = kvm_x2apic_id(apic);
0273 
0274         /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
0275         if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
0276                 x2apic_id <= new->max_apic_id)
0277             new->phys_map[x2apic_id] = apic;
0278         /*
0279          * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
0280          * prevent them from masking VCPUs with APIC ID <= 0xff.
0281          */
0282         if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
0283             new->phys_map[xapic_id] = apic;
0284 
0285         if (!kvm_apic_sw_enabled(apic))
0286             continue;
0287 
0288         ldr = kvm_lapic_get_reg(apic, APIC_LDR);
0289 
0290         if (apic_x2apic_mode(apic)) {
0291             new->mode |= KVM_APIC_MODE_X2APIC;
0292         } else if (ldr) {
0293             ldr = GET_APIC_LOGICAL_ID(ldr);
0294             if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
0295                 new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
0296             else
0297                 new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
0298         }
0299 
0300         if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
0301             continue;
0302 
0303         if (mask)
0304             cluster[ffs(mask) - 1] = apic;
0305     }
0306 out:
0307     old = rcu_dereference_protected(kvm->arch.apic_map,
0308             lockdep_is_held(&kvm->arch.apic_map_lock));
0309     rcu_assign_pointer(kvm->arch.apic_map, new);
0310     /*
0311      * Write kvm->arch.apic_map before clearing apic->apic_map_dirty.
0312      * If another update has come in, leave it DIRTY.
0313      */
0314     atomic_cmpxchg_release(&kvm->arch.apic_map_dirty,
0315                    UPDATE_IN_PROGRESS, CLEAN);
0316     mutex_unlock(&kvm->arch.apic_map_lock);
0317 
0318     if (old)
0319         call_rcu(&old->rcu, kvm_apic_map_free);
0320 
0321     kvm_make_scan_ioapic_request(kvm);
0322 }
0323 
0324 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
0325 {
0326     bool enabled = val & APIC_SPIV_APIC_ENABLED;
0327 
0328     kvm_lapic_set_reg(apic, APIC_SPIV, val);
0329 
0330     if (enabled != apic->sw_enabled) {
0331         apic->sw_enabled = enabled;
0332         if (enabled)
0333             static_branch_slow_dec_deferred(&apic_sw_disabled);
0334         else
0335             static_branch_inc(&apic_sw_disabled.key);
0336 
0337         atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
0338     }
0339 
0340     /* Check if there are APF page ready requests pending */
0341     if (enabled)
0342         kvm_make_request(KVM_REQ_APF_READY, apic->vcpu);
0343 }
0344 
0345 static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
0346 {
0347     kvm_lapic_set_reg(apic, APIC_ID, id << 24);
0348     atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
0349 }
0350 
0351 static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
0352 {
0353     kvm_lapic_set_reg(apic, APIC_LDR, id);
0354     atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
0355 }
0356 
0357 static inline void kvm_apic_set_dfr(struct kvm_lapic *apic, u32 val)
0358 {
0359     kvm_lapic_set_reg(apic, APIC_DFR, val);
0360     atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
0361 }
0362 
0363 static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
0364 {
0365     return ((id >> 4) << 16) | (1 << (id & 0xf));
0366 }
0367 
0368 static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
0369 {
0370     u32 ldr = kvm_apic_calc_x2apic_ldr(id);
0371 
0372     WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
0373 
0374     kvm_lapic_set_reg(apic, APIC_ID, id);
0375     kvm_lapic_set_reg(apic, APIC_LDR, ldr);
0376     atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
0377 }
0378 
0379 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
0380 {
0381     return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
0382 }
0383 
0384 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
0385 {
0386     return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
0387 }
0388 
0389 static inline int apic_lvtt_period(struct kvm_lapic *apic)
0390 {
0391     return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
0392 }
0393 
0394 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
0395 {
0396     return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
0397 }
0398 
0399 static inline int apic_lvt_nmi_mode(u32 lvt_val)
0400 {
0401     return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
0402 }
0403 
0404 static inline bool kvm_lapic_lvt_supported(struct kvm_lapic *apic, int lvt_index)
0405 {
0406     return apic->nr_lvt_entries > lvt_index;
0407 }
0408 
0409 static inline int kvm_apic_calc_nr_lvt_entries(struct kvm_vcpu *vcpu)
0410 {
0411     return KVM_APIC_MAX_NR_LVT_ENTRIES - !(vcpu->arch.mcg_cap & MCG_CMCI_P);
0412 }
0413 
0414 void kvm_apic_set_version(struct kvm_vcpu *vcpu)
0415 {
0416     struct kvm_lapic *apic = vcpu->arch.apic;
0417     u32 v = 0;
0418 
0419     if (!lapic_in_kernel(vcpu))
0420         return;
0421 
0422     v = APIC_VERSION | ((apic->nr_lvt_entries - 1) << 16);
0423 
0424     /*
0425      * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
0426      * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
0427      * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
0428      * version first and level-triggered interrupts never get EOIed in
0429      * IOAPIC.
0430      */
0431     if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) &&
0432         !ioapic_in_kernel(vcpu->kvm))
0433         v |= APIC_LVR_DIRECTED_EOI;
0434     kvm_lapic_set_reg(apic, APIC_LVR, v);
0435 }
0436 
0437 void kvm_apic_after_set_mcg_cap(struct kvm_vcpu *vcpu)
0438 {
0439     int nr_lvt_entries = kvm_apic_calc_nr_lvt_entries(vcpu);
0440     struct kvm_lapic *apic = vcpu->arch.apic;
0441     int i;
0442 
0443     if (!lapic_in_kernel(vcpu) || nr_lvt_entries == apic->nr_lvt_entries)
0444         return;
0445 
0446     /* Initialize/mask any "new" LVT entries. */
0447     for (i = apic->nr_lvt_entries; i < nr_lvt_entries; i++)
0448         kvm_lapic_set_reg(apic, APIC_LVTx(i), APIC_LVT_MASKED);
0449 
0450     apic->nr_lvt_entries = nr_lvt_entries;
0451 
0452     /* The number of LVT entries is reflected in the version register. */
0453     kvm_apic_set_version(vcpu);
0454 }
0455 
0456 static const unsigned int apic_lvt_mask[KVM_APIC_MAX_NR_LVT_ENTRIES] = {
0457     [LVT_TIMER] = LVT_MASK,      /* timer mode mask added at runtime */
0458     [LVT_THERMAL_MONITOR] = LVT_MASK | APIC_MODE_MASK,
0459     [LVT_PERFORMANCE_COUNTER] = LVT_MASK | APIC_MODE_MASK,
0460     [LVT_LINT0] = LINT_MASK,
0461     [LVT_LINT1] = LINT_MASK,
0462     [LVT_ERROR] = LVT_MASK,
0463     [LVT_CMCI] = LVT_MASK | APIC_MODE_MASK
0464 };
0465 
0466 static int find_highest_vector(void *bitmap)
0467 {
0468     int vec;
0469     u32 *reg;
0470 
0471     for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
0472          vec >= 0; vec -= APIC_VECTORS_PER_REG) {
0473         reg = bitmap + REG_POS(vec);
0474         if (*reg)
0475             return __fls(*reg) + vec;
0476     }
0477 
0478     return -1;
0479 }
0480 
0481 static u8 count_vectors(void *bitmap)
0482 {
0483     int vec;
0484     u32 *reg;
0485     u8 count = 0;
0486 
0487     for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
0488         reg = bitmap + REG_POS(vec);
0489         count += hweight32(*reg);
0490     }
0491 
0492     return count;
0493 }
0494 
0495 bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
0496 {
0497     u32 i, vec;
0498     u32 pir_val, irr_val, prev_irr_val;
0499     int max_updated_irr;
0500 
0501     max_updated_irr = -1;
0502     *max_irr = -1;
0503 
0504     for (i = vec = 0; i <= 7; i++, vec += 32) {
0505         pir_val = READ_ONCE(pir[i]);
0506         irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
0507         if (pir_val) {
0508             prev_irr_val = irr_val;
0509             irr_val |= xchg(&pir[i], 0);
0510             *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
0511             if (prev_irr_val != irr_val) {
0512                 max_updated_irr =
0513                     __fls(irr_val ^ prev_irr_val) + vec;
0514             }
0515         }
0516         if (irr_val)
0517             *max_irr = __fls(irr_val) + vec;
0518     }
0519 
0520     return ((max_updated_irr != -1) &&
0521         (max_updated_irr == *max_irr));
0522 }
0523 EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
0524 
0525 bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
0526 {
0527     struct kvm_lapic *apic = vcpu->arch.apic;
0528 
0529     return __kvm_apic_update_irr(pir, apic->regs, max_irr);
0530 }
0531 EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
0532 
0533 static inline int apic_search_irr(struct kvm_lapic *apic)
0534 {
0535     return find_highest_vector(apic->regs + APIC_IRR);
0536 }
0537 
0538 static inline int apic_find_highest_irr(struct kvm_lapic *apic)
0539 {
0540     int result;
0541 
0542     /*
0543      * Note that irr_pending is just a hint. It will be always
0544      * true with virtual interrupt delivery enabled.
0545      */
0546     if (!apic->irr_pending)
0547         return -1;
0548 
0549     result = apic_search_irr(apic);
0550     ASSERT(result == -1 || result >= 16);
0551 
0552     return result;
0553 }
0554 
0555 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
0556 {
0557     if (unlikely(apic->apicv_active)) {
0558         /* need to update RVI */
0559         kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
0560         static_call_cond(kvm_x86_hwapic_irr_update)(apic->vcpu,
0561                                 apic_find_highest_irr(apic));
0562     } else {
0563         apic->irr_pending = false;
0564         kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
0565         if (apic_search_irr(apic) != -1)
0566             apic->irr_pending = true;
0567     }
0568 }
0569 
0570 void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec)
0571 {
0572     apic_clear_irr(vec, vcpu->arch.apic);
0573 }
0574 EXPORT_SYMBOL_GPL(kvm_apic_clear_irr);
0575 
0576 static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
0577 {
0578     if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
0579         return;
0580 
0581     /*
0582      * With APIC virtualization enabled, all caching is disabled
0583      * because the processor can modify ISR under the hood.  Instead
0584      * just set SVI.
0585      */
0586     if (unlikely(apic->apicv_active))
0587         static_call_cond(kvm_x86_hwapic_isr_update)(vec);
0588     else {
0589         ++apic->isr_count;
0590         BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
0591         /*
0592          * ISR (in service register) bit is set when injecting an interrupt.
0593          * The highest vector is injected. Thus the latest bit set matches
0594          * the highest bit in ISR.
0595          */
0596         apic->highest_isr_cache = vec;
0597     }
0598 }
0599 
0600 static inline int apic_find_highest_isr(struct kvm_lapic *apic)
0601 {
0602     int result;
0603 
0604     /*
0605      * Note that isr_count is always 1, and highest_isr_cache
0606      * is always -1, with APIC virtualization enabled.
0607      */
0608     if (!apic->isr_count)
0609         return -1;
0610     if (likely(apic->highest_isr_cache != -1))
0611         return apic->highest_isr_cache;
0612 
0613     result = find_highest_vector(apic->regs + APIC_ISR);
0614     ASSERT(result == -1 || result >= 16);
0615 
0616     return result;
0617 }
0618 
0619 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
0620 {
0621     if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
0622         return;
0623 
0624     /*
0625      * We do get here for APIC virtualization enabled if the guest
0626      * uses the Hyper-V APIC enlightenment.  In this case we may need
0627      * to trigger a new interrupt delivery by writing the SVI field;
0628      * on the other hand isr_count and highest_isr_cache are unused
0629      * and must be left alone.
0630      */
0631     if (unlikely(apic->apicv_active))
0632         static_call_cond(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic));
0633     else {
0634         --apic->isr_count;
0635         BUG_ON(apic->isr_count < 0);
0636         apic->highest_isr_cache = -1;
0637     }
0638 }
0639 
0640 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
0641 {
0642     /* This may race with setting of irr in __apic_accept_irq() and
0643      * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
0644      * will cause vmexit immediately and the value will be recalculated
0645      * on the next vmentry.
0646      */
0647     return apic_find_highest_irr(vcpu->arch.apic);
0648 }
0649 EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
0650 
0651 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
0652                  int vector, int level, int trig_mode,
0653                  struct dest_map *dest_map);
0654 
0655 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
0656              struct dest_map *dest_map)
0657 {
0658     struct kvm_lapic *apic = vcpu->arch.apic;
0659 
0660     return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
0661             irq->level, irq->trig_mode, dest_map);
0662 }
0663 
0664 static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
0665              struct kvm_lapic_irq *irq, u32 min)
0666 {
0667     int i, count = 0;
0668     struct kvm_vcpu *vcpu;
0669 
0670     if (min > map->max_apic_id)
0671         return 0;
0672 
0673     for_each_set_bit(i, ipi_bitmap,
0674         min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
0675         if (map->phys_map[min + i]) {
0676             vcpu = map->phys_map[min + i]->vcpu;
0677             count += kvm_apic_set_irq(vcpu, irq, NULL);
0678         }
0679     }
0680 
0681     return count;
0682 }
0683 
0684 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
0685             unsigned long ipi_bitmap_high, u32 min,
0686             unsigned long icr, int op_64_bit)
0687 {
0688     struct kvm_apic_map *map;
0689     struct kvm_lapic_irq irq = {0};
0690     int cluster_size = op_64_bit ? 64 : 32;
0691     int count;
0692 
0693     if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
0694         return -KVM_EINVAL;
0695 
0696     irq.vector = icr & APIC_VECTOR_MASK;
0697     irq.delivery_mode = icr & APIC_MODE_MASK;
0698     irq.level = (icr & APIC_INT_ASSERT) != 0;
0699     irq.trig_mode = icr & APIC_INT_LEVELTRIG;
0700 
0701     rcu_read_lock();
0702     map = rcu_dereference(kvm->arch.apic_map);
0703 
0704     count = -EOPNOTSUPP;
0705     if (likely(map)) {
0706         count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min);
0707         min += cluster_size;
0708         count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min);
0709     }
0710 
0711     rcu_read_unlock();
0712     return count;
0713 }
0714 
0715 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
0716 {
0717 
0718     return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
0719                       sizeof(val));
0720 }
0721 
0722 static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
0723 {
0724 
0725     return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
0726                       sizeof(*val));
0727 }
0728 
0729 static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
0730 {
0731     return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
0732 }
0733 
0734 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
0735 {
0736     if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0)
0737         return;
0738 
0739     __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
0740 }
0741 
0742 static bool pv_eoi_test_and_clr_pending(struct kvm_vcpu *vcpu)
0743 {
0744     u8 val;
0745 
0746     if (pv_eoi_get_user(vcpu, &val) < 0)
0747         return false;
0748 
0749     val &= KVM_PV_EOI_ENABLED;
0750 
0751     if (val && pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0)
0752         return false;
0753 
0754     /*
0755      * Clear pending bit in any case: it will be set again on vmentry.
0756      * While this might not be ideal from performance point of view,
0757      * this makes sure pv eoi is only enabled when we know it's safe.
0758      */
0759     __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
0760 
0761     return val;
0762 }
0763 
0764 static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
0765 {
0766     int highest_irr;
0767     if (kvm_x86_ops.sync_pir_to_irr)
0768         highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu);
0769     else
0770         highest_irr = apic_find_highest_irr(apic);
0771     if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
0772         return -1;
0773     return highest_irr;
0774 }
0775 
0776 static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
0777 {
0778     u32 tpr, isrv, ppr, old_ppr;
0779     int isr;
0780 
0781     old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
0782     tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
0783     isr = apic_find_highest_isr(apic);
0784     isrv = (isr != -1) ? isr : 0;
0785 
0786     if ((tpr & 0xf0) >= (isrv & 0xf0))
0787         ppr = tpr & 0xff;
0788     else
0789         ppr = isrv & 0xf0;
0790 
0791     *new_ppr = ppr;
0792     if (old_ppr != ppr)
0793         kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
0794 
0795     return ppr < old_ppr;
0796 }
0797 
0798 static void apic_update_ppr(struct kvm_lapic *apic)
0799 {
0800     u32 ppr;
0801 
0802     if (__apic_update_ppr(apic, &ppr) &&
0803         apic_has_interrupt_for_ppr(apic, ppr) != -1)
0804         kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
0805 }
0806 
0807 void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
0808 {
0809     apic_update_ppr(vcpu->arch.apic);
0810 }
0811 EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);
0812 
0813 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
0814 {
0815     kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
0816     apic_update_ppr(apic);
0817 }
0818 
0819 static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
0820 {
0821     return mda == (apic_x2apic_mode(apic) ?
0822             X2APIC_BROADCAST : APIC_BROADCAST);
0823 }
0824 
0825 static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
0826 {
0827     if (kvm_apic_broadcast(apic, mda))
0828         return true;
0829 
0830     /*
0831      * Hotplug hack: Accept interrupts for vCPUs in xAPIC mode as if they
0832      * were in x2APIC mode if the target APIC ID can't be encoded as an
0833      * xAPIC ID.  This allows unique addressing of hotplugged vCPUs (which
0834      * start in xAPIC mode) with an APIC ID that is unaddressable in xAPIC
0835      * mode.  Match the x2APIC ID if and only if the target APIC ID can't
0836      * be encoded in xAPIC to avoid spurious matches against a vCPU that
0837      * changed its (addressable) xAPIC ID (which is writable).
0838      */
0839     if (apic_x2apic_mode(apic) || mda > 0xff)
0840         return mda == kvm_x2apic_id(apic);
0841 
0842     return mda == kvm_xapic_id(apic);
0843 }
0844 
0845 static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
0846 {
0847     u32 logical_id;
0848 
0849     if (kvm_apic_broadcast(apic, mda))
0850         return true;
0851 
0852     logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
0853 
0854     if (apic_x2apic_mode(apic))
0855         return ((logical_id >> 16) == (mda >> 16))
0856                && (logical_id & mda & 0xffff) != 0;
0857 
0858     logical_id = GET_APIC_LOGICAL_ID(logical_id);
0859 
0860     switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
0861     case APIC_DFR_FLAT:
0862         return (logical_id & mda) != 0;
0863     case APIC_DFR_CLUSTER:
0864         return ((logical_id >> 4) == (mda >> 4))
0865                && (logical_id & mda & 0xf) != 0;
0866     default:
0867         return false;
0868     }
0869 }
0870 
0871 /* The KVM local APIC implementation has two quirks:
0872  *
0873  *  - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
0874  *    in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
0875  *    KVM doesn't do that aliasing.
0876  *
0877  *  - in-kernel IOAPIC messages have to be delivered directly to
0878  *    x2APIC, because the kernel does not support interrupt remapping.
0879  *    In order to support broadcast without interrupt remapping, x2APIC
0880  *    rewrites the destination of non-IPI messages from APIC_BROADCAST
0881  *    to X2APIC_BROADCAST.
0882  *
0883  * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API.  This is
0884  * important when userspace wants to use x2APIC-format MSIs, because
0885  * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
0886  */
0887 static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
0888         struct kvm_lapic *source, struct kvm_lapic *target)
0889 {
0890     bool ipi = source != NULL;
0891 
0892     if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
0893         !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
0894         return X2APIC_BROADCAST;
0895 
0896     return dest_id;
0897 }
0898 
0899 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
0900                int shorthand, unsigned int dest, int dest_mode)
0901 {
0902     struct kvm_lapic *target = vcpu->arch.apic;
0903     u32 mda = kvm_apic_mda(vcpu, dest, source, target);
0904 
0905     ASSERT(target);
0906     switch (shorthand) {
0907     case APIC_DEST_NOSHORT:
0908         if (dest_mode == APIC_DEST_PHYSICAL)
0909             return kvm_apic_match_physical_addr(target, mda);
0910         else
0911             return kvm_apic_match_logical_addr(target, mda);
0912     case APIC_DEST_SELF:
0913         return target == source;
0914     case APIC_DEST_ALLINC:
0915         return true;
0916     case APIC_DEST_ALLBUT:
0917         return target != source;
0918     default:
0919         return false;
0920     }
0921 }
0922 EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
0923 
0924 int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
0925                const unsigned long *bitmap, u32 bitmap_size)
0926 {
0927     u32 mod;
0928     int i, idx = -1;
0929 
0930     mod = vector % dest_vcpus;
0931 
0932     for (i = 0; i <= mod; i++) {
0933         idx = find_next_bit(bitmap, bitmap_size, idx + 1);
0934         BUG_ON(idx == bitmap_size);
0935     }
0936 
0937     return idx;
0938 }
0939 
0940 static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
0941 {
0942     if (!kvm->arch.disabled_lapic_found) {
0943         kvm->arch.disabled_lapic_found = true;
0944         printk(KERN_INFO
0945                "Disabled LAPIC found during irq injection\n");
0946     }
0947 }
0948 
0949 static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
0950         struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
0951 {
0952     if (kvm->arch.x2apic_broadcast_quirk_disabled) {
0953         if ((irq->dest_id == APIC_BROADCAST &&
0954                 map->mode != KVM_APIC_MODE_X2APIC))
0955             return true;
0956         if (irq->dest_id == X2APIC_BROADCAST)
0957             return true;
0958     } else {
0959         bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
0960         if (irq->dest_id == (x2apic_ipi ?
0961                              X2APIC_BROADCAST : APIC_BROADCAST))
0962             return true;
0963     }
0964 
0965     return false;
0966 }
0967 
0968 /* Return true if the interrupt can be handled by using *bitmap as index mask
0969  * for valid destinations in *dst array.
0970  * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
0971  * Note: we may have zero kvm_lapic destinations when we return true, which
0972  * means that the interrupt should be dropped.  In this case, *bitmap would be
0973  * zero and *dst undefined.
0974  */
0975 static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
0976         struct kvm_lapic **src, struct kvm_lapic_irq *irq,
0977         struct kvm_apic_map *map, struct kvm_lapic ***dst,
0978         unsigned long *bitmap)
0979 {
0980     int i, lowest;
0981 
0982     if (irq->shorthand == APIC_DEST_SELF && src) {
0983         *dst = src;
0984         *bitmap = 1;
0985         return true;
0986     } else if (irq->shorthand)
0987         return false;
0988 
0989     if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
0990         return false;
0991 
0992     if (irq->dest_mode == APIC_DEST_PHYSICAL) {
0993         if (irq->dest_id > map->max_apic_id) {
0994             *bitmap = 0;
0995         } else {
0996             u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
0997             *dst = &map->phys_map[dest_id];
0998             *bitmap = 1;
0999         }
1000         return true;
1001     }
1002 
1003     *bitmap = 0;
1004     if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
1005                 (u16 *)bitmap))
1006         return false;
1007 
1008     if (!kvm_lowest_prio_delivery(irq))
1009         return true;
1010 
1011     if (!kvm_vector_hashing_enabled()) {
1012         lowest = -1;
1013         for_each_set_bit(i, bitmap, 16) {
1014             if (!(*dst)[i])
1015                 continue;
1016             if (lowest < 0)
1017                 lowest = i;
1018             else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
1019                         (*dst)[lowest]->vcpu) < 0)
1020                 lowest = i;
1021         }
1022     } else {
1023         if (!*bitmap)
1024             return true;
1025 
1026         lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
1027                 bitmap, 16);
1028 
1029         if (!(*dst)[lowest]) {
1030             kvm_apic_disabled_lapic_found(kvm);
1031             *bitmap = 0;
1032             return true;
1033         }
1034     }
1035 
1036     *bitmap = (lowest >= 0) ? 1 << lowest : 0;
1037 
1038     return true;
1039 }
1040 
1041 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
1042         struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
1043 {
1044     struct kvm_apic_map *map;
1045     unsigned long bitmap;
1046     struct kvm_lapic **dst = NULL;
1047     int i;
1048     bool ret;
1049 
1050     *r = -1;
1051 
1052     if (irq->shorthand == APIC_DEST_SELF) {
1053         if (KVM_BUG_ON(!src, kvm)) {
1054             *r = 0;
1055             return true;
1056         }
1057         *r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
1058         return true;
1059     }
1060 
1061     rcu_read_lock();
1062     map = rcu_dereference(kvm->arch.apic_map);
1063 
1064     ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
1065     if (ret) {
1066         *r = 0;
1067         for_each_set_bit(i, &bitmap, 16) {
1068             if (!dst[i])
1069                 continue;
1070             *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
1071         }
1072     }
1073 
1074     rcu_read_unlock();
1075     return ret;
1076 }
1077 
1078 /*
1079  * This routine tries to handle interrupts in posted mode, here is how
1080  * it deals with different cases:
1081  * - For single-destination interrupts, handle it in posted mode
1082  * - Else if vector hashing is enabled and it is a lowest-priority
1083  *   interrupt, handle it in posted mode and use the following mechanism
1084  *   to find the destination vCPU.
1085  *  1. For lowest-priority interrupts, store all the possible
1086  *     destination vCPUs in an array.
1087  *  2. Use "guest vector % max number of destination vCPUs" to find
1088  *     the right destination vCPU in the array for the lowest-priority
1089  *     interrupt.
1090  * - Otherwise, use remapped mode to inject the interrupt.
1091  */
1092 bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
1093             struct kvm_vcpu **dest_vcpu)
1094 {
1095     struct kvm_apic_map *map;
1096     unsigned long bitmap;
1097     struct kvm_lapic **dst = NULL;
1098     bool ret = false;
1099 
1100     if (irq->shorthand)
1101         return false;
1102 
1103     rcu_read_lock();
1104     map = rcu_dereference(kvm->arch.apic_map);
1105 
1106     if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
1107             hweight16(bitmap) == 1) {
1108         unsigned long i = find_first_bit(&bitmap, 16);
1109 
1110         if (dst[i]) {
1111             *dest_vcpu = dst[i]->vcpu;
1112             ret = true;
1113         }
1114     }
1115 
1116     rcu_read_unlock();
1117     return ret;
1118 }
1119 
1120 /*
1121  * Add a pending IRQ into lapic.
1122  * Return 1 if successfully added and 0 if discarded.
1123  */
1124 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
1125                  int vector, int level, int trig_mode,
1126                  struct dest_map *dest_map)
1127 {
1128     int result = 0;
1129     struct kvm_vcpu *vcpu = apic->vcpu;
1130 
1131     trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
1132                   trig_mode, vector);
1133     switch (delivery_mode) {
1134     case APIC_DM_LOWEST:
1135         vcpu->arch.apic_arb_prio++;
1136         fallthrough;
1137     case APIC_DM_FIXED:
1138         if (unlikely(trig_mode && !level))
1139             break;
1140 
1141         /* FIXME add logic for vcpu on reset */
1142         if (unlikely(!apic_enabled(apic)))
1143             break;
1144 
1145         result = 1;
1146 
1147         if (dest_map) {
1148             __set_bit(vcpu->vcpu_id, dest_map->map);
1149             dest_map->vectors[vcpu->vcpu_id] = vector;
1150         }
1151 
1152         if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
1153             if (trig_mode)
1154                 kvm_lapic_set_vector(vector,
1155                              apic->regs + APIC_TMR);
1156             else
1157                 kvm_lapic_clear_vector(vector,
1158                                apic->regs + APIC_TMR);
1159         }
1160 
1161         static_call(kvm_x86_deliver_interrupt)(apic, delivery_mode,
1162                                trig_mode, vector);
1163         break;
1164 
1165     case APIC_DM_REMRD:
1166         result = 1;
1167         vcpu->arch.pv.pv_unhalted = 1;
1168         kvm_make_request(KVM_REQ_EVENT, vcpu);
1169         kvm_vcpu_kick(vcpu);
1170         break;
1171 
1172     case APIC_DM_SMI:
1173         result = 1;
1174         kvm_make_request(KVM_REQ_SMI, vcpu);
1175         kvm_vcpu_kick(vcpu);
1176         break;
1177 
1178     case APIC_DM_NMI:
1179         result = 1;
1180         kvm_inject_nmi(vcpu);
1181         kvm_vcpu_kick(vcpu);
1182         break;
1183 
1184     case APIC_DM_INIT:
1185         if (!trig_mode || level) {
1186             result = 1;
1187             /* assumes that there are only KVM_APIC_INIT/SIPI */
1188             apic->pending_events = (1UL << KVM_APIC_INIT);
1189             kvm_make_request(KVM_REQ_EVENT, vcpu);
1190             kvm_vcpu_kick(vcpu);
1191         }
1192         break;
1193 
1194     case APIC_DM_STARTUP:
1195         result = 1;
1196         apic->sipi_vector = vector;
1197         /* make sure sipi_vector is visible for the receiver */
1198         smp_wmb();
1199         set_bit(KVM_APIC_SIPI, &apic->pending_events);
1200         kvm_make_request(KVM_REQ_EVENT, vcpu);
1201         kvm_vcpu_kick(vcpu);
1202         break;
1203 
1204     case APIC_DM_EXTINT:
1205         /*
1206          * Should only be called by kvm_apic_local_deliver() with LVT0,
1207          * before NMI watchdog was enabled. Already handled by
1208          * kvm_apic_accept_pic_intr().
1209          */
1210         break;
1211 
1212     default:
1213         printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
1214                delivery_mode);
1215         break;
1216     }
1217     return result;
1218 }
1219 
1220 /*
1221  * This routine identifies the destination vcpus mask meant to receive the
1222  * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find
1223  * out the destination vcpus array and set the bitmap or it traverses to
1224  * each available vcpu to identify the same.
1225  */
1226 void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
1227                   unsigned long *vcpu_bitmap)
1228 {
1229     struct kvm_lapic **dest_vcpu = NULL;
1230     struct kvm_lapic *src = NULL;
1231     struct kvm_apic_map *map;
1232     struct kvm_vcpu *vcpu;
1233     unsigned long bitmap, i;
1234     int vcpu_idx;
1235     bool ret;
1236 
1237     rcu_read_lock();
1238     map = rcu_dereference(kvm->arch.apic_map);
1239 
1240     ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu,
1241                       &bitmap);
1242     if (ret) {
1243         for_each_set_bit(i, &bitmap, 16) {
1244             if (!dest_vcpu[i])
1245                 continue;
1246             vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx;
1247             __set_bit(vcpu_idx, vcpu_bitmap);
1248         }
1249     } else {
1250         kvm_for_each_vcpu(i, vcpu, kvm) {
1251             if (!kvm_apic_present(vcpu))
1252                 continue;
1253             if (!kvm_apic_match_dest(vcpu, NULL,
1254                          irq->shorthand,
1255                          irq->dest_id,
1256                          irq->dest_mode))
1257                 continue;
1258             __set_bit(i, vcpu_bitmap);
1259         }
1260     }
1261     rcu_read_unlock();
1262 }
1263 
1264 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
1265 {
1266     return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
1267 }
1268 
1269 static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
1270 {
1271     return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
1272 }
1273 
1274 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
1275 {
1276     int trigger_mode;
1277 
1278     /* Eoi the ioapic only if the ioapic doesn't own the vector. */
1279     if (!kvm_ioapic_handles_vector(apic, vector))
1280         return;
1281 
1282     /* Request a KVM exit to inform the userspace IOAPIC. */
1283     if (irqchip_split(apic->vcpu->kvm)) {
1284         apic->vcpu->arch.pending_ioapic_eoi = vector;
1285         kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
1286         return;
1287     }
1288 
1289     if (apic_test_vector(vector, apic->regs + APIC_TMR))
1290         trigger_mode = IOAPIC_LEVEL_TRIG;
1291     else
1292         trigger_mode = IOAPIC_EDGE_TRIG;
1293 
1294     kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
1295 }
1296 
1297 static int apic_set_eoi(struct kvm_lapic *apic)
1298 {
1299     int vector = apic_find_highest_isr(apic);
1300 
1301     trace_kvm_eoi(apic, vector);
1302 
1303     /*
1304      * Not every write EOI will has corresponding ISR,
1305      * one example is when Kernel check timer on setup_IO_APIC
1306      */
1307     if (vector == -1)
1308         return vector;
1309 
1310     apic_clear_isr(vector, apic);
1311     apic_update_ppr(apic);
1312 
1313     if (to_hv_vcpu(apic->vcpu) &&
1314         test_bit(vector, to_hv_synic(apic->vcpu)->vec_bitmap))
1315         kvm_hv_synic_send_eoi(apic->vcpu, vector);
1316 
1317     kvm_ioapic_send_eoi(apic, vector);
1318     kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1319     return vector;
1320 }
1321 
1322 /*
1323  * this interface assumes a trap-like exit, which has already finished
1324  * desired side effect including vISR and vPPR update.
1325  */
1326 void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
1327 {
1328     struct kvm_lapic *apic = vcpu->arch.apic;
1329 
1330     trace_kvm_eoi(apic, vector);
1331 
1332     kvm_ioapic_send_eoi(apic, vector);
1333     kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1334 }
1335 EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
1336 
1337 void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
1338 {
1339     struct kvm_lapic_irq irq;
1340 
1341     /* KVM has no delay and should always clear the BUSY/PENDING flag. */
1342     WARN_ON_ONCE(icr_low & APIC_ICR_BUSY);
1343 
1344     irq.vector = icr_low & APIC_VECTOR_MASK;
1345     irq.delivery_mode = icr_low & APIC_MODE_MASK;
1346     irq.dest_mode = icr_low & APIC_DEST_MASK;
1347     irq.level = (icr_low & APIC_INT_ASSERT) != 0;
1348     irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
1349     irq.shorthand = icr_low & APIC_SHORT_MASK;
1350     irq.msi_redir_hint = false;
1351     if (apic_x2apic_mode(apic))
1352         irq.dest_id = icr_high;
1353     else
1354         irq.dest_id = GET_XAPIC_DEST_FIELD(icr_high);
1355 
1356     trace_kvm_apic_ipi(icr_low, irq.dest_id);
1357 
1358     kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
1359 }
1360 EXPORT_SYMBOL_GPL(kvm_apic_send_ipi);
1361 
1362 static u32 apic_get_tmcct(struct kvm_lapic *apic)
1363 {
1364     ktime_t remaining, now;
1365     s64 ns;
1366     u32 tmcct;
1367 
1368     ASSERT(apic != NULL);
1369 
1370     /* if initial count is 0, current count should also be 0 */
1371     if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
1372         apic->lapic_timer.period == 0)
1373         return 0;
1374 
1375     now = ktime_get();
1376     remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1377     if (ktime_to_ns(remaining) < 0)
1378         remaining = 0;
1379 
1380     ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
1381     tmcct = div64_u64(ns,
1382              (APIC_BUS_CYCLE_NS * apic->divide_count));
1383 
1384     return tmcct;
1385 }
1386 
1387 static void __report_tpr_access(struct kvm_lapic *apic, bool write)
1388 {
1389     struct kvm_vcpu *vcpu = apic->vcpu;
1390     struct kvm_run *run = vcpu->run;
1391 
1392     kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
1393     run->tpr_access.rip = kvm_rip_read(vcpu);
1394     run->tpr_access.is_write = write;
1395 }
1396 
1397 static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
1398 {
1399     if (apic->vcpu->arch.tpr_access_reporting)
1400         __report_tpr_access(apic, write);
1401 }
1402 
1403 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
1404 {
1405     u32 val = 0;
1406 
1407     if (offset >= LAPIC_MMIO_LENGTH)
1408         return 0;
1409 
1410     switch (offset) {
1411     case APIC_ARBPRI:
1412         break;
1413 
1414     case APIC_TMCCT:    /* Timer CCR */
1415         if (apic_lvtt_tscdeadline(apic))
1416             return 0;
1417 
1418         val = apic_get_tmcct(apic);
1419         break;
1420     case APIC_PROCPRI:
1421         apic_update_ppr(apic);
1422         val = kvm_lapic_get_reg(apic, offset);
1423         break;
1424     case APIC_TASKPRI:
1425         report_tpr_access(apic, false);
1426         fallthrough;
1427     default:
1428         val = kvm_lapic_get_reg(apic, offset);
1429         break;
1430     }
1431 
1432     return val;
1433 }
1434 
1435 static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
1436 {
1437     return container_of(dev, struct kvm_lapic, dev);
1438 }
1439 
1440 #define APIC_REG_MASK(reg)  (1ull << ((reg) >> 4))
1441 #define APIC_REGS_MASK(first, count) \
1442     (APIC_REG_MASK(first) * ((1ull << (count)) - 1))
1443 
1444 static int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
1445                   void *data)
1446 {
1447     unsigned char alignment = offset & 0xf;
1448     u32 result;
1449     /* this bitmask has a bit cleared for each reserved register */
1450     u64 valid_reg_mask =
1451         APIC_REG_MASK(APIC_ID) |
1452         APIC_REG_MASK(APIC_LVR) |
1453         APIC_REG_MASK(APIC_TASKPRI) |
1454         APIC_REG_MASK(APIC_PROCPRI) |
1455         APIC_REG_MASK(APIC_LDR) |
1456         APIC_REG_MASK(APIC_DFR) |
1457         APIC_REG_MASK(APIC_SPIV) |
1458         APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
1459         APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
1460         APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
1461         APIC_REG_MASK(APIC_ESR) |
1462         APIC_REG_MASK(APIC_ICR) |
1463         APIC_REG_MASK(APIC_LVTT) |
1464         APIC_REG_MASK(APIC_LVTTHMR) |
1465         APIC_REG_MASK(APIC_LVTPC) |
1466         APIC_REG_MASK(APIC_LVT0) |
1467         APIC_REG_MASK(APIC_LVT1) |
1468         APIC_REG_MASK(APIC_LVTERR) |
1469         APIC_REG_MASK(APIC_TMICT) |
1470         APIC_REG_MASK(APIC_TMCCT) |
1471         APIC_REG_MASK(APIC_TDCR);
1472 
1473     if (kvm_lapic_lvt_supported(apic, LVT_CMCI))
1474         valid_reg_mask |= APIC_REG_MASK(APIC_LVTCMCI);
1475 
1476     /*
1477      * ARBPRI and ICR2 are not valid in x2APIC mode.  WARN if KVM reads ICR
1478      * in x2APIC mode as it's an 8-byte register in x2APIC and needs to be
1479      * manually handled by the caller.
1480      */
1481     if (!apic_x2apic_mode(apic))
1482         valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI) |
1483                   APIC_REG_MASK(APIC_ICR2);
1484     else
1485         WARN_ON_ONCE(offset == APIC_ICR);
1486 
1487     if (alignment + len > 4)
1488         return 1;
1489 
1490     if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
1491         return 1;
1492 
1493     result = __apic_read(apic, offset & ~0xf);
1494 
1495     trace_kvm_apic_read(offset, result);
1496 
1497     switch (len) {
1498     case 1:
1499     case 2:
1500     case 4:
1501         memcpy(data, (char *)&result + alignment, len);
1502         break;
1503     default:
1504         printk(KERN_ERR "Local APIC read with len = %x, "
1505                "should be 1,2, or 4 instead\n", len);
1506         break;
1507     }
1508     return 0;
1509 }
1510 
1511 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
1512 {
1513     return addr >= apic->base_address &&
1514         addr < apic->base_address + LAPIC_MMIO_LENGTH;
1515 }
1516 
1517 static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1518                gpa_t address, int len, void *data)
1519 {
1520     struct kvm_lapic *apic = to_lapic(this);
1521     u32 offset = address - apic->base_address;
1522 
1523     if (!apic_mmio_in_range(apic, address))
1524         return -EOPNOTSUPP;
1525 
1526     if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
1527         if (!kvm_check_has_quirk(vcpu->kvm,
1528                      KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
1529             return -EOPNOTSUPP;
1530 
1531         memset(data, 0xff, len);
1532         return 0;
1533     }
1534 
1535     kvm_lapic_reg_read(apic, offset, len, data);
1536 
1537     return 0;
1538 }
1539 
1540 static void update_divide_count(struct kvm_lapic *apic)
1541 {
1542     u32 tmp1, tmp2, tdcr;
1543 
1544     tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
1545     tmp1 = tdcr & 0xf;
1546     tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
1547     apic->divide_count = 0x1 << (tmp2 & 0x7);
1548 }
1549 
1550 static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
1551 {
1552     /*
1553      * Do not allow the guest to program periodic timers with small
1554      * interval, since the hrtimers are not throttled by the host
1555      * scheduler.
1556      */
1557     if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1558         s64 min_period = min_timer_period_us * 1000LL;
1559 
1560         if (apic->lapic_timer.period < min_period) {
1561             pr_info_ratelimited(
1562                 "kvm: vcpu %i: requested %lld ns "
1563                 "lapic timer period limited to %lld ns\n",
1564                 apic->vcpu->vcpu_id,
1565                 apic->lapic_timer.period, min_period);
1566             apic->lapic_timer.period = min_period;
1567         }
1568     }
1569 }
1570 
1571 static void cancel_hv_timer(struct kvm_lapic *apic);
1572 
1573 static void cancel_apic_timer(struct kvm_lapic *apic)
1574 {
1575     hrtimer_cancel(&apic->lapic_timer.timer);
1576     preempt_disable();
1577     if (apic->lapic_timer.hv_timer_in_use)
1578         cancel_hv_timer(apic);
1579     preempt_enable();
1580     atomic_set(&apic->lapic_timer.pending, 0);
1581 }
1582 
1583 static void apic_update_lvtt(struct kvm_lapic *apic)
1584 {
1585     u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
1586             apic->lapic_timer.timer_mode_mask;
1587 
1588     if (apic->lapic_timer.timer_mode != timer_mode) {
1589         if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
1590                 APIC_LVT_TIMER_TSCDEADLINE)) {
1591             cancel_apic_timer(apic);
1592             kvm_lapic_set_reg(apic, APIC_TMICT, 0);
1593             apic->lapic_timer.period = 0;
1594             apic->lapic_timer.tscdeadline = 0;
1595         }
1596         apic->lapic_timer.timer_mode = timer_mode;
1597         limit_periodic_timer_frequency(apic);
1598     }
1599 }
1600 
1601 /*
1602  * On APICv, this test will cause a busy wait
1603  * during a higher-priority task.
1604  */
1605 
1606 static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
1607 {
1608     struct kvm_lapic *apic = vcpu->arch.apic;
1609     u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT);
1610 
1611     if (kvm_apic_hw_enabled(apic)) {
1612         int vec = reg & APIC_VECTOR_MASK;
1613         void *bitmap = apic->regs + APIC_ISR;
1614 
1615         if (apic->apicv_active)
1616             bitmap = apic->regs + APIC_IRR;
1617 
1618         if (apic_test_vector(vec, bitmap))
1619             return true;
1620     }
1621     return false;
1622 }
1623 
1624 static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
1625 {
1626     u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
1627 
1628     /*
1629      * If the guest TSC is running at a different ratio than the host, then
1630      * convert the delay to nanoseconds to achieve an accurate delay.  Note
1631      * that __delay() uses delay_tsc whenever the hardware has TSC, thus
1632      * always for VMX enabled hardware.
1633      */
1634     if (vcpu->arch.tsc_scaling_ratio == kvm_caps.default_tsc_scaling_ratio) {
1635         __delay(min(guest_cycles,
1636             nsec_to_cycles(vcpu, timer_advance_ns)));
1637     } else {
1638         u64 delay_ns = guest_cycles * 1000000ULL;
1639         do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
1640         ndelay(min_t(u32, delay_ns, timer_advance_ns));
1641     }
1642 }
1643 
1644 static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
1645                           s64 advance_expire_delta)
1646 {
1647     struct kvm_lapic *apic = vcpu->arch.apic;
1648     u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
1649     u64 ns;
1650 
1651     /* Do not adjust for tiny fluctuations or large random spikes. */
1652     if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
1653         abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN)
1654         return;
1655 
1656     /* too early */
1657     if (advance_expire_delta < 0) {
1658         ns = -advance_expire_delta * 1000000ULL;
1659         do_div(ns, vcpu->arch.virtual_tsc_khz);
1660         timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1661     } else {
1662     /* too late */
1663         ns = advance_expire_delta * 1000000ULL;
1664         do_div(ns, vcpu->arch.virtual_tsc_khz);
1665         timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1666     }
1667 
1668     if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX))
1669         timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
1670     apic->lapic_timer.timer_advance_ns = timer_advance_ns;
1671 }
1672 
1673 static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1674 {
1675     struct kvm_lapic *apic = vcpu->arch.apic;
1676     u64 guest_tsc, tsc_deadline;
1677 
1678     tsc_deadline = apic->lapic_timer.expired_tscdeadline;
1679     apic->lapic_timer.expired_tscdeadline = 0;
1680     guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1681     trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
1682 
1683     if (lapic_timer_advance_dynamic) {
1684         adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline);
1685         /*
1686          * If the timer fired early, reread the TSC to account for the
1687          * overhead of the above adjustment to avoid waiting longer
1688          * than is necessary.
1689          */
1690         if (guest_tsc < tsc_deadline)
1691             guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1692     }
1693 
1694     if (guest_tsc < tsc_deadline)
1695         __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
1696 }
1697 
1698 void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1699 {
1700     if (lapic_in_kernel(vcpu) &&
1701         vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1702         vcpu->arch.apic->lapic_timer.timer_advance_ns &&
1703         lapic_timer_int_injected(vcpu))
1704         __kvm_wait_lapic_expire(vcpu);
1705 }
1706 EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
1707 
1708 static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
1709 {
1710     struct kvm_timer *ktimer = &apic->lapic_timer;
1711 
1712     kvm_apic_local_deliver(apic, APIC_LVTT);
1713     if (apic_lvtt_tscdeadline(apic)) {
1714         ktimer->tscdeadline = 0;
1715     } else if (apic_lvtt_oneshot(apic)) {
1716         ktimer->tscdeadline = 0;
1717         ktimer->target_expiration = 0;
1718     }
1719 }
1720 
1721 static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
1722 {
1723     struct kvm_vcpu *vcpu = apic->vcpu;
1724     struct kvm_timer *ktimer = &apic->lapic_timer;
1725 
1726     if (atomic_read(&apic->lapic_timer.pending))
1727         return;
1728 
1729     if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
1730         ktimer->expired_tscdeadline = ktimer->tscdeadline;
1731 
1732     if (!from_timer_fn && apic->apicv_active) {
1733         WARN_ON(kvm_get_running_vcpu() != vcpu);
1734         kvm_apic_inject_pending_timer_irqs(apic);
1735         return;
1736     }
1737 
1738     if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
1739         /*
1740          * Ensure the guest's timer has truly expired before posting an
1741          * interrupt.  Open code the relevant checks to avoid querying
1742          * lapic_timer_int_injected(), which will be false since the
1743          * interrupt isn't yet injected.  Waiting until after injecting
1744          * is not an option since that won't help a posted interrupt.
1745          */
1746         if (vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1747             vcpu->arch.apic->lapic_timer.timer_advance_ns)
1748             __kvm_wait_lapic_expire(vcpu);
1749         kvm_apic_inject_pending_timer_irqs(apic);
1750         return;
1751     }
1752 
1753     atomic_inc(&apic->lapic_timer.pending);
1754     kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
1755     if (from_timer_fn)
1756         kvm_vcpu_kick(vcpu);
1757 }
1758 
1759 static void start_sw_tscdeadline(struct kvm_lapic *apic)
1760 {
1761     struct kvm_timer *ktimer = &apic->lapic_timer;
1762     u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
1763     u64 ns = 0;
1764     ktime_t expire;
1765     struct kvm_vcpu *vcpu = apic->vcpu;
1766     unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
1767     unsigned long flags;
1768     ktime_t now;
1769 
1770     if (unlikely(!tscdeadline || !this_tsc_khz))
1771         return;
1772 
1773     local_irq_save(flags);
1774 
1775     now = ktime_get();
1776     guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1777 
1778     ns = (tscdeadline - guest_tsc) * 1000000ULL;
1779     do_div(ns, this_tsc_khz);
1780 
1781     if (likely(tscdeadline > guest_tsc) &&
1782         likely(ns > apic->lapic_timer.timer_advance_ns)) {
1783         expire = ktime_add_ns(now, ns);
1784         expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
1785         hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
1786     } else
1787         apic_timer_expired(apic, false);
1788 
1789     local_irq_restore(flags);
1790 }
1791 
1792 static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict)
1793 {
1794     return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count;
1795 }
1796 
1797 static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
1798 {
1799     ktime_t now, remaining;
1800     u64 ns_remaining_old, ns_remaining_new;
1801 
1802     apic->lapic_timer.period =
1803             tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
1804     limit_periodic_timer_frequency(apic);
1805 
1806     now = ktime_get();
1807     remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1808     if (ktime_to_ns(remaining) < 0)
1809         remaining = 0;
1810 
1811     ns_remaining_old = ktime_to_ns(remaining);
1812     ns_remaining_new = mul_u64_u32_div(ns_remaining_old,
1813                                        apic->divide_count, old_divisor);
1814 
1815     apic->lapic_timer.tscdeadline +=
1816         nsec_to_cycles(apic->vcpu, ns_remaining_new) -
1817         nsec_to_cycles(apic->vcpu, ns_remaining_old);
1818     apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
1819 }
1820 
1821 static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg)
1822 {
1823     ktime_t now;
1824     u64 tscl = rdtsc();
1825     s64 deadline;
1826 
1827     now = ktime_get();
1828     apic->lapic_timer.period =
1829             tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
1830 
1831     if (!apic->lapic_timer.period) {
1832         apic->lapic_timer.tscdeadline = 0;
1833         return false;
1834     }
1835 
1836     limit_periodic_timer_frequency(apic);
1837     deadline = apic->lapic_timer.period;
1838 
1839     if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
1840         if (unlikely(count_reg != APIC_TMICT)) {
1841             deadline = tmict_to_ns(apic,
1842                      kvm_lapic_get_reg(apic, count_reg));
1843             if (unlikely(deadline <= 0))
1844                 deadline = apic->lapic_timer.period;
1845             else if (unlikely(deadline > apic->lapic_timer.period)) {
1846                 pr_info_ratelimited(
1847                     "kvm: vcpu %i: requested lapic timer restore with "
1848                     "starting count register %#x=%u (%lld ns) > initial count (%lld ns). "
1849                     "Using initial count to start timer.\n",
1850                     apic->vcpu->vcpu_id,
1851                     count_reg,
1852                     kvm_lapic_get_reg(apic, count_reg),
1853                     deadline, apic->lapic_timer.period);
1854                 kvm_lapic_set_reg(apic, count_reg, 0);
1855                 deadline = apic->lapic_timer.period;
1856             }
1857         }
1858     }
1859 
1860     apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1861         nsec_to_cycles(apic->vcpu, deadline);
1862     apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline);
1863 
1864     return true;
1865 }
1866 
1867 static void advance_periodic_target_expiration(struct kvm_lapic *apic)
1868 {
1869     ktime_t now = ktime_get();
1870     u64 tscl = rdtsc();
1871     ktime_t delta;
1872 
1873     /*
1874      * Synchronize both deadlines to the same time source or
1875      * differences in the periods (caused by differences in the
1876      * underlying clocks or numerical approximation errors) will
1877      * cause the two to drift apart over time as the errors
1878      * accumulate.
1879      */
1880     apic->lapic_timer.target_expiration =
1881         ktime_add_ns(apic->lapic_timer.target_expiration,
1882                 apic->lapic_timer.period);
1883     delta = ktime_sub(apic->lapic_timer.target_expiration, now);
1884     apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1885         nsec_to_cycles(apic->vcpu, delta);
1886 }
1887 
1888 static void start_sw_period(struct kvm_lapic *apic)
1889 {
1890     if (!apic->lapic_timer.period)
1891         return;
1892 
1893     if (ktime_after(ktime_get(),
1894             apic->lapic_timer.target_expiration)) {
1895         apic_timer_expired(apic, false);
1896 
1897         if (apic_lvtt_oneshot(apic))
1898             return;
1899 
1900         advance_periodic_target_expiration(apic);
1901     }
1902 
1903     hrtimer_start(&apic->lapic_timer.timer,
1904         apic->lapic_timer.target_expiration,
1905         HRTIMER_MODE_ABS_HARD);
1906 }
1907 
1908 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
1909 {
1910     if (!lapic_in_kernel(vcpu))
1911         return false;
1912 
1913     return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
1914 }
1915 EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
1916 
1917 static void cancel_hv_timer(struct kvm_lapic *apic)
1918 {
1919     WARN_ON(preemptible());
1920     WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1921     static_call(kvm_x86_cancel_hv_timer)(apic->vcpu);
1922     apic->lapic_timer.hv_timer_in_use = false;
1923 }
1924 
1925 static bool start_hv_timer(struct kvm_lapic *apic)
1926 {
1927     struct kvm_timer *ktimer = &apic->lapic_timer;
1928     struct kvm_vcpu *vcpu = apic->vcpu;
1929     bool expired;
1930 
1931     WARN_ON(preemptible());
1932     if (!kvm_can_use_hv_timer(vcpu))
1933         return false;
1934 
1935     if (!ktimer->tscdeadline)
1936         return false;
1937 
1938     if (static_call(kvm_x86_set_hv_timer)(vcpu, ktimer->tscdeadline, &expired))
1939         return false;
1940 
1941     ktimer->hv_timer_in_use = true;
1942     hrtimer_cancel(&ktimer->timer);
1943 
1944     /*
1945      * To simplify handling the periodic timer, leave the hv timer running
1946      * even if the deadline timer has expired, i.e. rely on the resulting
1947      * VM-Exit to recompute the periodic timer's target expiration.
1948      */
1949     if (!apic_lvtt_period(apic)) {
1950         /*
1951          * Cancel the hv timer if the sw timer fired while the hv timer
1952          * was being programmed, or if the hv timer itself expired.
1953          */
1954         if (atomic_read(&ktimer->pending)) {
1955             cancel_hv_timer(apic);
1956         } else if (expired) {
1957             apic_timer_expired(apic, false);
1958             cancel_hv_timer(apic);
1959         }
1960     }
1961 
1962     trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
1963 
1964     return true;
1965 }
1966 
1967 static void start_sw_timer(struct kvm_lapic *apic)
1968 {
1969     struct kvm_timer *ktimer = &apic->lapic_timer;
1970 
1971     WARN_ON(preemptible());
1972     if (apic->lapic_timer.hv_timer_in_use)
1973         cancel_hv_timer(apic);
1974     if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
1975         return;
1976 
1977     if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1978         start_sw_period(apic);
1979     else if (apic_lvtt_tscdeadline(apic))
1980         start_sw_tscdeadline(apic);
1981     trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false);
1982 }
1983 
1984 static void restart_apic_timer(struct kvm_lapic *apic)
1985 {
1986     preempt_disable();
1987 
1988     if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
1989         goto out;
1990 
1991     if (!start_hv_timer(apic))
1992         start_sw_timer(apic);
1993 out:
1994     preempt_enable();
1995 }
1996 
1997 void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
1998 {
1999     struct kvm_lapic *apic = vcpu->arch.apic;
2000 
2001     preempt_disable();
2002     /* If the preempt notifier has already run, it also called apic_timer_expired */
2003     if (!apic->lapic_timer.hv_timer_in_use)
2004         goto out;
2005     WARN_ON(kvm_vcpu_is_blocking(vcpu));
2006     apic_timer_expired(apic, false);
2007     cancel_hv_timer(apic);
2008 
2009     if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
2010         advance_periodic_target_expiration(apic);
2011         restart_apic_timer(apic);
2012     }
2013 out:
2014     preempt_enable();
2015 }
2016 EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
2017 
2018 void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
2019 {
2020     restart_apic_timer(vcpu->arch.apic);
2021 }
2022 
2023 void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
2024 {
2025     struct kvm_lapic *apic = vcpu->arch.apic;
2026 
2027     preempt_disable();
2028     /* Possibly the TSC deadline timer is not enabled yet */
2029     if (apic->lapic_timer.hv_timer_in_use)
2030         start_sw_timer(apic);
2031     preempt_enable();
2032 }
2033 
2034 void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
2035 {
2036     struct kvm_lapic *apic = vcpu->arch.apic;
2037 
2038     WARN_ON(!apic->lapic_timer.hv_timer_in_use);
2039     restart_apic_timer(apic);
2040 }
2041 
2042 static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg)
2043 {
2044     atomic_set(&apic->lapic_timer.pending, 0);
2045 
2046     if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
2047         && !set_target_expiration(apic, count_reg))
2048         return;
2049 
2050     restart_apic_timer(apic);
2051 }
2052 
2053 static void start_apic_timer(struct kvm_lapic *apic)
2054 {
2055     __start_apic_timer(apic, APIC_TMICT);
2056 }
2057 
2058 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
2059 {
2060     bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
2061 
2062     if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
2063         apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
2064         if (lvt0_in_nmi_mode) {
2065             atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
2066         } else
2067             atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
2068     }
2069 }
2070 
2071 static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic)
2072 {
2073     struct kvm *kvm = apic->vcpu->kvm;
2074 
2075     if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm))
2076         return;
2077 
2078     if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id)
2079         return;
2080 
2081     kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
2082 }
2083 
2084 static int get_lvt_index(u32 reg)
2085 {
2086     if (reg == APIC_LVTCMCI)
2087         return LVT_CMCI;
2088     if (reg < APIC_LVTT || reg > APIC_LVTERR)
2089         return -1;
2090     return array_index_nospec(
2091             (reg - APIC_LVTT) >> 4, KVM_APIC_MAX_NR_LVT_ENTRIES);
2092 }
2093 
2094 static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
2095 {
2096     int ret = 0;
2097 
2098     trace_kvm_apic_write(reg, val);
2099 
2100     switch (reg) {
2101     case APIC_ID:       /* Local APIC ID */
2102         if (!apic_x2apic_mode(apic)) {
2103             kvm_apic_set_xapic_id(apic, val >> 24);
2104             kvm_lapic_xapic_id_updated(apic);
2105         } else {
2106             ret = 1;
2107         }
2108         break;
2109 
2110     case APIC_TASKPRI:
2111         report_tpr_access(apic, true);
2112         apic_set_tpr(apic, val & 0xff);
2113         break;
2114 
2115     case APIC_EOI:
2116         apic_set_eoi(apic);
2117         break;
2118 
2119     case APIC_LDR:
2120         if (!apic_x2apic_mode(apic))
2121             kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
2122         else
2123             ret = 1;
2124         break;
2125 
2126     case APIC_DFR:
2127         if (!apic_x2apic_mode(apic))
2128             kvm_apic_set_dfr(apic, val | 0x0FFFFFFF);
2129         else
2130             ret = 1;
2131         break;
2132 
2133     case APIC_SPIV: {
2134         u32 mask = 0x3ff;
2135         if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
2136             mask |= APIC_SPIV_DIRECTED_EOI;
2137         apic_set_spiv(apic, val & mask);
2138         if (!(val & APIC_SPIV_APIC_ENABLED)) {
2139             int i;
2140 
2141             for (i = 0; i < apic->nr_lvt_entries; i++) {
2142                 kvm_lapic_set_reg(apic, APIC_LVTx(i),
2143                     kvm_lapic_get_reg(apic, APIC_LVTx(i)) | APIC_LVT_MASKED);
2144             }
2145             apic_update_lvtt(apic);
2146             atomic_set(&apic->lapic_timer.pending, 0);
2147 
2148         }
2149         break;
2150     }
2151     case APIC_ICR:
2152         WARN_ON_ONCE(apic_x2apic_mode(apic));
2153 
2154         /* No delay here, so we always clear the pending bit */
2155         val &= ~APIC_ICR_BUSY;
2156         kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
2157         kvm_lapic_set_reg(apic, APIC_ICR, val);
2158         break;
2159     case APIC_ICR2:
2160         if (apic_x2apic_mode(apic))
2161             ret = 1;
2162         else
2163             kvm_lapic_set_reg(apic, APIC_ICR2, val & 0xff000000);
2164         break;
2165 
2166     case APIC_LVT0:
2167         apic_manage_nmi_watchdog(apic, val);
2168         fallthrough;
2169     case APIC_LVTTHMR:
2170     case APIC_LVTPC:
2171     case APIC_LVT1:
2172     case APIC_LVTERR:
2173     case APIC_LVTCMCI: {
2174         u32 index = get_lvt_index(reg);
2175         if (!kvm_lapic_lvt_supported(apic, index)) {
2176             ret = 1;
2177             break;
2178         }
2179         if (!kvm_apic_sw_enabled(apic))
2180             val |= APIC_LVT_MASKED;
2181         val &= apic_lvt_mask[index];
2182         kvm_lapic_set_reg(apic, reg, val);
2183         break;
2184     }
2185 
2186     case APIC_LVTT:
2187         if (!kvm_apic_sw_enabled(apic))
2188             val |= APIC_LVT_MASKED;
2189         val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
2190         kvm_lapic_set_reg(apic, APIC_LVTT, val);
2191         apic_update_lvtt(apic);
2192         break;
2193 
2194     case APIC_TMICT:
2195         if (apic_lvtt_tscdeadline(apic))
2196             break;
2197 
2198         cancel_apic_timer(apic);
2199         kvm_lapic_set_reg(apic, APIC_TMICT, val);
2200         start_apic_timer(apic);
2201         break;
2202 
2203     case APIC_TDCR: {
2204         uint32_t old_divisor = apic->divide_count;
2205 
2206         kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb);
2207         update_divide_count(apic);
2208         if (apic->divide_count != old_divisor &&
2209                 apic->lapic_timer.period) {
2210             hrtimer_cancel(&apic->lapic_timer.timer);
2211             update_target_expiration(apic, old_divisor);
2212             restart_apic_timer(apic);
2213         }
2214         break;
2215     }
2216     case APIC_ESR:
2217         if (apic_x2apic_mode(apic) && val != 0)
2218             ret = 1;
2219         break;
2220 
2221     case APIC_SELF_IPI:
2222         if (apic_x2apic_mode(apic))
2223             kvm_apic_send_ipi(apic, APIC_DEST_SELF | (val & APIC_VECTOR_MASK), 0);
2224         else
2225             ret = 1;
2226         break;
2227     default:
2228         ret = 1;
2229         break;
2230     }
2231 
2232     /*
2233      * Recalculate APIC maps if necessary, e.g. if the software enable bit
2234      * was toggled, the APIC ID changed, etc...   The maps are marked dirty
2235      * on relevant changes, i.e. this is a nop for most writes.
2236      */
2237     kvm_recalculate_apic_map(apic->vcpu->kvm);
2238 
2239     return ret;
2240 }
2241 
2242 static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
2243                 gpa_t address, int len, const void *data)
2244 {
2245     struct kvm_lapic *apic = to_lapic(this);
2246     unsigned int offset = address - apic->base_address;
2247     u32 val;
2248 
2249     if (!apic_mmio_in_range(apic, address))
2250         return -EOPNOTSUPP;
2251 
2252     if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
2253         if (!kvm_check_has_quirk(vcpu->kvm,
2254                      KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
2255             return -EOPNOTSUPP;
2256 
2257         return 0;
2258     }
2259 
2260     /*
2261      * APIC register must be aligned on 128-bits boundary.
2262      * 32/64/128 bits registers must be accessed thru 32 bits.
2263      * Refer SDM 8.4.1
2264      */
2265     if (len != 4 || (offset & 0xf))
2266         return 0;
2267 
2268     val = *(u32*)data;
2269 
2270     kvm_lapic_reg_write(apic, offset & 0xff0, val);
2271 
2272     return 0;
2273 }
2274 
2275 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
2276 {
2277     kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
2278 }
2279 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
2280 
2281 /* emulate APIC access in a trap manner */
2282 void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
2283 {
2284     struct kvm_lapic *apic = vcpu->arch.apic;
2285     u64 val;
2286 
2287     if (apic_x2apic_mode(apic)) {
2288         if (KVM_BUG_ON(kvm_lapic_msr_read(apic, offset, &val), vcpu->kvm))
2289             return;
2290     } else {
2291         val = kvm_lapic_get_reg(apic, offset);
2292     }
2293 
2294     /*
2295      * ICR is a single 64-bit register when x2APIC is enabled.  For legacy
2296      * xAPIC, ICR writes need to go down the common (slightly slower) path
2297      * to get the upper half from ICR2.
2298      */
2299     if (apic_x2apic_mode(apic) && offset == APIC_ICR) {
2300         kvm_apic_send_ipi(apic, (u32)val, (u32)(val >> 32));
2301         trace_kvm_apic_write(APIC_ICR, val);
2302     } else {
2303         /* TODO: optimize to just emulate side effect w/o one more write */
2304         kvm_lapic_reg_write(apic, offset, (u32)val);
2305     }
2306 }
2307 EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
2308 
2309 void kvm_free_lapic(struct kvm_vcpu *vcpu)
2310 {
2311     struct kvm_lapic *apic = vcpu->arch.apic;
2312 
2313     if (!vcpu->arch.apic)
2314         return;
2315 
2316     hrtimer_cancel(&apic->lapic_timer.timer);
2317 
2318     if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
2319         static_branch_slow_dec_deferred(&apic_hw_disabled);
2320 
2321     if (!apic->sw_enabled)
2322         static_branch_slow_dec_deferred(&apic_sw_disabled);
2323 
2324     if (apic->regs)
2325         free_page((unsigned long)apic->regs);
2326 
2327     kfree(apic);
2328 }
2329 
2330 /*
2331  *----------------------------------------------------------------------
2332  * LAPIC interface
2333  *----------------------------------------------------------------------
2334  */
2335 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
2336 {
2337     struct kvm_lapic *apic = vcpu->arch.apic;
2338 
2339     if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
2340         return 0;
2341 
2342     return apic->lapic_timer.tscdeadline;
2343 }
2344 
2345 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
2346 {
2347     struct kvm_lapic *apic = vcpu->arch.apic;
2348 
2349     if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
2350         return;
2351 
2352     hrtimer_cancel(&apic->lapic_timer.timer);
2353     apic->lapic_timer.tscdeadline = data;
2354     start_apic_timer(apic);
2355 }
2356 
2357 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
2358 {
2359     apic_set_tpr(vcpu->arch.apic, (cr8 & 0x0f) << 4);
2360 }
2361 
2362 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
2363 {
2364     u64 tpr;
2365 
2366     tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
2367 
2368     return (tpr & 0xf0) >> 4;
2369 }
2370 
2371 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
2372 {
2373     u64 old_value = vcpu->arch.apic_base;
2374     struct kvm_lapic *apic = vcpu->arch.apic;
2375 
2376     vcpu->arch.apic_base = value;
2377 
2378     if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
2379         kvm_update_cpuid_runtime(vcpu);
2380 
2381     if (!apic)
2382         return;
2383 
2384     /* update jump label if enable bit changes */
2385     if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
2386         if (value & MSR_IA32_APICBASE_ENABLE) {
2387             kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2388             static_branch_slow_dec_deferred(&apic_hw_disabled);
2389             /* Check if there are APF page ready requests pending */
2390             kvm_make_request(KVM_REQ_APF_READY, vcpu);
2391         } else {
2392             static_branch_inc(&apic_hw_disabled.key);
2393             atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
2394         }
2395     }
2396 
2397     if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
2398         kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
2399 
2400     if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) {
2401         kvm_vcpu_update_apicv(vcpu);
2402         static_call_cond(kvm_x86_set_virtual_apic_mode)(vcpu);
2403     }
2404 
2405     apic->base_address = apic->vcpu->arch.apic_base &
2406                  MSR_IA32_APICBASE_BASE;
2407 
2408     if ((value & MSR_IA32_APICBASE_ENABLE) &&
2409          apic->base_address != APIC_DEFAULT_PHYS_BASE) {
2410         kvm_set_apicv_inhibit(apic->vcpu->kvm,
2411                       APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
2412     }
2413 }
2414 
2415 void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
2416 {
2417     struct kvm_lapic *apic = vcpu->arch.apic;
2418 
2419     if (apic->apicv_active) {
2420         /* irr_pending is always true when apicv is activated. */
2421         apic->irr_pending = true;
2422         apic->isr_count = 1;
2423     } else {
2424         /*
2425          * Don't clear irr_pending, searching the IRR can race with
2426          * updates from the CPU as APICv is still active from hardware's
2427          * perspective.  The flag will be cleared as appropriate when
2428          * KVM injects the interrupt.
2429          */
2430         apic->isr_count = count_vectors(apic->regs + APIC_ISR);
2431     }
2432 }
2433 EXPORT_SYMBOL_GPL(kvm_apic_update_apicv);
2434 
2435 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
2436 {
2437     struct kvm_lapic *apic = vcpu->arch.apic;
2438     u64 msr_val;
2439     int i;
2440 
2441     if (!init_event) {
2442         msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
2443         if (kvm_vcpu_is_reset_bsp(vcpu))
2444             msr_val |= MSR_IA32_APICBASE_BSP;
2445         kvm_lapic_set_base(vcpu, msr_val);
2446     }
2447 
2448     if (!apic)
2449         return;
2450 
2451     /* Stop the timer in case it's a reset to an active apic */
2452     hrtimer_cancel(&apic->lapic_timer.timer);
2453 
2454     /* The xAPIC ID is set at RESET even if the APIC was already enabled. */
2455     if (!init_event)
2456         kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2457     kvm_apic_set_version(apic->vcpu);
2458 
2459     for (i = 0; i < apic->nr_lvt_entries; i++)
2460         kvm_lapic_set_reg(apic, APIC_LVTx(i), APIC_LVT_MASKED);
2461     apic_update_lvtt(apic);
2462     if (kvm_vcpu_is_reset_bsp(vcpu) &&
2463         kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
2464         kvm_lapic_set_reg(apic, APIC_LVT0,
2465                  SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
2466     apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2467 
2468     kvm_apic_set_dfr(apic, 0xffffffffU);
2469     apic_set_spiv(apic, 0xff);
2470     kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
2471     if (!apic_x2apic_mode(apic))
2472         kvm_apic_set_ldr(apic, 0);
2473     kvm_lapic_set_reg(apic, APIC_ESR, 0);
2474     if (!apic_x2apic_mode(apic)) {
2475         kvm_lapic_set_reg(apic, APIC_ICR, 0);
2476         kvm_lapic_set_reg(apic, APIC_ICR2, 0);
2477     } else {
2478         kvm_lapic_set_reg64(apic, APIC_ICR, 0);
2479     }
2480     kvm_lapic_set_reg(apic, APIC_TDCR, 0);
2481     kvm_lapic_set_reg(apic, APIC_TMICT, 0);
2482     for (i = 0; i < 8; i++) {
2483         kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
2484         kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
2485         kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
2486     }
2487     kvm_apic_update_apicv(vcpu);
2488     apic->highest_isr_cache = -1;
2489     update_divide_count(apic);
2490     atomic_set(&apic->lapic_timer.pending, 0);
2491 
2492     vcpu->arch.pv_eoi.msr_val = 0;
2493     apic_update_ppr(apic);
2494     if (apic->apicv_active) {
2495         static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu);
2496         static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, -1);
2497         static_call_cond(kvm_x86_hwapic_isr_update)(-1);
2498     }
2499 
2500     vcpu->arch.apic_arb_prio = 0;
2501     vcpu->arch.apic_attention = 0;
2502 
2503     kvm_recalculate_apic_map(vcpu->kvm);
2504 }
2505 
2506 /*
2507  *----------------------------------------------------------------------
2508  * timer interface
2509  *----------------------------------------------------------------------
2510  */
2511 
2512 static bool lapic_is_periodic(struct kvm_lapic *apic)
2513 {
2514     return apic_lvtt_period(apic);
2515 }
2516 
2517 int apic_has_pending_timer(struct kvm_vcpu *vcpu)
2518 {
2519     struct kvm_lapic *apic = vcpu->arch.apic;
2520 
2521     if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
2522         return atomic_read(&apic->lapic_timer.pending);
2523 
2524     return 0;
2525 }
2526 
2527 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
2528 {
2529     u32 reg = kvm_lapic_get_reg(apic, lvt_type);
2530     int vector, mode, trig_mode;
2531 
2532     if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
2533         vector = reg & APIC_VECTOR_MASK;
2534         mode = reg & APIC_MODE_MASK;
2535         trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
2536         return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
2537                     NULL);
2538     }
2539     return 0;
2540 }
2541 
2542 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
2543 {
2544     struct kvm_lapic *apic = vcpu->arch.apic;
2545 
2546     if (apic)
2547         kvm_apic_local_deliver(apic, APIC_LVT0);
2548 }
2549 
2550 static const struct kvm_io_device_ops apic_mmio_ops = {
2551     .read     = apic_mmio_read,
2552     .write    = apic_mmio_write,
2553 };
2554 
2555 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
2556 {
2557     struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
2558     struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
2559 
2560     apic_timer_expired(apic, true);
2561 
2562     if (lapic_is_periodic(apic)) {
2563         advance_periodic_target_expiration(apic);
2564         hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
2565         return HRTIMER_RESTART;
2566     } else
2567         return HRTIMER_NORESTART;
2568 }
2569 
2570 int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
2571 {
2572     struct kvm_lapic *apic;
2573 
2574     ASSERT(vcpu != NULL);
2575 
2576     apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
2577     if (!apic)
2578         goto nomem;
2579 
2580     vcpu->arch.apic = apic;
2581 
2582     apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
2583     if (!apic->regs) {
2584         printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
2585                vcpu->vcpu_id);
2586         goto nomem_free_apic;
2587     }
2588     apic->vcpu = vcpu;
2589 
2590     apic->nr_lvt_entries = kvm_apic_calc_nr_lvt_entries(vcpu);
2591 
2592     hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
2593              HRTIMER_MODE_ABS_HARD);
2594     apic->lapic_timer.timer.function = apic_timer_fn;
2595     if (timer_advance_ns == -1) {
2596         apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
2597         lapic_timer_advance_dynamic = true;
2598     } else {
2599         apic->lapic_timer.timer_advance_ns = timer_advance_ns;
2600         lapic_timer_advance_dynamic = false;
2601     }
2602 
2603     /*
2604      * Stuff the APIC ENABLE bit in lieu of temporarily incrementing
2605      * apic_hw_disabled; the full RESET value is set by kvm_lapic_reset().
2606      */
2607     vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
2608     static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
2609     kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
2610 
2611     return 0;
2612 nomem_free_apic:
2613     kfree(apic);
2614     vcpu->arch.apic = NULL;
2615 nomem:
2616     return -ENOMEM;
2617 }
2618 
2619 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
2620 {
2621     struct kvm_lapic *apic = vcpu->arch.apic;
2622     u32 ppr;
2623 
2624     if (!kvm_apic_present(vcpu))
2625         return -1;
2626 
2627     __apic_update_ppr(apic, &ppr);
2628     return apic_has_interrupt_for_ppr(apic, ppr);
2629 }
2630 EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt);
2631 
2632 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
2633 {
2634     u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
2635 
2636     if (!kvm_apic_hw_enabled(vcpu->arch.apic))
2637         return 1;
2638     if ((lvt0 & APIC_LVT_MASKED) == 0 &&
2639         GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
2640         return 1;
2641     return 0;
2642 }
2643 
2644 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
2645 {
2646     struct kvm_lapic *apic = vcpu->arch.apic;
2647 
2648     if (atomic_read(&apic->lapic_timer.pending) > 0) {
2649         kvm_apic_inject_pending_timer_irqs(apic);
2650         atomic_set(&apic->lapic_timer.pending, 0);
2651     }
2652 }
2653 
2654 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
2655 {
2656     int vector = kvm_apic_has_interrupt(vcpu);
2657     struct kvm_lapic *apic = vcpu->arch.apic;
2658     u32 ppr;
2659 
2660     if (vector == -1)
2661         return -1;
2662 
2663     /*
2664      * We get here even with APIC virtualization enabled, if doing
2665      * nested virtualization and L1 runs with the "acknowledge interrupt
2666      * on exit" mode.  Then we cannot inject the interrupt via RVI,
2667      * because the process would deliver it through the IDT.
2668      */
2669 
2670     apic_clear_irr(vector, apic);
2671     if (to_hv_vcpu(vcpu) && test_bit(vector, to_hv_synic(vcpu)->auto_eoi_bitmap)) {
2672         /*
2673          * For auto-EOI interrupts, there might be another pending
2674          * interrupt above PPR, so check whether to raise another
2675          * KVM_REQ_EVENT.
2676          */
2677         apic_update_ppr(apic);
2678     } else {
2679         /*
2680          * For normal interrupts, PPR has been raised and there cannot
2681          * be a higher-priority pending interrupt---except if there was
2682          * a concurrent interrupt injection, but that would have
2683          * triggered KVM_REQ_EVENT already.
2684          */
2685         apic_set_isr(vector, apic);
2686         __apic_update_ppr(apic, &ppr);
2687     }
2688 
2689     return vector;
2690 }
2691 
2692 static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
2693         struct kvm_lapic_state *s, bool set)
2694 {
2695     if (apic_x2apic_mode(vcpu->arch.apic)) {
2696         u32 *id = (u32 *)(s->regs + APIC_ID);
2697         u32 *ldr = (u32 *)(s->regs + APIC_LDR);
2698         u64 icr;
2699 
2700         if (vcpu->kvm->arch.x2apic_format) {
2701             if (*id != vcpu->vcpu_id)
2702                 return -EINVAL;
2703         } else {
2704             if (set)
2705                 *id >>= 24;
2706             else
2707                 *id <<= 24;
2708         }
2709 
2710         /*
2711          * In x2APIC mode, the LDR is fixed and based on the id.  And
2712          * ICR is internally a single 64-bit register, but needs to be
2713          * split to ICR+ICR2 in userspace for backwards compatibility.
2714          */
2715         if (set) {
2716             *ldr = kvm_apic_calc_x2apic_ldr(*id);
2717 
2718             icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
2719                   (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
2720             __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr);
2721         } else {
2722             icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
2723             __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
2724         }
2725     } else {
2726         kvm_lapic_xapic_id_updated(vcpu->arch.apic);
2727     }
2728 
2729     return 0;
2730 }
2731 
2732 int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2733 {
2734     memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
2735 
2736     /*
2737      * Get calculated timer current count for remaining timer period (if
2738      * any) and store it in the returned register set.
2739      */
2740     __kvm_lapic_set_reg(s->regs, APIC_TMCCT,
2741                 __apic_read(vcpu->arch.apic, APIC_TMCCT));
2742 
2743     return kvm_apic_state_fixup(vcpu, s, false);
2744 }
2745 
2746 int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2747 {
2748     struct kvm_lapic *apic = vcpu->arch.apic;
2749     int r;
2750 
2751     kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
2752     /* set SPIV separately to get count of SW disabled APICs right */
2753     apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
2754 
2755     r = kvm_apic_state_fixup(vcpu, s, true);
2756     if (r) {
2757         kvm_recalculate_apic_map(vcpu->kvm);
2758         return r;
2759     }
2760     memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
2761 
2762     atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
2763     kvm_recalculate_apic_map(vcpu->kvm);
2764     kvm_apic_set_version(vcpu);
2765 
2766     apic_update_ppr(apic);
2767     cancel_apic_timer(apic);
2768     apic->lapic_timer.expired_tscdeadline = 0;
2769     apic_update_lvtt(apic);
2770     apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2771     update_divide_count(apic);
2772     __start_apic_timer(apic, APIC_TMCCT);
2773     kvm_lapic_set_reg(apic, APIC_TMCCT, 0);
2774     kvm_apic_update_apicv(vcpu);
2775     apic->highest_isr_cache = -1;
2776     if (apic->apicv_active) {
2777         static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu);
2778         static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, apic_find_highest_irr(apic));
2779         static_call_cond(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic));
2780     }
2781     kvm_make_request(KVM_REQ_EVENT, vcpu);
2782     if (ioapic_in_kernel(vcpu->kvm))
2783         kvm_rtc_eoi_tracking_restore_one(vcpu);
2784 
2785     vcpu->arch.apic_arb_prio = 0;
2786 
2787     return 0;
2788 }
2789 
2790 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
2791 {
2792     struct hrtimer *timer;
2793 
2794     if (!lapic_in_kernel(vcpu) ||
2795         kvm_can_post_timer_interrupt(vcpu))
2796         return;
2797 
2798     timer = &vcpu->arch.apic->lapic_timer.timer;
2799     if (hrtimer_cancel(timer))
2800         hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
2801 }
2802 
2803 /*
2804  * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
2805  *
2806  * Detect whether guest triggered PV EOI since the
2807  * last entry. If yes, set EOI on guests's behalf.
2808  * Clear PV EOI in guest memory in any case.
2809  */
2810 static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
2811                     struct kvm_lapic *apic)
2812 {
2813     int vector;
2814     /*
2815      * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
2816      * and KVM_PV_EOI_ENABLED in guest memory as follows:
2817      *
2818      * KVM_APIC_PV_EOI_PENDING is unset:
2819      *  -> host disabled PV EOI.
2820      * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
2821      *  -> host enabled PV EOI, guest did not execute EOI yet.
2822      * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
2823      *  -> host enabled PV EOI, guest executed EOI.
2824      */
2825     BUG_ON(!pv_eoi_enabled(vcpu));
2826 
2827     if (pv_eoi_test_and_clr_pending(vcpu))
2828         return;
2829     vector = apic_set_eoi(apic);
2830     trace_kvm_pv_eoi(apic, vector);
2831 }
2832 
2833 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
2834 {
2835     u32 data;
2836 
2837     if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
2838         apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
2839 
2840     if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2841         return;
2842 
2843     if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2844                   sizeof(u32)))
2845         return;
2846 
2847     apic_set_tpr(vcpu->arch.apic, data & 0xff);
2848 }
2849 
2850 /*
2851  * apic_sync_pv_eoi_to_guest - called before vmentry
2852  *
2853  * Detect whether it's safe to enable PV EOI and
2854  * if yes do so.
2855  */
2856 static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
2857                     struct kvm_lapic *apic)
2858 {
2859     if (!pv_eoi_enabled(vcpu) ||
2860         /* IRR set or many bits in ISR: could be nested. */
2861         apic->irr_pending ||
2862         /* Cache not set: could be safe but we don't bother. */
2863         apic->highest_isr_cache == -1 ||
2864         /* Need EOI to update ioapic. */
2865         kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
2866         /*
2867          * PV EOI was disabled by apic_sync_pv_eoi_from_guest
2868          * so we need not do anything here.
2869          */
2870         return;
2871     }
2872 
2873     pv_eoi_set_pending(apic->vcpu);
2874 }
2875 
2876 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
2877 {
2878     u32 data, tpr;
2879     int max_irr, max_isr;
2880     struct kvm_lapic *apic = vcpu->arch.apic;
2881 
2882     apic_sync_pv_eoi_to_guest(vcpu, apic);
2883 
2884     if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2885         return;
2886 
2887     tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
2888     max_irr = apic_find_highest_irr(apic);
2889     if (max_irr < 0)
2890         max_irr = 0;
2891     max_isr = apic_find_highest_isr(apic);
2892     if (max_isr < 0)
2893         max_isr = 0;
2894     data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
2895 
2896     kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2897                 sizeof(u32));
2898 }
2899 
2900 int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
2901 {
2902     if (vapic_addr) {
2903         if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2904                     &vcpu->arch.apic->vapic_cache,
2905                     vapic_addr, sizeof(u32)))
2906             return -EINVAL;
2907         __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2908     } else {
2909         __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2910     }
2911 
2912     vcpu->arch.apic->vapic_addr = vapic_addr;
2913     return 0;
2914 }
2915 
2916 int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
2917 {
2918     data &= ~APIC_ICR_BUSY;
2919 
2920     kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
2921     kvm_lapic_set_reg64(apic, APIC_ICR, data);
2922     trace_kvm_apic_write(APIC_ICR, data);
2923     return 0;
2924 }
2925 
2926 static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data)
2927 {
2928     u32 low;
2929 
2930     if (reg == APIC_ICR) {
2931         *data = kvm_lapic_get_reg64(apic, APIC_ICR);
2932         return 0;
2933     }
2934 
2935     if (kvm_lapic_reg_read(apic, reg, 4, &low))
2936         return 1;
2937 
2938     *data = low;
2939 
2940     return 0;
2941 }
2942 
2943 static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data)
2944 {
2945     /*
2946      * ICR is a 64-bit register in x2APIC mode (and Hyper'v PV vAPIC) and
2947      * can be written as such, all other registers remain accessible only
2948      * through 32-bit reads/writes.
2949      */
2950     if (reg == APIC_ICR)
2951         return kvm_x2apic_icr_write(apic, data);
2952 
2953     return kvm_lapic_reg_write(apic, reg, (u32)data);
2954 }
2955 
2956 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
2957 {
2958     struct kvm_lapic *apic = vcpu->arch.apic;
2959     u32 reg = (msr - APIC_BASE_MSR) << 4;
2960 
2961     if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2962         return 1;
2963 
2964     return kvm_lapic_msr_write(apic, reg, data);
2965 }
2966 
2967 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
2968 {
2969     struct kvm_lapic *apic = vcpu->arch.apic;
2970     u32 reg = (msr - APIC_BASE_MSR) << 4;
2971 
2972     if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2973         return 1;
2974 
2975     if (reg == APIC_DFR)
2976         return 1;
2977 
2978     return kvm_lapic_msr_read(apic, reg, data);
2979 }
2980 
2981 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
2982 {
2983     if (!lapic_in_kernel(vcpu))
2984         return 1;
2985 
2986     return kvm_lapic_msr_write(vcpu->arch.apic, reg, data);
2987 }
2988 
2989 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
2990 {
2991     if (!lapic_in_kernel(vcpu))
2992         return 1;
2993 
2994     return kvm_lapic_msr_read(vcpu->arch.apic, reg, data);
2995 }
2996 
2997 int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
2998 {
2999     u64 addr = data & ~KVM_MSR_ENABLED;
3000     struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
3001     unsigned long new_len;
3002     int ret;
3003 
3004     if (!IS_ALIGNED(addr, 4))
3005         return 1;
3006 
3007     if (data & KVM_MSR_ENABLED) {
3008         if (addr == ghc->gpa && len <= ghc->len)
3009             new_len = ghc->len;
3010         else
3011             new_len = len;
3012 
3013         ret = kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
3014         if (ret)
3015             return ret;
3016     }
3017 
3018     vcpu->arch.pv_eoi.msr_val = data;
3019 
3020     return 0;
3021 }
3022 
3023 int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
3024 {
3025     struct kvm_lapic *apic = vcpu->arch.apic;
3026     u8 sipi_vector;
3027     int r;
3028     unsigned long pe;
3029 
3030     if (!lapic_in_kernel(vcpu))
3031         return 0;
3032 
3033     /*
3034      * Read pending events before calling the check_events
3035      * callback.
3036      */
3037     pe = smp_load_acquire(&apic->pending_events);
3038     if (!pe)
3039         return 0;
3040 
3041     if (is_guest_mode(vcpu)) {
3042         r = kvm_check_nested_events(vcpu);
3043         if (r < 0)
3044             return r == -EBUSY ? 0 : r;
3045         /*
3046          * If an event has happened and caused a vmexit,
3047          * we know INITs are latched and therefore
3048          * we will not incorrectly deliver an APIC
3049          * event instead of a vmexit.
3050          */
3051     }
3052 
3053     /*
3054      * INITs are latched while CPU is in specific states
3055      * (SMM, VMX root mode, SVM with GIF=0).
3056      * Because a CPU cannot be in these states immediately
3057      * after it has processed an INIT signal (and thus in
3058      * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
3059      * and leave the INIT pending.
3060      */
3061     if (kvm_vcpu_latch_init(vcpu)) {
3062         WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
3063         if (test_bit(KVM_APIC_SIPI, &pe))
3064             clear_bit(KVM_APIC_SIPI, &apic->pending_events);
3065         return 0;
3066     }
3067 
3068     if (test_bit(KVM_APIC_INIT, &pe)) {
3069         clear_bit(KVM_APIC_INIT, &apic->pending_events);
3070         kvm_vcpu_reset(vcpu, true);
3071         if (kvm_vcpu_is_bsp(apic->vcpu))
3072             vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
3073         else
3074             vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
3075     }
3076     if (test_bit(KVM_APIC_SIPI, &pe)) {
3077         clear_bit(KVM_APIC_SIPI, &apic->pending_events);
3078         if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
3079             /* evaluate pending_events before reading the vector */
3080             smp_rmb();
3081             sipi_vector = apic->sipi_vector;
3082             static_call(kvm_x86_vcpu_deliver_sipi_vector)(vcpu, sipi_vector);
3083             vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
3084         }
3085     }
3086     return 0;
3087 }
3088 
3089 void kvm_lapic_exit(void)
3090 {
3091     static_key_deferred_flush(&apic_hw_disabled);
3092     WARN_ON(static_branch_unlikely(&apic_hw_disabled.key));
3093     static_key_deferred_flush(&apic_sw_disabled);
3094     WARN_ON(static_branch_unlikely(&apic_sw_disabled.key));
3095 }