Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*  Copyright(c) 2021 Intel Corporation. */
0003 
0004 #include <asm/sgx.h>
0005 
0006 #include "cpuid.h"
0007 #include "kvm_cache_regs.h"
0008 #include "nested.h"
0009 #include "sgx.h"
0010 #include "vmx.h"
0011 #include "x86.h"
0012 
0013 bool __read_mostly enable_sgx = 1;
0014 module_param_named(sgx, enable_sgx, bool, 0444);
0015 
0016 /* Initial value of guest's virtual SGX_LEPUBKEYHASHn MSRs */
0017 static u64 sgx_pubkey_hash[4] __ro_after_init;
0018 
0019 /*
0020  * ENCLS's memory operands use a fixed segment (DS) and a fixed
0021  * address size based on the mode.  Related prefixes are ignored.
0022  */
0023 static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
0024                  int size, int alignment, gva_t *gva)
0025 {
0026     struct kvm_segment s;
0027     bool fault;
0028 
0029     /* Skip vmcs.GUEST_DS retrieval for 64-bit mode to avoid VMREADs. */
0030     *gva = offset;
0031     if (!is_long_mode(vcpu)) {
0032         vmx_get_segment(vcpu, &s, VCPU_SREG_DS);
0033         *gva += s.base;
0034     }
0035 
0036     if (!IS_ALIGNED(*gva, alignment)) {
0037         fault = true;
0038     } else if (likely(is_long_mode(vcpu))) {
0039         fault = is_noncanonical_address(*gva, vcpu);
0040     } else {
0041         *gva &= 0xffffffff;
0042         fault = (s.unusable) ||
0043             (s.type != 2 && s.type != 3) ||
0044             (*gva > s.limit) ||
0045             ((s.base != 0 || s.limit != 0xffffffff) &&
0046             (((u64)*gva + size - 1) > s.limit + 1));
0047     }
0048     if (fault)
0049         kvm_inject_gp(vcpu, 0);
0050     return fault ? -EINVAL : 0;
0051 }
0052 
0053 static void sgx_handle_emulation_failure(struct kvm_vcpu *vcpu, u64 addr,
0054                      unsigned int size)
0055 {
0056     uint64_t data[2] = { addr, size };
0057 
0058     __kvm_prepare_emulation_failure_exit(vcpu, data, ARRAY_SIZE(data));
0059 }
0060 
0061 static int sgx_read_hva(struct kvm_vcpu *vcpu, unsigned long hva, void *data,
0062             unsigned int size)
0063 {
0064     if (__copy_from_user(data, (void __user *)hva, size)) {
0065         sgx_handle_emulation_failure(vcpu, hva, size);
0066         return -EFAULT;
0067     }
0068 
0069     return 0;
0070 }
0071 
0072 static int sgx_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t gva, bool write,
0073               gpa_t *gpa)
0074 {
0075     struct x86_exception ex;
0076 
0077     if (write)
0078         *gpa = kvm_mmu_gva_to_gpa_write(vcpu, gva, &ex);
0079     else
0080         *gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, &ex);
0081 
0082     if (*gpa == INVALID_GPA) {
0083         kvm_inject_emulated_page_fault(vcpu, &ex);
0084         return -EFAULT;
0085     }
0086 
0087     return 0;
0088 }
0089 
0090 static int sgx_gpa_to_hva(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long *hva)
0091 {
0092     *hva = kvm_vcpu_gfn_to_hva(vcpu, PFN_DOWN(gpa));
0093     if (kvm_is_error_hva(*hva)) {
0094         sgx_handle_emulation_failure(vcpu, gpa, 1);
0095         return -EFAULT;
0096     }
0097 
0098     *hva |= gpa & ~PAGE_MASK;
0099 
0100     return 0;
0101 }
0102 
0103 static int sgx_inject_fault(struct kvm_vcpu *vcpu, gva_t gva, int trapnr)
0104 {
0105     struct x86_exception ex;
0106 
0107     /*
0108      * A non-EPCM #PF indicates a bad userspace HVA.  This *should* check
0109      * for PFEC.SGX and not assume any #PF on SGX2 originated in the EPC,
0110      * but the error code isn't (yet) plumbed through the ENCLS helpers.
0111      */
0112     if (trapnr == PF_VECTOR && !boot_cpu_has(X86_FEATURE_SGX2)) {
0113         kvm_prepare_emulation_failure_exit(vcpu);
0114         return 0;
0115     }
0116 
0117     /*
0118      * If the guest thinks it's running on SGX2 hardware, inject an SGX
0119      * #PF if the fault matches an EPCM fault signature (#GP on SGX1,
0120      * #PF on SGX2).  The assumption is that EPCM faults are much more
0121      * likely than a bad userspace address.
0122      */
0123     if ((trapnr == PF_VECTOR || !boot_cpu_has(X86_FEATURE_SGX2)) &&
0124         guest_cpuid_has(vcpu, X86_FEATURE_SGX2)) {
0125         memset(&ex, 0, sizeof(ex));
0126         ex.vector = PF_VECTOR;
0127         ex.error_code = PFERR_PRESENT_MASK | PFERR_WRITE_MASK |
0128                 PFERR_SGX_MASK;
0129         ex.address = gva;
0130         ex.error_code_valid = true;
0131         ex.nested_page_fault = false;
0132         kvm_inject_page_fault(vcpu, &ex);
0133     } else {
0134         kvm_inject_gp(vcpu, 0);
0135     }
0136     return 1;
0137 }
0138 
0139 static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
0140                   struct sgx_pageinfo *pageinfo,
0141                   unsigned long secs_hva,
0142                   gva_t secs_gva)
0143 {
0144     struct sgx_secs *contents = (struct sgx_secs *)pageinfo->contents;
0145     struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1;
0146     u64 attributes, xfrm, size;
0147     u32 miscselect;
0148     u8 max_size_log2;
0149     int trapnr, ret;
0150 
0151     sgx_12_0 = kvm_find_cpuid_entry_index(vcpu, 0x12, 0);
0152     sgx_12_1 = kvm_find_cpuid_entry_index(vcpu, 0x12, 1);
0153     if (!sgx_12_0 || !sgx_12_1) {
0154         kvm_prepare_emulation_failure_exit(vcpu);
0155         return 0;
0156     }
0157 
0158     miscselect = contents->miscselect;
0159     attributes = contents->attributes;
0160     xfrm = contents->xfrm;
0161     size = contents->size;
0162 
0163     /* Enforce restriction of access to the PROVISIONKEY. */
0164     if (!vcpu->kvm->arch.sgx_provisioning_allowed &&
0165         (attributes & SGX_ATTR_PROVISIONKEY)) {
0166         if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY)
0167             pr_warn_once("KVM: SGX PROVISIONKEY advertised but not allowed\n");
0168         kvm_inject_gp(vcpu, 0);
0169         return 1;
0170     }
0171 
0172     /* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. */
0173     if ((u32)miscselect & ~sgx_12_0->ebx ||
0174         (u32)attributes & ~sgx_12_1->eax ||
0175         (u32)(attributes >> 32) & ~sgx_12_1->ebx ||
0176         (u32)xfrm & ~sgx_12_1->ecx ||
0177         (u32)(xfrm >> 32) & ~sgx_12_1->edx) {
0178         kvm_inject_gp(vcpu, 0);
0179         return 1;
0180     }
0181 
0182     /* Enforce CPUID restriction on max enclave size. */
0183     max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 :
0184                                 sgx_12_0->edx;
0185     if (size >= BIT_ULL(max_size_log2))
0186         kvm_inject_gp(vcpu, 0);
0187 
0188     /*
0189      * sgx_virt_ecreate() returns:
0190      *  1) 0:   ECREATE was successful
0191      *  2) -EFAULT: ECREATE was run but faulted, and trapnr was set to the
0192      *      exception number.
0193      *  3) -EINVAL: access_ok() on @secs_hva failed. This should never
0194      *      happen as KVM checks host addresses at memslot creation.
0195      *      sgx_virt_ecreate() has already warned in this case.
0196      */
0197     ret = sgx_virt_ecreate(pageinfo, (void __user *)secs_hva, &trapnr);
0198     if (!ret)
0199         return kvm_skip_emulated_instruction(vcpu);
0200     if (ret == -EFAULT)
0201         return sgx_inject_fault(vcpu, secs_gva, trapnr);
0202 
0203     return ret;
0204 }
0205 
0206 static int handle_encls_ecreate(struct kvm_vcpu *vcpu)
0207 {
0208     gva_t pageinfo_gva, secs_gva;
0209     gva_t metadata_gva, contents_gva;
0210     gpa_t metadata_gpa, contents_gpa, secs_gpa;
0211     unsigned long metadata_hva, contents_hva, secs_hva;
0212     struct sgx_pageinfo pageinfo;
0213     struct sgx_secs *contents;
0214     struct x86_exception ex;
0215     int r;
0216 
0217     if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva) ||
0218         sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva))
0219         return 1;
0220 
0221     /*
0222      * Copy the PAGEINFO to local memory, its pointers need to be
0223      * translated, i.e. we need to do a deep copy/translate.
0224      */
0225     r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo,
0226                 sizeof(pageinfo), &ex);
0227     if (r == X86EMUL_PROPAGATE_FAULT) {
0228         kvm_inject_emulated_page_fault(vcpu, &ex);
0229         return 1;
0230     } else if (r != X86EMUL_CONTINUE) {
0231         sgx_handle_emulation_failure(vcpu, pageinfo_gva,
0232                          sizeof(pageinfo));
0233         return 0;
0234     }
0235 
0236     if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64, &metadata_gva) ||
0237         sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096,
0238                   &contents_gva))
0239         return 1;
0240 
0241     /*
0242      * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA.
0243      * Resume the guest on failure to inject a #PF.
0244      */
0245     if (sgx_gva_to_gpa(vcpu, metadata_gva, false, &metadata_gpa) ||
0246         sgx_gva_to_gpa(vcpu, contents_gva, false, &contents_gpa) ||
0247         sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa))
0248         return 1;
0249 
0250     /*
0251      * ...and then to HVA.  The order of accesses isn't architectural, i.e.
0252      * KVM doesn't have to fully process one address at a time.  Exit to
0253      * userspace if a GPA is invalid.
0254      */
0255     if (sgx_gpa_to_hva(vcpu, metadata_gpa, &metadata_hva) ||
0256         sgx_gpa_to_hva(vcpu, contents_gpa, &contents_hva) ||
0257         sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva))
0258         return 0;
0259 
0260     /*
0261      * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the
0262      * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and
0263      * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to
0264      * enforce restriction of access to the PROVISIONKEY.
0265      */
0266     contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL_ACCOUNT);
0267     if (!contents)
0268         return -ENOMEM;
0269 
0270     /* Exit to userspace if copying from a host userspace address fails. */
0271     if (sgx_read_hva(vcpu, contents_hva, (void *)contents, PAGE_SIZE)) {
0272         free_page((unsigned long)contents);
0273         return 0;
0274     }
0275 
0276     pageinfo.metadata = metadata_hva;
0277     pageinfo.contents = (u64)contents;
0278 
0279     r = __handle_encls_ecreate(vcpu, &pageinfo, secs_hva, secs_gva);
0280 
0281     free_page((unsigned long)contents);
0282 
0283     return r;
0284 }
0285 
0286 static int handle_encls_einit(struct kvm_vcpu *vcpu)
0287 {
0288     unsigned long sig_hva, secs_hva, token_hva, rflags;
0289     struct vcpu_vmx *vmx = to_vmx(vcpu);
0290     gva_t sig_gva, secs_gva, token_gva;
0291     gpa_t sig_gpa, secs_gpa, token_gpa;
0292     int ret, trapnr;
0293 
0294     if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 1808, 4096, &sig_gva) ||
0295         sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva) ||
0296         sgx_get_encls_gva(vcpu, kvm_rdx_read(vcpu), 304, 512, &token_gva))
0297         return 1;
0298 
0299     /*
0300      * Translate the SIGSTRUCT, SECS and TOKEN pointers from GVA to GPA.
0301      * Resume the guest on failure to inject a #PF.
0302      */
0303     if (sgx_gva_to_gpa(vcpu, sig_gva, false, &sig_gpa) ||
0304         sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa) ||
0305         sgx_gva_to_gpa(vcpu, token_gva, false, &token_gpa))
0306         return 1;
0307 
0308     /*
0309      * ...and then to HVA.  The order of accesses isn't architectural, i.e.
0310      * KVM doesn't have to fully process one address at a time.  Exit to
0311      * userspace if a GPA is invalid.  Note, all structures are aligned and
0312      * cannot split pages.
0313      */
0314     if (sgx_gpa_to_hva(vcpu, sig_gpa, &sig_hva) ||
0315         sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva) ||
0316         sgx_gpa_to_hva(vcpu, token_gpa, &token_hva))
0317         return 0;
0318 
0319     ret = sgx_virt_einit((void __user *)sig_hva, (void __user *)token_hva,
0320                  (void __user *)secs_hva,
0321                  vmx->msr_ia32_sgxlepubkeyhash, &trapnr);
0322 
0323     if (ret == -EFAULT)
0324         return sgx_inject_fault(vcpu, secs_gva, trapnr);
0325 
0326     /*
0327      * sgx_virt_einit() returns -EINVAL when access_ok() fails on @sig_hva,
0328      * @token_hva or @secs_hva. This should never happen as KVM checks host
0329      * addresses at memslot creation. sgx_virt_einit() has already warned
0330      * in this case, so just return.
0331      */
0332     if (ret < 0)
0333         return ret;
0334 
0335     rflags = vmx_get_rflags(vcpu) & ~(X86_EFLAGS_CF | X86_EFLAGS_PF |
0336                       X86_EFLAGS_AF | X86_EFLAGS_SF |
0337                       X86_EFLAGS_OF);
0338     if (ret)
0339         rflags |= X86_EFLAGS_ZF;
0340     else
0341         rflags &= ~X86_EFLAGS_ZF;
0342     vmx_set_rflags(vcpu, rflags);
0343 
0344     kvm_rax_write(vcpu, ret);
0345     return kvm_skip_emulated_instruction(vcpu);
0346 }
0347 
0348 static inline bool encls_leaf_enabled_in_guest(struct kvm_vcpu *vcpu, u32 leaf)
0349 {
0350     if (!enable_sgx || !guest_cpuid_has(vcpu, X86_FEATURE_SGX))
0351         return false;
0352 
0353     if (leaf >= ECREATE && leaf <= ETRACK)
0354         return guest_cpuid_has(vcpu, X86_FEATURE_SGX1);
0355 
0356     if (leaf >= EAUG && leaf <= EMODT)
0357         return guest_cpuid_has(vcpu, X86_FEATURE_SGX2);
0358 
0359     return false;
0360 }
0361 
0362 static inline bool sgx_enabled_in_guest_bios(struct kvm_vcpu *vcpu)
0363 {
0364     const u64 bits = FEAT_CTL_SGX_ENABLED | FEAT_CTL_LOCKED;
0365 
0366     return (to_vmx(vcpu)->msr_ia32_feature_control & bits) == bits;
0367 }
0368 
0369 int handle_encls(struct kvm_vcpu *vcpu)
0370 {
0371     u32 leaf = (u32)kvm_rax_read(vcpu);
0372 
0373     if (!encls_leaf_enabled_in_guest(vcpu, leaf)) {
0374         kvm_queue_exception(vcpu, UD_VECTOR);
0375     } else if (!sgx_enabled_in_guest_bios(vcpu)) {
0376         kvm_inject_gp(vcpu, 0);
0377     } else {
0378         if (leaf == ECREATE)
0379             return handle_encls_ecreate(vcpu);
0380         if (leaf == EINIT)
0381             return handle_encls_einit(vcpu);
0382         WARN(1, "KVM: unexpected exit on ENCLS[%u]", leaf);
0383         vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
0384         vcpu->run->hw.hardware_exit_reason = EXIT_REASON_ENCLS;
0385         return 0;
0386     }
0387     return 1;
0388 }
0389 
0390 void setup_default_sgx_lepubkeyhash(void)
0391 {
0392     /*
0393      * Use Intel's default value for Skylake hardware if Launch Control is
0394      * not supported, i.e. Intel's hash is hardcoded into silicon, or if
0395      * Launch Control is supported and enabled, i.e. mimic the reset value
0396      * and let the guest write the MSRs at will.  If Launch Control is
0397      * supported but disabled, then use the current MSR values as the hash
0398      * MSRs exist but are read-only (locked and not writable).
0399      */
0400     if (!enable_sgx || boot_cpu_has(X86_FEATURE_SGX_LC) ||
0401         rdmsrl_safe(MSR_IA32_SGXLEPUBKEYHASH0, &sgx_pubkey_hash[0])) {
0402         sgx_pubkey_hash[0] = 0xa6053e051270b7acULL;
0403         sgx_pubkey_hash[1] = 0x6cfbe8ba8b3b413dULL;
0404         sgx_pubkey_hash[2] = 0xc4916d99f2b3735dULL;
0405         sgx_pubkey_hash[3] = 0xd4f8c05909f9bb3bULL;
0406     } else {
0407         /* MSR_IA32_SGXLEPUBKEYHASH0 is read above */
0408         rdmsrl(MSR_IA32_SGXLEPUBKEYHASH1, sgx_pubkey_hash[1]);
0409         rdmsrl(MSR_IA32_SGXLEPUBKEYHASH2, sgx_pubkey_hash[2]);
0410         rdmsrl(MSR_IA32_SGXLEPUBKEYHASH3, sgx_pubkey_hash[3]);
0411     }
0412 }
0413 
0414 void vcpu_setup_sgx_lepubkeyhash(struct kvm_vcpu *vcpu)
0415 {
0416     struct vcpu_vmx *vmx = to_vmx(vcpu);
0417 
0418     memcpy(vmx->msr_ia32_sgxlepubkeyhash, sgx_pubkey_hash,
0419            sizeof(sgx_pubkey_hash));
0420 }
0421 
0422 /*
0423  * ECREATE must be intercepted to enforce MISCSELECT, ATTRIBUTES and XFRM
0424  * restrictions if the guest's allowed-1 settings diverge from hardware.
0425  */
0426 static bool sgx_intercept_encls_ecreate(struct kvm_vcpu *vcpu)
0427 {
0428     struct kvm_cpuid_entry2 *guest_cpuid;
0429     u32 eax, ebx, ecx, edx;
0430 
0431     if (!vcpu->kvm->arch.sgx_provisioning_allowed)
0432         return true;
0433 
0434     guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 0);
0435     if (!guest_cpuid)
0436         return true;
0437 
0438     cpuid_count(0x12, 0, &eax, &ebx, &ecx, &edx);
0439     if (guest_cpuid->ebx != ebx || guest_cpuid->edx != edx)
0440         return true;
0441 
0442     guest_cpuid = kvm_find_cpuid_entry_index(vcpu, 0x12, 1);
0443     if (!guest_cpuid)
0444         return true;
0445 
0446     cpuid_count(0x12, 1, &eax, &ebx, &ecx, &edx);
0447     if (guest_cpuid->eax != eax || guest_cpuid->ebx != ebx ||
0448         guest_cpuid->ecx != ecx || guest_cpuid->edx != edx)
0449         return true;
0450 
0451     return false;
0452 }
0453 
0454 void vmx_write_encls_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
0455 {
0456     /*
0457      * There is no software enable bit for SGX that is virtualized by
0458      * hardware, e.g. there's no CR4.SGXE, so when SGX is disabled in the
0459      * guest (either by the host or by the guest's BIOS) but enabled in the
0460      * host, trap all ENCLS leafs and inject #UD/#GP as needed to emulate
0461      * the expected system behavior for ENCLS.
0462      */
0463     u64 bitmap = -1ull;
0464 
0465     /* Nothing to do if hardware doesn't support SGX */
0466     if (!cpu_has_vmx_encls_vmexit())
0467         return;
0468 
0469     if (guest_cpuid_has(vcpu, X86_FEATURE_SGX) &&
0470         sgx_enabled_in_guest_bios(vcpu)) {
0471         if (guest_cpuid_has(vcpu, X86_FEATURE_SGX1)) {
0472             bitmap &= ~GENMASK_ULL(ETRACK, ECREATE);
0473             if (sgx_intercept_encls_ecreate(vcpu))
0474                 bitmap |= (1 << ECREATE);
0475         }
0476 
0477         if (guest_cpuid_has(vcpu, X86_FEATURE_SGX2))
0478             bitmap &= ~GENMASK_ULL(EMODT, EAUG);
0479 
0480         /*
0481          * Trap and execute EINIT if launch control is enabled in the
0482          * host using the guest's values for launch control MSRs, even
0483          * if the guest's values are fixed to hardware default values.
0484          * The MSRs are not loaded/saved on VM-Enter/VM-Exit as writing
0485          * the MSRs is extraordinarily expensive.
0486          */
0487         if (boot_cpu_has(X86_FEATURE_SGX_LC))
0488             bitmap |= (1 << EINIT);
0489 
0490         if (!vmcs12 && is_guest_mode(vcpu))
0491             vmcs12 = get_vmcs12(vcpu);
0492         if (vmcs12 && nested_cpu_has_encls_exit(vmcs12))
0493             bitmap |= vmcs12->encls_exiting_bitmap;
0494     }
0495     vmcs_write64(ENCLS_EXITING_BITMAP, bitmap);
0496 }