Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * AMD Encrypted Register State Support
0004  *
0005  * Author: Joerg Roedel <jroedel@suse.de>
0006  *
0007  * This file is not compiled stand-alone. It contains code shared
0008  * between the pre-decompression boot code and the running Linux kernel
0009  * and is included directly into both code-bases.
0010  */
0011 
0012 #ifndef __BOOT_COMPRESSED
0013 #define error(v)    pr_err(v)
0014 #define has_cpuflag(f)  boot_cpu_has(f)
0015 #endif
0016 
0017 /* I/O parameters for CPUID-related helpers */
0018 struct cpuid_leaf {
0019     u32 fn;
0020     u32 subfn;
0021     u32 eax;
0022     u32 ebx;
0023     u32 ecx;
0024     u32 edx;
0025 };
0026 
0027 /*
0028  * Individual entries of the SNP CPUID table, as defined by the SNP
0029  * Firmware ABI, Revision 0.9, Section 7.1, Table 14.
0030  */
0031 struct snp_cpuid_fn {
0032     u32 eax_in;
0033     u32 ecx_in;
0034     u64 xcr0_in;
0035     u64 xss_in;
0036     u32 eax;
0037     u32 ebx;
0038     u32 ecx;
0039     u32 edx;
0040     u64 __reserved;
0041 } __packed;
0042 
0043 /*
0044  * SNP CPUID table, as defined by the SNP Firmware ABI, Revision 0.9,
0045  * Section 8.14.2.6. Also noted there is the SNP firmware-enforced limit
0046  * of 64 entries per CPUID table.
0047  */
0048 #define SNP_CPUID_COUNT_MAX 64
0049 
0050 struct snp_cpuid_table {
0051     u32 count;
0052     u32 __reserved1;
0053     u64 __reserved2;
0054     struct snp_cpuid_fn fn[SNP_CPUID_COUNT_MAX];
0055 } __packed;
0056 
0057 /*
0058  * Since feature negotiation related variables are set early in the boot
0059  * process they must reside in the .data section so as not to be zeroed
0060  * out when the .bss section is later cleared.
0061  *
0062  * GHCB protocol version negotiated with the hypervisor.
0063  */
0064 static u16 ghcb_version __ro_after_init;
0065 
0066 /* Copy of the SNP firmware's CPUID page. */
0067 static struct snp_cpuid_table cpuid_table_copy __ro_after_init;
0068 
0069 /*
0070  * These will be initialized based on CPUID table so that non-present
0071  * all-zero leaves (for sparse tables) can be differentiated from
0072  * invalid/out-of-range leaves. This is needed since all-zero leaves
0073  * still need to be post-processed.
0074  */
0075 static u32 cpuid_std_range_max __ro_after_init;
0076 static u32 cpuid_hyp_range_max __ro_after_init;
0077 static u32 cpuid_ext_range_max __ro_after_init;
0078 
0079 static bool __init sev_es_check_cpu_features(void)
0080 {
0081     if (!has_cpuflag(X86_FEATURE_RDRAND)) {
0082         error("RDRAND instruction not supported - no trusted source of randomness available\n");
0083         return false;
0084     }
0085 
0086     return true;
0087 }
0088 
0089 static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason)
0090 {
0091     u64 val = GHCB_MSR_TERM_REQ;
0092 
0093     /* Tell the hypervisor what went wrong. */
0094     val |= GHCB_SEV_TERM_REASON(set, reason);
0095 
0096     /* Request Guest Termination from Hypvervisor */
0097     sev_es_wr_ghcb_msr(val);
0098     VMGEXIT();
0099 
0100     while (true)
0101         asm volatile("hlt\n" : : : "memory");
0102 }
0103 
0104 /*
0105  * The hypervisor features are available from GHCB version 2 onward.
0106  */
0107 static u64 get_hv_features(void)
0108 {
0109     u64 val;
0110 
0111     if (ghcb_version < 2)
0112         return 0;
0113 
0114     sev_es_wr_ghcb_msr(GHCB_MSR_HV_FT_REQ);
0115     VMGEXIT();
0116 
0117     val = sev_es_rd_ghcb_msr();
0118     if (GHCB_RESP_CODE(val) != GHCB_MSR_HV_FT_RESP)
0119         return 0;
0120 
0121     return GHCB_MSR_HV_FT_RESP_VAL(val);
0122 }
0123 
0124 static void snp_register_ghcb_early(unsigned long paddr)
0125 {
0126     unsigned long pfn = paddr >> PAGE_SHIFT;
0127     u64 val;
0128 
0129     sev_es_wr_ghcb_msr(GHCB_MSR_REG_GPA_REQ_VAL(pfn));
0130     VMGEXIT();
0131 
0132     val = sev_es_rd_ghcb_msr();
0133 
0134     /* If the response GPA is not ours then abort the guest */
0135     if ((GHCB_RESP_CODE(val) != GHCB_MSR_REG_GPA_RESP) ||
0136         (GHCB_MSR_REG_GPA_RESP_VAL(val) != pfn))
0137         sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER);
0138 }
0139 
0140 static bool sev_es_negotiate_protocol(void)
0141 {
0142     u64 val;
0143 
0144     /* Do the GHCB protocol version negotiation */
0145     sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ);
0146     VMGEXIT();
0147     val = sev_es_rd_ghcb_msr();
0148 
0149     if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP)
0150         return false;
0151 
0152     if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN ||
0153         GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX)
0154         return false;
0155 
0156     ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val), GHCB_PROTOCOL_MAX);
0157 
0158     return true;
0159 }
0160 
0161 static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
0162 {
0163     ghcb->save.sw_exit_code = 0;
0164     __builtin_memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
0165 }
0166 
0167 static bool vc_decoding_needed(unsigned long exit_code)
0168 {
0169     /* Exceptions don't require to decode the instruction */
0170     return !(exit_code >= SVM_EXIT_EXCP_BASE &&
0171          exit_code <= SVM_EXIT_LAST_EXCP);
0172 }
0173 
0174 static enum es_result vc_init_em_ctxt(struct es_em_ctxt *ctxt,
0175                       struct pt_regs *regs,
0176                       unsigned long exit_code)
0177 {
0178     enum es_result ret = ES_OK;
0179 
0180     memset(ctxt, 0, sizeof(*ctxt));
0181     ctxt->regs = regs;
0182 
0183     if (vc_decoding_needed(exit_code))
0184         ret = vc_decode_insn(ctxt);
0185 
0186     return ret;
0187 }
0188 
0189 static void vc_finish_insn(struct es_em_ctxt *ctxt)
0190 {
0191     ctxt->regs->ip += ctxt->insn.length;
0192 }
0193 
0194 static enum es_result verify_exception_info(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
0195 {
0196     u32 ret;
0197 
0198     ret = ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0);
0199     if (!ret)
0200         return ES_OK;
0201 
0202     if (ret == 1) {
0203         u64 info = ghcb->save.sw_exit_info_2;
0204         unsigned long v = info & SVM_EVTINJ_VEC_MASK;
0205 
0206         /* Check if exception information from hypervisor is sane. */
0207         if ((info & SVM_EVTINJ_VALID) &&
0208             ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) &&
0209             ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) {
0210             ctxt->fi.vector = v;
0211 
0212             if (info & SVM_EVTINJ_VALID_ERR)
0213                 ctxt->fi.error_code = info >> 32;
0214 
0215             return ES_EXCEPTION;
0216         }
0217     }
0218 
0219     return ES_VMM_ERROR;
0220 }
0221 
0222 static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
0223                       struct es_em_ctxt *ctxt,
0224                       u64 exit_code, u64 exit_info_1,
0225                       u64 exit_info_2)
0226 {
0227     /* Fill in protocol and format specifiers */
0228     ghcb->protocol_version = ghcb_version;
0229     ghcb->ghcb_usage       = GHCB_DEFAULT_USAGE;
0230 
0231     ghcb_set_sw_exit_code(ghcb, exit_code);
0232     ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
0233     ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
0234 
0235     sev_es_wr_ghcb_msr(__pa(ghcb));
0236     VMGEXIT();
0237 
0238     return verify_exception_info(ghcb, ctxt);
0239 }
0240 
0241 static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg)
0242 {
0243     u64 val;
0244 
0245     sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, reg_idx));
0246     VMGEXIT();
0247     val = sev_es_rd_ghcb_msr();
0248     if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
0249         return -EIO;
0250 
0251     *reg = (val >> 32);
0252 
0253     return 0;
0254 }
0255 
0256 static int sev_cpuid_hv(struct cpuid_leaf *leaf)
0257 {
0258     int ret;
0259 
0260     /*
0261      * MSR protocol does not support fetching non-zero subfunctions, but is
0262      * sufficient to handle current early-boot cases. Should that change,
0263      * make sure to report an error rather than ignoring the index and
0264      * grabbing random values. If this issue arises in the future, handling
0265      * can be added here to use GHCB-page protocol for cases that occur late
0266      * enough in boot that GHCB page is available.
0267      */
0268     if (cpuid_function_is_indexed(leaf->fn) && leaf->subfn)
0269         return -EINVAL;
0270 
0271     ret =         __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EAX, &leaf->eax);
0272     ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EBX, &leaf->ebx);
0273     ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_ECX, &leaf->ecx);
0274     ret = ret ? : __sev_cpuid_hv(leaf->fn, GHCB_CPUID_REQ_EDX, &leaf->edx);
0275 
0276     return ret;
0277 }
0278 
0279 /*
0280  * This may be called early while still running on the initial identity
0281  * mapping. Use RIP-relative addressing to obtain the correct address
0282  * while running with the initial identity mapping as well as the
0283  * switch-over to kernel virtual addresses later.
0284  */
0285 static const struct snp_cpuid_table *snp_cpuid_get_table(void)
0286 {
0287     void *ptr;
0288 
0289     asm ("lea cpuid_table_copy(%%rip), %0"
0290          : "=r" (ptr)
0291          : "p" (&cpuid_table_copy));
0292 
0293     return ptr;
0294 }
0295 
0296 /*
0297  * The SNP Firmware ABI, Revision 0.9, Section 7.1, details the use of
0298  * XCR0_IN and XSS_IN to encode multiple versions of 0xD subfunctions 0
0299  * and 1 based on the corresponding features enabled by a particular
0300  * combination of XCR0 and XSS registers so that a guest can look up the
0301  * version corresponding to the features currently enabled in its XCR0/XSS
0302  * registers. The only values that differ between these versions/table
0303  * entries is the enabled XSAVE area size advertised via EBX.
0304  *
0305  * While hypervisors may choose to make use of this support, it is more
0306  * robust/secure for a guest to simply find the entry corresponding to the
0307  * base/legacy XSAVE area size (XCR0=1 or XCR0=3), and then calculate the
0308  * XSAVE area size using subfunctions 2 through 64, as documented in APM
0309  * Volume 3, Rev 3.31, Appendix E.3.8, which is what is done here.
0310  *
0311  * Since base/legacy XSAVE area size is documented as 0x240, use that value
0312  * directly rather than relying on the base size in the CPUID table.
0313  *
0314  * Return: XSAVE area size on success, 0 otherwise.
0315  */
0316 static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
0317 {
0318     const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
0319     u64 xfeatures_found = 0;
0320     u32 xsave_size = 0x240;
0321     int i;
0322 
0323     for (i = 0; i < cpuid_table->count; i++) {
0324         const struct snp_cpuid_fn *e = &cpuid_table->fn[i];
0325 
0326         if (!(e->eax_in == 0xD && e->ecx_in > 1 && e->ecx_in < 64))
0327             continue;
0328         if (!(xfeatures_en & (BIT_ULL(e->ecx_in))))
0329             continue;
0330         if (xfeatures_found & (BIT_ULL(e->ecx_in)))
0331             continue;
0332 
0333         xfeatures_found |= (BIT_ULL(e->ecx_in));
0334 
0335         if (compacted)
0336             xsave_size += e->eax;
0337         else
0338             xsave_size = max(xsave_size, e->eax + e->ebx);
0339     }
0340 
0341     /*
0342      * Either the guest set unsupported XCR0/XSS bits, or the corresponding
0343      * entries in the CPUID table were not present. This is not a valid
0344      * state to be in.
0345      */
0346     if (xfeatures_found != (xfeatures_en & GENMASK_ULL(63, 2)))
0347         return 0;
0348 
0349     return xsave_size;
0350 }
0351 
0352 static bool
0353 snp_cpuid_get_validated_func(struct cpuid_leaf *leaf)
0354 {
0355     const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
0356     int i;
0357 
0358     for (i = 0; i < cpuid_table->count; i++) {
0359         const struct snp_cpuid_fn *e = &cpuid_table->fn[i];
0360 
0361         if (e->eax_in != leaf->fn)
0362             continue;
0363 
0364         if (cpuid_function_is_indexed(leaf->fn) && e->ecx_in != leaf->subfn)
0365             continue;
0366 
0367         /*
0368          * For 0xD subfunctions 0 and 1, only use the entry corresponding
0369          * to the base/legacy XSAVE area size (XCR0=1 or XCR0=3, XSS=0).
0370          * See the comments above snp_cpuid_calc_xsave_size() for more
0371          * details.
0372          */
0373         if (e->eax_in == 0xD && (e->ecx_in == 0 || e->ecx_in == 1))
0374             if (!(e->xcr0_in == 1 || e->xcr0_in == 3) || e->xss_in)
0375                 continue;
0376 
0377         leaf->eax = e->eax;
0378         leaf->ebx = e->ebx;
0379         leaf->ecx = e->ecx;
0380         leaf->edx = e->edx;
0381 
0382         return true;
0383     }
0384 
0385     return false;
0386 }
0387 
0388 static void snp_cpuid_hv(struct cpuid_leaf *leaf)
0389 {
0390     if (sev_cpuid_hv(leaf))
0391         sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV);
0392 }
0393 
0394 static int snp_cpuid_postprocess(struct cpuid_leaf *leaf)
0395 {
0396     struct cpuid_leaf leaf_hv = *leaf;
0397 
0398     switch (leaf->fn) {
0399     case 0x1:
0400         snp_cpuid_hv(&leaf_hv);
0401 
0402         /* initial APIC ID */
0403         leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0));
0404         /* APIC enabled bit */
0405         leaf->edx = (leaf_hv.edx & BIT(9)) | (leaf->edx & ~BIT(9));
0406 
0407         /* OSXSAVE enabled bit */
0408         if (native_read_cr4() & X86_CR4_OSXSAVE)
0409             leaf->ecx |= BIT(27);
0410         break;
0411     case 0x7:
0412         /* OSPKE enabled bit */
0413         leaf->ecx &= ~BIT(4);
0414         if (native_read_cr4() & X86_CR4_PKE)
0415             leaf->ecx |= BIT(4);
0416         break;
0417     case 0xB:
0418         leaf_hv.subfn = 0;
0419         snp_cpuid_hv(&leaf_hv);
0420 
0421         /* extended APIC ID */
0422         leaf->edx = leaf_hv.edx;
0423         break;
0424     case 0xD: {
0425         bool compacted = false;
0426         u64 xcr0 = 1, xss = 0;
0427         u32 xsave_size;
0428 
0429         if (leaf->subfn != 0 && leaf->subfn != 1)
0430             return 0;
0431 
0432         if (native_read_cr4() & X86_CR4_OSXSAVE)
0433             xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
0434         if (leaf->subfn == 1) {
0435             /* Get XSS value if XSAVES is enabled. */
0436             if (leaf->eax & BIT(3)) {
0437                 unsigned long lo, hi;
0438 
0439                 asm volatile("rdmsr" : "=a" (lo), "=d" (hi)
0440                              : "c" (MSR_IA32_XSS));
0441                 xss = (hi << 32) | lo;
0442             }
0443 
0444             /*
0445              * The PPR and APM aren't clear on what size should be
0446              * encoded in 0xD:0x1:EBX when compaction is not enabled
0447              * by either XSAVEC (feature bit 1) or XSAVES (feature
0448              * bit 3) since SNP-capable hardware has these feature
0449              * bits fixed as 1. KVM sets it to 0 in this case, but
0450              * to avoid this becoming an issue it's safer to simply
0451              * treat this as unsupported for SNP guests.
0452              */
0453             if (!(leaf->eax & (BIT(1) | BIT(3))))
0454                 return -EINVAL;
0455 
0456             compacted = true;
0457         }
0458 
0459         xsave_size = snp_cpuid_calc_xsave_size(xcr0 | xss, compacted);
0460         if (!xsave_size)
0461             return -EINVAL;
0462 
0463         leaf->ebx = xsave_size;
0464         }
0465         break;
0466     case 0x8000001E:
0467         snp_cpuid_hv(&leaf_hv);
0468 
0469         /* extended APIC ID */
0470         leaf->eax = leaf_hv.eax;
0471         /* compute ID */
0472         leaf->ebx = (leaf->ebx & GENMASK(31, 8)) | (leaf_hv.ebx & GENMASK(7, 0));
0473         /* node ID */
0474         leaf->ecx = (leaf->ecx & GENMASK(31, 8)) | (leaf_hv.ecx & GENMASK(7, 0));
0475         break;
0476     default:
0477         /* No fix-ups needed, use values as-is. */
0478         break;
0479     }
0480 
0481     return 0;
0482 }
0483 
0484 /*
0485  * Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value
0486  * should be treated as fatal by caller.
0487  */
0488 static int snp_cpuid(struct cpuid_leaf *leaf)
0489 {
0490     const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
0491 
0492     if (!cpuid_table->count)
0493         return -EOPNOTSUPP;
0494 
0495     if (!snp_cpuid_get_validated_func(leaf)) {
0496         /*
0497          * Some hypervisors will avoid keeping track of CPUID entries
0498          * where all values are zero, since they can be handled the
0499          * same as out-of-range values (all-zero). This is useful here
0500          * as well as it allows virtually all guest configurations to
0501          * work using a single SNP CPUID table.
0502          *
0503          * To allow for this, there is a need to distinguish between
0504          * out-of-range entries and in-range zero entries, since the
0505          * CPUID table entries are only a template that may need to be
0506          * augmented with additional values for things like
0507          * CPU-specific information during post-processing. So if it's
0508          * not in the table, set the values to zero. Then, if they are
0509          * within a valid CPUID range, proceed with post-processing
0510          * using zeros as the initial values. Otherwise, skip
0511          * post-processing and just return zeros immediately.
0512          */
0513         leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0;
0514 
0515         /* Skip post-processing for out-of-range zero leafs. */
0516         if (!(leaf->fn <= cpuid_std_range_max ||
0517               (leaf->fn >= 0x40000000 && leaf->fn <= cpuid_hyp_range_max) ||
0518               (leaf->fn >= 0x80000000 && leaf->fn <= cpuid_ext_range_max)))
0519             return 0;
0520     }
0521 
0522     return snp_cpuid_postprocess(leaf);
0523 }
0524 
0525 /*
0526  * Boot VC Handler - This is the first VC handler during boot, there is no GHCB
0527  * page yet, so it only supports the MSR based communication with the
0528  * hypervisor and only the CPUID exit-code.
0529  */
0530 void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
0531 {
0532     unsigned int subfn = lower_bits(regs->cx, 32);
0533     unsigned int fn = lower_bits(regs->ax, 32);
0534     struct cpuid_leaf leaf;
0535     int ret;
0536 
0537     /* Only CPUID is supported via MSR protocol */
0538     if (exit_code != SVM_EXIT_CPUID)
0539         goto fail;
0540 
0541     leaf.fn = fn;
0542     leaf.subfn = subfn;
0543 
0544     ret = snp_cpuid(&leaf);
0545     if (!ret)
0546         goto cpuid_done;
0547 
0548     if (ret != -EOPNOTSUPP)
0549         goto fail;
0550 
0551     if (sev_cpuid_hv(&leaf))
0552         goto fail;
0553 
0554 cpuid_done:
0555     regs->ax = leaf.eax;
0556     regs->bx = leaf.ebx;
0557     regs->cx = leaf.ecx;
0558     regs->dx = leaf.edx;
0559 
0560     /*
0561      * This is a VC handler and the #VC is only raised when SEV-ES is
0562      * active, which means SEV must be active too. Do sanity checks on the
0563      * CPUID results to make sure the hypervisor does not trick the kernel
0564      * into the no-sev path. This could map sensitive data unencrypted and
0565      * make it accessible to the hypervisor.
0566      *
0567      * In particular, check for:
0568      *  - Availability of CPUID leaf 0x8000001f
0569      *  - SEV CPUID bit.
0570      *
0571      * The hypervisor might still report the wrong C-bit position, but this
0572      * can't be checked here.
0573      */
0574 
0575     if (fn == 0x80000000 && (regs->ax < 0x8000001f))
0576         /* SEV leaf check */
0577         goto fail;
0578     else if ((fn == 0x8000001f && !(regs->ax & BIT(1))))
0579         /* SEV bit */
0580         goto fail;
0581 
0582     /* Skip over the CPUID two-byte opcode */
0583     regs->ip += 2;
0584 
0585     return;
0586 
0587 fail:
0588     /* Terminate the guest */
0589     sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
0590 }
0591 
0592 static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
0593                       void *src, char *buf,
0594                       unsigned int data_size,
0595                       unsigned int count,
0596                       bool backwards)
0597 {
0598     int i, b = backwards ? -1 : 1;
0599     enum es_result ret = ES_OK;
0600 
0601     for (i = 0; i < count; i++) {
0602         void *s = src + (i * data_size * b);
0603         char *d = buf + (i * data_size);
0604 
0605         ret = vc_read_mem(ctxt, s, d, data_size);
0606         if (ret != ES_OK)
0607             break;
0608     }
0609 
0610     return ret;
0611 }
0612 
0613 static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt,
0614                        void *dst, char *buf,
0615                        unsigned int data_size,
0616                        unsigned int count,
0617                        bool backwards)
0618 {
0619     int i, s = backwards ? -1 : 1;
0620     enum es_result ret = ES_OK;
0621 
0622     for (i = 0; i < count; i++) {
0623         void *d = dst + (i * data_size * s);
0624         char *b = buf + (i * data_size);
0625 
0626         ret = vc_write_mem(ctxt, d, b, data_size);
0627         if (ret != ES_OK)
0628             break;
0629     }
0630 
0631     return ret;
0632 }
0633 
0634 #define IOIO_TYPE_STR  BIT(2)
0635 #define IOIO_TYPE_IN   1
0636 #define IOIO_TYPE_INS  (IOIO_TYPE_IN | IOIO_TYPE_STR)
0637 #define IOIO_TYPE_OUT  0
0638 #define IOIO_TYPE_OUTS (IOIO_TYPE_OUT | IOIO_TYPE_STR)
0639 
0640 #define IOIO_REP       BIT(3)
0641 
0642 #define IOIO_ADDR_64   BIT(9)
0643 #define IOIO_ADDR_32   BIT(8)
0644 #define IOIO_ADDR_16   BIT(7)
0645 
0646 #define IOIO_DATA_32   BIT(6)
0647 #define IOIO_DATA_16   BIT(5)
0648 #define IOIO_DATA_8    BIT(4)
0649 
0650 #define IOIO_SEG_ES    (0 << 10)
0651 #define IOIO_SEG_DS    (3 << 10)
0652 
0653 static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
0654 {
0655     struct insn *insn = &ctxt->insn;
0656     *exitinfo = 0;
0657 
0658     switch (insn->opcode.bytes[0]) {
0659     /* INS opcodes */
0660     case 0x6c:
0661     case 0x6d:
0662         *exitinfo |= IOIO_TYPE_INS;
0663         *exitinfo |= IOIO_SEG_ES;
0664         *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
0665         break;
0666 
0667     /* OUTS opcodes */
0668     case 0x6e:
0669     case 0x6f:
0670         *exitinfo |= IOIO_TYPE_OUTS;
0671         *exitinfo |= IOIO_SEG_DS;
0672         *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
0673         break;
0674 
0675     /* IN immediate opcodes */
0676     case 0xe4:
0677     case 0xe5:
0678         *exitinfo |= IOIO_TYPE_IN;
0679         *exitinfo |= (u8)insn->immediate.value << 16;
0680         break;
0681 
0682     /* OUT immediate opcodes */
0683     case 0xe6:
0684     case 0xe7:
0685         *exitinfo |= IOIO_TYPE_OUT;
0686         *exitinfo |= (u8)insn->immediate.value << 16;
0687         break;
0688 
0689     /* IN register opcodes */
0690     case 0xec:
0691     case 0xed:
0692         *exitinfo |= IOIO_TYPE_IN;
0693         *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
0694         break;
0695 
0696     /* OUT register opcodes */
0697     case 0xee:
0698     case 0xef:
0699         *exitinfo |= IOIO_TYPE_OUT;
0700         *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
0701         break;
0702 
0703     default:
0704         return ES_DECODE_FAILED;
0705     }
0706 
0707     switch (insn->opcode.bytes[0]) {
0708     case 0x6c:
0709     case 0x6e:
0710     case 0xe4:
0711     case 0xe6:
0712     case 0xec:
0713     case 0xee:
0714         /* Single byte opcodes */
0715         *exitinfo |= IOIO_DATA_8;
0716         break;
0717     default:
0718         /* Length determined by instruction parsing */
0719         *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16
0720                              : IOIO_DATA_32;
0721     }
0722     switch (insn->addr_bytes) {
0723     case 2:
0724         *exitinfo |= IOIO_ADDR_16;
0725         break;
0726     case 4:
0727         *exitinfo |= IOIO_ADDR_32;
0728         break;
0729     case 8:
0730         *exitinfo |= IOIO_ADDR_64;
0731         break;
0732     }
0733 
0734     if (insn_has_rep_prefix(insn))
0735         *exitinfo |= IOIO_REP;
0736 
0737     return ES_OK;
0738 }
0739 
0740 static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
0741 {
0742     struct pt_regs *regs = ctxt->regs;
0743     u64 exit_info_1, exit_info_2;
0744     enum es_result ret;
0745 
0746     ret = vc_ioio_exitinfo(ctxt, &exit_info_1);
0747     if (ret != ES_OK)
0748         return ret;
0749 
0750     if (exit_info_1 & IOIO_TYPE_STR) {
0751 
0752         /* (REP) INS/OUTS */
0753 
0754         bool df = ((regs->flags & X86_EFLAGS_DF) == X86_EFLAGS_DF);
0755         unsigned int io_bytes, exit_bytes;
0756         unsigned int ghcb_count, op_count;
0757         unsigned long es_base;
0758         u64 sw_scratch;
0759 
0760         /*
0761          * For the string variants with rep prefix the amount of in/out
0762          * operations per #VC exception is limited so that the kernel
0763          * has a chance to take interrupts and re-schedule while the
0764          * instruction is emulated.
0765          */
0766         io_bytes   = (exit_info_1 >> 4) & 0x7;
0767         ghcb_count = sizeof(ghcb->shared_buffer) / io_bytes;
0768 
0769         op_count    = (exit_info_1 & IOIO_REP) ? regs->cx : 1;
0770         exit_info_2 = min(op_count, ghcb_count);
0771         exit_bytes  = exit_info_2 * io_bytes;
0772 
0773         es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
0774 
0775         /* Read bytes of OUTS into the shared buffer */
0776         if (!(exit_info_1 & IOIO_TYPE_IN)) {
0777             ret = vc_insn_string_read(ctxt,
0778                            (void *)(es_base + regs->si),
0779                            ghcb->shared_buffer, io_bytes,
0780                            exit_info_2, df);
0781             if (ret)
0782                 return ret;
0783         }
0784 
0785         /*
0786          * Issue an VMGEXIT to the HV to consume the bytes from the
0787          * shared buffer or to have it write them into the shared buffer
0788          * depending on the instruction: OUTS or INS.
0789          */
0790         sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer);
0791         ghcb_set_sw_scratch(ghcb, sw_scratch);
0792         ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO,
0793                       exit_info_1, exit_info_2);
0794         if (ret != ES_OK)
0795             return ret;
0796 
0797         /* Read bytes from shared buffer into the guest's destination. */
0798         if (exit_info_1 & IOIO_TYPE_IN) {
0799             ret = vc_insn_string_write(ctxt,
0800                            (void *)(es_base + regs->di),
0801                            ghcb->shared_buffer, io_bytes,
0802                            exit_info_2, df);
0803             if (ret)
0804                 return ret;
0805 
0806             if (df)
0807                 regs->di -= exit_bytes;
0808             else
0809                 regs->di += exit_bytes;
0810         } else {
0811             if (df)
0812                 regs->si -= exit_bytes;
0813             else
0814                 regs->si += exit_bytes;
0815         }
0816 
0817         if (exit_info_1 & IOIO_REP)
0818             regs->cx -= exit_info_2;
0819 
0820         ret = regs->cx ? ES_RETRY : ES_OK;
0821 
0822     } else {
0823 
0824         /* IN/OUT into/from rAX */
0825 
0826         int bits = (exit_info_1 & 0x70) >> 1;
0827         u64 rax = 0;
0828 
0829         if (!(exit_info_1 & IOIO_TYPE_IN))
0830             rax = lower_bits(regs->ax, bits);
0831 
0832         ghcb_set_rax(ghcb, rax);
0833 
0834         ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, 0);
0835         if (ret != ES_OK)
0836             return ret;
0837 
0838         if (exit_info_1 & IOIO_TYPE_IN) {
0839             if (!ghcb_rax_is_valid(ghcb))
0840                 return ES_VMM_ERROR;
0841             regs->ax = lower_bits(ghcb->save.rax, bits);
0842         }
0843     }
0844 
0845     return ret;
0846 }
0847 
0848 static int vc_handle_cpuid_snp(struct pt_regs *regs)
0849 {
0850     struct cpuid_leaf leaf;
0851     int ret;
0852 
0853     leaf.fn = regs->ax;
0854     leaf.subfn = regs->cx;
0855     ret = snp_cpuid(&leaf);
0856     if (!ret) {
0857         regs->ax = leaf.eax;
0858         regs->bx = leaf.ebx;
0859         regs->cx = leaf.ecx;
0860         regs->dx = leaf.edx;
0861     }
0862 
0863     return ret;
0864 }
0865 
0866 static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
0867                       struct es_em_ctxt *ctxt)
0868 {
0869     struct pt_regs *regs = ctxt->regs;
0870     u32 cr4 = native_read_cr4();
0871     enum es_result ret;
0872     int snp_cpuid_ret;
0873 
0874     snp_cpuid_ret = vc_handle_cpuid_snp(regs);
0875     if (!snp_cpuid_ret)
0876         return ES_OK;
0877     if (snp_cpuid_ret != -EOPNOTSUPP)
0878         return ES_VMM_ERROR;
0879 
0880     ghcb_set_rax(ghcb, regs->ax);
0881     ghcb_set_rcx(ghcb, regs->cx);
0882 
0883     if (cr4 & X86_CR4_OSXSAVE)
0884         /* Safe to read xcr0 */
0885         ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK));
0886     else
0887         /* xgetbv will cause #GP - use reset value for xcr0 */
0888         ghcb_set_xcr0(ghcb, 1);
0889 
0890     ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
0891     if (ret != ES_OK)
0892         return ret;
0893 
0894     if (!(ghcb_rax_is_valid(ghcb) &&
0895           ghcb_rbx_is_valid(ghcb) &&
0896           ghcb_rcx_is_valid(ghcb) &&
0897           ghcb_rdx_is_valid(ghcb)))
0898         return ES_VMM_ERROR;
0899 
0900     regs->ax = ghcb->save.rax;
0901     regs->bx = ghcb->save.rbx;
0902     regs->cx = ghcb->save.rcx;
0903     regs->dx = ghcb->save.rdx;
0904 
0905     return ES_OK;
0906 }
0907 
0908 static enum es_result vc_handle_rdtsc(struct ghcb *ghcb,
0909                       struct es_em_ctxt *ctxt,
0910                       unsigned long exit_code)
0911 {
0912     bool rdtscp = (exit_code == SVM_EXIT_RDTSCP);
0913     enum es_result ret;
0914 
0915     ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0);
0916     if (ret != ES_OK)
0917         return ret;
0918 
0919     if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb) &&
0920          (!rdtscp || ghcb_rcx_is_valid(ghcb))))
0921         return ES_VMM_ERROR;
0922 
0923     ctxt->regs->ax = ghcb->save.rax;
0924     ctxt->regs->dx = ghcb->save.rdx;
0925     if (rdtscp)
0926         ctxt->regs->cx = ghcb->save.rcx;
0927 
0928     return ES_OK;
0929 }
0930 
0931 struct cc_setup_data {
0932     struct setup_data header;
0933     u32 cc_blob_address;
0934 };
0935 
0936 /*
0937  * Search for a Confidential Computing blob passed in as a setup_data entry
0938  * via the Linux Boot Protocol.
0939  */
0940 static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
0941 {
0942     struct cc_setup_data *sd = NULL;
0943     struct setup_data *hdr;
0944 
0945     hdr = (struct setup_data *)bp->hdr.setup_data;
0946 
0947     while (hdr) {
0948         if (hdr->type == SETUP_CC_BLOB) {
0949             sd = (struct cc_setup_data *)hdr;
0950             return (struct cc_blob_sev_info *)(unsigned long)sd->cc_blob_address;
0951         }
0952         hdr = (struct setup_data *)hdr->next;
0953     }
0954 
0955     return NULL;
0956 }
0957 
0958 /*
0959  * Initialize the kernel's copy of the SNP CPUID table, and set up the
0960  * pointer that will be used to access it.
0961  *
0962  * Maintaining a direct mapping of the SNP CPUID table used by firmware would
0963  * be possible as an alternative, but the approach is brittle since the
0964  * mapping needs to be updated in sync with all the changes to virtual memory
0965  * layout and related mapping facilities throughout the boot process.
0966  */
0967 static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
0968 {
0969     const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table;
0970     int i;
0971 
0972     if (!cc_info || !cc_info->cpuid_phys || cc_info->cpuid_len < PAGE_SIZE)
0973         sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID);
0974 
0975     cpuid_table_fw = (const struct snp_cpuid_table *)cc_info->cpuid_phys;
0976     if (!cpuid_table_fw->count || cpuid_table_fw->count > SNP_CPUID_COUNT_MAX)
0977         sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID);
0978 
0979     cpuid_table = snp_cpuid_get_table();
0980     memcpy((void *)cpuid_table, cpuid_table_fw, sizeof(*cpuid_table));
0981 
0982     /* Initialize CPUID ranges for range-checking. */
0983     for (i = 0; i < cpuid_table->count; i++) {
0984         const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
0985 
0986         if (fn->eax_in == 0x0)
0987             cpuid_std_range_max = fn->eax;
0988         else if (fn->eax_in == 0x40000000)
0989             cpuid_hyp_range_max = fn->eax;
0990         else if (fn->eax_in == 0x80000000)
0991             cpuid_ext_range_max = fn->eax;
0992     }
0993 }