Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 */
0002 /*
0003  *  linux/boot/head.S
0004  *
0005  *  Copyright (C) 1991, 1992, 1993  Linus Torvalds
0006  */
0007 
0008 /*
0009  *  head.S contains the 32-bit startup code.
0010  *
0011  * NOTE!!! Startup happens at absolute address 0x00001000, which is also where
0012  * the page directory will exist. The startup code will be overwritten by
0013  * the page directory. [According to comments etc elsewhere on a compressed
0014  * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
0015  *
0016  * Page 0 is deliberately kept safe, since System Management Mode code in 
0017  * laptops may need to access the BIOS data stored there.  This is also
0018  * useful for future device drivers that either access the BIOS via VM86 
0019  * mode.
0020  */
0021 
0022 /*
0023  * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
0024  */
0025     .code32
0026     .text
0027 
0028 #include <linux/init.h>
0029 #include <linux/linkage.h>
0030 #include <asm/segment.h>
0031 #include <asm/boot.h>
0032 #include <asm/msr.h>
0033 #include <asm/processor-flags.h>
0034 #include <asm/asm-offsets.h>
0035 #include <asm/bootparam.h>
0036 #include <asm/desc_defs.h>
0037 #include <asm/trapnr.h>
0038 #include "pgtable.h"
0039 
0040 /*
0041  * Locally defined symbols should be marked hidden:
0042  */
0043     .hidden _bss
0044     .hidden _ebss
0045     .hidden _end
0046 
0047     __HEAD
0048 
0049 /*
0050  * This macro gives the relative virtual address of X, i.e. the offset of X
0051  * from startup_32. This is the same as the link-time virtual address of X,
0052  * since startup_32 is at 0, but defining it this way tells the
0053  * assembler/linker that we do not want the actual run-time address of X. This
0054  * prevents the linker from trying to create unwanted run-time relocation
0055  * entries for the reference when the compressed kernel is linked as PIE.
0056  *
0057  * A reference X(%reg) will result in the link-time VA of X being stored with
0058  * the instruction, and a run-time R_X86_64_RELATIVE relocation entry that
0059  * adds the 64-bit base address where the kernel is loaded.
0060  *
0061  * Replacing it with (X-startup_32)(%reg) results in the offset being stored,
0062  * and no run-time relocation.
0063  *
0064  * The macro should be used as a displacement with a base register containing
0065  * the run-time address of startup_32 [i.e. rva(X)(%reg)], or as an immediate
0066  * [$ rva(X)].
0067  *
0068  * This macro can only be used from within the .head.text section, since the
0069  * expression requires startup_32 to be in the same section as the code being
0070  * assembled.
0071  */
0072 #define rva(X) ((X) - startup_32)
0073 
0074     .code32
0075 SYM_FUNC_START(startup_32)
0076     /*
0077      * 32bit entry is 0 and it is ABI so immutable!
0078      * If we come here directly from a bootloader,
0079      * kernel(text+data+bss+brk) ramdisk, zero_page, command line
0080      * all need to be under the 4G limit.
0081      */
0082     cld
0083     cli
0084 
0085 /*
0086  * Calculate the delta between where we were compiled to run
0087  * at and where we were actually loaded at.  This can only be done
0088  * with a short local call on x86.  Nothing  else will tell us what
0089  * address we are running at.  The reserved chunk of the real-mode
0090  * data at 0x1e4 (defined as a scratch field) are used as the stack
0091  * for this calculation. Only 4 bytes are needed.
0092  */
0093     leal    (BP_scratch+4)(%esi), %esp
0094     call    1f
0095 1:  popl    %ebp
0096     subl    $ rva(1b), %ebp
0097 
0098     /* Load new GDT with the 64bit segments using 32bit descriptor */
0099     leal    rva(gdt)(%ebp), %eax
0100     movl    %eax, 2(%eax)
0101     lgdt    (%eax)
0102 
0103     /* Load segment registers with our descriptors */
0104     movl    $__BOOT_DS, %eax
0105     movl    %eax, %ds
0106     movl    %eax, %es
0107     movl    %eax, %fs
0108     movl    %eax, %gs
0109     movl    %eax, %ss
0110 
0111     /* Setup a stack and load CS from current GDT */
0112     leal    rva(boot_stack_end)(%ebp), %esp
0113 
0114     pushl   $__KERNEL32_CS
0115     leal    rva(1f)(%ebp), %eax
0116     pushl   %eax
0117     lretl
0118 1:
0119 
0120     /* Setup Exception handling for SEV-ES */
0121     call    startup32_load_idt
0122 
0123     /* Make sure cpu supports long mode. */
0124     call    verify_cpu
0125     testl   %eax, %eax
0126     jnz .Lno_longmode
0127 
0128 /*
0129  * Compute the delta between where we were compiled to run at
0130  * and where the code will actually run at.
0131  *
0132  * %ebp contains the address we are loaded at by the boot loader and %ebx
0133  * contains the address where we should move the kernel image temporarily
0134  * for safe in-place decompression.
0135  */
0136 
0137 #ifdef CONFIG_RELOCATABLE
0138     movl    %ebp, %ebx
0139 
0140 #ifdef CONFIG_EFI_STUB
0141 /*
0142  * If we were loaded via the EFI LoadImage service, startup_32 will be at an
0143  * offset to the start of the space allocated for the image. efi_pe_entry will
0144  * set up image_offset to tell us where the image actually starts, so that we
0145  * can use the full available buffer.
0146  *  image_offset = startup_32 - image_base
0147  * Otherwise image_offset will be zero and has no effect on the calculations.
0148  */
0149     subl    rva(image_offset)(%ebp), %ebx
0150 #endif
0151 
0152     movl    BP_kernel_alignment(%esi), %eax
0153     decl    %eax
0154     addl    %eax, %ebx
0155     notl    %eax
0156     andl    %eax, %ebx
0157     cmpl    $LOAD_PHYSICAL_ADDR, %ebx
0158     jae 1f
0159 #endif
0160     movl    $LOAD_PHYSICAL_ADDR, %ebx
0161 1:
0162 
0163     /* Target address to relocate to for decompression */
0164     addl    BP_init_size(%esi), %ebx
0165     subl    $ rva(_end), %ebx
0166 
0167 /*
0168  * Prepare for entering 64 bit mode
0169  */
0170 
0171     /* Enable PAE mode */
0172     movl    %cr4, %eax
0173     orl $X86_CR4_PAE, %eax
0174     movl    %eax, %cr4
0175 
0176  /*
0177   * Build early 4G boot pagetable
0178   */
0179     /*
0180      * If SEV is active then set the encryption mask in the page tables.
0181      * This will insure that when the kernel is copied and decompressed
0182      * it will be done so encrypted.
0183      */
0184     call    get_sev_encryption_bit
0185     xorl    %edx, %edx
0186 #ifdef  CONFIG_AMD_MEM_ENCRYPT
0187     testl   %eax, %eax
0188     jz  1f
0189     subl    $32, %eax   /* Encryption bit is always above bit 31 */
0190     bts %eax, %edx  /* Set encryption mask for page tables */
0191     /*
0192      * Set MSR_AMD64_SEV_ENABLED_BIT in sev_status so that
0193      * startup32_check_sev_cbit() will do a check. sev_enable() will
0194      * initialize sev_status with all the bits reported by
0195      * MSR_AMD_SEV_STATUS later, but only MSR_AMD64_SEV_ENABLED_BIT
0196      * needs to be set for now.
0197      */
0198     movl    $1, rva(sev_status)(%ebp)
0199 1:
0200 #endif
0201 
0202     /* Initialize Page tables to 0 */
0203     leal    rva(pgtable)(%ebx), %edi
0204     xorl    %eax, %eax
0205     movl    $(BOOT_INIT_PGT_SIZE/4), %ecx
0206     rep stosl
0207 
0208     /* Build Level 4 */
0209     leal    rva(pgtable + 0)(%ebx), %edi
0210     leal    0x1007 (%edi), %eax
0211     movl    %eax, 0(%edi)
0212     addl    %edx, 4(%edi)
0213 
0214     /* Build Level 3 */
0215     leal    rva(pgtable + 0x1000)(%ebx), %edi
0216     leal    0x1007(%edi), %eax
0217     movl    $4, %ecx
0218 1:  movl    %eax, 0x00(%edi)
0219     addl    %edx, 0x04(%edi)
0220     addl    $0x00001000, %eax
0221     addl    $8, %edi
0222     decl    %ecx
0223     jnz 1b
0224 
0225     /* Build Level 2 */
0226     leal    rva(pgtable + 0x2000)(%ebx), %edi
0227     movl    $0x00000183, %eax
0228     movl    $2048, %ecx
0229 1:  movl    %eax, 0(%edi)
0230     addl    %edx, 4(%edi)
0231     addl    $0x00200000, %eax
0232     addl    $8, %edi
0233     decl    %ecx
0234     jnz 1b
0235 
0236     /* Enable the boot page tables */
0237     leal    rva(pgtable)(%ebx), %eax
0238     movl    %eax, %cr3
0239 
0240     /* Enable Long mode in EFER (Extended Feature Enable Register) */
0241     movl    $MSR_EFER, %ecx
0242     rdmsr
0243     btsl    $_EFER_LME, %eax
0244     wrmsr
0245 
0246     /* After gdt is loaded */
0247     xorl    %eax, %eax
0248     lldt    %ax
0249     movl    $__BOOT_TSS, %eax
0250     ltr %ax
0251 
0252     /*
0253      * Setup for the jump to 64bit mode
0254      *
0255      * When the jump is performed we will be in long mode but
0256      * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
0257      * (and in turn EFER.LMA = 1).  To jump into 64bit mode we use
0258      * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
0259      * We place all of the values on our mini stack so lret can
0260      * used to perform that far jump.
0261      */
0262     leal    rva(startup_64)(%ebp), %eax
0263 #ifdef CONFIG_EFI_MIXED
0264     movl    rva(efi32_boot_args)(%ebp), %edi
0265     testl   %edi, %edi
0266     jz  1f
0267     leal    rva(efi64_stub_entry)(%ebp), %eax
0268     movl    rva(efi32_boot_args+4)(%ebp), %esi
0269     movl    rva(efi32_boot_args+8)(%ebp), %edx  // saved bootparams pointer
0270     testl   %edx, %edx
0271     jnz 1f
0272     /*
0273      * efi_pe_entry uses MS calling convention, which requires 32 bytes of
0274      * shadow space on the stack even if all arguments are passed in
0275      * registers. We also need an additional 8 bytes for the space that
0276      * would be occupied by the return address, and this also results in
0277      * the correct stack alignment for entry.
0278      */
0279     subl    $40, %esp
0280     leal    rva(efi_pe_entry)(%ebp), %eax
0281     movl    %edi, %ecx          // MS calling convention
0282     movl    %esi, %edx
0283 1:
0284 #endif
0285     /* Check if the C-bit position is correct when SEV is active */
0286     call    startup32_check_sev_cbit
0287 
0288     pushl   $__KERNEL_CS
0289     pushl   %eax
0290 
0291     /* Enter paged protected Mode, activating Long Mode */
0292     movl    $CR0_STATE, %eax
0293     movl    %eax, %cr0
0294 
0295     /* Jump from 32bit compatibility mode into 64bit mode. */
0296     lret
0297 SYM_FUNC_END(startup_32)
0298 
0299 #ifdef CONFIG_EFI_MIXED
0300     .org 0x190
0301 SYM_FUNC_START(efi32_stub_entry)
0302     add $0x4, %esp      /* Discard return address */
0303     popl    %ecx
0304     popl    %edx
0305     popl    %esi
0306 
0307     call    1f
0308 1:  pop %ebp
0309     subl    $ rva(1b), %ebp
0310 
0311     movl    %esi, rva(efi32_boot_args+8)(%ebp)
0312 SYM_INNER_LABEL(efi32_pe_stub_entry, SYM_L_LOCAL)
0313     movl    %ecx, rva(efi32_boot_args)(%ebp)
0314     movl    %edx, rva(efi32_boot_args+4)(%ebp)
0315     movb    $0, rva(efi_is64)(%ebp)
0316 
0317     /* Save firmware GDTR and code/data selectors */
0318     sgdtl   rva(efi32_boot_gdt)(%ebp)
0319     movw    %cs, rva(efi32_boot_cs)(%ebp)
0320     movw    %ds, rva(efi32_boot_ds)(%ebp)
0321 
0322     /* Store firmware IDT descriptor */
0323     sidtl   rva(efi32_boot_idt)(%ebp)
0324 
0325     /* Disable paging */
0326     movl    %cr0, %eax
0327     btrl    $X86_CR0_PG_BIT, %eax
0328     movl    %eax, %cr0
0329 
0330     jmp startup_32
0331 SYM_FUNC_END(efi32_stub_entry)
0332 #endif
0333 
0334     .code64
0335     .org 0x200
0336 SYM_CODE_START(startup_64)
0337     /*
0338      * 64bit entry is 0x200 and it is ABI so immutable!
0339      * We come here either from startup_32 or directly from a
0340      * 64bit bootloader.
0341      * If we come here from a bootloader, kernel(text+data+bss+brk),
0342      * ramdisk, zero_page, command line could be above 4G.
0343      * We depend on an identity mapped page table being provided
0344      * that maps our entire kernel(text+data+bss+brk), zero page
0345      * and command line.
0346      */
0347 
0348     cld
0349     cli
0350 
0351     /* Setup data segments. */
0352     xorl    %eax, %eax
0353     movl    %eax, %ds
0354     movl    %eax, %es
0355     movl    %eax, %ss
0356     movl    %eax, %fs
0357     movl    %eax, %gs
0358 
0359     /*
0360      * Compute the decompressed kernel start address.  It is where
0361      * we were loaded at aligned to a 2M boundary. %rbp contains the
0362      * decompressed kernel start address.
0363      *
0364      * If it is a relocatable kernel then decompress and run the kernel
0365      * from load address aligned to 2MB addr, otherwise decompress and
0366      * run the kernel from LOAD_PHYSICAL_ADDR
0367      *
0368      * We cannot rely on the calculation done in 32-bit mode, since we
0369      * may have been invoked via the 64-bit entry point.
0370      */
0371 
0372     /* Start with the delta to where the kernel will run at. */
0373 #ifdef CONFIG_RELOCATABLE
0374     leaq    startup_32(%rip) /* - $startup_32 */, %rbp
0375 
0376 #ifdef CONFIG_EFI_STUB
0377 /*
0378  * If we were loaded via the EFI LoadImage service, startup_32 will be at an
0379  * offset to the start of the space allocated for the image. efi_pe_entry will
0380  * set up image_offset to tell us where the image actually starts, so that we
0381  * can use the full available buffer.
0382  *  image_offset = startup_32 - image_base
0383  * Otherwise image_offset will be zero and has no effect on the calculations.
0384  */
0385     movl    image_offset(%rip), %eax
0386     subq    %rax, %rbp
0387 #endif
0388 
0389     movl    BP_kernel_alignment(%rsi), %eax
0390     decl    %eax
0391     addq    %rax, %rbp
0392     notq    %rax
0393     andq    %rax, %rbp
0394     cmpq    $LOAD_PHYSICAL_ADDR, %rbp
0395     jae 1f
0396 #endif
0397     movq    $LOAD_PHYSICAL_ADDR, %rbp
0398 1:
0399 
0400     /* Target address to relocate to for decompression */
0401     movl    BP_init_size(%rsi), %ebx
0402     subl    $ rva(_end), %ebx
0403     addq    %rbp, %rbx
0404 
0405     /* Set up the stack */
0406     leaq    rva(boot_stack_end)(%rbx), %rsp
0407 
0408     /*
0409      * At this point we are in long mode with 4-level paging enabled,
0410      * but we might want to enable 5-level paging or vice versa.
0411      *
0412      * The problem is that we cannot do it directly. Setting or clearing
0413      * CR4.LA57 in long mode would trigger #GP. So we need to switch off
0414      * long mode and paging first.
0415      *
0416      * We also need a trampoline in lower memory to switch over from
0417      * 4- to 5-level paging for cases when the bootloader puts the kernel
0418      * above 4G, but didn't enable 5-level paging for us.
0419      *
0420      * The same trampoline can be used to switch from 5- to 4-level paging
0421      * mode, like when starting 4-level paging kernel via kexec() when
0422      * original kernel worked in 5-level paging mode.
0423      *
0424      * For the trampoline, we need the top page table to reside in lower
0425      * memory as we don't have a way to load 64-bit values into CR3 in
0426      * 32-bit mode.
0427      *
0428      * We go though the trampoline even if we don't have to: if we're
0429      * already in a desired paging mode. This way the trampoline code gets
0430      * tested on every boot.
0431      */
0432 
0433     /* Make sure we have GDT with 32-bit code segment */
0434     leaq    gdt64(%rip), %rax
0435     addq    %rax, 2(%rax)
0436     lgdt    (%rax)
0437 
0438     /* Reload CS so IRET returns to a CS actually in the GDT */
0439     pushq   $__KERNEL_CS
0440     leaq    .Lon_kernel_cs(%rip), %rax
0441     pushq   %rax
0442     lretq
0443 
0444 .Lon_kernel_cs:
0445 
0446     pushq   %rsi
0447     call    load_stage1_idt
0448     popq    %rsi
0449 
0450 #ifdef CONFIG_AMD_MEM_ENCRYPT
0451     /*
0452      * Now that the stage1 interrupt handlers are set up, #VC exceptions from
0453      * CPUID instructions can be properly handled for SEV-ES guests.
0454      *
0455      * For SEV-SNP, the CPUID table also needs to be set up in advance of any
0456      * CPUID instructions being issued, so go ahead and do that now via
0457      * sev_enable(), which will also handle the rest of the SEV-related
0458      * detection/setup to ensure that has been done in advance of any dependent
0459      * code.
0460      */
0461     pushq   %rsi
0462     movq    %rsi, %rdi      /* real mode address */
0463     call    sev_enable
0464     popq    %rsi
0465 #endif
0466 
0467     /*
0468      * paging_prepare() sets up the trampoline and checks if we need to
0469      * enable 5-level paging.
0470      *
0471      * paging_prepare() returns a two-quadword structure which lands
0472      * into RDX:RAX:
0473      *   - Address of the trampoline is returned in RAX.
0474      *   - Non zero RDX means trampoline needs to enable 5-level
0475      *     paging.
0476      *
0477      * RSI holds real mode data and needs to be preserved across
0478      * this function call.
0479      */
0480     pushq   %rsi
0481     movq    %rsi, %rdi      /* real mode address */
0482     call    paging_prepare
0483     popq    %rsi
0484 
0485     /* Save the trampoline address in RCX */
0486     movq    %rax, %rcx
0487 
0488     /*
0489      * Load the address of trampoline_return() into RDI.
0490      * It will be used by the trampoline to return to the main code.
0491      */
0492     leaq    trampoline_return(%rip), %rdi
0493 
0494     /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
0495     pushq   $__KERNEL32_CS
0496     leaq    TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
0497     pushq   %rax
0498     lretq
0499 trampoline_return:
0500     /* Restore the stack, the 32-bit trampoline uses its own stack */
0501     leaq    rva(boot_stack_end)(%rbx), %rsp
0502 
0503     /*
0504      * cleanup_trampoline() would restore trampoline memory.
0505      *
0506      * RDI is address of the page table to use instead of page table
0507      * in trampoline memory (if required).
0508      *
0509      * RSI holds real mode data and needs to be preserved across
0510      * this function call.
0511      */
0512     pushq   %rsi
0513     leaq    rva(top_pgtable)(%rbx), %rdi
0514     call    cleanup_trampoline
0515     popq    %rsi
0516 
0517     /* Zero EFLAGS */
0518     pushq   $0
0519     popfq
0520 
0521 /*
0522  * Copy the compressed kernel to the end of our buffer
0523  * where decompression in place becomes safe.
0524  */
0525     pushq   %rsi
0526     leaq    (_bss-8)(%rip), %rsi
0527     leaq    rva(_bss-8)(%rbx), %rdi
0528     movl    $(_bss - startup_32), %ecx
0529     shrl    $3, %ecx
0530     std
0531     rep movsq
0532     cld
0533     popq    %rsi
0534 
0535     /*
0536      * The GDT may get overwritten either during the copy we just did or
0537      * during extract_kernel below. To avoid any issues, repoint the GDTR
0538      * to the new copy of the GDT.
0539      */
0540     leaq    rva(gdt64)(%rbx), %rax
0541     leaq    rva(gdt)(%rbx), %rdx
0542     movq    %rdx, 2(%rax)
0543     lgdt    (%rax)
0544 
0545 /*
0546  * Jump to the relocated address.
0547  */
0548     leaq    rva(.Lrelocated)(%rbx), %rax
0549     jmp *%rax
0550 SYM_CODE_END(startup_64)
0551 
0552 #ifdef CONFIG_EFI_STUB
0553     .org 0x390
0554 SYM_FUNC_START(efi64_stub_entry)
0555     and $~0xf, %rsp         /* realign the stack */
0556     movq    %rdx, %rbx          /* save boot_params pointer */
0557     call    efi_main
0558     movq    %rbx,%rsi
0559     leaq    rva(startup_64)(%rax), %rax
0560     jmp *%rax
0561 SYM_FUNC_END(efi64_stub_entry)
0562 SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry)
0563 #endif
0564 
0565     .text
0566 SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
0567 
0568 /*
0569  * Clear BSS (stack is currently empty)
0570  */
0571     xorl    %eax, %eax
0572     leaq    _bss(%rip), %rdi
0573     leaq    _ebss(%rip), %rcx
0574     subq    %rdi, %rcx
0575     shrq    $3, %rcx
0576     rep stosq
0577 
0578     pushq   %rsi
0579     call    load_stage2_idt
0580 
0581     /* Pass boot_params to initialize_identity_maps() */
0582     movq    (%rsp), %rdi
0583     call    initialize_identity_maps
0584     popq    %rsi
0585 
0586 /*
0587  * Do the extraction, and jump to the new kernel..
0588  */
0589     pushq   %rsi            /* Save the real mode argument */
0590     movq    %rsi, %rdi      /* real mode address */
0591     leaq    boot_heap(%rip), %rsi   /* malloc area for uncompression */
0592     leaq    input_data(%rip), %rdx  /* input_data */
0593     movl    input_len(%rip), %ecx   /* input_len */
0594     movq    %rbp, %r8       /* output target address */
0595     movl    output_len(%rip), %r9d  /* decompressed length, end of relocs */
0596     call    extract_kernel      /* returns kernel location in %rax */
0597     popq    %rsi
0598 
0599 /*
0600  * Jump to the decompressed kernel.
0601  */
0602     jmp *%rax
0603 SYM_FUNC_END(.Lrelocated)
0604 
0605     .code32
0606 /*
0607  * This is the 32-bit trampoline that will be copied over to low memory.
0608  *
0609  * RDI contains the return address (might be above 4G).
0610  * ECX contains the base address of the trampoline memory.
0611  * Non zero RDX means trampoline needs to enable 5-level paging.
0612  */
0613 SYM_CODE_START(trampoline_32bit_src)
0614     /* Set up data and stack segments */
0615     movl    $__KERNEL_DS, %eax
0616     movl    %eax, %ds
0617     movl    %eax, %ss
0618 
0619     /* Set up new stack */
0620     leal    TRAMPOLINE_32BIT_STACK_END(%ecx), %esp
0621 
0622     /* Disable paging */
0623     movl    %cr0, %eax
0624     btrl    $X86_CR0_PG_BIT, %eax
0625     movl    %eax, %cr0
0626 
0627     /* Check what paging mode we want to be in after the trampoline */
0628     testl   %edx, %edx
0629     jz  1f
0630 
0631     /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
0632     movl    %cr4, %eax
0633     testl   $X86_CR4_LA57, %eax
0634     jnz 3f
0635     jmp 2f
0636 1:
0637     /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */
0638     movl    %cr4, %eax
0639     testl   $X86_CR4_LA57, %eax
0640     jz  3f
0641 2:
0642     /* Point CR3 to the trampoline's new top level page table */
0643     leal    TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
0644     movl    %eax, %cr3
0645 3:
0646     /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
0647     pushl   %ecx
0648     pushl   %edx
0649     movl    $MSR_EFER, %ecx
0650     rdmsr
0651     btsl    $_EFER_LME, %eax
0652     /* Avoid writing EFER if no change was made (for TDX guest) */
0653     jc  1f
0654     wrmsr
0655 1:  popl    %edx
0656     popl    %ecx
0657 
0658 #ifdef CONFIG_X86_MCE
0659     /*
0660      * Preserve CR4.MCE if the kernel will enable #MC support.
0661      * Clearing MCE may fault in some environments (that also force #MC
0662      * support). Any machine check that occurs before #MC support is fully
0663      * configured will crash the system regardless of the CR4.MCE value set
0664      * here.
0665      */
0666     movl    %cr4, %eax
0667     andl    $X86_CR4_MCE, %eax
0668 #else
0669     movl    $0, %eax
0670 #endif
0671 
0672     /* Enable PAE and LA57 (if required) paging modes */
0673     orl $X86_CR4_PAE, %eax
0674     testl   %edx, %edx
0675     jz  1f
0676     orl $X86_CR4_LA57, %eax
0677 1:
0678     movl    %eax, %cr4
0679 
0680     /* Calculate address of paging_enabled() once we are executing in the trampoline */
0681     leal    .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
0682 
0683     /* Prepare the stack for far return to Long Mode */
0684     pushl   $__KERNEL_CS
0685     pushl   %eax
0686 
0687     /* Enable paging again. */
0688     movl    %cr0, %eax
0689     btsl    $X86_CR0_PG_BIT, %eax
0690     movl    %eax, %cr0
0691 
0692     lret
0693 SYM_CODE_END(trampoline_32bit_src)
0694 
0695     .code64
0696 SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled)
0697     /* Return from the trampoline */
0698     jmp *%rdi
0699 SYM_FUNC_END(.Lpaging_enabled)
0700 
0701     /*
0702          * The trampoline code has a size limit.
0703          * Make sure we fail to compile if the trampoline code grows
0704          * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
0705      */
0706     .org    trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
0707 
0708     .code32
0709 SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
0710     /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
0711 1:
0712     hlt
0713     jmp     1b
0714 SYM_FUNC_END(.Lno_longmode)
0715 
0716 #include "../../kernel/verify_cpu.S"
0717 
0718     .data
0719 SYM_DATA_START_LOCAL(gdt64)
0720     .word   gdt_end - gdt - 1
0721     .quad   gdt - gdt64
0722 SYM_DATA_END(gdt64)
0723     .balign 8
0724 SYM_DATA_START_LOCAL(gdt)
0725     .word   gdt_end - gdt - 1
0726     .long   0
0727     .word   0
0728     .quad   0x00cf9a000000ffff  /* __KERNEL32_CS */
0729     .quad   0x00af9a000000ffff  /* __KERNEL_CS */
0730     .quad   0x00cf92000000ffff  /* __KERNEL_DS */
0731     .quad   0x0080890000000000  /* TS descriptor */
0732     .quad   0x0000000000000000  /* TS continued */
0733 SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
0734 
0735 SYM_DATA_START(boot_idt_desc)
0736     .word   boot_idt_end - boot_idt - 1
0737     .quad   0
0738 SYM_DATA_END(boot_idt_desc)
0739     .balign 8
0740 SYM_DATA_START(boot_idt)
0741     .rept   BOOT_IDT_ENTRIES
0742     .quad   0
0743     .quad   0
0744     .endr
0745 SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end)
0746 
0747 #ifdef CONFIG_AMD_MEM_ENCRYPT
0748 SYM_DATA_START(boot32_idt_desc)
0749     .word   boot32_idt_end - boot32_idt - 1
0750     .long   0
0751 SYM_DATA_END(boot32_idt_desc)
0752     .balign 8
0753 SYM_DATA_START(boot32_idt)
0754     .rept 32
0755     .quad 0
0756     .endr
0757 SYM_DATA_END_LABEL(boot32_idt, SYM_L_GLOBAL, boot32_idt_end)
0758 #endif
0759 
0760 #ifdef CONFIG_EFI_STUB
0761 SYM_DATA(image_offset, .long 0)
0762 #endif
0763 #ifdef CONFIG_EFI_MIXED
0764 SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0)
0765 SYM_DATA(efi_is64, .byte 1)
0766 
0767 #define ST32_boottime       60 // offsetof(efi_system_table_32_t, boottime)
0768 #define BS32_handle_protocol    88 // offsetof(efi_boot_services_32_t, handle_protocol)
0769 #define LI32_image_base     32 // offsetof(efi_loaded_image_32_t, image_base)
0770 
0771     __HEAD
0772     .code32
0773 SYM_FUNC_START(efi32_pe_entry)
0774 /*
0775  * efi_status_t efi32_pe_entry(efi_handle_t image_handle,
0776  *                 efi_system_table_32_t *sys_table)
0777  */
0778 
0779     pushl   %ebp
0780     movl    %esp, %ebp
0781     pushl   %eax                // dummy push to allocate loaded_image
0782 
0783     pushl   %ebx                // save callee-save registers
0784     pushl   %edi
0785 
0786     call    verify_cpu          // check for long mode support
0787     testl   %eax, %eax
0788     movl    $0x80000003, %eax       // EFI_UNSUPPORTED
0789     jnz 2f
0790 
0791     call    1f
0792 1:  pop %ebx
0793     subl    $ rva(1b), %ebx
0794 
0795     /* Get the loaded image protocol pointer from the image handle */
0796     leal    -4(%ebp), %eax
0797     pushl   %eax                // &loaded_image
0798     leal    rva(loaded_image_proto)(%ebx), %eax
0799     pushl   %eax                // pass the GUID address
0800     pushl   8(%ebp)             // pass the image handle
0801 
0802     /*
0803      * Note the alignment of the stack frame.
0804      *   sys_table
0805      *   handle             <-- 16-byte aligned on entry by ABI
0806      *   return address
0807      *   frame pointer
0808      *   loaded_image       <-- local variable
0809      *   saved %ebx     <-- 16-byte aligned here
0810      *   saved %edi
0811      *   &loaded_image
0812      *   &loaded_image_proto
0813      *   handle             <-- 16-byte aligned for call to handle_protocol
0814      */
0815 
0816     movl    12(%ebp), %eax          // sys_table
0817     movl    ST32_boottime(%eax), %eax   // sys_table->boottime
0818     call    *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol
0819     addl    $12, %esp           // restore argument space
0820     testl   %eax, %eax
0821     jnz 2f
0822 
0823     movl    8(%ebp), %ecx           // image_handle
0824     movl    12(%ebp), %edx          // sys_table
0825     movl    -4(%ebp), %esi          // loaded_image
0826     movl    LI32_image_base(%esi), %esi // loaded_image->image_base
0827     movl    %ebx, %ebp          // startup_32 for efi32_pe_stub_entry
0828     /*
0829      * We need to set the image_offset variable here since startup_32() will
0830      * use it before we get to the 64-bit efi_pe_entry() in C code.
0831      */
0832     subl    %esi, %ebx
0833     movl    %ebx, rva(image_offset)(%ebp)   // save image_offset
0834     jmp efi32_pe_stub_entry
0835 
0836 2:  popl    %edi                // restore callee-save registers
0837     popl    %ebx
0838     leave
0839     RET
0840 SYM_FUNC_END(efi32_pe_entry)
0841 
0842     .section ".rodata"
0843     /* EFI loaded image protocol GUID */
0844     .balign 4
0845 SYM_DATA_START_LOCAL(loaded_image_proto)
0846     .long   0x5b1b31a1
0847     .word   0x9562, 0x11d2
0848     .byte   0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b
0849 SYM_DATA_END(loaded_image_proto)
0850 #endif
0851 
0852 #ifdef CONFIG_AMD_MEM_ENCRYPT
0853     __HEAD
0854     .code32
0855 /*
0856  * Write an IDT entry into boot32_idt
0857  *
0858  * Parameters:
0859  *
0860  * %eax:    Handler address
0861  * %edx:    Vector number
0862  *
0863  * Physical offset is expected in %ebp
0864  */
0865 SYM_FUNC_START(startup32_set_idt_entry)
0866     push    %ebx
0867     push    %ecx
0868 
0869     /* IDT entry address to %ebx */
0870     leal    rva(boot32_idt)(%ebp), %ebx
0871     shl $3, %edx
0872     addl    %edx, %ebx
0873 
0874     /* Build IDT entry, lower 4 bytes */
0875     movl    %eax, %edx
0876     andl    $0x0000ffff, %edx   # Target code segment offset [15:0]
0877     movl    $__KERNEL32_CS, %ecx    # Target code segment selector
0878     shl     $16, %ecx
0879     orl     %ecx, %edx
0880 
0881     /* Store lower 4 bytes to IDT */
0882     movl    %edx, (%ebx)
0883 
0884     /* Build IDT entry, upper 4 bytes */
0885     movl    %eax, %edx
0886     andl    $0xffff0000, %edx   # Target code segment offset [31:16]
0887     orl     $0x00008e00, %edx   # Present, Type 32-bit Interrupt Gate
0888 
0889     /* Store upper 4 bytes to IDT */
0890     movl    %edx, 4(%ebx)
0891 
0892     pop     %ecx
0893     pop     %ebx
0894     RET
0895 SYM_FUNC_END(startup32_set_idt_entry)
0896 #endif
0897 
0898 SYM_FUNC_START(startup32_load_idt)
0899 #ifdef CONFIG_AMD_MEM_ENCRYPT
0900     /* #VC handler */
0901     leal    rva(startup32_vc_handler)(%ebp), %eax
0902     movl    $X86_TRAP_VC, %edx
0903     call    startup32_set_idt_entry
0904 
0905     /* Load IDT */
0906     leal    rva(boot32_idt)(%ebp), %eax
0907     movl    %eax, rva(boot32_idt_desc+2)(%ebp)
0908     lidt    rva(boot32_idt_desc)(%ebp)
0909 #endif
0910     RET
0911 SYM_FUNC_END(startup32_load_idt)
0912 
0913 /*
0914  * Check for the correct C-bit position when the startup_32 boot-path is used.
0915  *
0916  * The check makes use of the fact that all memory is encrypted when paging is
0917  * disabled. The function creates 64 bits of random data using the RDRAND
0918  * instruction. RDRAND is mandatory for SEV guests, so always available. If the
0919  * hypervisor violates that the kernel will crash right here.
0920  *
0921  * The 64 bits of random data are stored to a memory location and at the same
0922  * time kept in the %eax and %ebx registers. Since encryption is always active
0923  * when paging is off the random data will be stored encrypted in main memory.
0924  *
0925  * Then paging is enabled. When the C-bit position is correct all memory is
0926  * still mapped encrypted and comparing the register values with memory will
0927  * succeed. An incorrect C-bit position will map all memory unencrypted, so that
0928  * the compare will use the encrypted random data and fail.
0929  */
0930 SYM_FUNC_START(startup32_check_sev_cbit)
0931 #ifdef CONFIG_AMD_MEM_ENCRYPT
0932     pushl   %eax
0933     pushl   %ebx
0934     pushl   %ecx
0935     pushl   %edx
0936 
0937     /* Check for non-zero sev_status */
0938     movl    rva(sev_status)(%ebp), %eax
0939     testl   %eax, %eax
0940     jz  4f
0941 
0942     /*
0943      * Get two 32-bit random values - Don't bail out if RDRAND fails
0944      * because it is better to prevent forward progress if no random value
0945      * can be gathered.
0946      */
0947 1:  rdrand  %eax
0948     jnc 1b
0949 2:  rdrand  %ebx
0950     jnc 2b
0951 
0952     /* Store to memory and keep it in the registers */
0953     movl    %eax, rva(sev_check_data)(%ebp)
0954     movl    %ebx, rva(sev_check_data+4)(%ebp)
0955 
0956     /* Enable paging to see if encryption is active */
0957     movl    %cr0, %edx           /* Backup %cr0 in %edx */
0958     movl    $(X86_CR0_PG | X86_CR0_PE), %ecx /* Enable Paging and Protected mode */
0959     movl    %ecx, %cr0
0960 
0961     cmpl    %eax, rva(sev_check_data)(%ebp)
0962     jne 3f
0963     cmpl    %ebx, rva(sev_check_data+4)(%ebp)
0964     jne 3f
0965 
0966     movl    %edx, %cr0  /* Restore previous %cr0 */
0967 
0968     jmp 4f
0969 
0970 3:  /* Check failed - hlt the machine */
0971     hlt
0972     jmp 3b
0973 
0974 4:
0975     popl    %edx
0976     popl    %ecx
0977     popl    %ebx
0978     popl    %eax
0979 #endif
0980     RET
0981 SYM_FUNC_END(startup32_check_sev_cbit)
0982 
0983 /*
0984  * Stack and heap for uncompression
0985  */
0986     .bss
0987     .balign 4
0988 SYM_DATA_LOCAL(boot_heap,   .fill BOOT_HEAP_SIZE, 1, 0)
0989 
0990 SYM_DATA_START_LOCAL(boot_stack)
0991     .fill BOOT_STACK_SIZE, 1, 0
0992     .balign 16
0993 SYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end)
0994 
0995 /*
0996  * Space for page tables (not in .bss so not zeroed)
0997  */
0998     .section ".pgtable","aw",@nobits
0999     .balign 4096
1000 SYM_DATA_LOCAL(pgtable,     .fill BOOT_PGT_SIZE, 1, 0)
1001 
1002 /*
1003  * The page table is going to be used instead of page table in the trampoline
1004  * memory.
1005  */
1006 SYM_DATA_LOCAL(top_pgtable, .fill PAGE_SIZE, 1, 0)