Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 */
0002 #include <linux/jump_label.h>
0003 #include <asm/unwind_hints.h>
0004 #include <asm/cpufeatures.h>
0005 #include <asm/page_types.h>
0006 #include <asm/percpu.h>
0007 #include <asm/asm-offsets.h>
0008 #include <asm/processor-flags.h>
0009 #include <asm/ptrace-abi.h>
0010 #include <asm/msr.h>
0011 #include <asm/nospec-branch.h>
0012 
0013 /*
0014 
0015  x86 function call convention, 64-bit:
0016  -------------------------------------
0017   arguments           |  callee-saved      | extra caller-saved | return
0018  [callee-clobbered]   |                    | [callee-clobbered] |
0019  ---------------------------------------------------------------------------
0020  rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11             | rax, rdx [**]
0021 
0022  ( rsp is obviously invariant across normal function calls. (gcc can 'merge'
0023    functions when it sees tail-call optimization possibilities) rflags is
0024    clobbered. Leftover arguments are passed over the stack frame.)
0025 
0026  [*]  In the frame-pointers case rbp is fixed to the stack frame.
0027 
0028  [**] for struct return values wider than 64 bits the return convention is a
0029       bit more complex: up to 128 bits width we return small structures
0030       straight in rax, rdx. For structures larger than that (3 words or
0031       larger) the caller puts a pointer to an on-stack return struct
0032       [allocated in the caller's stack frame] into the first argument - i.e.
0033       into rdi. All other arguments shift up by one in this case.
0034       Fortunately this case is rare in the kernel.
0035 
0036 For 32-bit we have the following conventions - kernel is built with
0037 -mregparm=3 and -freg-struct-return:
0038 
0039  x86 function calling convention, 32-bit:
0040  ----------------------------------------
0041   arguments         | callee-saved        | extra caller-saved | return
0042  [callee-clobbered] |                     | [callee-clobbered] |
0043  -------------------------------------------------------------------------
0044  eax edx ecx        | ebx edi esi ebp [*] | <none>             | eax, edx [**]
0045 
0046  ( here too esp is obviously invariant across normal function calls. eflags
0047    is clobbered. Leftover arguments are passed over the stack frame. )
0048 
0049  [*]  In the frame-pointers case ebp is fixed to the stack frame.
0050 
0051  [**] We build with -freg-struct-return, which on 32-bit means similar
0052       semantics as on 64-bit: edx can be used for a second return value
0053       (i.e. covering integer and structure sizes up to 64 bits) - after that
0054       it gets more complex and more expensive: 3-word or larger struct returns
0055       get done in the caller's frame and the pointer to the return struct goes
0056       into regparm0, i.e. eax - the other arguments shift up and the
0057       function's register parameters degenerate to regparm=2 in essence.
0058 
0059 */
0060 
0061 #ifdef CONFIG_X86_64
0062 
0063 /*
0064  * 64-bit system call stack frame layout defines and helpers,
0065  * for assembly code:
0066  */
0067 
0068 .macro PUSH_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0
0069     .if \save_ret
0070     pushq   %rsi        /* pt_regs->si */
0071     movq    8(%rsp), %rsi   /* temporarily store the return address in %rsi */
0072     movq    %rdi, 8(%rsp)   /* pt_regs->di (overwriting original return address) */
0073     .else
0074     pushq   %rdi        /* pt_regs->di */
0075     pushq   %rsi        /* pt_regs->si */
0076     .endif
0077     pushq   \rdx        /* pt_regs->dx */
0078     pushq   \rcx        /* pt_regs->cx */
0079     pushq   \rax        /* pt_regs->ax */
0080     pushq   %r8     /* pt_regs->r8 */
0081     pushq   %r9     /* pt_regs->r9 */
0082     pushq   %r10        /* pt_regs->r10 */
0083     pushq   %r11        /* pt_regs->r11 */
0084     pushq   %rbx        /* pt_regs->rbx */
0085     pushq   %rbp        /* pt_regs->rbp */
0086     pushq   %r12        /* pt_regs->r12 */
0087     pushq   %r13        /* pt_regs->r13 */
0088     pushq   %r14        /* pt_regs->r14 */
0089     pushq   %r15        /* pt_regs->r15 */
0090     UNWIND_HINT_REGS
0091 
0092     .if \save_ret
0093     pushq   %rsi        /* return address on top of stack */
0094     .endif
0095 .endm
0096 
0097 .macro CLEAR_REGS
0098     /*
0099      * Sanitize registers of values that a speculation attack might
0100      * otherwise want to exploit. The lower registers are likely clobbered
0101      * well before they could be put to use in a speculative execution
0102      * gadget.
0103      */
0104     xorl    %esi,  %esi /* nospec si  */
0105     xorl    %edx,  %edx /* nospec dx  */
0106     xorl    %ecx,  %ecx /* nospec cx  */
0107     xorl    %r8d,  %r8d /* nospec r8  */
0108     xorl    %r9d,  %r9d /* nospec r9  */
0109     xorl    %r10d, %r10d    /* nospec r10 */
0110     xorl    %r11d, %r11d    /* nospec r11 */
0111     xorl    %ebx,  %ebx /* nospec rbx */
0112     xorl    %ebp,  %ebp /* nospec rbp */
0113     xorl    %r12d, %r12d    /* nospec r12 */
0114     xorl    %r13d, %r13d    /* nospec r13 */
0115     xorl    %r14d, %r14d    /* nospec r14 */
0116     xorl    %r15d, %r15d    /* nospec r15 */
0117 
0118 .endm
0119 
0120 .macro PUSH_AND_CLEAR_REGS rdx=%rdx rcx=%rcx rax=%rax save_ret=0
0121     PUSH_REGS rdx=\rdx, rcx=\rcx, rax=\rax, save_ret=\save_ret
0122     CLEAR_REGS
0123 .endm
0124 
0125 .macro POP_REGS pop_rdi=1
0126     popq %r15
0127     popq %r14
0128     popq %r13
0129     popq %r12
0130     popq %rbp
0131     popq %rbx
0132     popq %r11
0133     popq %r10
0134     popq %r9
0135     popq %r8
0136     popq %rax
0137     popq %rcx
0138     popq %rdx
0139     popq %rsi
0140     .if \pop_rdi
0141     popq %rdi
0142     .endif
0143 .endm
0144 
0145 #ifdef CONFIG_PAGE_TABLE_ISOLATION
0146 
0147 /*
0148  * PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two
0149  * halves:
0150  */
0151 #define PTI_USER_PGTABLE_BIT        PAGE_SHIFT
0152 #define PTI_USER_PGTABLE_MASK       (1 << PTI_USER_PGTABLE_BIT)
0153 #define PTI_USER_PCID_BIT       X86_CR3_PTI_PCID_USER_BIT
0154 #define PTI_USER_PCID_MASK      (1 << PTI_USER_PCID_BIT)
0155 #define PTI_USER_PGTABLE_AND_PCID_MASK  (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)
0156 
0157 .macro SET_NOFLUSH_BIT  reg:req
0158     bts $X86_CR3_PCID_NOFLUSH_BIT, \reg
0159 .endm
0160 
0161 .macro ADJUST_KERNEL_CR3 reg:req
0162     ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
0163     /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
0164     andq    $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
0165 .endm
0166 
0167 .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
0168     ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
0169     mov %cr3, \scratch_reg
0170     ADJUST_KERNEL_CR3 \scratch_reg
0171     mov \scratch_reg, %cr3
0172 .Lend_\@:
0173 .endm
0174 
0175 #define THIS_CPU_user_pcid_flush_mask   \
0176     PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
0177 
0178 .macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
0179     ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
0180     mov %cr3, \scratch_reg
0181 
0182     ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
0183 
0184     /*
0185      * Test if the ASID needs a flush.
0186      */
0187     movq    \scratch_reg, \scratch_reg2
0188     andq    $(0x7FF), \scratch_reg      /* mask ASID */
0189     bt  \scratch_reg, THIS_CPU_user_pcid_flush_mask
0190     jnc .Lnoflush_\@
0191 
0192     /* Flush needed, clear the bit */
0193     btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
0194     movq    \scratch_reg2, \scratch_reg
0195     jmp .Lwrcr3_pcid_\@
0196 
0197 .Lnoflush_\@:
0198     movq    \scratch_reg2, \scratch_reg
0199     SET_NOFLUSH_BIT \scratch_reg
0200 
0201 .Lwrcr3_pcid_\@:
0202     /* Flip the ASID to the user version */
0203     orq $(PTI_USER_PCID_MASK), \scratch_reg
0204 
0205 .Lwrcr3_\@:
0206     /* Flip the PGD to the user version */
0207     orq     $(PTI_USER_PGTABLE_MASK), \scratch_reg
0208     mov \scratch_reg, %cr3
0209 .Lend_\@:
0210 .endm
0211 
0212 .macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
0213     pushq   %rax
0214     SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
0215     popq    %rax
0216 .endm
0217 
0218 .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
0219     ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
0220     movq    %cr3, \scratch_reg
0221     movq    \scratch_reg, \save_reg
0222     /*
0223      * Test the user pagetable bit. If set, then the user page tables
0224      * are active. If clear CR3 already has the kernel page table
0225      * active.
0226      */
0227     bt  $PTI_USER_PGTABLE_BIT, \scratch_reg
0228     jnc .Ldone_\@
0229 
0230     ADJUST_KERNEL_CR3 \scratch_reg
0231     movq    \scratch_reg, %cr3
0232 
0233 .Ldone_\@:
0234 .endm
0235 
0236 .macro RESTORE_CR3 scratch_reg:req save_reg:req
0237     ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
0238 
0239     ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
0240 
0241     /*
0242      * KERNEL pages can always resume with NOFLUSH as we do
0243      * explicit flushes.
0244      */
0245     bt  $PTI_USER_PGTABLE_BIT, \save_reg
0246     jnc .Lnoflush_\@
0247 
0248     /*
0249      * Check if there's a pending flush for the user ASID we're
0250      * about to set.
0251      */
0252     movq    \save_reg, \scratch_reg
0253     andq    $(0x7FF), \scratch_reg
0254     bt  \scratch_reg, THIS_CPU_user_pcid_flush_mask
0255     jnc .Lnoflush_\@
0256 
0257     btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
0258     jmp .Lwrcr3_\@
0259 
0260 .Lnoflush_\@:
0261     SET_NOFLUSH_BIT \save_reg
0262 
0263 .Lwrcr3_\@:
0264     /*
0265      * The CR3 write could be avoided when not changing its value,
0266      * but would require a CR3 read *and* a scratch register.
0267      */
0268     movq    \save_reg, %cr3
0269 .Lend_\@:
0270 .endm
0271 
0272 #else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
0273 
0274 .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
0275 .endm
0276 .macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
0277 .endm
0278 .macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
0279 .endm
0280 .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
0281 .endm
0282 .macro RESTORE_CR3 scratch_reg:req save_reg:req
0283 .endm
0284 
0285 #endif
0286 
0287 /*
0288  * IBRS kernel mitigation for Spectre_v2.
0289  *
0290  * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers
0291  * the regs it uses (AX, CX, DX). Must be called before the first RET
0292  * instruction (NOTE! UNTRAIN_RET includes a RET instruction)
0293  *
0294  * The optional argument is used to save/restore the current value,
0295  * which is used on the paranoid paths.
0296  *
0297  * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set.
0298  */
0299 .macro IBRS_ENTER save_reg
0300 #ifdef CONFIG_CPU_IBRS_ENTRY
0301     ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
0302     movl    $MSR_IA32_SPEC_CTRL, %ecx
0303 
0304 .ifnb \save_reg
0305     rdmsr
0306     shl $32, %rdx
0307     or  %rdx, %rax
0308     mov %rax, \save_reg
0309     test    $SPEC_CTRL_IBRS, %eax
0310     jz  .Ldo_wrmsr_\@
0311     lfence
0312     jmp .Lend_\@
0313 .Ldo_wrmsr_\@:
0314 .endif
0315 
0316     movq    PER_CPU_VAR(x86_spec_ctrl_current), %rdx
0317     movl    %edx, %eax
0318     shr $32, %rdx
0319     wrmsr
0320 .Lend_\@:
0321 #endif
0322 .endm
0323 
0324 /*
0325  * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX)
0326  * regs. Must be called after the last RET.
0327  */
0328 .macro IBRS_EXIT save_reg
0329 #ifdef CONFIG_CPU_IBRS_ENTRY
0330     ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
0331     movl    $MSR_IA32_SPEC_CTRL, %ecx
0332 
0333 .ifnb \save_reg
0334     mov \save_reg, %rdx
0335 .else
0336     movq    PER_CPU_VAR(x86_spec_ctrl_current), %rdx
0337     andl    $(~SPEC_CTRL_IBRS), %edx
0338 .endif
0339 
0340     movl    %edx, %eax
0341     shr $32, %rdx
0342     wrmsr
0343 .Lend_\@:
0344 #endif
0345 .endm
0346 
0347 /*
0348  * Mitigate Spectre v1 for conditional swapgs code paths.
0349  *
0350  * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to
0351  * prevent a speculative swapgs when coming from kernel space.
0352  *
0353  * FENCE_SWAPGS_KERNEL_ENTRY is used in the kernel entry non-swapgs code path,
0354  * to prevent the swapgs from getting speculatively skipped when coming from
0355  * user space.
0356  */
0357 .macro FENCE_SWAPGS_USER_ENTRY
0358     ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_USER
0359 .endm
0360 .macro FENCE_SWAPGS_KERNEL_ENTRY
0361     ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_KERNEL
0362 .endm
0363 
0364 .macro STACKLEAK_ERASE_NOCLOBBER
0365 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
0366     PUSH_AND_CLEAR_REGS
0367     call stackleak_erase
0368     POP_REGS
0369 #endif
0370 .endm
0371 
0372 .macro SAVE_AND_SET_GSBASE scratch_reg:req save_reg:req
0373     rdgsbase \save_reg
0374     GET_PERCPU_BASE \scratch_reg
0375     wrgsbase \scratch_reg
0376 .endm
0377 
0378 #else /* CONFIG_X86_64 */
0379 # undef     UNWIND_HINT_IRET_REGS
0380 # define    UNWIND_HINT_IRET_REGS
0381 #endif /* !CONFIG_X86_64 */
0382 
0383 .macro STACKLEAK_ERASE
0384 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
0385     call stackleak_erase
0386 #endif
0387 .endm
0388 
0389 #ifdef CONFIG_SMP
0390 
0391 /*
0392  * CPU/node NR is loaded from the limit (size) field of a special segment
0393  * descriptor entry in GDT.
0394  */
0395 .macro LOAD_CPU_AND_NODE_SEG_LIMIT reg:req
0396     movq    $__CPUNODE_SEG, \reg
0397     lsl \reg, \reg
0398 .endm
0399 
0400 /*
0401  * Fetch the per-CPU GSBASE value for this processor and put it in @reg.
0402  * We normally use %gs for accessing per-CPU data, but we are setting up
0403  * %gs here and obviously can not use %gs itself to access per-CPU data.
0404  *
0405  * Do not use RDPID, because KVM loads guest's TSC_AUX on vm-entry and
0406  * may not restore the host's value until the CPU returns to userspace.
0407  * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives
0408  * while running KVM's run loop.
0409  */
0410 .macro GET_PERCPU_BASE reg:req
0411     LOAD_CPU_AND_NODE_SEG_LIMIT \reg
0412     andq    $VDSO_CPUNODE_MASK, \reg
0413     movq    __per_cpu_offset(, \reg, 8), \reg
0414 .endm
0415 
0416 #else
0417 
0418 .macro GET_PERCPU_BASE reg:req
0419     movq    pcpu_unit_offsets(%rip), \reg
0420 .endm
0421 
0422 #endif /* CONFIG_SMP */