Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * This code is used on x86_64 to create page table identity mappings on
0004  * demand by building up a new set of page tables (or appending to the
0005  * existing ones), and then switching over to them when ready.
0006  *
0007  * Copyright (C) 2015-2016  Yinghai Lu
0008  * Copyright (C)      2016  Kees Cook
0009  */
0010 
0011 /*
0012  * Since we're dealing with identity mappings, physical and virtual
0013  * addresses are the same, so override these defines which are ultimately
0014  * used by the headers in misc.h.
0015  */
0016 #define __pa(x)  ((unsigned long)(x))
0017 #define __va(x)  ((void *)((unsigned long)(x)))
0018 
0019 /* No PAGE_TABLE_ISOLATION support needed either: */
0020 #undef CONFIG_PAGE_TABLE_ISOLATION
0021 
0022 #include "error.h"
0023 #include "misc.h"
0024 
0025 /* These actually do the work of building the kernel identity maps. */
0026 #include <linux/pgtable.h>
0027 #include <asm/cmpxchg.h>
0028 #include <asm/trap_pf.h>
0029 #include <asm/trapnr.h>
0030 #include <asm/init.h>
0031 /* Use the static base for this part of the boot process */
0032 #undef __PAGE_OFFSET
0033 #define __PAGE_OFFSET __PAGE_OFFSET_BASE
0034 #include "../../mm/ident_map.c"
0035 
0036 #define _SETUP
0037 #include <asm/setup.h>  /* For COMMAND_LINE_SIZE */
0038 #undef _SETUP
0039 
0040 extern unsigned long get_cmd_line_ptr(void);
0041 
0042 /* Used by PAGE_KERN* macros: */
0043 pteval_t __default_kernel_pte_mask __read_mostly = ~0;
0044 
0045 /* Used to track our page table allocation area. */
0046 struct alloc_pgt_data {
0047     unsigned char *pgt_buf;
0048     unsigned long pgt_buf_size;
0049     unsigned long pgt_buf_offset;
0050 };
0051 
0052 /*
0053  * Allocates space for a page table entry, using struct alloc_pgt_data
0054  * above. Besides the local callers, this is used as the allocation
0055  * callback in mapping_info below.
0056  */
0057 static void *alloc_pgt_page(void *context)
0058 {
0059     struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context;
0060     unsigned char *entry;
0061 
0062     /* Validate there is space available for a new page. */
0063     if (pages->pgt_buf_offset >= pages->pgt_buf_size) {
0064         debug_putstr("out of pgt_buf in " __FILE__ "!?\n");
0065         debug_putaddr(pages->pgt_buf_offset);
0066         debug_putaddr(pages->pgt_buf_size);
0067         return NULL;
0068     }
0069 
0070     entry = pages->pgt_buf + pages->pgt_buf_offset;
0071     pages->pgt_buf_offset += PAGE_SIZE;
0072 
0073     return entry;
0074 }
0075 
0076 /* Used to track our allocated page tables. */
0077 static struct alloc_pgt_data pgt_data;
0078 
0079 /* The top level page table entry pointer. */
0080 static unsigned long top_level_pgt;
0081 
0082 phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
0083 
0084 /*
0085  * Mapping information structure passed to kernel_ident_mapping_init().
0086  * Due to relocation, pointers must be assigned at run time not build time.
0087  */
0088 static struct x86_mapping_info mapping_info;
0089 
0090 /*
0091  * Adds the specified range to the identity mappings.
0092  */
0093 void kernel_add_identity_map(unsigned long start, unsigned long end)
0094 {
0095     int ret;
0096 
0097     /* Align boundary to 2M. */
0098     start = round_down(start, PMD_SIZE);
0099     end = round_up(end, PMD_SIZE);
0100     if (start >= end)
0101         return;
0102 
0103     /* Build the mapping. */
0104     ret = kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt, start, end);
0105     if (ret)
0106         error("Error: kernel_ident_mapping_init() failed\n");
0107 }
0108 
0109 /* Locates and clears a region for a new top level page table. */
0110 void initialize_identity_maps(void *rmode)
0111 {
0112     unsigned long cmdline;
0113     struct setup_data *sd;
0114 
0115     /* Exclude the encryption mask from __PHYSICAL_MASK */
0116     physical_mask &= ~sme_me_mask;
0117 
0118     /* Init mapping_info with run-time function/buffer pointers. */
0119     mapping_info.alloc_pgt_page = alloc_pgt_page;
0120     mapping_info.context = &pgt_data;
0121     mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask;
0122     mapping_info.kernpg_flag = _KERNPG_TABLE;
0123 
0124     /*
0125      * It should be impossible for this not to already be true,
0126      * but since calling this a second time would rewind the other
0127      * counters, let's just make sure this is reset too.
0128      */
0129     pgt_data.pgt_buf_offset = 0;
0130 
0131     /*
0132      * If we came here via startup_32(), cr3 will be _pgtable already
0133      * and we must append to the existing area instead of entirely
0134      * overwriting it.
0135      *
0136      * With 5-level paging, we use '_pgtable' to allocate the p4d page table,
0137      * the top-level page table is allocated separately.
0138      *
0139      * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level
0140      * cases. On 4-level paging it's equal to 'top_level_pgt'.
0141      */
0142     top_level_pgt = read_cr3_pa();
0143     if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) {
0144         pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
0145         pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
0146         memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
0147     } else {
0148         pgt_data.pgt_buf = _pgtable;
0149         pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
0150         memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
0151         top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data);
0152     }
0153 
0154     /*
0155      * New page-table is set up - map the kernel image, boot_params and the
0156      * command line. The uncompressed kernel requires boot_params and the
0157      * command line to be mapped in the identity mapping. Map them
0158      * explicitly here in case the compressed kernel does not touch them,
0159      * or does not touch all the pages covering them.
0160      */
0161     kernel_add_identity_map((unsigned long)_head, (unsigned long)_end);
0162     boot_params = rmode;
0163     kernel_add_identity_map((unsigned long)boot_params, (unsigned long)(boot_params + 1));
0164     cmdline = get_cmd_line_ptr();
0165     kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE);
0166 
0167     /*
0168      * Also map the setup_data entries passed via boot_params in case they
0169      * need to be accessed by uncompressed kernel via the identity mapping.
0170      */
0171     sd = (struct setup_data *)boot_params->hdr.setup_data;
0172     while (sd) {
0173         unsigned long sd_addr = (unsigned long)sd;
0174 
0175         kernel_add_identity_map(sd_addr, sd_addr + sizeof(*sd) + sd->len);
0176         sd = (struct setup_data *)sd->next;
0177     }
0178 
0179     sev_prep_identity_maps(top_level_pgt);
0180 
0181     /* Load the new page-table. */
0182     write_cr3(top_level_pgt);
0183 }
0184 
0185 static pte_t *split_large_pmd(struct x86_mapping_info *info,
0186                   pmd_t *pmdp, unsigned long __address)
0187 {
0188     unsigned long page_flags;
0189     unsigned long address;
0190     pte_t *pte;
0191     pmd_t pmd;
0192     int i;
0193 
0194     pte = (pte_t *)info->alloc_pgt_page(info->context);
0195     if (!pte)
0196         return NULL;
0197 
0198     address     = __address & PMD_MASK;
0199     /* No large page - clear PSE flag */
0200     page_flags  = info->page_flag & ~_PAGE_PSE;
0201 
0202     /* Populate the PTEs */
0203     for (i = 0; i < PTRS_PER_PMD; i++) {
0204         set_pte(&pte[i], __pte(address | page_flags));
0205         address += PAGE_SIZE;
0206     }
0207 
0208     /*
0209      * Ideally we need to clear the large PMD first and do a TLB
0210      * flush before we write the new PMD. But the 2M range of the
0211      * PMD might contain the code we execute and/or the stack
0212      * we are on, so we can't do that. But that should be safe here
0213      * because we are going from large to small mappings and we are
0214      * also the only user of the page-table, so there is no chance
0215      * of a TLB multihit.
0216      */
0217     pmd = __pmd((unsigned long)pte | info->kernpg_flag);
0218     set_pmd(pmdp, pmd);
0219     /* Flush TLB to establish the new PMD */
0220     write_cr3(top_level_pgt);
0221 
0222     return pte + pte_index(__address);
0223 }
0224 
0225 static void clflush_page(unsigned long address)
0226 {
0227     unsigned int flush_size;
0228     char *cl, *start, *end;
0229 
0230     /*
0231      * Hardcode cl-size to 64 - CPUID can't be used here because that might
0232      * cause another #VC exception and the GHCB is not ready to use yet.
0233      */
0234     flush_size = 64;
0235     start      = (char *)(address & PAGE_MASK);
0236     end        = start + PAGE_SIZE;
0237 
0238     /*
0239      * First make sure there are no pending writes on the cache-lines to
0240      * flush.
0241      */
0242     asm volatile("mfence" : : : "memory");
0243 
0244     for (cl = start; cl != end; cl += flush_size)
0245         clflush(cl);
0246 }
0247 
0248 static int set_clr_page_flags(struct x86_mapping_info *info,
0249                   unsigned long address,
0250                   pteval_t set, pteval_t clr)
0251 {
0252     pgd_t *pgdp = (pgd_t *)top_level_pgt;
0253     p4d_t *p4dp;
0254     pud_t *pudp;
0255     pmd_t *pmdp;
0256     pte_t *ptep, pte;
0257 
0258     /*
0259      * First make sure there is a PMD mapping for 'address'.
0260      * It should already exist, but keep things generic.
0261      *
0262      * To map the page just read from it and fault it in if there is no
0263      * mapping yet. kernel_add_identity_map() can't be called here because
0264      * that would unconditionally map the address on PMD level, destroying
0265      * any PTE-level mappings that might already exist. Use assembly here
0266      * so the access won't be optimized away.
0267      */
0268     asm volatile("mov %[address], %%r9"
0269              :: [address] "g" (*(unsigned long *)address)
0270              : "r9", "memory");
0271 
0272     /*
0273      * The page is mapped at least with PMD size - so skip checks and walk
0274      * directly to the PMD.
0275      */
0276     p4dp = p4d_offset(pgdp, address);
0277     pudp = pud_offset(p4dp, address);
0278     pmdp = pmd_offset(pudp, address);
0279 
0280     if (pmd_large(*pmdp))
0281         ptep = split_large_pmd(info, pmdp, address);
0282     else
0283         ptep = pte_offset_kernel(pmdp, address);
0284 
0285     if (!ptep)
0286         return -ENOMEM;
0287 
0288     /*
0289      * Changing encryption attributes of a page requires to flush it from
0290      * the caches.
0291      */
0292     if ((set | clr) & _PAGE_ENC) {
0293         clflush_page(address);
0294 
0295         /*
0296          * If the encryption attribute is being cleared, change the page state
0297          * to shared in the RMP table.
0298          */
0299         if (clr)
0300             snp_set_page_shared(__pa(address & PAGE_MASK));
0301     }
0302 
0303     /* Update PTE */
0304     pte = *ptep;
0305     pte = pte_set_flags(pte, set);
0306     pte = pte_clear_flags(pte, clr);
0307     set_pte(ptep, pte);
0308 
0309     /*
0310      * If the encryption attribute is being set, then change the page state to
0311      * private in the RMP entry. The page state change must be done after the PTE
0312      * is updated.
0313      */
0314     if (set & _PAGE_ENC)
0315         snp_set_page_private(__pa(address & PAGE_MASK));
0316 
0317     /* Flush TLB after changing encryption attribute */
0318     write_cr3(top_level_pgt);
0319 
0320     return 0;
0321 }
0322 
0323 int set_page_decrypted(unsigned long address)
0324 {
0325     return set_clr_page_flags(&mapping_info, address, 0, _PAGE_ENC);
0326 }
0327 
0328 int set_page_encrypted(unsigned long address)
0329 {
0330     return set_clr_page_flags(&mapping_info, address, _PAGE_ENC, 0);
0331 }
0332 
0333 int set_page_non_present(unsigned long address)
0334 {
0335     return set_clr_page_flags(&mapping_info, address, 0, _PAGE_PRESENT);
0336 }
0337 
0338 static void do_pf_error(const char *msg, unsigned long error_code,
0339             unsigned long address, unsigned long ip)
0340 {
0341     error_putstr(msg);
0342 
0343     error_putstr("\nError Code: ");
0344     error_puthex(error_code);
0345     error_putstr("\nCR2: 0x");
0346     error_puthex(address);
0347     error_putstr("\nRIP relative to _head: 0x");
0348     error_puthex(ip - (unsigned long)_head);
0349     error_putstr("\n");
0350 
0351     error("Stopping.\n");
0352 }
0353 
0354 void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code)
0355 {
0356     unsigned long address = native_read_cr2();
0357     unsigned long end;
0358     bool ghcb_fault;
0359 
0360     ghcb_fault = sev_es_check_ghcb_fault(address);
0361 
0362     address   &= PMD_MASK;
0363     end        = address + PMD_SIZE;
0364 
0365     /*
0366      * Check for unexpected error codes. Unexpected are:
0367      *  - Faults on present pages
0368      *  - User faults
0369      *  - Reserved bits set
0370      */
0371     if (error_code & (X86_PF_PROT | X86_PF_USER | X86_PF_RSVD))
0372         do_pf_error("Unexpected page-fault:", error_code, address, regs->ip);
0373     else if (ghcb_fault)
0374         do_pf_error("Page-fault on GHCB page:", error_code, address, regs->ip);
0375 
0376     /*
0377      * Error code is sane - now identity map the 2M region around
0378      * the faulting address.
0379      */
0380     kernel_add_identity_map(address, end);
0381 }