Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 
0003 /*
0004  * Transitional page tables for kexec and hibernate
0005  *
0006  * This file derived from: arch/arm64/kernel/hibernate.c
0007  *
0008  * Copyright (c) 2021, Microsoft Corporation.
0009  * Pasha Tatashin <pasha.tatashin@soleen.com>
0010  *
0011  */
0012 
0013 /*
0014  * Transitional tables are used during system transferring from one world to
0015  * another: such as during hibernate restore, and kexec reboots. During these
0016  * phases one cannot rely on page table not being overwritten. This is because
0017  * hibernate and kexec can overwrite the current page tables during transition.
0018  */
0019 
0020 #include <asm/trans_pgd.h>
0021 #include <asm/pgalloc.h>
0022 #include <asm/pgtable.h>
0023 #include <linux/suspend.h>
0024 #include <linux/bug.h>
0025 #include <linux/mm.h>
0026 #include <linux/mmzone.h>
0027 
0028 static void *trans_alloc(struct trans_pgd_info *info)
0029 {
0030     return info->trans_alloc_page(info->trans_alloc_arg);
0031 }
0032 
0033 static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
0034 {
0035     pte_t pte = READ_ONCE(*src_ptep);
0036 
0037     if (pte_valid(pte)) {
0038         /*
0039          * Resume will overwrite areas that may be marked
0040          * read only (code, rodata). Clear the RDONLY bit from
0041          * the temporary mappings we use during restore.
0042          */
0043         set_pte(dst_ptep, pte_mkwrite(pte));
0044     } else if (debug_pagealloc_enabled() && !pte_none(pte)) {
0045         /*
0046          * debug_pagealloc will removed the PTE_VALID bit if
0047          * the page isn't in use by the resume kernel. It may have
0048          * been in use by the original kernel, in which case we need
0049          * to put it back in our copy to do the restore.
0050          *
0051          * Before marking this entry valid, check the pfn should
0052          * be mapped.
0053          */
0054         BUG_ON(!pfn_valid(pte_pfn(pte)));
0055 
0056         set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
0057     }
0058 }
0059 
0060 static int copy_pte(struct trans_pgd_info *info, pmd_t *dst_pmdp,
0061             pmd_t *src_pmdp, unsigned long start, unsigned long end)
0062 {
0063     pte_t *src_ptep;
0064     pte_t *dst_ptep;
0065     unsigned long addr = start;
0066 
0067     dst_ptep = trans_alloc(info);
0068     if (!dst_ptep)
0069         return -ENOMEM;
0070     pmd_populate_kernel(NULL, dst_pmdp, dst_ptep);
0071     dst_ptep = pte_offset_kernel(dst_pmdp, start);
0072 
0073     src_ptep = pte_offset_kernel(src_pmdp, start);
0074     do {
0075         _copy_pte(dst_ptep, src_ptep, addr);
0076     } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
0077 
0078     return 0;
0079 }
0080 
0081 static int copy_pmd(struct trans_pgd_info *info, pud_t *dst_pudp,
0082             pud_t *src_pudp, unsigned long start, unsigned long end)
0083 {
0084     pmd_t *src_pmdp;
0085     pmd_t *dst_pmdp;
0086     unsigned long next;
0087     unsigned long addr = start;
0088 
0089     if (pud_none(READ_ONCE(*dst_pudp))) {
0090         dst_pmdp = trans_alloc(info);
0091         if (!dst_pmdp)
0092             return -ENOMEM;
0093         pud_populate(NULL, dst_pudp, dst_pmdp);
0094     }
0095     dst_pmdp = pmd_offset(dst_pudp, start);
0096 
0097     src_pmdp = pmd_offset(src_pudp, start);
0098     do {
0099         pmd_t pmd = READ_ONCE(*src_pmdp);
0100 
0101         next = pmd_addr_end(addr, end);
0102         if (pmd_none(pmd))
0103             continue;
0104         if (pmd_table(pmd)) {
0105             if (copy_pte(info, dst_pmdp, src_pmdp, addr, next))
0106                 return -ENOMEM;
0107         } else {
0108             set_pmd(dst_pmdp,
0109                 __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
0110         }
0111     } while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
0112 
0113     return 0;
0114 }
0115 
0116 static int copy_pud(struct trans_pgd_info *info, p4d_t *dst_p4dp,
0117             p4d_t *src_p4dp, unsigned long start,
0118             unsigned long end)
0119 {
0120     pud_t *dst_pudp;
0121     pud_t *src_pudp;
0122     unsigned long next;
0123     unsigned long addr = start;
0124 
0125     if (p4d_none(READ_ONCE(*dst_p4dp))) {
0126         dst_pudp = trans_alloc(info);
0127         if (!dst_pudp)
0128             return -ENOMEM;
0129         p4d_populate(NULL, dst_p4dp, dst_pudp);
0130     }
0131     dst_pudp = pud_offset(dst_p4dp, start);
0132 
0133     src_pudp = pud_offset(src_p4dp, start);
0134     do {
0135         pud_t pud = READ_ONCE(*src_pudp);
0136 
0137         next = pud_addr_end(addr, end);
0138         if (pud_none(pud))
0139             continue;
0140         if (pud_table(pud)) {
0141             if (copy_pmd(info, dst_pudp, src_pudp, addr, next))
0142                 return -ENOMEM;
0143         } else {
0144             set_pud(dst_pudp,
0145                 __pud(pud_val(pud) & ~PUD_SECT_RDONLY));
0146         }
0147     } while (dst_pudp++, src_pudp++, addr = next, addr != end);
0148 
0149     return 0;
0150 }
0151 
0152 static int copy_p4d(struct trans_pgd_info *info, pgd_t *dst_pgdp,
0153             pgd_t *src_pgdp, unsigned long start,
0154             unsigned long end)
0155 {
0156     p4d_t *dst_p4dp;
0157     p4d_t *src_p4dp;
0158     unsigned long next;
0159     unsigned long addr = start;
0160 
0161     dst_p4dp = p4d_offset(dst_pgdp, start);
0162     src_p4dp = p4d_offset(src_pgdp, start);
0163     do {
0164         next = p4d_addr_end(addr, end);
0165         if (p4d_none(READ_ONCE(*src_p4dp)))
0166             continue;
0167         if (copy_pud(info, dst_p4dp, src_p4dp, addr, next))
0168             return -ENOMEM;
0169     } while (dst_p4dp++, src_p4dp++, addr = next, addr != end);
0170 
0171     return 0;
0172 }
0173 
0174 static int copy_page_tables(struct trans_pgd_info *info, pgd_t *dst_pgdp,
0175                 unsigned long start, unsigned long end)
0176 {
0177     unsigned long next;
0178     unsigned long addr = start;
0179     pgd_t *src_pgdp = pgd_offset_k(start);
0180 
0181     dst_pgdp = pgd_offset_pgd(dst_pgdp, start);
0182     do {
0183         next = pgd_addr_end(addr, end);
0184         if (pgd_none(READ_ONCE(*src_pgdp)))
0185             continue;
0186         if (copy_p4d(info, dst_pgdp, src_pgdp, addr, next))
0187             return -ENOMEM;
0188     } while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
0189 
0190     return 0;
0191 }
0192 
0193 /*
0194  * Create trans_pgd and copy linear map.
0195  * info:    contains allocator and its argument
0196  * dst_pgdp:    new page table that is created, and to which map is copied.
0197  * start:   Start of the interval (inclusive).
0198  * end:     End of the interval (exclusive).
0199  *
0200  * Returns 0 on success, and -ENOMEM on failure.
0201  */
0202 int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **dst_pgdp,
0203               unsigned long start, unsigned long end)
0204 {
0205     int rc;
0206     pgd_t *trans_pgd = trans_alloc(info);
0207 
0208     if (!trans_pgd) {
0209         pr_err("Failed to allocate memory for temporary page tables.\n");
0210         return -ENOMEM;
0211     }
0212 
0213     rc = copy_page_tables(info, trans_pgd, start, end);
0214     if (!rc)
0215         *dst_pgdp = trans_pgd;
0216 
0217     return rc;
0218 }
0219 
0220 /*
0221  * The page we want to idmap may be outside the range covered by VA_BITS that
0222  * can be built using the kernel's p?d_populate() helpers. As a one off, for a
0223  * single page, we build these page tables bottom up and just assume that will
0224  * need the maximum T0SZ.
0225  *
0226  * Returns 0 on success, and -ENOMEM on failure.
0227  * On success trans_ttbr0 contains page table with idmapped page, t0sz is set to
0228  * maximum T0SZ for this page.
0229  */
0230 int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0,
0231              unsigned long *t0sz, void *page)
0232 {
0233     phys_addr_t dst_addr = virt_to_phys(page);
0234     unsigned long pfn = __phys_to_pfn(dst_addr);
0235     int max_msb = (dst_addr & GENMASK(52, 48)) ? 51 : 47;
0236     int bits_mapped = PAGE_SHIFT - 4;
0237     unsigned long level_mask, prev_level_entry, *levels[4];
0238     int this_level, index, level_lsb, level_msb;
0239 
0240     dst_addr &= PAGE_MASK;
0241     prev_level_entry = pte_val(pfn_pte(pfn, PAGE_KERNEL_ROX));
0242 
0243     for (this_level = 3; this_level >= 0; this_level--) {
0244         levels[this_level] = trans_alloc(info);
0245         if (!levels[this_level])
0246             return -ENOMEM;
0247 
0248         level_lsb = ARM64_HW_PGTABLE_LEVEL_SHIFT(this_level);
0249         level_msb = min(level_lsb + bits_mapped, max_msb);
0250         level_mask = GENMASK_ULL(level_msb, level_lsb);
0251 
0252         index = (dst_addr & level_mask) >> level_lsb;
0253         *(levels[this_level] + index) = prev_level_entry;
0254 
0255         pfn = virt_to_pfn(levels[this_level]);
0256         prev_level_entry = pte_val(pfn_pte(pfn,
0257                            __pgprot(PMD_TYPE_TABLE)));
0258 
0259         if (level_msb == max_msb)
0260             break;
0261     }
0262 
0263     *trans_ttbr0 = phys_to_ttbr(__pfn_to_phys(pfn));
0264     *t0sz = TCR_T0SZ(max_msb + 1);
0265 
0266     return 0;
0267 }
0268 
0269 /*
0270  * Create a copy of the vector table so we can call HVC_SET_VECTORS or
0271  * HVC_SOFT_RESTART from contexts where the table may be overwritten.
0272  */
0273 int trans_pgd_copy_el2_vectors(struct trans_pgd_info *info,
0274                    phys_addr_t *el2_vectors)
0275 {
0276     void *hyp_stub = trans_alloc(info);
0277 
0278     if (!hyp_stub)
0279         return -ENOMEM;
0280     *el2_vectors = virt_to_phys(hyp_stub);
0281     memcpy(hyp_stub, &trans_pgd_stub_vectors, ARM64_VECTOR_TABLE_LEN);
0282     caches_clean_inval_pou((unsigned long)hyp_stub,
0283                    (unsigned long)hyp_stub +
0284                    ARM64_VECTOR_TABLE_LEN);
0285     dcache_clean_inval_poc((unsigned long)hyp_stub,
0286                    (unsigned long)hyp_stub +
0287                    ARM64_VECTOR_TABLE_LEN);
0288 
0289     return 0;
0290 }