Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 
0003 /*
0004  * Xen leaves the responsibility for maintaining p2m mappings to the
0005  * guests themselves, but it must also access and update the p2m array
0006  * during suspend/resume when all the pages are reallocated.
0007  *
0008  * The logical flat p2m table is mapped to a linear kernel memory area.
0009  * For accesses by Xen a three-level tree linked via mfns only is set up to
0010  * allow the address space to be sparse.
0011  *
0012  *               Xen
0013  *                |
0014  *          p2m_top_mfn
0015  *              /   \
0016  * p2m_mid_mfn p2m_mid_mfn
0017  *         /           /
0018  *  p2m p2m p2m ...
0019  *
0020  * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p.
0021  *
0022  * The p2m_top_mfn level is limited to 1 page, so the maximum representable
0023  * pseudo-physical address space is:
0024  *  P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages
0025  *
0026  * P2M_PER_PAGE depends on the architecture, as a mfn is always
0027  * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
0028  * 512 and 1024 entries respectively.
0029  *
0030  * In short, these structures contain the Machine Frame Number (MFN) of the PFN.
0031  *
0032  * However not all entries are filled with MFNs. Specifically for all other
0033  * leaf entries, or for the top  root, or middle one, for which there is a void
0034  * entry, we assume it is  "missing". So (for example)
0035  *  pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY.
0036  * We have a dedicated page p2m_missing with all entries being
0037  * INVALID_P2M_ENTRY. This page may be referenced multiple times in the p2m
0038  * list/tree in case there are multiple areas with P2M_PER_PAGE invalid pfns.
0039  *
0040  * We also have the possibility of setting 1-1 mappings on certain regions, so
0041  * that:
0042  *  pfn_to_mfn(0xc0000)=0xc0000
0043  *
0044  * The benefit of this is, that we can assume for non-RAM regions (think
0045  * PCI BARs, or ACPI spaces), we can create mappings easily because we
0046  * get the PFN value to match the MFN.
0047  *
0048  * For this to work efficiently we have one new page p2m_identity. All entries
0049  * in p2m_identity are set to INVALID_P2M_ENTRY type (Xen toolstack only
0050  * recognizes that and MFNs, no other fancy value).
0051  *
0052  * On lookup we spot that the entry points to p2m_identity and return the
0053  * identity value instead of dereferencing and returning INVALID_P2M_ENTRY.
0054  * If the entry points to an allocated page, we just proceed as before and
0055  * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in
0056  * appropriate functions (pfn_to_mfn).
0057  *
0058  * The reason for having the IDENTITY_FRAME_BIT instead of just returning the
0059  * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
0060  * non-identity pfn. To protect ourselves against we elect to set (and get) the
0061  * IDENTITY_FRAME_BIT on all identity mapped PFNs.
0062  */
0063 
0064 #include <linux/init.h>
0065 #include <linux/export.h>
0066 #include <linux/list.h>
0067 #include <linux/hash.h>
0068 #include <linux/sched.h>
0069 #include <linux/seq_file.h>
0070 #include <linux/memblock.h>
0071 #include <linux/slab.h>
0072 #include <linux/vmalloc.h>
0073 
0074 #include <asm/cache.h>
0075 #include <asm/setup.h>
0076 #include <linux/uaccess.h>
0077 
0078 #include <asm/xen/page.h>
0079 #include <asm/xen/hypercall.h>
0080 #include <asm/xen/hypervisor.h>
0081 #include <xen/balloon.h>
0082 #include <xen/grant_table.h>
0083 
0084 #include "multicalls.h"
0085 #include "xen-ops.h"
0086 
0087 #define P2M_MID_PER_PAGE    (PAGE_SIZE / sizeof(unsigned long *))
0088 #define P2M_TOP_PER_PAGE    (PAGE_SIZE / sizeof(unsigned long **))
0089 
0090 #define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE)
0091 
0092 #define PMDS_PER_MID_PAGE   (P2M_MID_PER_PAGE / PTRS_PER_PTE)
0093 
0094 unsigned long *xen_p2m_addr __read_mostly;
0095 EXPORT_SYMBOL_GPL(xen_p2m_addr);
0096 unsigned long xen_p2m_size __read_mostly;
0097 EXPORT_SYMBOL_GPL(xen_p2m_size);
0098 unsigned long xen_max_p2m_pfn __read_mostly;
0099 EXPORT_SYMBOL_GPL(xen_max_p2m_pfn);
0100 
0101 #ifdef CONFIG_XEN_MEMORY_HOTPLUG_LIMIT
0102 #define P2M_LIMIT CONFIG_XEN_MEMORY_HOTPLUG_LIMIT
0103 #else
0104 #define P2M_LIMIT 0
0105 #endif
0106 
0107 static DEFINE_SPINLOCK(p2m_update_lock);
0108 
0109 static unsigned long *p2m_mid_missing_mfn;
0110 static unsigned long *p2m_top_mfn;
0111 static unsigned long **p2m_top_mfn_p;
0112 static unsigned long *p2m_missing;
0113 static unsigned long *p2m_identity;
0114 static pte_t *p2m_missing_pte;
0115 static pte_t *p2m_identity_pte;
0116 
0117 /*
0118  * Hint at last populated PFN.
0119  *
0120  * Used to set HYPERVISOR_shared_info->arch.max_pfn so the toolstack
0121  * can avoid scanning the whole P2M (which may be sized to account for
0122  * hotplugged memory).
0123  */
0124 static unsigned long xen_p2m_last_pfn;
0125 
0126 static inline unsigned p2m_top_index(unsigned long pfn)
0127 {
0128     BUG_ON(pfn >= MAX_P2M_PFN);
0129     return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE);
0130 }
0131 
0132 static inline unsigned p2m_mid_index(unsigned long pfn)
0133 {
0134     return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE;
0135 }
0136 
0137 static inline unsigned p2m_index(unsigned long pfn)
0138 {
0139     return pfn % P2M_PER_PAGE;
0140 }
0141 
0142 static void p2m_top_mfn_init(unsigned long *top)
0143 {
0144     unsigned i;
0145 
0146     for (i = 0; i < P2M_TOP_PER_PAGE; i++)
0147         top[i] = virt_to_mfn(p2m_mid_missing_mfn);
0148 }
0149 
0150 static void p2m_top_mfn_p_init(unsigned long **top)
0151 {
0152     unsigned i;
0153 
0154     for (i = 0; i < P2M_TOP_PER_PAGE; i++)
0155         top[i] = p2m_mid_missing_mfn;
0156 }
0157 
0158 static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf)
0159 {
0160     unsigned i;
0161 
0162     for (i = 0; i < P2M_MID_PER_PAGE; i++)
0163         mid[i] = virt_to_mfn(leaf);
0164 }
0165 
0166 static void p2m_init(unsigned long *p2m)
0167 {
0168     unsigned i;
0169 
0170     for (i = 0; i < P2M_PER_PAGE; i++)
0171         p2m[i] = INVALID_P2M_ENTRY;
0172 }
0173 
0174 static void p2m_init_identity(unsigned long *p2m, unsigned long pfn)
0175 {
0176     unsigned i;
0177 
0178     for (i = 0; i < P2M_PER_PAGE; i++)
0179         p2m[i] = IDENTITY_FRAME(pfn + i);
0180 }
0181 
0182 static void * __ref alloc_p2m_page(void)
0183 {
0184     if (unlikely(!slab_is_available())) {
0185         void *ptr = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
0186 
0187         if (!ptr)
0188             panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
0189                   __func__, PAGE_SIZE, PAGE_SIZE);
0190 
0191         return ptr;
0192     }
0193 
0194     return (void *)__get_free_page(GFP_KERNEL);
0195 }
0196 
0197 static void __ref free_p2m_page(void *p)
0198 {
0199     if (unlikely(!slab_is_available())) {
0200         memblock_free(p, PAGE_SIZE);
0201         return;
0202     }
0203 
0204     free_page((unsigned long)p);
0205 }
0206 
0207 /*
0208  * Build the parallel p2m_top_mfn and p2m_mid_mfn structures
0209  *
0210  * This is called both at boot time, and after resuming from suspend:
0211  * - At boot time we're called rather early, and must use alloc_bootmem*()
0212  *   to allocate memory.
0213  *
0214  * - After resume we're called from within stop_machine, but the mfn
0215  *   tree should already be completely allocated.
0216  */
0217 void __ref xen_build_mfn_list_list(void)
0218 {
0219     unsigned long pfn, mfn;
0220     pte_t *ptep;
0221     unsigned int level, topidx, mididx;
0222     unsigned long *mid_mfn_p;
0223 
0224     if (xen_start_info->flags & SIF_VIRT_P2M_4TOOLS)
0225         return;
0226 
0227     /* Pre-initialize p2m_top_mfn to be completely missing */
0228     if (p2m_top_mfn == NULL) {
0229         p2m_mid_missing_mfn = alloc_p2m_page();
0230         p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
0231 
0232         p2m_top_mfn_p = alloc_p2m_page();
0233         p2m_top_mfn_p_init(p2m_top_mfn_p);
0234 
0235         p2m_top_mfn = alloc_p2m_page();
0236         p2m_top_mfn_init(p2m_top_mfn);
0237     } else {
0238         /* Reinitialise, mfn's all change after migration */
0239         p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing);
0240     }
0241 
0242     for (pfn = 0; pfn < xen_max_p2m_pfn && pfn < MAX_P2M_PFN;
0243          pfn += P2M_PER_PAGE) {
0244         topidx = p2m_top_index(pfn);
0245         mididx = p2m_mid_index(pfn);
0246 
0247         mid_mfn_p = p2m_top_mfn_p[topidx];
0248         ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn),
0249                       &level);
0250         BUG_ON(!ptep || level != PG_LEVEL_4K);
0251         mfn = pte_mfn(*ptep);
0252         ptep = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
0253 
0254         /* Don't bother allocating any mfn mid levels if
0255          * they're just missing, just update the stored mfn,
0256          * since all could have changed over a migrate.
0257          */
0258         if (ptep == p2m_missing_pte || ptep == p2m_identity_pte) {
0259             BUG_ON(mididx);
0260             BUG_ON(mid_mfn_p != p2m_mid_missing_mfn);
0261             p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn);
0262             pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE;
0263             continue;
0264         }
0265 
0266         if (mid_mfn_p == p2m_mid_missing_mfn) {
0267             mid_mfn_p = alloc_p2m_page();
0268             p2m_mid_mfn_init(mid_mfn_p, p2m_missing);
0269 
0270             p2m_top_mfn_p[topidx] = mid_mfn_p;
0271         }
0272 
0273         p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p);
0274         mid_mfn_p[mididx] = mfn;
0275     }
0276 }
0277 
0278 void xen_setup_mfn_list_list(void)
0279 {
0280     BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
0281 
0282     if (xen_start_info->flags & SIF_VIRT_P2M_4TOOLS)
0283         HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = ~0UL;
0284     else
0285         HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
0286             virt_to_mfn(p2m_top_mfn);
0287     HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn;
0288     HYPERVISOR_shared_info->arch.p2m_generation = 0;
0289     HYPERVISOR_shared_info->arch.p2m_vaddr = (unsigned long)xen_p2m_addr;
0290     HYPERVISOR_shared_info->arch.p2m_cr3 =
0291         xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
0292 }
0293 
0294 /* Set up p2m_top to point to the domain-builder provided p2m pages */
0295 void __init xen_build_dynamic_phys_to_machine(void)
0296 {
0297     unsigned long pfn;
0298 
0299     xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list;
0300     xen_p2m_size = ALIGN(xen_start_info->nr_pages, P2M_PER_PAGE);
0301 
0302     for (pfn = xen_start_info->nr_pages; pfn < xen_p2m_size; pfn++)
0303         xen_p2m_addr[pfn] = INVALID_P2M_ENTRY;
0304 
0305     xen_max_p2m_pfn = xen_p2m_size;
0306 }
0307 
0308 #define P2M_TYPE_IDENTITY   0
0309 #define P2M_TYPE_MISSING    1
0310 #define P2M_TYPE_PFN        2
0311 #define P2M_TYPE_UNKNOWN    3
0312 
0313 static int xen_p2m_elem_type(unsigned long pfn)
0314 {
0315     unsigned long mfn;
0316 
0317     if (pfn >= xen_p2m_size)
0318         return P2M_TYPE_IDENTITY;
0319 
0320     mfn = xen_p2m_addr[pfn];
0321 
0322     if (mfn == INVALID_P2M_ENTRY)
0323         return P2M_TYPE_MISSING;
0324 
0325     if (mfn & IDENTITY_FRAME_BIT)
0326         return P2M_TYPE_IDENTITY;
0327 
0328     return P2M_TYPE_PFN;
0329 }
0330 
0331 static void __init xen_rebuild_p2m_list(unsigned long *p2m)
0332 {
0333     unsigned int i, chunk;
0334     unsigned long pfn;
0335     unsigned long *mfns;
0336     pte_t *ptep;
0337     pmd_t *pmdp;
0338     int type;
0339 
0340     p2m_missing = alloc_p2m_page();
0341     p2m_init(p2m_missing);
0342     p2m_identity = alloc_p2m_page();
0343     p2m_init(p2m_identity);
0344 
0345     p2m_missing_pte = alloc_p2m_page();
0346     paravirt_alloc_pte(&init_mm, __pa(p2m_missing_pte) >> PAGE_SHIFT);
0347     p2m_identity_pte = alloc_p2m_page();
0348     paravirt_alloc_pte(&init_mm, __pa(p2m_identity_pte) >> PAGE_SHIFT);
0349     for (i = 0; i < PTRS_PER_PTE; i++) {
0350         set_pte(p2m_missing_pte + i,
0351             pfn_pte(PFN_DOWN(__pa(p2m_missing)), PAGE_KERNEL_RO));
0352         set_pte(p2m_identity_pte + i,
0353             pfn_pte(PFN_DOWN(__pa(p2m_identity)), PAGE_KERNEL_RO));
0354     }
0355 
0356     for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += chunk) {
0357         /*
0358          * Try to map missing/identity PMDs or p2m-pages if possible.
0359          * We have to respect the structure of the mfn_list_list
0360          * which will be built just afterwards.
0361          * Chunk size to test is one p2m page if we are in the middle
0362          * of a mfn_list_list mid page and the complete mid page area
0363          * if we are at index 0 of the mid page. Please note that a
0364          * mid page might cover more than one PMD, e.g. on 32 bit PAE
0365          * kernels.
0366          */
0367         chunk = (pfn & (P2M_PER_PAGE * P2M_MID_PER_PAGE - 1)) ?
0368             P2M_PER_PAGE : P2M_PER_PAGE * P2M_MID_PER_PAGE;
0369 
0370         type = xen_p2m_elem_type(pfn);
0371         i = 0;
0372         if (type != P2M_TYPE_PFN)
0373             for (i = 1; i < chunk; i++)
0374                 if (xen_p2m_elem_type(pfn + i) != type)
0375                     break;
0376         if (i < chunk)
0377             /* Reset to minimal chunk size. */
0378             chunk = P2M_PER_PAGE;
0379 
0380         if (type == P2M_TYPE_PFN || i < chunk) {
0381             /* Use initial p2m page contents. */
0382             mfns = alloc_p2m_page();
0383             copy_page(mfns, xen_p2m_addr + pfn);
0384             ptep = populate_extra_pte((unsigned long)(p2m + pfn));
0385             set_pte(ptep,
0386                 pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL));
0387             continue;
0388         }
0389 
0390         if (chunk == P2M_PER_PAGE) {
0391             /* Map complete missing or identity p2m-page. */
0392             mfns = (type == P2M_TYPE_MISSING) ?
0393                 p2m_missing : p2m_identity;
0394             ptep = populate_extra_pte((unsigned long)(p2m + pfn));
0395             set_pte(ptep,
0396                 pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL_RO));
0397             continue;
0398         }
0399 
0400         /* Complete missing or identity PMD(s) can be mapped. */
0401         ptep = (type == P2M_TYPE_MISSING) ?
0402             p2m_missing_pte : p2m_identity_pte;
0403         for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
0404             pmdp = populate_extra_pmd(
0405                 (unsigned long)(p2m + pfn) + i * PMD_SIZE);
0406             set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE));
0407         }
0408     }
0409 }
0410 
0411 void __init xen_vmalloc_p2m_tree(void)
0412 {
0413     static struct vm_struct vm;
0414     unsigned long p2m_limit;
0415 
0416     xen_p2m_last_pfn = xen_max_p2m_pfn;
0417 
0418     p2m_limit = (phys_addr_t)P2M_LIMIT * 1024 * 1024 * 1024 / PAGE_SIZE;
0419     vm.flags = VM_ALLOC;
0420     vm.size = ALIGN(sizeof(unsigned long) * max(xen_max_p2m_pfn, p2m_limit),
0421             PMD_SIZE * PMDS_PER_MID_PAGE);
0422     vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE);
0423     pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size);
0424 
0425     xen_max_p2m_pfn = vm.size / sizeof(unsigned long);
0426 
0427     xen_rebuild_p2m_list(vm.addr);
0428 
0429     xen_p2m_addr = vm.addr;
0430     xen_p2m_size = xen_max_p2m_pfn;
0431 
0432     xen_inv_extra_mem();
0433 }
0434 
0435 unsigned long get_phys_to_machine(unsigned long pfn)
0436 {
0437     pte_t *ptep;
0438     unsigned int level;
0439 
0440     if (unlikely(pfn >= xen_p2m_size)) {
0441         if (pfn < xen_max_p2m_pfn)
0442             return xen_chk_extra_mem(pfn);
0443 
0444         return IDENTITY_FRAME(pfn);
0445     }
0446 
0447     ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level);
0448     BUG_ON(!ptep || level != PG_LEVEL_4K);
0449 
0450     /*
0451      * The INVALID_P2M_ENTRY is filled in both p2m_*identity
0452      * and in p2m_*missing, so returning the INVALID_P2M_ENTRY
0453      * would be wrong.
0454      */
0455     if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity)))
0456         return IDENTITY_FRAME(pfn);
0457 
0458     return xen_p2m_addr[pfn];
0459 }
0460 EXPORT_SYMBOL_GPL(get_phys_to_machine);
0461 
0462 /*
0463  * Allocate new pmd(s). It is checked whether the old pmd is still in place.
0464  * If not, nothing is changed. This is okay as the only reason for allocating
0465  * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual
0466  * pmd.
0467  */
0468 static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg)
0469 {
0470     pte_t *ptechk;
0471     pte_t *pte_newpg[PMDS_PER_MID_PAGE];
0472     pmd_t *pmdp;
0473     unsigned int level;
0474     unsigned long flags;
0475     unsigned long vaddr;
0476     int i;
0477 
0478     /* Do all allocations first to bail out in error case. */
0479     for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
0480         pte_newpg[i] = alloc_p2m_page();
0481         if (!pte_newpg[i]) {
0482             for (i--; i >= 0; i--)
0483                 free_p2m_page(pte_newpg[i]);
0484 
0485             return NULL;
0486         }
0487     }
0488 
0489     vaddr = addr & ~(PMD_SIZE * PMDS_PER_MID_PAGE - 1);
0490 
0491     for (i = 0; i < PMDS_PER_MID_PAGE; i++) {
0492         copy_page(pte_newpg[i], pte_pg);
0493         paravirt_alloc_pte(&init_mm, __pa(pte_newpg[i]) >> PAGE_SHIFT);
0494 
0495         pmdp = lookup_pmd_address(vaddr);
0496         BUG_ON(!pmdp);
0497 
0498         spin_lock_irqsave(&p2m_update_lock, flags);
0499 
0500         ptechk = lookup_address(vaddr, &level);
0501         if (ptechk == pte_pg) {
0502             HYPERVISOR_shared_info->arch.p2m_generation++;
0503             wmb(); /* Tools are synchronizing via p2m_generation. */
0504             set_pmd(pmdp,
0505                 __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE));
0506             wmb(); /* Tools are synchronizing via p2m_generation. */
0507             HYPERVISOR_shared_info->arch.p2m_generation++;
0508             pte_newpg[i] = NULL;
0509         }
0510 
0511         spin_unlock_irqrestore(&p2m_update_lock, flags);
0512 
0513         if (pte_newpg[i]) {
0514             paravirt_release_pte(__pa(pte_newpg[i]) >> PAGE_SHIFT);
0515             free_p2m_page(pte_newpg[i]);
0516         }
0517 
0518         vaddr += PMD_SIZE;
0519     }
0520 
0521     return lookup_address(addr, &level);
0522 }
0523 
0524 /*
0525  * Fully allocate the p2m structure for a given pfn.  We need to check
0526  * that both the top and mid levels are allocated, and make sure the
0527  * parallel mfn tree is kept in sync.  We may race with other cpus, so
0528  * the new pages are installed with cmpxchg; if we lose the race then
0529  * simply free the page we allocated and use the one that's there.
0530  */
0531 int xen_alloc_p2m_entry(unsigned long pfn)
0532 {
0533     unsigned topidx;
0534     unsigned long *top_mfn_p, *mid_mfn;
0535     pte_t *ptep, *pte_pg;
0536     unsigned int level;
0537     unsigned long flags;
0538     unsigned long addr = (unsigned long)(xen_p2m_addr + pfn);
0539     unsigned long p2m_pfn;
0540 
0541     ptep = lookup_address(addr, &level);
0542     BUG_ON(!ptep || level != PG_LEVEL_4K);
0543     pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
0544 
0545     if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) {
0546         /* PMD level is missing, allocate a new one */
0547         ptep = alloc_p2m_pmd(addr, pte_pg);
0548         if (!ptep)
0549             return -ENOMEM;
0550     }
0551 
0552     if (p2m_top_mfn && pfn < MAX_P2M_PFN) {
0553         topidx = p2m_top_index(pfn);
0554         top_mfn_p = &p2m_top_mfn[topidx];
0555         mid_mfn = READ_ONCE(p2m_top_mfn_p[topidx]);
0556 
0557         BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p);
0558 
0559         if (mid_mfn == p2m_mid_missing_mfn) {
0560             /* Separately check the mid mfn level */
0561             unsigned long missing_mfn;
0562             unsigned long mid_mfn_mfn;
0563             unsigned long old_mfn;
0564 
0565             mid_mfn = alloc_p2m_page();
0566             if (!mid_mfn)
0567                 return -ENOMEM;
0568 
0569             p2m_mid_mfn_init(mid_mfn, p2m_missing);
0570 
0571             missing_mfn = virt_to_mfn(p2m_mid_missing_mfn);
0572             mid_mfn_mfn = virt_to_mfn(mid_mfn);
0573             old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn);
0574             if (old_mfn != missing_mfn) {
0575                 free_p2m_page(mid_mfn);
0576                 mid_mfn = mfn_to_virt(old_mfn);
0577             } else {
0578                 p2m_top_mfn_p[topidx] = mid_mfn;
0579             }
0580         }
0581     } else {
0582         mid_mfn = NULL;
0583     }
0584 
0585     p2m_pfn = pte_pfn(READ_ONCE(*ptep));
0586     if (p2m_pfn == PFN_DOWN(__pa(p2m_identity)) ||
0587         p2m_pfn == PFN_DOWN(__pa(p2m_missing))) {
0588         /* p2m leaf page is missing */
0589         unsigned long *p2m;
0590 
0591         p2m = alloc_p2m_page();
0592         if (!p2m)
0593             return -ENOMEM;
0594 
0595         if (p2m_pfn == PFN_DOWN(__pa(p2m_missing)))
0596             p2m_init(p2m);
0597         else
0598             p2m_init_identity(p2m, pfn & ~(P2M_PER_PAGE - 1));
0599 
0600         spin_lock_irqsave(&p2m_update_lock, flags);
0601 
0602         if (pte_pfn(*ptep) == p2m_pfn) {
0603             HYPERVISOR_shared_info->arch.p2m_generation++;
0604             wmb(); /* Tools are synchronizing via p2m_generation. */
0605             set_pte(ptep,
0606                 pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL));
0607             wmb(); /* Tools are synchronizing via p2m_generation. */
0608             HYPERVISOR_shared_info->arch.p2m_generation++;
0609             if (mid_mfn)
0610                 mid_mfn[p2m_mid_index(pfn)] = virt_to_mfn(p2m);
0611             p2m = NULL;
0612         }
0613 
0614         spin_unlock_irqrestore(&p2m_update_lock, flags);
0615 
0616         if (p2m)
0617             free_p2m_page(p2m);
0618     }
0619 
0620     /* Expanded the p2m? */
0621     if (pfn >= xen_p2m_last_pfn) {
0622         xen_p2m_last_pfn = ALIGN(pfn + 1, P2M_PER_PAGE);
0623         HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn;
0624     }
0625 
0626     return 0;
0627 }
0628 EXPORT_SYMBOL(xen_alloc_p2m_entry);
0629 
0630 unsigned long __init set_phys_range_identity(unsigned long pfn_s,
0631                       unsigned long pfn_e)
0632 {
0633     unsigned long pfn;
0634 
0635     if (unlikely(pfn_s >= xen_p2m_size))
0636         return 0;
0637 
0638     if (pfn_s > pfn_e)
0639         return 0;
0640 
0641     if (pfn_e > xen_p2m_size)
0642         pfn_e = xen_p2m_size;
0643 
0644     for (pfn = pfn_s; pfn < pfn_e; pfn++)
0645         xen_p2m_addr[pfn] = IDENTITY_FRAME(pfn);
0646 
0647     return pfn - pfn_s;
0648 }
0649 
0650 bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
0651 {
0652     pte_t *ptep;
0653     unsigned int level;
0654 
0655     /* Only invalid entries allowed above the highest p2m covered frame. */
0656     if (unlikely(pfn >= xen_p2m_size))
0657         return mfn == INVALID_P2M_ENTRY;
0658 
0659     /*
0660      * The interface requires atomic updates on p2m elements.
0661      * xen_safe_write_ulong() is using an atomic store via asm().
0662      */
0663     if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn)))
0664         return true;
0665 
0666     ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level);
0667     BUG_ON(!ptep || level != PG_LEVEL_4K);
0668 
0669     if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_missing)))
0670         return mfn == INVALID_P2M_ENTRY;
0671 
0672     if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity)))
0673         return mfn == IDENTITY_FRAME(pfn);
0674 
0675     return false;
0676 }
0677 
0678 bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
0679 {
0680     if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
0681         int ret;
0682 
0683         ret = xen_alloc_p2m_entry(pfn);
0684         if (ret < 0)
0685             return false;
0686 
0687         return __set_phys_to_machine(pfn, mfn);
0688     }
0689 
0690     return true;
0691 }
0692 
0693 int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
0694                 struct gnttab_map_grant_ref *kmap_ops,
0695                 struct page **pages, unsigned int count)
0696 {
0697     int i, ret = 0;
0698     pte_t *pte;
0699 
0700     if (xen_feature(XENFEAT_auto_translated_physmap))
0701         return 0;
0702 
0703     if (kmap_ops) {
0704         ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
0705                         kmap_ops, count);
0706         if (ret)
0707             goto out;
0708     }
0709 
0710     for (i = 0; i < count; i++) {
0711         unsigned long mfn, pfn;
0712         struct gnttab_unmap_grant_ref unmap[2];
0713         int rc;
0714 
0715         /* Do not add to override if the map failed. */
0716         if (map_ops[i].status != GNTST_okay ||
0717             (kmap_ops && kmap_ops[i].status != GNTST_okay))
0718             continue;
0719 
0720         if (map_ops[i].flags & GNTMAP_contains_pte) {
0721             pte = (pte_t *)(mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) +
0722                 (map_ops[i].host_addr & ~PAGE_MASK));
0723             mfn = pte_mfn(*pte);
0724         } else {
0725             mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
0726         }
0727         pfn = page_to_pfn(pages[i]);
0728 
0729         WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned");
0730 
0731         if (likely(set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
0732             continue;
0733 
0734         /*
0735          * Signal an error for this slot. This in turn requires
0736          * immediate unmapping.
0737          */
0738         map_ops[i].status = GNTST_general_error;
0739         unmap[0].host_addr = map_ops[i].host_addr,
0740         unmap[0].handle = map_ops[i].handle;
0741         map_ops[i].handle = INVALID_GRANT_HANDLE;
0742         if (map_ops[i].flags & GNTMAP_device_map)
0743             unmap[0].dev_bus_addr = map_ops[i].dev_bus_addr;
0744         else
0745             unmap[0].dev_bus_addr = 0;
0746 
0747         if (kmap_ops) {
0748             kmap_ops[i].status = GNTST_general_error;
0749             unmap[1].host_addr = kmap_ops[i].host_addr,
0750             unmap[1].handle = kmap_ops[i].handle;
0751             kmap_ops[i].handle = INVALID_GRANT_HANDLE;
0752             if (kmap_ops[i].flags & GNTMAP_device_map)
0753                 unmap[1].dev_bus_addr = kmap_ops[i].dev_bus_addr;
0754             else
0755                 unmap[1].dev_bus_addr = 0;
0756         }
0757 
0758         /*
0759          * Pre-populate both status fields, to be recognizable in
0760          * the log message below.
0761          */
0762         unmap[0].status = 1;
0763         unmap[1].status = 1;
0764 
0765         rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
0766                            unmap, 1 + !!kmap_ops);
0767         if (rc || unmap[0].status != GNTST_okay ||
0768             unmap[1].status != GNTST_okay)
0769             pr_err_once("gnttab unmap failed: rc=%d st0=%d st1=%d\n",
0770                     rc, unmap[0].status, unmap[1].status);
0771     }
0772 
0773 out:
0774     return ret;
0775 }
0776 
0777 int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
0778                   struct gnttab_unmap_grant_ref *kunmap_ops,
0779                   struct page **pages, unsigned int count)
0780 {
0781     int i, ret = 0;
0782 
0783     if (xen_feature(XENFEAT_auto_translated_physmap))
0784         return 0;
0785 
0786     for (i = 0; i < count; i++) {
0787         unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i]));
0788         unsigned long pfn = page_to_pfn(pages[i]);
0789 
0790         if (mfn != INVALID_P2M_ENTRY && (mfn & FOREIGN_FRAME_BIT))
0791             set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
0792         else
0793             ret = -EINVAL;
0794     }
0795     if (kunmap_ops)
0796         ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
0797                         kunmap_ops, count) ?: ret;
0798 
0799     return ret;
0800 }
0801 
0802 #ifdef CONFIG_XEN_DEBUG_FS
0803 #include <linux/debugfs.h>
0804 #include "debugfs.h"
0805 static int p2m_dump_show(struct seq_file *m, void *v)
0806 {
0807     static const char * const type_name[] = {
0808                 [P2M_TYPE_IDENTITY] = "identity",
0809                 [P2M_TYPE_MISSING] = "missing",
0810                 [P2M_TYPE_PFN] = "pfn",
0811                 [P2M_TYPE_UNKNOWN] = "abnormal"};
0812     unsigned long pfn, first_pfn;
0813     int type, prev_type;
0814 
0815     prev_type = xen_p2m_elem_type(0);
0816     first_pfn = 0;
0817 
0818     for (pfn = 0; pfn < xen_p2m_size; pfn++) {
0819         type = xen_p2m_elem_type(pfn);
0820         if (type != prev_type) {
0821             seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn,
0822                    type_name[prev_type]);
0823             prev_type = type;
0824             first_pfn = pfn;
0825         }
0826     }
0827     seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn,
0828            type_name[prev_type]);
0829     return 0;
0830 }
0831 
0832 DEFINE_SHOW_ATTRIBUTE(p2m_dump);
0833 
0834 static struct dentry *d_mmu_debug;
0835 
0836 static int __init xen_p2m_debugfs(void)
0837 {
0838     struct dentry *d_xen = xen_init_debugfs();
0839 
0840     d_mmu_debug = debugfs_create_dir("mmu", d_xen);
0841 
0842     debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
0843     return 0;
0844 }
0845 fs_initcall(xen_p2m_debugfs);
0846 #endif /* CONFIG_XEN_DEBUG_FS */