0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043 #include <linux/sched/mm.h>
0044 #include <linux/debugfs.h>
0045 #include <linux/bug.h>
0046 #include <linux/vmalloc.h>
0047 #include <linux/export.h>
0048 #include <linux/init.h>
0049 #include <linux/gfp.h>
0050 #include <linux/memblock.h>
0051 #include <linux/seq_file.h>
0052 #include <linux/crash_dump.h>
0053 #include <linux/pgtable.h>
0054 #ifdef CONFIG_KEXEC_CORE
0055 #include <linux/kexec.h>
0056 #endif
0057
0058 #include <trace/events/xen.h>
0059
0060 #include <asm/tlbflush.h>
0061 #include <asm/fixmap.h>
0062 #include <asm/mmu_context.h>
0063 #include <asm/setup.h>
0064 #include <asm/paravirt.h>
0065 #include <asm/e820/api.h>
0066 #include <asm/linkage.h>
0067 #include <asm/page.h>
0068 #include <asm/init.h>
0069 #include <asm/memtype.h>
0070 #include <asm/smp.h>
0071 #include <asm/tlb.h>
0072
0073 #include <asm/xen/hypercall.h>
0074 #include <asm/xen/hypervisor.h>
0075
0076 #include <xen/xen.h>
0077 #include <xen/page.h>
0078 #include <xen/interface/xen.h>
0079 #include <xen/interface/hvm/hvm_op.h>
0080 #include <xen/interface/version.h>
0081 #include <xen/interface/memory.h>
0082 #include <xen/hvc-console.h>
0083 #include <xen/swiotlb-xen.h>
0084
0085 #include "multicalls.h"
0086 #include "mmu.h"
0087 #include "debugfs.h"
0088
0089 #ifdef CONFIG_X86_VSYSCALL_EMULATION
0090
0091 static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
0092 #endif
0093
0094
0095
0096
0097
0098 static DEFINE_SPINLOCK(xen_reservation_lock);
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114 DEFINE_PER_CPU(unsigned long, xen_cr3);
0115 DEFINE_PER_CPU(unsigned long, xen_current_cr3);
0116
0117 static phys_addr_t xen_pt_base, xen_pt_size __initdata;
0118
0119 static DEFINE_STATIC_KEY_FALSE(xen_struct_pages_ready);
0120
0121
0122
0123
0124
0125 #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK)
0126
0127 void make_lowmem_page_readonly(void *vaddr)
0128 {
0129 pte_t *pte, ptev;
0130 unsigned long address = (unsigned long)vaddr;
0131 unsigned int level;
0132
0133 pte = lookup_address(address, &level);
0134 if (pte == NULL)
0135 return;
0136
0137 ptev = pte_wrprotect(*pte);
0138
0139 if (HYPERVISOR_update_va_mapping(address, ptev, 0))
0140 BUG();
0141 }
0142
0143 void make_lowmem_page_readwrite(void *vaddr)
0144 {
0145 pte_t *pte, ptev;
0146 unsigned long address = (unsigned long)vaddr;
0147 unsigned int level;
0148
0149 pte = lookup_address(address, &level);
0150 if (pte == NULL)
0151 return;
0152
0153 ptev = pte_mkwrite(*pte);
0154
0155 if (HYPERVISOR_update_va_mapping(address, ptev, 0))
0156 BUG();
0157 }
0158
0159
0160
0161
0162
0163
0164 static bool xen_page_pinned(void *ptr)
0165 {
0166 if (static_branch_likely(&xen_struct_pages_ready)) {
0167 struct page *page = virt_to_page(ptr);
0168
0169 return PagePinned(page);
0170 }
0171 return true;
0172 }
0173
0174 static void xen_extend_mmu_update(const struct mmu_update *update)
0175 {
0176 struct multicall_space mcs;
0177 struct mmu_update *u;
0178
0179 mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
0180
0181 if (mcs.mc != NULL) {
0182 mcs.mc->args[1]++;
0183 } else {
0184 mcs = __xen_mc_entry(sizeof(*u));
0185 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
0186 }
0187
0188 u = mcs.args;
0189 *u = *update;
0190 }
0191
0192 static void xen_extend_mmuext_op(const struct mmuext_op *op)
0193 {
0194 struct multicall_space mcs;
0195 struct mmuext_op *u;
0196
0197 mcs = xen_mc_extend_args(__HYPERVISOR_mmuext_op, sizeof(*u));
0198
0199 if (mcs.mc != NULL) {
0200 mcs.mc->args[1]++;
0201 } else {
0202 mcs = __xen_mc_entry(sizeof(*u));
0203 MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
0204 }
0205
0206 u = mcs.args;
0207 *u = *op;
0208 }
0209
0210 static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
0211 {
0212 struct mmu_update u;
0213
0214 preempt_disable();
0215
0216 xen_mc_batch();
0217
0218
0219 u.ptr = arbitrary_virt_to_machine(ptr).maddr;
0220 u.val = pmd_val_ma(val);
0221 xen_extend_mmu_update(&u);
0222
0223 xen_mc_issue(PARAVIRT_LAZY_MMU);
0224
0225 preempt_enable();
0226 }
0227
0228 static void xen_set_pmd(pmd_t *ptr, pmd_t val)
0229 {
0230 trace_xen_mmu_set_pmd(ptr, val);
0231
0232
0233
0234 if (!xen_page_pinned(ptr)) {
0235 *ptr = val;
0236 return;
0237 }
0238
0239 xen_set_pmd_hyper(ptr, val);
0240 }
0241
0242
0243
0244
0245
0246 void __init set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
0247 {
0248 if (HYPERVISOR_update_va_mapping(vaddr, mfn_pte(mfn, flags),
0249 UVMF_INVLPG))
0250 BUG();
0251 }
0252
0253 static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
0254 {
0255 struct mmu_update u;
0256
0257 if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU)
0258 return false;
0259
0260 xen_mc_batch();
0261
0262 u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
0263 u.val = pte_val_ma(pteval);
0264 xen_extend_mmu_update(&u);
0265
0266 xen_mc_issue(PARAVIRT_LAZY_MMU);
0267
0268 return true;
0269 }
0270
0271 static inline void __xen_set_pte(pte_t *ptep, pte_t pteval)
0272 {
0273 if (!xen_batched_set_pte(ptep, pteval)) {
0274
0275
0276
0277
0278 struct mmu_update u;
0279
0280 u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
0281 u.val = pte_val_ma(pteval);
0282 HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF);
0283 }
0284 }
0285
0286 static void xen_set_pte(pte_t *ptep, pte_t pteval)
0287 {
0288 trace_xen_mmu_set_pte(ptep, pteval);
0289 __xen_set_pte(ptep, pteval);
0290 }
0291
0292 pte_t xen_ptep_modify_prot_start(struct vm_area_struct *vma,
0293 unsigned long addr, pte_t *ptep)
0294 {
0295
0296 trace_xen_mmu_ptep_modify_prot_start(vma->vm_mm, addr, ptep, *ptep);
0297 return *ptep;
0298 }
0299
0300 void xen_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
0301 pte_t *ptep, pte_t pte)
0302 {
0303 struct mmu_update u;
0304
0305 trace_xen_mmu_ptep_modify_prot_commit(vma->vm_mm, addr, ptep, pte);
0306 xen_mc_batch();
0307
0308 u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
0309 u.val = pte_val_ma(pte);
0310 xen_extend_mmu_update(&u);
0311
0312 xen_mc_issue(PARAVIRT_LAZY_MMU);
0313 }
0314
0315
0316 static pteval_t pte_mfn_to_pfn(pteval_t val)
0317 {
0318 if (val & _PAGE_PRESENT) {
0319 unsigned long mfn = (val & XEN_PTE_MFN_MASK) >> PAGE_SHIFT;
0320 unsigned long pfn = mfn_to_pfn(mfn);
0321
0322 pteval_t flags = val & PTE_FLAGS_MASK;
0323 if (unlikely(pfn == ~0))
0324 val = flags & ~_PAGE_PRESENT;
0325 else
0326 val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
0327 }
0328
0329 return val;
0330 }
0331
0332 static pteval_t pte_pfn_to_mfn(pteval_t val)
0333 {
0334 if (val & _PAGE_PRESENT) {
0335 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
0336 pteval_t flags = val & PTE_FLAGS_MASK;
0337 unsigned long mfn;
0338
0339 mfn = __pfn_to_mfn(pfn);
0340
0341
0342
0343
0344
0345
0346
0347 if (unlikely(mfn == INVALID_P2M_ENTRY)) {
0348 mfn = 0;
0349 flags = 0;
0350 } else
0351 mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
0352 val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
0353 }
0354
0355 return val;
0356 }
0357
0358 __visible pteval_t xen_pte_val(pte_t pte)
0359 {
0360 pteval_t pteval = pte.pte;
0361
0362 return pte_mfn_to_pfn(pteval);
0363 }
0364 PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
0365
0366 __visible pgdval_t xen_pgd_val(pgd_t pgd)
0367 {
0368 return pte_mfn_to_pfn(pgd.pgd);
0369 }
0370 PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
0371
0372 __visible pte_t xen_make_pte(pteval_t pte)
0373 {
0374 pte = pte_pfn_to_mfn(pte);
0375
0376 return native_make_pte(pte);
0377 }
0378 PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
0379
0380 __visible pgd_t xen_make_pgd(pgdval_t pgd)
0381 {
0382 pgd = pte_pfn_to_mfn(pgd);
0383 return native_make_pgd(pgd);
0384 }
0385 PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd);
0386
0387 __visible pmdval_t xen_pmd_val(pmd_t pmd)
0388 {
0389 return pte_mfn_to_pfn(pmd.pmd);
0390 }
0391 PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val);
0392
0393 static void xen_set_pud_hyper(pud_t *ptr, pud_t val)
0394 {
0395 struct mmu_update u;
0396
0397 preempt_disable();
0398
0399 xen_mc_batch();
0400
0401
0402 u.ptr = arbitrary_virt_to_machine(ptr).maddr;
0403 u.val = pud_val_ma(val);
0404 xen_extend_mmu_update(&u);
0405
0406 xen_mc_issue(PARAVIRT_LAZY_MMU);
0407
0408 preempt_enable();
0409 }
0410
0411 static void xen_set_pud(pud_t *ptr, pud_t val)
0412 {
0413 trace_xen_mmu_set_pud(ptr, val);
0414
0415
0416
0417 if (!xen_page_pinned(ptr)) {
0418 *ptr = val;
0419 return;
0420 }
0421
0422 xen_set_pud_hyper(ptr, val);
0423 }
0424
0425 __visible pmd_t xen_make_pmd(pmdval_t pmd)
0426 {
0427 pmd = pte_pfn_to_mfn(pmd);
0428 return native_make_pmd(pmd);
0429 }
0430 PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
0431
0432 __visible pudval_t xen_pud_val(pud_t pud)
0433 {
0434 return pte_mfn_to_pfn(pud.pud);
0435 }
0436 PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val);
0437
0438 __visible pud_t xen_make_pud(pudval_t pud)
0439 {
0440 pud = pte_pfn_to_mfn(pud);
0441
0442 return native_make_pud(pud);
0443 }
0444 PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud);
0445
0446 static pgd_t *xen_get_user_pgd(pgd_t *pgd)
0447 {
0448 pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK);
0449 unsigned offset = pgd - pgd_page;
0450 pgd_t *user_ptr = NULL;
0451
0452 if (offset < pgd_index(USER_LIMIT)) {
0453 struct page *page = virt_to_page(pgd_page);
0454 user_ptr = (pgd_t *)page->private;
0455 if (user_ptr)
0456 user_ptr += offset;
0457 }
0458
0459 return user_ptr;
0460 }
0461
0462 static void __xen_set_p4d_hyper(p4d_t *ptr, p4d_t val)
0463 {
0464 struct mmu_update u;
0465
0466 u.ptr = virt_to_machine(ptr).maddr;
0467 u.val = p4d_val_ma(val);
0468 xen_extend_mmu_update(&u);
0469 }
0470
0471
0472
0473
0474
0475
0476
0477
0478 static void __init xen_set_p4d_hyper(p4d_t *ptr, p4d_t val)
0479 {
0480 preempt_disable();
0481
0482 xen_mc_batch();
0483
0484 __xen_set_p4d_hyper(ptr, val);
0485
0486 xen_mc_issue(PARAVIRT_LAZY_MMU);
0487
0488 preempt_enable();
0489 }
0490
0491 static void xen_set_p4d(p4d_t *ptr, p4d_t val)
0492 {
0493 pgd_t *user_ptr = xen_get_user_pgd((pgd_t *)ptr);
0494 pgd_t pgd_val;
0495
0496 trace_xen_mmu_set_p4d(ptr, (p4d_t *)user_ptr, val);
0497
0498
0499
0500 if (!xen_page_pinned(ptr)) {
0501 *ptr = val;
0502 if (user_ptr) {
0503 WARN_ON(xen_page_pinned(user_ptr));
0504 pgd_val.pgd = p4d_val_ma(val);
0505 *user_ptr = pgd_val;
0506 }
0507 return;
0508 }
0509
0510
0511
0512 xen_mc_batch();
0513
0514 __xen_set_p4d_hyper(ptr, val);
0515 if (user_ptr)
0516 __xen_set_p4d_hyper((p4d_t *)user_ptr, val);
0517
0518 xen_mc_issue(PARAVIRT_LAZY_MMU);
0519 }
0520
0521 #if CONFIG_PGTABLE_LEVELS >= 5
0522 __visible p4dval_t xen_p4d_val(p4d_t p4d)
0523 {
0524 return pte_mfn_to_pfn(p4d.p4d);
0525 }
0526 PV_CALLEE_SAVE_REGS_THUNK(xen_p4d_val);
0527
0528 __visible p4d_t xen_make_p4d(p4dval_t p4d)
0529 {
0530 p4d = pte_pfn_to_mfn(p4d);
0531
0532 return native_make_p4d(p4d);
0533 }
0534 PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d);
0535 #endif
0536
0537 static void xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
0538 void (*func)(struct mm_struct *mm, struct page *,
0539 enum pt_level),
0540 bool last, unsigned long limit)
0541 {
0542 int i, nr;
0543
0544 nr = last ? pmd_index(limit) + 1 : PTRS_PER_PMD;
0545 for (i = 0; i < nr; i++) {
0546 if (!pmd_none(pmd[i]))
0547 (*func)(mm, pmd_page(pmd[i]), PT_PTE);
0548 }
0549 }
0550
0551 static void xen_pud_walk(struct mm_struct *mm, pud_t *pud,
0552 void (*func)(struct mm_struct *mm, struct page *,
0553 enum pt_level),
0554 bool last, unsigned long limit)
0555 {
0556 int i, nr;
0557
0558 nr = last ? pud_index(limit) + 1 : PTRS_PER_PUD;
0559 for (i = 0; i < nr; i++) {
0560 pmd_t *pmd;
0561
0562 if (pud_none(pud[i]))
0563 continue;
0564
0565 pmd = pmd_offset(&pud[i], 0);
0566 if (PTRS_PER_PMD > 1)
0567 (*func)(mm, virt_to_page(pmd), PT_PMD);
0568 xen_pmd_walk(mm, pmd, func, last && i == nr - 1, limit);
0569 }
0570 }
0571
0572 static void xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
0573 void (*func)(struct mm_struct *mm, struct page *,
0574 enum pt_level),
0575 bool last, unsigned long limit)
0576 {
0577 pud_t *pud;
0578
0579
0580 if (p4d_none(*p4d))
0581 return;
0582
0583 pud = pud_offset(p4d, 0);
0584 if (PTRS_PER_PUD > 1)
0585 (*func)(mm, virt_to_page(pud), PT_PUD);
0586 xen_pud_walk(mm, pud, func, last, limit);
0587 }
0588
0589
0590
0591
0592
0593
0594
0595
0596
0597
0598
0599
0600
0601 static void __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
0602 void (*func)(struct mm_struct *mm, struct page *,
0603 enum pt_level),
0604 unsigned long limit)
0605 {
0606 int i, nr;
0607 unsigned hole_low = 0, hole_high = 0;
0608
0609
0610 limit--;
0611 BUG_ON(limit >= FIXADDR_TOP);
0612
0613
0614
0615
0616
0617 hole_low = pgd_index(GUARD_HOLE_BASE_ADDR);
0618 hole_high = pgd_index(GUARD_HOLE_END_ADDR);
0619
0620 nr = pgd_index(limit) + 1;
0621 for (i = 0; i < nr; i++) {
0622 p4d_t *p4d;
0623
0624 if (i >= hole_low && i < hole_high)
0625 continue;
0626
0627 if (pgd_none(pgd[i]))
0628 continue;
0629
0630 p4d = p4d_offset(&pgd[i], 0);
0631 xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
0632 }
0633
0634
0635
0636 (*func)(mm, virt_to_page(pgd), PT_PGD);
0637 }
0638
0639 static void xen_pgd_walk(struct mm_struct *mm,
0640 void (*func)(struct mm_struct *mm, struct page *,
0641 enum pt_level),
0642 unsigned long limit)
0643 {
0644 __xen_pgd_walk(mm, mm->pgd, func, limit);
0645 }
0646
0647
0648
0649 static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
0650 {
0651 spinlock_t *ptl = NULL;
0652
0653 #if USE_SPLIT_PTE_PTLOCKS
0654 ptl = ptlock_ptr(page);
0655 spin_lock_nest_lock(ptl, &mm->page_table_lock);
0656 #endif
0657
0658 return ptl;
0659 }
0660
0661 static void xen_pte_unlock(void *v)
0662 {
0663 spinlock_t *ptl = v;
0664 spin_unlock(ptl);
0665 }
0666
0667 static void xen_do_pin(unsigned level, unsigned long pfn)
0668 {
0669 struct mmuext_op op;
0670
0671 op.cmd = level;
0672 op.arg1.mfn = pfn_to_mfn(pfn);
0673
0674 xen_extend_mmuext_op(&op);
0675 }
0676
0677 static void xen_pin_page(struct mm_struct *mm, struct page *page,
0678 enum pt_level level)
0679 {
0680 unsigned pgfl = TestSetPagePinned(page);
0681
0682 if (!pgfl) {
0683 void *pt = lowmem_page_address(page);
0684 unsigned long pfn = page_to_pfn(page);
0685 struct multicall_space mcs = __xen_mc_entry(0);
0686 spinlock_t *ptl;
0687
0688
0689
0690
0691
0692
0693
0694
0695
0696
0697
0698
0699
0700
0701
0702
0703
0704
0705
0706
0707
0708 ptl = NULL;
0709 if (level == PT_PTE)
0710 ptl = xen_pte_lock(page, mm);
0711
0712 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
0713 pfn_pte(pfn, PAGE_KERNEL_RO),
0714 level == PT_PGD ? UVMF_TLB_FLUSH : 0);
0715
0716 if (ptl) {
0717 xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn);
0718
0719
0720
0721 xen_mc_callback(xen_pte_unlock, ptl);
0722 }
0723 }
0724 }
0725
0726
0727
0728
0729 static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
0730 {
0731 pgd_t *user_pgd = xen_get_user_pgd(pgd);
0732
0733 trace_xen_mmu_pgd_pin(mm, pgd);
0734
0735 xen_mc_batch();
0736
0737 __xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT);
0738
0739 xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
0740
0741 if (user_pgd) {
0742 xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
0743 xen_do_pin(MMUEXT_PIN_L4_TABLE,
0744 PFN_DOWN(__pa(user_pgd)));
0745 }
0746
0747 xen_mc_issue(0);
0748 }
0749
0750 static void xen_pgd_pin(struct mm_struct *mm)
0751 {
0752 __xen_pgd_pin(mm, mm->pgd);
0753 }
0754
0755
0756
0757
0758
0759
0760
0761
0762
0763
0764
0765 void xen_mm_pin_all(void)
0766 {
0767 struct page *page;
0768
0769 spin_lock(&pgd_lock);
0770
0771 list_for_each_entry(page, &pgd_list, lru) {
0772 if (!PagePinned(page)) {
0773 __xen_pgd_pin(&init_mm, (pgd_t *)page_address(page));
0774 SetPageSavePinned(page);
0775 }
0776 }
0777
0778 spin_unlock(&pgd_lock);
0779 }
0780
0781 static void __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
0782 enum pt_level level)
0783 {
0784 SetPagePinned(page);
0785 }
0786
0787
0788
0789
0790
0791
0792
0793 static void __init xen_after_bootmem(void)
0794 {
0795 static_branch_enable(&xen_struct_pages_ready);
0796 #ifdef CONFIG_X86_VSYSCALL_EMULATION
0797 SetPagePinned(virt_to_page(level3_user_vsyscall));
0798 #endif
0799 xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
0800 }
0801
0802 static void xen_unpin_page(struct mm_struct *mm, struct page *page,
0803 enum pt_level level)
0804 {
0805 unsigned pgfl = TestClearPagePinned(page);
0806
0807 if (pgfl) {
0808 void *pt = lowmem_page_address(page);
0809 unsigned long pfn = page_to_pfn(page);
0810 spinlock_t *ptl = NULL;
0811 struct multicall_space mcs;
0812
0813
0814
0815
0816
0817
0818
0819
0820 if (level == PT_PTE) {
0821 ptl = xen_pte_lock(page, mm);
0822
0823 if (ptl)
0824 xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
0825 }
0826
0827 mcs = __xen_mc_entry(0);
0828
0829 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
0830 pfn_pte(pfn, PAGE_KERNEL),
0831 level == PT_PGD ? UVMF_TLB_FLUSH : 0);
0832
0833 if (ptl) {
0834
0835 xen_mc_callback(xen_pte_unlock, ptl);
0836 }
0837 }
0838 }
0839
0840
0841 static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
0842 {
0843 pgd_t *user_pgd = xen_get_user_pgd(pgd);
0844
0845 trace_xen_mmu_pgd_unpin(mm, pgd);
0846
0847 xen_mc_batch();
0848
0849 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
0850
0851 if (user_pgd) {
0852 xen_do_pin(MMUEXT_UNPIN_TABLE,
0853 PFN_DOWN(__pa(user_pgd)));
0854 xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
0855 }
0856
0857 __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT);
0858
0859 xen_mc_issue(0);
0860 }
0861
0862 static void xen_pgd_unpin(struct mm_struct *mm)
0863 {
0864 __xen_pgd_unpin(mm, mm->pgd);
0865 }
0866
0867
0868
0869
0870
0871 void xen_mm_unpin_all(void)
0872 {
0873 struct page *page;
0874
0875 spin_lock(&pgd_lock);
0876
0877 list_for_each_entry(page, &pgd_list, lru) {
0878 if (PageSavePinned(page)) {
0879 BUG_ON(!PagePinned(page));
0880 __xen_pgd_unpin(&init_mm, (pgd_t *)page_address(page));
0881 ClearPageSavePinned(page);
0882 }
0883 }
0884
0885 spin_unlock(&pgd_lock);
0886 }
0887
0888 static void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
0889 {
0890 spin_lock(&next->page_table_lock);
0891 xen_pgd_pin(next);
0892 spin_unlock(&next->page_table_lock);
0893 }
0894
0895 static void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
0896 {
0897 spin_lock(&mm->page_table_lock);
0898 xen_pgd_pin(mm);
0899 spin_unlock(&mm->page_table_lock);
0900 }
0901
0902 static void drop_mm_ref_this_cpu(void *info)
0903 {
0904 struct mm_struct *mm = info;
0905
0906 if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm)
0907 leave_mm(smp_processor_id());
0908
0909
0910
0911
0912
0913 if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd))
0914 xen_mc_flush();
0915 }
0916
0917 #ifdef CONFIG_SMP
0918
0919
0920
0921
0922 static void xen_drop_mm_ref(struct mm_struct *mm)
0923 {
0924 cpumask_var_t mask;
0925 unsigned cpu;
0926
0927 drop_mm_ref_this_cpu(mm);
0928
0929
0930 if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
0931 for_each_online_cpu(cpu) {
0932 if (per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
0933 continue;
0934 smp_call_function_single(cpu, drop_mm_ref_this_cpu, mm, 1);
0935 }
0936 return;
0937 }
0938
0939
0940
0941
0942
0943
0944
0945
0946 cpumask_clear(mask);
0947 for_each_online_cpu(cpu) {
0948 if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
0949 cpumask_set_cpu(cpu, mask);
0950 }
0951
0952 smp_call_function_many(mask, drop_mm_ref_this_cpu, mm, 1);
0953 free_cpumask_var(mask);
0954 }
0955 #else
0956 static void xen_drop_mm_ref(struct mm_struct *mm)
0957 {
0958 drop_mm_ref_this_cpu(mm);
0959 }
0960 #endif
0961
0962
0963
0964
0965
0966
0967
0968
0969
0970
0971
0972
0973
0974
0975
0976 static void xen_exit_mmap(struct mm_struct *mm)
0977 {
0978 get_cpu();
0979 xen_drop_mm_ref(mm);
0980 put_cpu();
0981
0982 spin_lock(&mm->page_table_lock);
0983
0984
0985 if (xen_page_pinned(mm->pgd))
0986 xen_pgd_unpin(mm);
0987
0988 spin_unlock(&mm->page_table_lock);
0989 }
0990
0991 static void xen_post_allocator_init(void);
0992
0993 static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
0994 {
0995 struct mmuext_op op;
0996
0997 op.cmd = cmd;
0998 op.arg1.mfn = pfn_to_mfn(pfn);
0999 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
1000 BUG();
1001 }
1002
1003 static void __init xen_cleanhighmap(unsigned long vaddr,
1004 unsigned long vaddr_end)
1005 {
1006 unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
1007 pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr);
1008
1009
1010
1011 for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PTRS_PER_PMD));
1012 pmd++, vaddr += PMD_SIZE) {
1013 if (pmd_none(*pmd))
1014 continue;
1015 if (vaddr < (unsigned long) _text || vaddr > kernel_end)
1016 set_pmd(pmd, __pmd(0));
1017 }
1018
1019
1020 xen_mc_flush();
1021 }
1022
1023
1024
1025
1026 static void __init xen_free_ro_pages(unsigned long paddr, unsigned long size)
1027 {
1028 void *vaddr = __va(paddr);
1029 void *vaddr_end = vaddr + size;
1030
1031 for (; vaddr < vaddr_end; vaddr += PAGE_SIZE)
1032 make_lowmem_page_readwrite(vaddr);
1033
1034 memblock_phys_free(paddr, size);
1035 }
1036
1037 static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl, bool unpin)
1038 {
1039 unsigned long pa = __pa(pgtbl) & PHYSICAL_PAGE_MASK;
1040
1041 if (unpin)
1042 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(pa));
1043 ClearPagePinned(virt_to_page(__va(pa)));
1044 xen_free_ro_pages(pa, PAGE_SIZE);
1045 }
1046
1047 static void __init xen_cleanmfnmap_pmd(pmd_t *pmd, bool unpin)
1048 {
1049 unsigned long pa;
1050 pte_t *pte_tbl;
1051 int i;
1052
1053 if (pmd_large(*pmd)) {
1054 pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
1055 xen_free_ro_pages(pa, PMD_SIZE);
1056 return;
1057 }
1058
1059 pte_tbl = pte_offset_kernel(pmd, 0);
1060 for (i = 0; i < PTRS_PER_PTE; i++) {
1061 if (pte_none(pte_tbl[i]))
1062 continue;
1063 pa = pte_pfn(pte_tbl[i]) << PAGE_SHIFT;
1064 xen_free_ro_pages(pa, PAGE_SIZE);
1065 }
1066 set_pmd(pmd, __pmd(0));
1067 xen_cleanmfnmap_free_pgtbl(pte_tbl, unpin);
1068 }
1069
1070 static void __init xen_cleanmfnmap_pud(pud_t *pud, bool unpin)
1071 {
1072 unsigned long pa;
1073 pmd_t *pmd_tbl;
1074 int i;
1075
1076 if (pud_large(*pud)) {
1077 pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
1078 xen_free_ro_pages(pa, PUD_SIZE);
1079 return;
1080 }
1081
1082 pmd_tbl = pmd_offset(pud, 0);
1083 for (i = 0; i < PTRS_PER_PMD; i++) {
1084 if (pmd_none(pmd_tbl[i]))
1085 continue;
1086 xen_cleanmfnmap_pmd(pmd_tbl + i, unpin);
1087 }
1088 set_pud(pud, __pud(0));
1089 xen_cleanmfnmap_free_pgtbl(pmd_tbl, unpin);
1090 }
1091
1092 static void __init xen_cleanmfnmap_p4d(p4d_t *p4d, bool unpin)
1093 {
1094 unsigned long pa;
1095 pud_t *pud_tbl;
1096 int i;
1097
1098 if (p4d_large(*p4d)) {
1099 pa = p4d_val(*p4d) & PHYSICAL_PAGE_MASK;
1100 xen_free_ro_pages(pa, P4D_SIZE);
1101 return;
1102 }
1103
1104 pud_tbl = pud_offset(p4d, 0);
1105 for (i = 0; i < PTRS_PER_PUD; i++) {
1106 if (pud_none(pud_tbl[i]))
1107 continue;
1108 xen_cleanmfnmap_pud(pud_tbl + i, unpin);
1109 }
1110 set_p4d(p4d, __p4d(0));
1111 xen_cleanmfnmap_free_pgtbl(pud_tbl, unpin);
1112 }
1113
1114
1115
1116
1117
1118 static void __init xen_cleanmfnmap(unsigned long vaddr)
1119 {
1120 pgd_t *pgd;
1121 p4d_t *p4d;
1122 bool unpin;
1123
1124 unpin = (vaddr == 2 * PGDIR_SIZE);
1125 vaddr &= PMD_MASK;
1126 pgd = pgd_offset_k(vaddr);
1127 p4d = p4d_offset(pgd, 0);
1128 if (!p4d_none(*p4d))
1129 xen_cleanmfnmap_p4d(p4d, unpin);
1130 }
1131
1132 static void __init xen_pagetable_p2m_free(void)
1133 {
1134 unsigned long size;
1135 unsigned long addr;
1136
1137 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1138
1139
1140 if ((unsigned long)xen_p2m_addr == xen_start_info->mfn_list)
1141 return;
1142
1143
1144 memset((void *)xen_start_info->mfn_list, 0xff, size);
1145
1146 addr = xen_start_info->mfn_list;
1147
1148
1149
1150
1151
1152
1153
1154 size = roundup(size, PMD_SIZE);
1155
1156 if (addr >= __START_KERNEL_map) {
1157 xen_cleanhighmap(addr, addr + size);
1158 size = PAGE_ALIGN(xen_start_info->nr_pages *
1159 sizeof(unsigned long));
1160 memblock_free((void *)addr, size);
1161 } else {
1162 xen_cleanmfnmap(addr);
1163 }
1164 }
1165
1166 static void __init xen_pagetable_cleanhighmap(void)
1167 {
1168 unsigned long size;
1169 unsigned long addr;
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180 addr = xen_start_info->pt_base;
1181 size = xen_start_info->nr_pt_frames * PAGE_SIZE;
1182
1183 xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE * 2));
1184 xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
1185 }
1186
1187 static void __init xen_pagetable_p2m_setup(void)
1188 {
1189 xen_vmalloc_p2m_tree();
1190
1191 xen_pagetable_p2m_free();
1192
1193 xen_pagetable_cleanhighmap();
1194
1195
1196 xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
1197 }
1198
1199 static void __init xen_pagetable_init(void)
1200 {
1201
1202
1203
1204
1205
1206 pv_ops.mmu.set_pte = __xen_set_pte;
1207
1208 paging_init();
1209 xen_post_allocator_init();
1210
1211 xen_pagetable_p2m_setup();
1212
1213
1214 xen_build_mfn_list_list();
1215
1216
1217 xen_remap_memory();
1218 xen_setup_mfn_list_list();
1219 }
1220
1221 static noinstr void xen_write_cr2(unsigned long cr2)
1222 {
1223 this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
1224 }
1225
1226 static noinline void xen_flush_tlb(void)
1227 {
1228 struct mmuext_op *op;
1229 struct multicall_space mcs;
1230
1231 preempt_disable();
1232
1233 mcs = xen_mc_entry(sizeof(*op));
1234
1235 op = mcs.args;
1236 op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
1237 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
1238
1239 xen_mc_issue(PARAVIRT_LAZY_MMU);
1240
1241 preempt_enable();
1242 }
1243
1244 static void xen_flush_tlb_one_user(unsigned long addr)
1245 {
1246 struct mmuext_op *op;
1247 struct multicall_space mcs;
1248
1249 trace_xen_mmu_flush_tlb_one_user(addr);
1250
1251 preempt_disable();
1252
1253 mcs = xen_mc_entry(sizeof(*op));
1254 op = mcs.args;
1255 op->cmd = MMUEXT_INVLPG_LOCAL;
1256 op->arg1.linear_addr = addr & PAGE_MASK;
1257 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
1258
1259 xen_mc_issue(PARAVIRT_LAZY_MMU);
1260
1261 preempt_enable();
1262 }
1263
1264 static void xen_flush_tlb_multi(const struct cpumask *cpus,
1265 const struct flush_tlb_info *info)
1266 {
1267 struct {
1268 struct mmuext_op op;
1269 DECLARE_BITMAP(mask, NR_CPUS);
1270 } *args;
1271 struct multicall_space mcs;
1272 const size_t mc_entry_size = sizeof(args->op) +
1273 sizeof(args->mask[0]) * BITS_TO_LONGS(num_possible_cpus());
1274
1275 trace_xen_mmu_flush_tlb_multi(cpus, info->mm, info->start, info->end);
1276
1277 if (cpumask_empty(cpus))
1278 return;
1279
1280 mcs = xen_mc_entry(mc_entry_size);
1281 args = mcs.args;
1282 args->op.arg2.vcpumask = to_cpumask(args->mask);
1283
1284
1285 cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
1286
1287 args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
1288 if (info->end != TLB_FLUSH_ALL &&
1289 (info->end - info->start) <= PAGE_SIZE) {
1290 args->op.cmd = MMUEXT_INVLPG_MULTI;
1291 args->op.arg1.linear_addr = info->start;
1292 }
1293
1294 MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
1295
1296 xen_mc_issue(PARAVIRT_LAZY_MMU);
1297 }
1298
1299 static unsigned long xen_read_cr3(void)
1300 {
1301 return this_cpu_read(xen_cr3);
1302 }
1303
1304 static void set_current_cr3(void *v)
1305 {
1306 this_cpu_write(xen_current_cr3, (unsigned long)v);
1307 }
1308
1309 static void __xen_write_cr3(bool kernel, unsigned long cr3)
1310 {
1311 struct mmuext_op op;
1312 unsigned long mfn;
1313
1314 trace_xen_mmu_write_cr3(kernel, cr3);
1315
1316 if (cr3)
1317 mfn = pfn_to_mfn(PFN_DOWN(cr3));
1318 else
1319 mfn = 0;
1320
1321 WARN_ON(mfn == 0 && kernel);
1322
1323 op.cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
1324 op.arg1.mfn = mfn;
1325
1326 xen_extend_mmuext_op(&op);
1327
1328 if (kernel) {
1329 this_cpu_write(xen_cr3, cr3);
1330
1331
1332
1333 xen_mc_callback(set_current_cr3, (void *)cr3);
1334 }
1335 }
1336 static void xen_write_cr3(unsigned long cr3)
1337 {
1338 pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
1339
1340 BUG_ON(preemptible());
1341
1342 xen_mc_batch();
1343
1344
1345
1346 this_cpu_write(xen_cr3, cr3);
1347
1348 __xen_write_cr3(true, cr3);
1349
1350 if (user_pgd)
1351 __xen_write_cr3(false, __pa(user_pgd));
1352 else
1353 __xen_write_cr3(false, 0);
1354
1355 xen_mc_issue(PARAVIRT_LAZY_CPU);
1356 }
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378 static void __init xen_write_cr3_init(unsigned long cr3)
1379 {
1380 BUG_ON(preemptible());
1381
1382 xen_mc_batch();
1383
1384
1385
1386 this_cpu_write(xen_cr3, cr3);
1387
1388 __xen_write_cr3(true, cr3);
1389
1390 xen_mc_issue(PARAVIRT_LAZY_CPU);
1391 }
1392
1393 static int xen_pgd_alloc(struct mm_struct *mm)
1394 {
1395 pgd_t *pgd = mm->pgd;
1396 struct page *page = virt_to_page(pgd);
1397 pgd_t *user_pgd;
1398 int ret = -ENOMEM;
1399
1400 BUG_ON(PagePinned(virt_to_page(pgd)));
1401 BUG_ON(page->private != 0);
1402
1403 user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
1404 page->private = (unsigned long)user_pgd;
1405
1406 if (user_pgd != NULL) {
1407 #ifdef CONFIG_X86_VSYSCALL_EMULATION
1408 user_pgd[pgd_index(VSYSCALL_ADDR)] =
1409 __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
1410 #endif
1411 ret = 0;
1412 }
1413
1414 BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
1415
1416 return ret;
1417 }
1418
1419 static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
1420 {
1421 pgd_t *user_pgd = xen_get_user_pgd(pgd);
1422
1423 if (user_pgd)
1424 free_page((unsigned long)user_pgd);
1425 }
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441 static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
1442 {
1443 if (unlikely(is_early_ioremap_ptep(ptep)))
1444 __xen_set_pte(ptep, pte);
1445 else
1446 native_set_pte(ptep, pte);
1447 }
1448
1449 __visible pte_t xen_make_pte_init(pteval_t pte)
1450 {
1451 unsigned long pfn;
1452
1453
1454
1455
1456
1457
1458
1459 pfn = (pte & PTE_PFN_MASK) >> PAGE_SHIFT;
1460 if (xen_start_info->mfn_list < __START_KERNEL_map &&
1461 pfn >= xen_start_info->first_p2m_pfn &&
1462 pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
1463 pte &= ~_PAGE_RW;
1464
1465 pte = pte_pfn_to_mfn(pte);
1466 return native_make_pte(pte);
1467 }
1468 PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init);
1469
1470
1471
1472 static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
1473 {
1474 #ifdef CONFIG_FLATMEM
1475 BUG_ON(mem_map);
1476 #endif
1477 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
1478 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
1479 }
1480
1481
1482 static void __init xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
1483 {
1484 #ifdef CONFIG_FLATMEM
1485 BUG_ON(mem_map);
1486 #endif
1487 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
1488 }
1489
1490
1491
1492 static void __init xen_release_pte_init(unsigned long pfn)
1493 {
1494 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
1495 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1496 }
1497
1498 static void __init xen_release_pmd_init(unsigned long pfn)
1499 {
1500 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1501 }
1502
1503 static inline void __pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
1504 {
1505 struct multicall_space mcs;
1506 struct mmuext_op *op;
1507
1508 mcs = __xen_mc_entry(sizeof(*op));
1509 op = mcs.args;
1510 op->cmd = cmd;
1511 op->arg1.mfn = pfn_to_mfn(pfn);
1512
1513 MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
1514 }
1515
1516 static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot)
1517 {
1518 struct multicall_space mcs;
1519 unsigned long addr = (unsigned long)__va(pfn << PAGE_SHIFT);
1520
1521 mcs = __xen_mc_entry(0);
1522 MULTI_update_va_mapping(mcs.mc, (unsigned long)addr,
1523 pfn_pte(pfn, prot), 0);
1524 }
1525
1526
1527
1528 static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn,
1529 unsigned level)
1530 {
1531 bool pinned = xen_page_pinned(mm->pgd);
1532
1533 trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned);
1534
1535 if (pinned) {
1536 struct page *page = pfn_to_page(pfn);
1537
1538 pinned = false;
1539 if (static_branch_likely(&xen_struct_pages_ready)) {
1540 pinned = PagePinned(page);
1541 SetPagePinned(page);
1542 }
1543
1544 xen_mc_batch();
1545
1546 __set_pfn_prot(pfn, PAGE_KERNEL_RO);
1547
1548 if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS && !pinned)
1549 __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
1550
1551 xen_mc_issue(PARAVIRT_LAZY_MMU);
1552 }
1553 }
1554
1555 static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn)
1556 {
1557 xen_alloc_ptpage(mm, pfn, PT_PTE);
1558 }
1559
1560 static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
1561 {
1562 xen_alloc_ptpage(mm, pfn, PT_PMD);
1563 }
1564
1565
1566 static inline void xen_release_ptpage(unsigned long pfn, unsigned level)
1567 {
1568 struct page *page = pfn_to_page(pfn);
1569 bool pinned = PagePinned(page);
1570
1571 trace_xen_mmu_release_ptpage(pfn, level, pinned);
1572
1573 if (pinned) {
1574 xen_mc_batch();
1575
1576 if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
1577 __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
1578
1579 __set_pfn_prot(pfn, PAGE_KERNEL);
1580
1581 xen_mc_issue(PARAVIRT_LAZY_MMU);
1582
1583 ClearPagePinned(page);
1584 }
1585 }
1586
1587 static void xen_release_pte(unsigned long pfn)
1588 {
1589 xen_release_ptpage(pfn, PT_PTE);
1590 }
1591
1592 static void xen_release_pmd(unsigned long pfn)
1593 {
1594 xen_release_ptpage(pfn, PT_PMD);
1595 }
1596
1597 static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
1598 {
1599 xen_alloc_ptpage(mm, pfn, PT_PUD);
1600 }
1601
1602 static void xen_release_pud(unsigned long pfn)
1603 {
1604 xen_release_ptpage(pfn, PT_PUD);
1605 }
1606
1607
1608
1609
1610
1611 static void * __init __ka(phys_addr_t paddr)
1612 {
1613 return (void *)(paddr + __START_KERNEL_map);
1614 }
1615
1616
1617 static unsigned long __init m2p(phys_addr_t maddr)
1618 {
1619 phys_addr_t paddr;
1620
1621 maddr &= XEN_PTE_MFN_MASK;
1622 paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
1623
1624 return paddr;
1625 }
1626
1627
1628 static void * __init m2v(phys_addr_t maddr)
1629 {
1630 return __ka(m2p(maddr));
1631 }
1632
1633
1634 static void __init set_page_prot_flags(void *addr, pgprot_t prot,
1635 unsigned long flags)
1636 {
1637 unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
1638 pte_t pte = pfn_pte(pfn, prot);
1639
1640 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
1641 BUG();
1642 }
1643 static void __init set_page_prot(void *addr, pgprot_t prot)
1644 {
1645 return set_page_prot_flags(addr, prot, UVMF_NONE);
1646 }
1647
1648 void __init xen_setup_machphys_mapping(void)
1649 {
1650 struct xen_machphys_mapping mapping;
1651
1652 if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
1653 machine_to_phys_mapping = (unsigned long *)mapping.v_start;
1654 machine_to_phys_nr = mapping.max_mfn + 1;
1655 } else {
1656 machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
1657 }
1658 }
1659
1660 static void __init convert_pfn_mfn(void *v)
1661 {
1662 pte_t *pte = v;
1663 int i;
1664
1665
1666
1667 for (i = 0; i < PTRS_PER_PTE; i++)
1668 pte[i] = xen_make_pte(pte[i].pte);
1669 }
1670 static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
1671 unsigned long addr)
1672 {
1673 if (*pt_base == PFN_DOWN(__pa(addr))) {
1674 set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG);
1675 clear_page((void *)addr);
1676 (*pt_base)++;
1677 }
1678 if (*pt_end == PFN_DOWN(__pa(addr))) {
1679 set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG);
1680 clear_page((void *)addr);
1681 (*pt_end)--;
1682 }
1683 }
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694 void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1695 {
1696 pud_t *l3;
1697 pmd_t *l2;
1698 unsigned long addr[3];
1699 unsigned long pt_base, pt_end;
1700 unsigned i;
1701
1702
1703
1704
1705
1706 if (xen_start_info->mfn_list < __START_KERNEL_map)
1707 max_pfn_mapped = xen_start_info->first_p2m_pfn;
1708 else
1709 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
1710
1711 pt_base = PFN_DOWN(__pa(xen_start_info->pt_base));
1712 pt_end = pt_base + xen_start_info->nr_pt_frames;
1713
1714
1715 init_top_pgt[0] = __pgd(0);
1716
1717
1718
1719
1720 convert_pfn_mfn(init_top_pgt);
1721
1722
1723 convert_pfn_mfn(level3_ident_pgt);
1724
1725
1726 convert_pfn_mfn(level3_kernel_pgt);
1727
1728
1729 convert_pfn_mfn(level2_fixmap_pgt);
1730
1731
1732 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
1733 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
1734
1735 addr[0] = (unsigned long)pgd;
1736 addr[1] = (unsigned long)l3;
1737 addr[2] = (unsigned long)l2;
1738
1739
1740
1741
1742
1743
1744 copy_page(level2_ident_pgt, l2);
1745
1746 copy_page(level2_kernel_pgt, l2);
1747
1748
1749
1750
1751
1752 if (__supported_pte_mask & _PAGE_NX) {
1753 for (i = 0; i < PTRS_PER_PMD; ++i) {
1754 if (pmd_none(level2_ident_pgt[i]))
1755 continue;
1756 level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX);
1757 }
1758 }
1759
1760
1761 i = pgd_index(xen_start_info->mfn_list);
1762 if (i && i < pgd_index(__START_KERNEL_map))
1763 init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
1764
1765
1766 set_page_prot(init_top_pgt, PAGE_KERNEL_RO);
1767 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
1768 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
1769 set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
1770 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1771 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
1772
1773 for (i = 0; i < FIXMAP_PMD_NUM; i++) {
1774 set_page_prot(level1_fixmap_pgt + i * PTRS_PER_PTE,
1775 PAGE_KERNEL_RO);
1776 }
1777
1778
1779 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
1780 PFN_DOWN(__pa_symbol(init_top_pgt)));
1781
1782
1783 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1784
1785 #ifdef CONFIG_X86_VSYSCALL_EMULATION
1786
1787 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
1788 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
1789 PFN_DOWN(__pa_symbol(level3_user_vsyscall)));
1790 #endif
1791
1792
1793
1794
1795
1796 xen_mc_batch();
1797 __xen_write_cr3(true, __pa(init_top_pgt));
1798 xen_mc_issue(PARAVIRT_LAZY_CPU);
1799
1800
1801
1802
1803
1804
1805
1806 for (i = 0; i < ARRAY_SIZE(addr); i++)
1807 check_pt_base(&pt_base, &pt_end, addr[i]);
1808
1809
1810 xen_pt_base = PFN_PHYS(pt_base);
1811 xen_pt_size = (pt_end - pt_base) * PAGE_SIZE;
1812 memblock_reserve(xen_pt_base, xen_pt_size);
1813
1814
1815 xen_start_info = (struct start_info *)__va(__pa(xen_start_info));
1816 }
1817
1818
1819
1820
1821 static unsigned long __init xen_read_phys_ulong(phys_addr_t addr)
1822 {
1823 unsigned long *vaddr;
1824 unsigned long val;
1825
1826 vaddr = early_memremap_ro(addr, sizeof(val));
1827 val = *vaddr;
1828 early_memunmap(vaddr, sizeof(val));
1829 return val;
1830 }
1831
1832
1833
1834
1835
1836
1837 static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
1838 {
1839 phys_addr_t pa;
1840 pgd_t pgd;
1841 pud_t pud;
1842 pmd_t pmd;
1843 pte_t pte;
1844
1845 pa = read_cr3_pa();
1846 pgd = native_make_pgd(xen_read_phys_ulong(pa + pgd_index(vaddr) *
1847 sizeof(pgd)));
1848 if (!pgd_present(pgd))
1849 return 0;
1850
1851 pa = pgd_val(pgd) & PTE_PFN_MASK;
1852 pud = native_make_pud(xen_read_phys_ulong(pa + pud_index(vaddr) *
1853 sizeof(pud)));
1854 if (!pud_present(pud))
1855 return 0;
1856 pa = pud_val(pud) & PTE_PFN_MASK;
1857 if (pud_large(pud))
1858 return pa + (vaddr & ~PUD_MASK);
1859
1860 pmd = native_make_pmd(xen_read_phys_ulong(pa + pmd_index(vaddr) *
1861 sizeof(pmd)));
1862 if (!pmd_present(pmd))
1863 return 0;
1864 pa = pmd_val(pmd) & PTE_PFN_MASK;
1865 if (pmd_large(pmd))
1866 return pa + (vaddr & ~PMD_MASK);
1867
1868 pte = native_make_pte(xen_read_phys_ulong(pa + pte_index(vaddr) *
1869 sizeof(pte)));
1870 if (!pte_present(pte))
1871 return 0;
1872 pa = pte_pfn(pte) << PAGE_SHIFT;
1873
1874 return pa | (vaddr & ~PAGE_MASK);
1875 }
1876
1877
1878
1879
1880
1881 void __init xen_relocate_p2m(void)
1882 {
1883 phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
1884 unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
1885 int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
1886 pte_t *pt;
1887 pmd_t *pmd;
1888 pud_t *pud;
1889 pgd_t *pgd;
1890 unsigned long *new_p2m;
1891
1892 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1893 n_pte = roundup(size, PAGE_SIZE) >> PAGE_SHIFT;
1894 n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
1895 n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
1896 n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT;
1897 n_frames = n_pte + n_pt + n_pmd + n_pud;
1898
1899 new_area = xen_find_free_area(PFN_PHYS(n_frames));
1900 if (!new_area) {
1901 xen_raw_console_write("Can't find new memory area for p2m needed due to E820 map conflict\n");
1902 BUG();
1903 }
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913 pud_phys = new_area;
1914 pmd_phys = pud_phys + PFN_PHYS(n_pud);
1915 pt_phys = pmd_phys + PFN_PHYS(n_pmd);
1916 p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
1917
1918 pgd = __va(read_cr3_pa());
1919 new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
1920 for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
1921 pud = early_memremap(pud_phys, PAGE_SIZE);
1922 clear_page(pud);
1923 for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
1924 idx_pmd++) {
1925 pmd = early_memremap(pmd_phys, PAGE_SIZE);
1926 clear_page(pmd);
1927 for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
1928 idx_pt++) {
1929 pt = early_memremap(pt_phys, PAGE_SIZE);
1930 clear_page(pt);
1931 for (idx_pte = 0;
1932 idx_pte < min(n_pte, PTRS_PER_PTE);
1933 idx_pte++) {
1934 pt[idx_pte] = pfn_pte(p2m_pfn,
1935 PAGE_KERNEL);
1936 p2m_pfn++;
1937 }
1938 n_pte -= PTRS_PER_PTE;
1939 early_memunmap(pt, PAGE_SIZE);
1940 make_lowmem_page_readonly(__va(pt_phys));
1941 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
1942 PFN_DOWN(pt_phys));
1943 pmd[idx_pt] = __pmd(_PAGE_TABLE | pt_phys);
1944 pt_phys += PAGE_SIZE;
1945 }
1946 n_pt -= PTRS_PER_PMD;
1947 early_memunmap(pmd, PAGE_SIZE);
1948 make_lowmem_page_readonly(__va(pmd_phys));
1949 pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
1950 PFN_DOWN(pmd_phys));
1951 pud[idx_pmd] = __pud(_PAGE_TABLE | pmd_phys);
1952 pmd_phys += PAGE_SIZE;
1953 }
1954 n_pmd -= PTRS_PER_PUD;
1955 early_memunmap(pud, PAGE_SIZE);
1956 make_lowmem_page_readonly(__va(pud_phys));
1957 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
1958 set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
1959 pud_phys += PAGE_SIZE;
1960 }
1961
1962
1963 memcpy(new_p2m, xen_p2m_addr, size);
1964 xen_p2m_addr = new_p2m;
1965
1966
1967 p2m_pfn = PFN_DOWN(xen_early_virt_to_phys(xen_start_info->mfn_list));
1968 BUG_ON(!p2m_pfn);
1969 p2m_pfn_end = p2m_pfn + PFN_DOWN(size);
1970
1971 if (xen_start_info->mfn_list < __START_KERNEL_map) {
1972 pfn = xen_start_info->first_p2m_pfn;
1973 pfn_end = xen_start_info->first_p2m_pfn +
1974 xen_start_info->nr_p2m_frames;
1975 set_pgd(pgd + 1, __pgd(0));
1976 } else {
1977 pfn = p2m_pfn;
1978 pfn_end = p2m_pfn_end;
1979 }
1980
1981 memblock_phys_free(PFN_PHYS(pfn), PAGE_SIZE * (pfn_end - pfn));
1982 while (pfn < pfn_end) {
1983 if (pfn == p2m_pfn) {
1984 pfn = p2m_pfn_end;
1985 continue;
1986 }
1987 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1988 pfn++;
1989 }
1990
1991 xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
1992 xen_start_info->first_p2m_pfn = PFN_DOWN(new_area);
1993 xen_start_info->nr_p2m_frames = n_frames;
1994 }
1995
1996 void __init xen_reserve_special_pages(void)
1997 {
1998 phys_addr_t paddr;
1999
2000 memblock_reserve(__pa(xen_start_info), PAGE_SIZE);
2001 if (xen_start_info->store_mfn) {
2002 paddr = PFN_PHYS(mfn_to_pfn(xen_start_info->store_mfn));
2003 memblock_reserve(paddr, PAGE_SIZE);
2004 }
2005 if (!xen_initial_domain()) {
2006 paddr = PFN_PHYS(mfn_to_pfn(xen_start_info->console.domU.mfn));
2007 memblock_reserve(paddr, PAGE_SIZE);
2008 }
2009 }
2010
2011 void __init xen_pt_check_e820(void)
2012 {
2013 if (xen_is_e820_reserved(xen_pt_base, xen_pt_size)) {
2014 xen_raw_console_write("Xen hypervisor allocated page table memory conflicts with E820 map\n");
2015 BUG();
2016 }
2017 }
2018
2019 static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss;
2020
2021 static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2022 {
2023 pte_t pte;
2024 unsigned long vaddr;
2025
2026 phys >>= PAGE_SHIFT;
2027
2028 switch (idx) {
2029 case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
2030 #ifdef CONFIG_X86_VSYSCALL_EMULATION
2031 case VSYSCALL_PAGE:
2032 #endif
2033
2034 pte = pfn_pte(phys, prot);
2035 break;
2036
2037 #ifdef CONFIG_X86_LOCAL_APIC
2038 case FIX_APIC_BASE:
2039 pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
2040 break;
2041 #endif
2042
2043 #ifdef CONFIG_X86_IO_APIC
2044 case FIX_IO_APIC_BASE_0 ... FIX_IO_APIC_BASE_END:
2045
2046
2047
2048
2049 pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
2050 break;
2051 #endif
2052
2053 case FIX_PARAVIRT_BOOTMAP:
2054
2055
2056 pte = mfn_pte(phys, prot);
2057 break;
2058
2059 default:
2060
2061 pte = mfn_pte(phys, prot);
2062 break;
2063 }
2064
2065 vaddr = __fix_to_virt(idx);
2066 if (HYPERVISOR_update_va_mapping(vaddr, pte, UVMF_INVLPG))
2067 BUG();
2068
2069 #ifdef CONFIG_X86_VSYSCALL_EMULATION
2070
2071
2072 if (idx == VSYSCALL_PAGE)
2073 set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
2074 #endif
2075 }
2076
2077 static void __init xen_post_allocator_init(void)
2078 {
2079 pv_ops.mmu.set_pte = xen_set_pte;
2080 pv_ops.mmu.set_pmd = xen_set_pmd;
2081 pv_ops.mmu.set_pud = xen_set_pud;
2082 pv_ops.mmu.set_p4d = xen_set_p4d;
2083
2084
2085
2086 pv_ops.mmu.alloc_pte = xen_alloc_pte;
2087 pv_ops.mmu.alloc_pmd = xen_alloc_pmd;
2088 pv_ops.mmu.release_pte = xen_release_pte;
2089 pv_ops.mmu.release_pmd = xen_release_pmd;
2090 pv_ops.mmu.alloc_pud = xen_alloc_pud;
2091 pv_ops.mmu.release_pud = xen_release_pud;
2092 pv_ops.mmu.make_pte = PV_CALLEE_SAVE(xen_make_pte);
2093
2094 pv_ops.mmu.write_cr3 = &xen_write_cr3;
2095 }
2096
2097 static void xen_leave_lazy_mmu(void)
2098 {
2099 preempt_disable();
2100 xen_mc_flush();
2101 paravirt_leave_lazy_mmu();
2102 preempt_enable();
2103 }
2104
2105 static const typeof(pv_ops) xen_mmu_ops __initconst = {
2106 .mmu = {
2107 .read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2),
2108 .write_cr2 = xen_write_cr2,
2109
2110 .read_cr3 = xen_read_cr3,
2111 .write_cr3 = xen_write_cr3_init,
2112
2113 .flush_tlb_user = xen_flush_tlb,
2114 .flush_tlb_kernel = xen_flush_tlb,
2115 .flush_tlb_one_user = xen_flush_tlb_one_user,
2116 .flush_tlb_multi = xen_flush_tlb_multi,
2117 .tlb_remove_table = tlb_remove_table,
2118
2119 .pgd_alloc = xen_pgd_alloc,
2120 .pgd_free = xen_pgd_free,
2121
2122 .alloc_pte = xen_alloc_pte_init,
2123 .release_pte = xen_release_pte_init,
2124 .alloc_pmd = xen_alloc_pmd_init,
2125 .release_pmd = xen_release_pmd_init,
2126
2127 .set_pte = xen_set_pte_init,
2128 .set_pmd = xen_set_pmd_hyper,
2129
2130 .ptep_modify_prot_start = xen_ptep_modify_prot_start,
2131 .ptep_modify_prot_commit = xen_ptep_modify_prot_commit,
2132
2133 .pte_val = PV_CALLEE_SAVE(xen_pte_val),
2134 .pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
2135
2136 .make_pte = PV_CALLEE_SAVE(xen_make_pte_init),
2137 .make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
2138
2139 .set_pud = xen_set_pud_hyper,
2140
2141 .make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
2142 .pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
2143
2144 .pud_val = PV_CALLEE_SAVE(xen_pud_val),
2145 .make_pud = PV_CALLEE_SAVE(xen_make_pud),
2146 .set_p4d = xen_set_p4d_hyper,
2147
2148 .alloc_pud = xen_alloc_pmd_init,
2149 .release_pud = xen_release_pmd_init,
2150
2151 #if CONFIG_PGTABLE_LEVELS >= 5
2152 .p4d_val = PV_CALLEE_SAVE(xen_p4d_val),
2153 .make_p4d = PV_CALLEE_SAVE(xen_make_p4d),
2154 #endif
2155
2156 .activate_mm = xen_activate_mm,
2157 .dup_mmap = xen_dup_mmap,
2158 .exit_mmap = xen_exit_mmap,
2159
2160 .lazy_mode = {
2161 .enter = paravirt_enter_lazy_mmu,
2162 .leave = xen_leave_lazy_mmu,
2163 .flush = paravirt_flush_lazy_mmu,
2164 },
2165
2166 .set_fixmap = xen_set_fixmap,
2167 },
2168 };
2169
2170 void __init xen_init_mmu_ops(void)
2171 {
2172 x86_init.paging.pagetable_init = xen_pagetable_init;
2173 x86_init.hyper.init_after_bootmem = xen_after_bootmem;
2174
2175 pv_ops.mmu = xen_mmu_ops.mmu;
2176
2177 memset(dummy_mapping, 0xff, PAGE_SIZE);
2178 }
2179
2180
2181 #define MAX_CONTIG_ORDER 9
2182 static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
2183
2184 #define VOID_PTE (mfn_pte(0, __pgprot(0)))
2185 static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
2186 unsigned long *in_frames,
2187 unsigned long *out_frames)
2188 {
2189 int i;
2190 struct multicall_space mcs;
2191
2192 xen_mc_batch();
2193 for (i = 0; i < (1UL<<order); i++, vaddr += PAGE_SIZE) {
2194 mcs = __xen_mc_entry(0);
2195
2196 if (in_frames)
2197 in_frames[i] = virt_to_mfn(vaddr);
2198
2199 MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
2200 __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
2201
2202 if (out_frames)
2203 out_frames[i] = virt_to_pfn(vaddr);
2204 }
2205 xen_mc_issue(0);
2206 }
2207
2208
2209
2210
2211
2212
2213 static void xen_remap_exchanged_ptes(unsigned long vaddr, int order,
2214 unsigned long *mfns,
2215 unsigned long first_mfn)
2216 {
2217 unsigned i, limit;
2218 unsigned long mfn;
2219
2220 xen_mc_batch();
2221
2222 limit = 1u << order;
2223 for (i = 0; i < limit; i++, vaddr += PAGE_SIZE) {
2224 struct multicall_space mcs;
2225 unsigned flags;
2226
2227 mcs = __xen_mc_entry(0);
2228 if (mfns)
2229 mfn = mfns[i];
2230 else
2231 mfn = first_mfn + i;
2232
2233 if (i < (limit - 1))
2234 flags = 0;
2235 else {
2236 if (order == 0)
2237 flags = UVMF_INVLPG | UVMF_ALL;
2238 else
2239 flags = UVMF_TLB_FLUSH | UVMF_ALL;
2240 }
2241
2242 MULTI_update_va_mapping(mcs.mc, vaddr,
2243 mfn_pte(mfn, PAGE_KERNEL), flags);
2244
2245 set_phys_to_machine(virt_to_pfn(vaddr), mfn);
2246 }
2247
2248 xen_mc_issue(0);
2249 }
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259 static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in,
2260 unsigned long *pfns_in,
2261 unsigned long extents_out,
2262 unsigned int order_out,
2263 unsigned long *mfns_out,
2264 unsigned int address_bits)
2265 {
2266 long rc;
2267 int success;
2268
2269 struct xen_memory_exchange exchange = {
2270 .in = {
2271 .nr_extents = extents_in,
2272 .extent_order = order_in,
2273 .extent_start = pfns_in,
2274 .domid = DOMID_SELF
2275 },
2276 .out = {
2277 .nr_extents = extents_out,
2278 .extent_order = order_out,
2279 .extent_start = mfns_out,
2280 .address_bits = address_bits,
2281 .domid = DOMID_SELF
2282 }
2283 };
2284
2285 BUG_ON(extents_in << order_in != extents_out << order_out);
2286
2287 rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
2288 success = (exchange.nr_exchanged == extents_in);
2289
2290 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
2291 BUG_ON(success && (rc != 0));
2292
2293 return success;
2294 }
2295
2296 int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
2297 unsigned int address_bits,
2298 dma_addr_t *dma_handle)
2299 {
2300 unsigned long *in_frames = discontig_frames, out_frame;
2301 unsigned long flags;
2302 int success;
2303 unsigned long vstart = (unsigned long)phys_to_virt(pstart);
2304
2305
2306
2307
2308
2309
2310
2311 if (unlikely(order > MAX_CONTIG_ORDER))
2312 return -ENOMEM;
2313
2314 memset((void *) vstart, 0, PAGE_SIZE << order);
2315
2316 spin_lock_irqsave(&xen_reservation_lock, flags);
2317
2318
2319 xen_zap_pfn_range(vstart, order, in_frames, NULL);
2320
2321
2322 out_frame = virt_to_pfn(vstart);
2323 success = xen_exchange_memory(1UL << order, 0, in_frames,
2324 1, order, &out_frame,
2325 address_bits);
2326
2327
2328 if (success)
2329 xen_remap_exchanged_ptes(vstart, order, NULL, out_frame);
2330 else
2331 xen_remap_exchanged_ptes(vstart, order, in_frames, 0);
2332
2333 spin_unlock_irqrestore(&xen_reservation_lock, flags);
2334
2335 *dma_handle = virt_to_machine(vstart).maddr;
2336 return success ? 0 : -ENOMEM;
2337 }
2338
2339 void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
2340 {
2341 unsigned long *out_frames = discontig_frames, in_frame;
2342 unsigned long flags;
2343 int success;
2344 unsigned long vstart;
2345
2346 if (unlikely(order > MAX_CONTIG_ORDER))
2347 return;
2348
2349 vstart = (unsigned long)phys_to_virt(pstart);
2350 memset((void *) vstart, 0, PAGE_SIZE << order);
2351
2352 spin_lock_irqsave(&xen_reservation_lock, flags);
2353
2354
2355 in_frame = virt_to_mfn(vstart);
2356
2357
2358 xen_zap_pfn_range(vstart, order, NULL, out_frames);
2359
2360
2361 success = xen_exchange_memory(1, order, &in_frame, 1UL << order,
2362 0, out_frames, 0);
2363
2364
2365 if (success)
2366 xen_remap_exchanged_ptes(vstart, order, out_frames, 0);
2367 else
2368 xen_remap_exchanged_ptes(vstart, order, NULL, in_frame);
2369
2370 spin_unlock_irqrestore(&xen_reservation_lock, flags);
2371 }
2372
2373 static noinline void xen_flush_tlb_all(void)
2374 {
2375 struct mmuext_op *op;
2376 struct multicall_space mcs;
2377
2378 preempt_disable();
2379
2380 mcs = xen_mc_entry(sizeof(*op));
2381
2382 op = mcs.args;
2383 op->cmd = MMUEXT_TLB_FLUSH_ALL;
2384 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
2385
2386 xen_mc_issue(PARAVIRT_LAZY_MMU);
2387
2388 preempt_enable();
2389 }
2390
2391 #define REMAP_BATCH_SIZE 16
2392
2393 struct remap_data {
2394 xen_pfn_t *pfn;
2395 bool contiguous;
2396 bool no_translate;
2397 pgprot_t prot;
2398 struct mmu_update *mmu_update;
2399 };
2400
2401 static int remap_area_pfn_pte_fn(pte_t *ptep, unsigned long addr, void *data)
2402 {
2403 struct remap_data *rmd = data;
2404 pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot));
2405
2406
2407
2408
2409
2410 if (rmd->contiguous)
2411 (*rmd->pfn)++;
2412 else
2413 rmd->pfn++;
2414
2415 rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
2416 rmd->mmu_update->ptr |= rmd->no_translate ?
2417 MMU_PT_UPDATE_NO_TRANSLATE :
2418 MMU_NORMAL_PT_UPDATE;
2419 rmd->mmu_update->val = pte_val_ma(pte);
2420 rmd->mmu_update++;
2421
2422 return 0;
2423 }
2424
2425 int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
2426 xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot,
2427 unsigned int domid, bool no_translate)
2428 {
2429 int err = 0;
2430 struct remap_data rmd;
2431 struct mmu_update mmu_update[REMAP_BATCH_SIZE];
2432 unsigned long range;
2433 int mapped = 0;
2434
2435 BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
2436
2437 rmd.pfn = pfn;
2438 rmd.prot = prot;
2439
2440
2441
2442
2443 rmd.contiguous = !err_ptr;
2444 rmd.no_translate = no_translate;
2445
2446 while (nr) {
2447 int index = 0;
2448 int done = 0;
2449 int batch = min(REMAP_BATCH_SIZE, nr);
2450 int batch_left = batch;
2451
2452 range = (unsigned long)batch << PAGE_SHIFT;
2453
2454 rmd.mmu_update = mmu_update;
2455 err = apply_to_page_range(vma->vm_mm, addr, range,
2456 remap_area_pfn_pte_fn, &rmd);
2457 if (err)
2458 goto out;
2459
2460
2461
2462
2463
2464 do {
2465 int i;
2466
2467 err = HYPERVISOR_mmu_update(&mmu_update[index],
2468 batch_left, &done, domid);
2469
2470
2471
2472
2473
2474
2475 if (err_ptr) {
2476 for (i = index; i < index + done; i++)
2477 err_ptr[i] = 0;
2478 }
2479 if (err < 0) {
2480 if (!err_ptr)
2481 goto out;
2482 err_ptr[i] = err;
2483 done++;
2484 } else
2485 mapped += done;
2486 batch_left -= done;
2487 index += done;
2488 } while (batch_left);
2489
2490 nr -= batch;
2491 addr += range;
2492 if (err_ptr)
2493 err_ptr += batch;
2494 cond_resched();
2495 }
2496 out:
2497
2498 xen_flush_tlb_all();
2499
2500 return err < 0 ? err : mapped;
2501 }
2502 EXPORT_SYMBOL_GPL(xen_remap_pfn);
2503
2504 #ifdef CONFIG_KEXEC_CORE
2505 phys_addr_t paddr_vmcoreinfo_note(void)
2506 {
2507 if (xen_pv_domain())
2508 return virt_to_machine(vmcoreinfo_note).maddr;
2509 else
2510 return __pa(vmcoreinfo_note);
2511 }
2512 #endif