0001
0002
0003
0004
0005
0006
0007
0008 #include <linux/sched.h>
0009 #include <linux/mm_types.h>
0010 #include <linux/mm.h>
0011 #include <linux/stop_machine.h>
0012
0013 #include <asm/sections.h>
0014 #include <asm/mmu.h>
0015 #include <asm/tlb.h>
0016 #include <asm/firmware.h>
0017
0018 #include <mm/mmu_decl.h>
0019
0020 #include <trace/events/thp.h>
0021
0022 #if H_PGTABLE_RANGE > (USER_VSID_RANGE * (TASK_SIZE_USER64 / TASK_CONTEXT_SIZE))
0023 #warning Limited user VSID range means pagetable space is wasted
0024 #endif
0025
0026 #ifdef CONFIG_SPARSEMEM_VMEMMAP
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106 int __meminit hash__vmemmap_create_mapping(unsigned long start,
0107 unsigned long page_size,
0108 unsigned long phys)
0109 {
0110 int rc;
0111
0112 if ((start + page_size) >= H_VMEMMAP_END) {
0113 pr_warn("Outside the supported range\n");
0114 return -1;
0115 }
0116
0117 rc = htab_bolt_mapping(start, start + page_size, phys,
0118 pgprot_val(PAGE_KERNEL),
0119 mmu_vmemmap_psize, mmu_kernel_ssize);
0120 if (rc < 0) {
0121 int rc2 = htab_remove_mapping(start, start + page_size,
0122 mmu_vmemmap_psize,
0123 mmu_kernel_ssize);
0124 BUG_ON(rc2 && (rc2 != -ENOENT));
0125 }
0126 return rc;
0127 }
0128
0129 #ifdef CONFIG_MEMORY_HOTPLUG
0130 void hash__vmemmap_remove_mapping(unsigned long start,
0131 unsigned long page_size)
0132 {
0133 int rc = htab_remove_mapping(start, start + page_size,
0134 mmu_vmemmap_psize,
0135 mmu_kernel_ssize);
0136 BUG_ON((rc < 0) && (rc != -ENOENT));
0137 WARN_ON(rc == -ENOENT);
0138 }
0139 #endif
0140 #endif
0141
0142
0143
0144
0145
0146
0147 int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
0148 {
0149 pgd_t *pgdp;
0150 p4d_t *p4dp;
0151 pud_t *pudp;
0152 pmd_t *pmdp;
0153 pte_t *ptep;
0154
0155 BUILD_BUG_ON(TASK_SIZE_USER64 > H_PGTABLE_RANGE);
0156 if (slab_is_available()) {
0157 pgdp = pgd_offset_k(ea);
0158 p4dp = p4d_offset(pgdp, ea);
0159 pudp = pud_alloc(&init_mm, p4dp, ea);
0160 if (!pudp)
0161 return -ENOMEM;
0162 pmdp = pmd_alloc(&init_mm, pudp, ea);
0163 if (!pmdp)
0164 return -ENOMEM;
0165 ptep = pte_alloc_kernel(pmdp, ea);
0166 if (!ptep)
0167 return -ENOMEM;
0168 set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot));
0169 } else {
0170
0171
0172
0173
0174
0175
0176 if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, pgprot_val(prot),
0177 mmu_io_psize, mmu_kernel_ssize)) {
0178 printk(KERN_ERR "Failed to do bolted mapping IO "
0179 "memory at %016lx !\n", pa);
0180 return -ENOMEM;
0181 }
0182 }
0183
0184 smp_wmb();
0185 return 0;
0186 }
0187
0188 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
0189
0190 unsigned long hash__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
0191 pmd_t *pmdp, unsigned long clr,
0192 unsigned long set)
0193 {
0194 __be64 old_be, tmp;
0195 unsigned long old;
0196
0197 #ifdef CONFIG_DEBUG_VM
0198 WARN_ON(!hash__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
0199 assert_spin_locked(pmd_lockptr(mm, pmdp));
0200 #endif
0201
0202 __asm__ __volatile__(
0203 "1: ldarx %0,0,%3\n\
0204 and. %1,%0,%6\n\
0205 bne- 1b \n\
0206 andc %1,%0,%4 \n\
0207 or %1,%1,%7\n\
0208 stdcx. %1,0,%3 \n\
0209 bne- 1b"
0210 : "=&r" (old_be), "=&r" (tmp), "=m" (*pmdp)
0211 : "r" (pmdp), "r" (cpu_to_be64(clr)), "m" (*pmdp),
0212 "r" (cpu_to_be64(H_PAGE_BUSY)), "r" (cpu_to_be64(set))
0213 : "cc" );
0214
0215 old = be64_to_cpu(old_be);
0216
0217 trace_hugepage_update(addr, old, clr, set);
0218 if (old & H_PAGE_HASHPTE)
0219 hpte_do_hugepage_flush(mm, addr, pmdp, old);
0220 return old;
0221 }
0222
0223 pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
0224 pmd_t *pmdp)
0225 {
0226 pmd_t pmd;
0227
0228 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
0229 VM_BUG_ON(pmd_trans_huge(*pmdp));
0230 VM_BUG_ON(pmd_devmap(*pmdp));
0231
0232 pmd = *pmdp;
0233 pmd_clear(pmdp);
0234
0235
0236
0237
0238
0239
0240
0241
0242
0243
0244
0245
0246
0247
0248
0249 serialize_against_pte_lookup(vma->vm_mm);
0250
0251
0252
0253
0254
0255
0256
0257
0258
0259 flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
0260 return pmd;
0261 }
0262
0263
0264
0265
0266
0267 void hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
0268 pgtable_t pgtable)
0269 {
0270 pgtable_t *pgtable_slot;
0271
0272 assert_spin_locked(pmd_lockptr(mm, pmdp));
0273
0274
0275
0276 pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
0277 *pgtable_slot = pgtable;
0278
0279
0280
0281
0282
0283
0284 smp_wmb();
0285 }
0286
0287 pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
0288 {
0289 pgtable_t pgtable;
0290 pgtable_t *pgtable_slot;
0291
0292 assert_spin_locked(pmd_lockptr(mm, pmdp));
0293
0294 pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
0295 pgtable = *pgtable_slot;
0296
0297
0298
0299 *pgtable_slot = NULL;
0300
0301
0302
0303
0304 memset(pgtable, 0, PTE_FRAG_SIZE);
0305 return pgtable;
0306 }
0307
0308
0309
0310
0311
0312 void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
0313 pmd_t *pmdp, unsigned long old_pmd)
0314 {
0315 int ssize;
0316 unsigned int psize;
0317 unsigned long vsid;
0318 unsigned long flags = 0;
0319
0320
0321 #ifdef CONFIG_DEBUG_VM
0322 psize = get_slice_psize(mm, addr);
0323 BUG_ON(psize == MMU_PAGE_16M);
0324 #endif
0325 if (old_pmd & H_PAGE_COMBO)
0326 psize = MMU_PAGE_4K;
0327 else
0328 psize = MMU_PAGE_64K;
0329
0330 if (!is_kernel_addr(addr)) {
0331 ssize = user_segment_size(addr);
0332 vsid = get_user_vsid(&mm->context, addr, ssize);
0333 WARN_ON(vsid == 0);
0334 } else {
0335 vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
0336 ssize = mmu_kernel_ssize;
0337 }
0338
0339 if (mm_is_thread_local(mm))
0340 flags |= HPTE_LOCAL_UPDATE;
0341
0342 return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags);
0343 }
0344
0345 pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm,
0346 unsigned long addr, pmd_t *pmdp)
0347 {
0348 pmd_t old_pmd;
0349 pgtable_t pgtable;
0350 unsigned long old;
0351 pgtable_t *pgtable_slot;
0352
0353 old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
0354 old_pmd = __pmd(old);
0355
0356
0357
0358
0359
0360 pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
0361 pgtable = *pgtable_slot;
0362
0363
0364
0365
0366 memset(pgtable, 0, PTE_FRAG_SIZE);
0367 return old_pmd;
0368 }
0369
0370 int hash__has_transparent_hugepage(void)
0371 {
0372
0373 if (!mmu_has_feature(MMU_FTR_16M_PAGE))
0374 return 0;
0375
0376
0377
0378 if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
0379 return 0;
0380
0381
0382
0383
0384
0385
0386
0387
0388 if (mmu_psize_defs[MMU_PAGE_64K].shift &&
0389 (mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1))
0390 return 0;
0391
0392
0393
0394 if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1)
0395 return 0;
0396
0397 return 1;
0398 }
0399 EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage);
0400
0401 #endif
0402
0403 #ifdef CONFIG_STRICT_KERNEL_RWX
0404
0405 struct change_memory_parms {
0406 unsigned long start, end, newpp;
0407 unsigned int step, nr_cpus, master_cpu;
0408 atomic_t cpu_counter;
0409 };
0410
0411
0412 static struct change_memory_parms chmem_parms;
0413
0414
0415 static DEFINE_MUTEX(chmem_lock);
0416
0417 static void change_memory_range(unsigned long start, unsigned long end,
0418 unsigned int step, unsigned long newpp)
0419 {
0420 unsigned long idx;
0421
0422 pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n",
0423 start, end, newpp, step);
0424
0425 for (idx = start; idx < end; idx += step)
0426
0427 mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize,
0428 mmu_kernel_ssize);
0429 }
0430
0431 static int notrace chmem_secondary_loop(struct change_memory_parms *parms)
0432 {
0433 unsigned long msr, tmp, flags;
0434 int *p;
0435
0436 p = &parms->cpu_counter.counter;
0437
0438 local_irq_save(flags);
0439 hard_irq_disable();
0440
0441 asm volatile (
0442
0443 "mfmsr %[msr] ;"
0444 "li %[tmp], %[MSR_IR_DR] ;"
0445 "andc %[tmp], %[msr], %[tmp] ;"
0446 "mtmsrd %[tmp] ;"
0447
0448
0449 "1: "
0450 "lwarx %[tmp], 0, %[p] ;"
0451 "addic %[tmp], %[tmp], -1 ;"
0452 "stwcx. %[tmp], 0, %[p] ;"
0453 "bne- 1b ;"
0454
0455
0456 "2: ;"
0457 "lwz %[tmp], 0(%[p]) ;"
0458 "cmpwi %[tmp], 0 ;"
0459 "bne- 2b ;"
0460
0461
0462 "mtmsrd %[msr] ;"
0463
0464 :
0465 [msr] "=&r" (msr), [tmp] "=&b" (tmp), "+m" (*p)
0466 :
0467 [p] "b" (p), [MSR_IR_DR] "i" (MSR_IR | MSR_DR)
0468 :
0469 "cc", "xer"
0470 );
0471
0472 local_irq_restore(flags);
0473
0474 return 0;
0475 }
0476
0477 static int change_memory_range_fn(void *data)
0478 {
0479 struct change_memory_parms *parms = data;
0480
0481 if (parms->master_cpu != smp_processor_id())
0482 return chmem_secondary_loop(parms);
0483
0484
0485 while (atomic_read(&parms->cpu_counter) > 1)
0486 barrier();
0487
0488 change_memory_range(parms->start, parms->end, parms->step, parms->newpp);
0489
0490 mb();
0491
0492
0493 atomic_dec(&parms->cpu_counter);
0494
0495 return 0;
0496 }
0497
0498 static bool hash__change_memory_range(unsigned long start, unsigned long end,
0499 unsigned long newpp)
0500 {
0501 unsigned int step, shift;
0502
0503 shift = mmu_psize_defs[mmu_linear_psize].shift;
0504 step = 1 << shift;
0505
0506 start = ALIGN_DOWN(start, step);
0507 end = ALIGN(end, step);
0508
0509 if (start >= end)
0510 return false;
0511
0512 if (firmware_has_feature(FW_FEATURE_LPAR)) {
0513 mutex_lock(&chmem_lock);
0514
0515 chmem_parms.start = start;
0516 chmem_parms.end = end;
0517 chmem_parms.step = step;
0518 chmem_parms.newpp = newpp;
0519 chmem_parms.master_cpu = smp_processor_id();
0520
0521 cpus_read_lock();
0522
0523 atomic_set(&chmem_parms.cpu_counter, num_online_cpus());
0524
0525
0526 mb();
0527
0528 stop_machine_cpuslocked(change_memory_range_fn, &chmem_parms,
0529 cpu_online_mask);
0530
0531 cpus_read_unlock();
0532 mutex_unlock(&chmem_lock);
0533 } else
0534 change_memory_range(start, end, step, newpp);
0535
0536 return true;
0537 }
0538
0539 void hash__mark_rodata_ro(void)
0540 {
0541 unsigned long start, end, pp;
0542
0543 start = (unsigned long)_stext;
0544 end = (unsigned long)__init_begin;
0545
0546 pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL_ROX), HPTE_USE_KERNEL_KEY);
0547
0548 WARN_ON(!hash__change_memory_range(start, end, pp));
0549 }
0550
0551 void hash__mark_initmem_nx(void)
0552 {
0553 unsigned long start, end, pp;
0554
0555 start = (unsigned long)__init_begin;
0556 end = (unsigned long)__init_end;
0557
0558 pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY);
0559
0560 WARN_ON(!hash__change_memory_range(start, end, pp));
0561 }
0562 #endif