0001
0002
0003
0004
0005
0006
0007
0008 #include <linux/fs.h>
0009 #include <linux/mm.h>
0010 #include <linux/sched/mm.h>
0011 #include <linux/hugetlb.h>
0012 #include <linux/pagemap.h>
0013 #include <linux/sysctl.h>
0014
0015 #include <asm/mman.h>
0016 #include <asm/pgalloc.h>
0017 #include <asm/tlb.h>
0018 #include <asm/tlbflush.h>
0019 #include <asm/cacheflush.h>
0020 #include <asm/mmu_context.h>
0021
0022
0023
0024
0025
0026 static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
0027 unsigned long addr,
0028 unsigned long len,
0029 unsigned long pgoff,
0030 unsigned long flags)
0031 {
0032 struct hstate *h = hstate_file(filp);
0033 unsigned long task_size = TASK_SIZE;
0034 struct vm_unmapped_area_info info;
0035
0036 if (test_thread_flag(TIF_32BIT))
0037 task_size = STACK_TOP32;
0038
0039 info.flags = 0;
0040 info.length = len;
0041 info.low_limit = TASK_UNMAPPED_BASE;
0042 info.high_limit = min(task_size, VA_EXCLUDE_START);
0043 info.align_mask = PAGE_MASK & ~huge_page_mask(h);
0044 info.align_offset = 0;
0045 addr = vm_unmapped_area(&info);
0046
0047 if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) {
0048 VM_BUG_ON(addr != -ENOMEM);
0049 info.low_limit = VA_EXCLUDE_END;
0050 info.high_limit = task_size;
0051 addr = vm_unmapped_area(&info);
0052 }
0053
0054 return addr;
0055 }
0056
0057 static unsigned long
0058 hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
0059 const unsigned long len,
0060 const unsigned long pgoff,
0061 const unsigned long flags)
0062 {
0063 struct hstate *h = hstate_file(filp);
0064 struct mm_struct *mm = current->mm;
0065 unsigned long addr = addr0;
0066 struct vm_unmapped_area_info info;
0067
0068
0069 BUG_ON(!test_thread_flag(TIF_32BIT));
0070
0071 info.flags = VM_UNMAPPED_AREA_TOPDOWN;
0072 info.length = len;
0073 info.low_limit = PAGE_SIZE;
0074 info.high_limit = mm->mmap_base;
0075 info.align_mask = PAGE_MASK & ~huge_page_mask(h);
0076 info.align_offset = 0;
0077 addr = vm_unmapped_area(&info);
0078
0079
0080
0081
0082
0083
0084
0085 if (addr & ~PAGE_MASK) {
0086 VM_BUG_ON(addr != -ENOMEM);
0087 info.flags = 0;
0088 info.low_limit = TASK_UNMAPPED_BASE;
0089 info.high_limit = STACK_TOP32;
0090 addr = vm_unmapped_area(&info);
0091 }
0092
0093 return addr;
0094 }
0095
0096 unsigned long
0097 hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
0098 unsigned long len, unsigned long pgoff, unsigned long flags)
0099 {
0100 struct hstate *h = hstate_file(file);
0101 struct mm_struct *mm = current->mm;
0102 struct vm_area_struct *vma;
0103 unsigned long task_size = TASK_SIZE;
0104
0105 if (test_thread_flag(TIF_32BIT))
0106 task_size = STACK_TOP32;
0107
0108 if (len & ~huge_page_mask(h))
0109 return -EINVAL;
0110 if (len > task_size)
0111 return -ENOMEM;
0112
0113 if (flags & MAP_FIXED) {
0114 if (prepare_hugepage_range(file, addr, len))
0115 return -EINVAL;
0116 return addr;
0117 }
0118
0119 if (addr) {
0120 addr = ALIGN(addr, huge_page_size(h));
0121 vma = find_vma(mm, addr);
0122 if (task_size - len >= addr &&
0123 (!vma || addr + len <= vm_start_gap(vma)))
0124 return addr;
0125 }
0126 if (mm->get_unmapped_area == arch_get_unmapped_area)
0127 return hugetlb_get_unmapped_area_bottomup(file, addr, len,
0128 pgoff, flags);
0129 else
0130 return hugetlb_get_unmapped_area_topdown(file, addr, len,
0131 pgoff, flags);
0132 }
0133
0134 static pte_t sun4u_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
0135 {
0136 return entry;
0137 }
0138
0139 static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
0140 {
0141 unsigned long hugepage_size = _PAGE_SZ4MB_4V;
0142
0143 pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;
0144
0145 switch (shift) {
0146 case HPAGE_16GB_SHIFT:
0147 hugepage_size = _PAGE_SZ16GB_4V;
0148 pte_val(entry) |= _PAGE_PUD_HUGE;
0149 break;
0150 case HPAGE_2GB_SHIFT:
0151 hugepage_size = _PAGE_SZ2GB_4V;
0152 pte_val(entry) |= _PAGE_PMD_HUGE;
0153 break;
0154 case HPAGE_256MB_SHIFT:
0155 hugepage_size = _PAGE_SZ256MB_4V;
0156 pte_val(entry) |= _PAGE_PMD_HUGE;
0157 break;
0158 case HPAGE_SHIFT:
0159 pte_val(entry) |= _PAGE_PMD_HUGE;
0160 break;
0161 case HPAGE_64K_SHIFT:
0162 hugepage_size = _PAGE_SZ64K_4V;
0163 break;
0164 default:
0165 WARN_ONCE(1, "unsupported hugepage shift=%u\n", shift);
0166 }
0167
0168 pte_val(entry) = pte_val(entry) | hugepage_size;
0169 return entry;
0170 }
0171
0172 static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int shift)
0173 {
0174 if (tlb_type == hypervisor)
0175 return sun4v_hugepage_shift_to_tte(entry, shift);
0176 else
0177 return sun4u_hugepage_shift_to_tte(entry, shift);
0178 }
0179
0180 pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
0181 {
0182 pte_t pte;
0183
0184 entry = pte_mkhuge(entry);
0185 pte = hugepage_shift_to_tte(entry, shift);
0186
0187 #ifdef CONFIG_SPARC64
0188
0189
0190 if (flags & VM_SPARC_ADI)
0191 return pte_mkmcd(pte);
0192 else
0193 return pte_mknotmcd(pte);
0194 #else
0195 return pte;
0196 #endif
0197 }
0198
0199 static unsigned int sun4v_huge_tte_to_shift(pte_t entry)
0200 {
0201 unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4V;
0202 unsigned int shift;
0203
0204 switch (tte_szbits) {
0205 case _PAGE_SZ16GB_4V:
0206 shift = HPAGE_16GB_SHIFT;
0207 break;
0208 case _PAGE_SZ2GB_4V:
0209 shift = HPAGE_2GB_SHIFT;
0210 break;
0211 case _PAGE_SZ256MB_4V:
0212 shift = HPAGE_256MB_SHIFT;
0213 break;
0214 case _PAGE_SZ4MB_4V:
0215 shift = REAL_HPAGE_SHIFT;
0216 break;
0217 case _PAGE_SZ64K_4V:
0218 shift = HPAGE_64K_SHIFT;
0219 break;
0220 default:
0221 shift = PAGE_SHIFT;
0222 break;
0223 }
0224 return shift;
0225 }
0226
0227 static unsigned int sun4u_huge_tte_to_shift(pte_t entry)
0228 {
0229 unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4U;
0230 unsigned int shift;
0231
0232 switch (tte_szbits) {
0233 case _PAGE_SZ256MB_4U:
0234 shift = HPAGE_256MB_SHIFT;
0235 break;
0236 case _PAGE_SZ4MB_4U:
0237 shift = REAL_HPAGE_SHIFT;
0238 break;
0239 case _PAGE_SZ64K_4U:
0240 shift = HPAGE_64K_SHIFT;
0241 break;
0242 default:
0243 shift = PAGE_SHIFT;
0244 break;
0245 }
0246 return shift;
0247 }
0248
0249 static unsigned long tte_to_shift(pte_t entry)
0250 {
0251 if (tlb_type == hypervisor)
0252 return sun4v_huge_tte_to_shift(entry);
0253
0254 return sun4u_huge_tte_to_shift(entry);
0255 }
0256
0257 static unsigned int huge_tte_to_shift(pte_t entry)
0258 {
0259 unsigned long shift = tte_to_shift(entry);
0260
0261 if (shift == PAGE_SHIFT)
0262 WARN_ONCE(1, "tto_to_shift: invalid hugepage tte=0x%lx\n",
0263 pte_val(entry));
0264
0265 return shift;
0266 }
0267
0268 static unsigned long huge_tte_to_size(pte_t pte)
0269 {
0270 unsigned long size = 1UL << huge_tte_to_shift(pte);
0271
0272 if (size == REAL_HPAGE_SIZE)
0273 size = HPAGE_SIZE;
0274 return size;
0275 }
0276
0277 unsigned long pud_leaf_size(pud_t pud) { return 1UL << tte_to_shift(*(pte_t *)&pud); }
0278 unsigned long pmd_leaf_size(pmd_t pmd) { return 1UL << tte_to_shift(*(pte_t *)&pmd); }
0279 unsigned long pte_leaf_size(pte_t pte) { return 1UL << tte_to_shift(pte); }
0280
0281 pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
0282 unsigned long addr, unsigned long sz)
0283 {
0284 pgd_t *pgd;
0285 p4d_t *p4d;
0286 pud_t *pud;
0287 pmd_t *pmd;
0288
0289 pgd = pgd_offset(mm, addr);
0290 p4d = p4d_offset(pgd, addr);
0291 pud = pud_alloc(mm, p4d, addr);
0292 if (!pud)
0293 return NULL;
0294 if (sz >= PUD_SIZE)
0295 return (pte_t *)pud;
0296 pmd = pmd_alloc(mm, pud, addr);
0297 if (!pmd)
0298 return NULL;
0299 if (sz >= PMD_SIZE)
0300 return (pte_t *)pmd;
0301 return pte_alloc_map(mm, pmd, addr);
0302 }
0303
0304 pte_t *huge_pte_offset(struct mm_struct *mm,
0305 unsigned long addr, unsigned long sz)
0306 {
0307 pgd_t *pgd;
0308 p4d_t *p4d;
0309 pud_t *pud;
0310 pmd_t *pmd;
0311
0312 pgd = pgd_offset(mm, addr);
0313 if (pgd_none(*pgd))
0314 return NULL;
0315 p4d = p4d_offset(pgd, addr);
0316 if (p4d_none(*p4d))
0317 return NULL;
0318 pud = pud_offset(p4d, addr);
0319 if (pud_none(*pud))
0320 return NULL;
0321 if (is_hugetlb_pud(*pud))
0322 return (pte_t *)pud;
0323 pmd = pmd_offset(pud, addr);
0324 if (pmd_none(*pmd))
0325 return NULL;
0326 if (is_hugetlb_pmd(*pmd))
0327 return (pte_t *)pmd;
0328 return pte_offset_map(pmd, addr);
0329 }
0330
0331 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
0332 pte_t *ptep, pte_t entry)
0333 {
0334 unsigned int nptes, orig_shift, shift;
0335 unsigned long i, size;
0336 pte_t orig;
0337
0338 size = huge_tte_to_size(entry);
0339
0340 shift = PAGE_SHIFT;
0341 if (size >= PUD_SIZE)
0342 shift = PUD_SHIFT;
0343 else if (size >= PMD_SIZE)
0344 shift = PMD_SHIFT;
0345 else
0346 shift = PAGE_SHIFT;
0347
0348 nptes = size >> shift;
0349
0350 if (!pte_present(*ptep) && pte_present(entry))
0351 mm->context.hugetlb_pte_count += nptes;
0352
0353 addr &= ~(size - 1);
0354 orig = *ptep;
0355 orig_shift = pte_none(orig) ? PAGE_SHIFT : huge_tte_to_shift(orig);
0356
0357 for (i = 0; i < nptes; i++)
0358 ptep[i] = __pte(pte_val(entry) + (i << shift));
0359
0360 maybe_tlb_batch_add(mm, addr, ptep, orig, 0, orig_shift);
0361
0362 if (size == HPAGE_SIZE)
0363 maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0,
0364 orig_shift);
0365 }
0366
0367 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
0368 pte_t *ptep)
0369 {
0370 unsigned int i, nptes, orig_shift, shift;
0371 unsigned long size;
0372 pte_t entry;
0373
0374 entry = *ptep;
0375 size = huge_tte_to_size(entry);
0376
0377 shift = PAGE_SHIFT;
0378 if (size >= PUD_SIZE)
0379 shift = PUD_SHIFT;
0380 else if (size >= PMD_SIZE)
0381 shift = PMD_SHIFT;
0382 else
0383 shift = PAGE_SHIFT;
0384
0385 nptes = size >> shift;
0386 orig_shift = pte_none(entry) ? PAGE_SHIFT : huge_tte_to_shift(entry);
0387
0388 if (pte_present(entry))
0389 mm->context.hugetlb_pte_count -= nptes;
0390
0391 addr &= ~(size - 1);
0392 for (i = 0; i < nptes; i++)
0393 ptep[i] = __pte(0UL);
0394
0395 maybe_tlb_batch_add(mm, addr, ptep, entry, 0, orig_shift);
0396
0397 if (size == HPAGE_SIZE)
0398 maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0,
0399 orig_shift);
0400
0401 return entry;
0402 }
0403
0404 int pmd_huge(pmd_t pmd)
0405 {
0406 return !pmd_none(pmd) &&
0407 (pmd_val(pmd) & (_PAGE_VALID|_PAGE_PMD_HUGE)) != _PAGE_VALID;
0408 }
0409
0410 int pud_huge(pud_t pud)
0411 {
0412 return !pud_none(pud) &&
0413 (pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID;
0414 }
0415
0416 static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
0417 unsigned long addr)
0418 {
0419 pgtable_t token = pmd_pgtable(*pmd);
0420
0421 pmd_clear(pmd);
0422 pte_free_tlb(tlb, token, addr);
0423 mm_dec_nr_ptes(tlb->mm);
0424 }
0425
0426 static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
0427 unsigned long addr, unsigned long end,
0428 unsigned long floor, unsigned long ceiling)
0429 {
0430 pmd_t *pmd;
0431 unsigned long next;
0432 unsigned long start;
0433
0434 start = addr;
0435 pmd = pmd_offset(pud, addr);
0436 do {
0437 next = pmd_addr_end(addr, end);
0438 if (pmd_none(*pmd))
0439 continue;
0440 if (is_hugetlb_pmd(*pmd))
0441 pmd_clear(pmd);
0442 else
0443 hugetlb_free_pte_range(tlb, pmd, addr);
0444 } while (pmd++, addr = next, addr != end);
0445
0446 start &= PUD_MASK;
0447 if (start < floor)
0448 return;
0449 if (ceiling) {
0450 ceiling &= PUD_MASK;
0451 if (!ceiling)
0452 return;
0453 }
0454 if (end - 1 > ceiling - 1)
0455 return;
0456
0457 pmd = pmd_offset(pud, start);
0458 pud_clear(pud);
0459 pmd_free_tlb(tlb, pmd, start);
0460 mm_dec_nr_pmds(tlb->mm);
0461 }
0462
0463 static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
0464 unsigned long addr, unsigned long end,
0465 unsigned long floor, unsigned long ceiling)
0466 {
0467 pud_t *pud;
0468 unsigned long next;
0469 unsigned long start;
0470
0471 start = addr;
0472 pud = pud_offset(p4d, addr);
0473 do {
0474 next = pud_addr_end(addr, end);
0475 if (pud_none_or_clear_bad(pud))
0476 continue;
0477 if (is_hugetlb_pud(*pud))
0478 pud_clear(pud);
0479 else
0480 hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
0481 ceiling);
0482 } while (pud++, addr = next, addr != end);
0483
0484 start &= PGDIR_MASK;
0485 if (start < floor)
0486 return;
0487 if (ceiling) {
0488 ceiling &= PGDIR_MASK;
0489 if (!ceiling)
0490 return;
0491 }
0492 if (end - 1 > ceiling - 1)
0493 return;
0494
0495 pud = pud_offset(p4d, start);
0496 p4d_clear(p4d);
0497 pud_free_tlb(tlb, pud, start);
0498 mm_dec_nr_puds(tlb->mm);
0499 }
0500
0501 void hugetlb_free_pgd_range(struct mmu_gather *tlb,
0502 unsigned long addr, unsigned long end,
0503 unsigned long floor, unsigned long ceiling)
0504 {
0505 pgd_t *pgd;
0506 p4d_t *p4d;
0507 unsigned long next;
0508
0509 addr &= PMD_MASK;
0510 if (addr < floor) {
0511 addr += PMD_SIZE;
0512 if (!addr)
0513 return;
0514 }
0515 if (ceiling) {
0516 ceiling &= PMD_MASK;
0517 if (!ceiling)
0518 return;
0519 }
0520 if (end - 1 > ceiling - 1)
0521 end -= PMD_SIZE;
0522 if (addr > end - 1)
0523 return;
0524
0525 pgd = pgd_offset(tlb->mm, addr);
0526 p4d = p4d_offset(pgd, addr);
0527 do {
0528 next = p4d_addr_end(addr, end);
0529 if (p4d_none_or_clear_bad(p4d))
0530 continue;
0531 hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling);
0532 } while (p4d++, addr = next, addr != end);
0533 }