0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #include <linux/init.h>
0011 #include <linux/fs.h>
0012 #include <linux/mm.h>
0013 #include <linux/hugetlb.h>
0014 #include <linux/pagemap.h>
0015 #include <linux/err.h>
0016 #include <linux/sysctl.h>
0017 #include <asm/mman.h>
0018 #include <asm/tlb.h>
0019 #include <asm/tlbflush.h>
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038 #ifdef CONFIG_CMA
0039 void __init arm64_hugetlb_cma_reserve(void)
0040 {
0041 int order;
0042
0043 if (pud_sect_supported())
0044 order = PUD_SHIFT - PAGE_SHIFT;
0045 else
0046 order = CONT_PMD_SHIFT - PAGE_SHIFT;
0047
0048
0049
0050
0051
0052
0053
0054 WARN_ON(order <= MAX_ORDER);
0055 hugetlb_cma_reserve(order);
0056 }
0057 #endif
0058
0059 static bool __hugetlb_valid_size(unsigned long size)
0060 {
0061 switch (size) {
0062 #ifndef __PAGETABLE_PMD_FOLDED
0063 case PUD_SIZE:
0064 return pud_sect_supported();
0065 #endif
0066 case CONT_PMD_SIZE:
0067 case PMD_SIZE:
0068 case CONT_PTE_SIZE:
0069 return true;
0070 }
0071
0072 return false;
0073 }
0074
0075 #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
0076 bool arch_hugetlb_migration_supported(struct hstate *h)
0077 {
0078 size_t pagesize = huge_page_size(h);
0079
0080 if (!__hugetlb_valid_size(pagesize)) {
0081 pr_warn("%s: unrecognized huge page size 0x%lx\n",
0082 __func__, pagesize);
0083 return false;
0084 }
0085 return true;
0086 }
0087 #endif
0088
0089 int pmd_huge(pmd_t pmd)
0090 {
0091 return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
0092 }
0093
0094 int pud_huge(pud_t pud)
0095 {
0096 #ifndef __PAGETABLE_PMD_FOLDED
0097 return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT);
0098 #else
0099 return 0;
0100 #endif
0101 }
0102
0103 static int find_num_contig(struct mm_struct *mm, unsigned long addr,
0104 pte_t *ptep, size_t *pgsize)
0105 {
0106 pgd_t *pgdp = pgd_offset(mm, addr);
0107 p4d_t *p4dp;
0108 pud_t *pudp;
0109 pmd_t *pmdp;
0110
0111 *pgsize = PAGE_SIZE;
0112 p4dp = p4d_offset(pgdp, addr);
0113 pudp = pud_offset(p4dp, addr);
0114 pmdp = pmd_offset(pudp, addr);
0115 if ((pte_t *)pmdp == ptep) {
0116 *pgsize = PMD_SIZE;
0117 return CONT_PMDS;
0118 }
0119 return CONT_PTES;
0120 }
0121
0122 static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
0123 {
0124 int contig_ptes = 0;
0125
0126 *pgsize = size;
0127
0128 switch (size) {
0129 #ifndef __PAGETABLE_PMD_FOLDED
0130 case PUD_SIZE:
0131 if (pud_sect_supported())
0132 contig_ptes = 1;
0133 break;
0134 #endif
0135 case PMD_SIZE:
0136 contig_ptes = 1;
0137 break;
0138 case CONT_PMD_SIZE:
0139 *pgsize = PMD_SIZE;
0140 contig_ptes = CONT_PMDS;
0141 break;
0142 case CONT_PTE_SIZE:
0143 *pgsize = PAGE_SIZE;
0144 contig_ptes = CONT_PTES;
0145 break;
0146 }
0147
0148 return contig_ptes;
0149 }
0150
0151 pte_t huge_ptep_get(pte_t *ptep)
0152 {
0153 int ncontig, i;
0154 size_t pgsize;
0155 pte_t orig_pte = ptep_get(ptep);
0156
0157 if (!pte_present(orig_pte) || !pte_cont(orig_pte))
0158 return orig_pte;
0159
0160 ncontig = num_contig_ptes(page_size(pte_page(orig_pte)), &pgsize);
0161 for (i = 0; i < ncontig; i++, ptep++) {
0162 pte_t pte = ptep_get(ptep);
0163
0164 if (pte_dirty(pte))
0165 orig_pte = pte_mkdirty(orig_pte);
0166
0167 if (pte_young(pte))
0168 orig_pte = pte_mkyoung(orig_pte);
0169 }
0170 return orig_pte;
0171 }
0172
0173
0174
0175
0176
0177
0178
0179
0180
0181 static pte_t get_clear_contig(struct mm_struct *mm,
0182 unsigned long addr,
0183 pte_t *ptep,
0184 unsigned long pgsize,
0185 unsigned long ncontig)
0186 {
0187 pte_t orig_pte = ptep_get(ptep);
0188 unsigned long i;
0189
0190 for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
0191 pte_t pte = ptep_get_and_clear(mm, addr, ptep);
0192
0193
0194
0195
0196
0197
0198 if (pte_dirty(pte))
0199 orig_pte = pte_mkdirty(orig_pte);
0200
0201 if (pte_young(pte))
0202 orig_pte = pte_mkyoung(orig_pte);
0203 }
0204 return orig_pte;
0205 }
0206
0207 static pte_t get_clear_contig_flush(struct mm_struct *mm,
0208 unsigned long addr,
0209 pte_t *ptep,
0210 unsigned long pgsize,
0211 unsigned long ncontig)
0212 {
0213 pte_t orig_pte = get_clear_contig(mm, addr, ptep, pgsize, ncontig);
0214 struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
0215
0216 flush_tlb_range(&vma, addr, addr + (pgsize * ncontig));
0217 return orig_pte;
0218 }
0219
0220
0221
0222
0223
0224
0225
0226
0227
0228
0229 static void clear_flush(struct mm_struct *mm,
0230 unsigned long addr,
0231 pte_t *ptep,
0232 unsigned long pgsize,
0233 unsigned long ncontig)
0234 {
0235 struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
0236 unsigned long i, saddr = addr;
0237
0238 for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
0239 pte_clear(mm, addr, ptep);
0240
0241 flush_tlb_range(&vma, saddr, addr);
0242 }
0243
0244 static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
0245 {
0246 VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
0247
0248 return page_folio(pfn_to_page(swp_offset(entry)));
0249 }
0250
0251 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
0252 pte_t *ptep, pte_t pte)
0253 {
0254 size_t pgsize;
0255 int i;
0256 int ncontig;
0257 unsigned long pfn, dpfn;
0258 pgprot_t hugeprot;
0259
0260 if (!pte_present(pte)) {
0261 struct folio *folio;
0262
0263 folio = hugetlb_swap_entry_to_folio(pte_to_swp_entry(pte));
0264 ncontig = num_contig_ptes(folio_size(folio), &pgsize);
0265
0266 for (i = 0; i < ncontig; i++, ptep++)
0267 set_pte_at(mm, addr, ptep, pte);
0268 return;
0269 }
0270
0271 if (!pte_cont(pte)) {
0272 set_pte_at(mm, addr, ptep, pte);
0273 return;
0274 }
0275
0276 ncontig = find_num_contig(mm, addr, ptep, &pgsize);
0277 pfn = pte_pfn(pte);
0278 dpfn = pgsize >> PAGE_SHIFT;
0279 hugeprot = pte_pgprot(pte);
0280
0281 clear_flush(mm, addr, ptep, pgsize, ncontig);
0282
0283 for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
0284 set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
0285 }
0286
0287 pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
0288 unsigned long addr, unsigned long sz)
0289 {
0290 pgd_t *pgdp;
0291 p4d_t *p4dp;
0292 pud_t *pudp;
0293 pmd_t *pmdp;
0294 pte_t *ptep = NULL;
0295
0296 pgdp = pgd_offset(mm, addr);
0297 p4dp = p4d_offset(pgdp, addr);
0298 pudp = pud_alloc(mm, p4dp, addr);
0299 if (!pudp)
0300 return NULL;
0301
0302 if (sz == PUD_SIZE) {
0303 ptep = (pte_t *)pudp;
0304 } else if (sz == (CONT_PTE_SIZE)) {
0305 pmdp = pmd_alloc(mm, pudp, addr);
0306 if (!pmdp)
0307 return NULL;
0308
0309 WARN_ON(addr & (sz - 1));
0310
0311
0312
0313
0314
0315
0316
0317 ptep = pte_alloc_map(mm, pmdp, addr);
0318 } else if (sz == PMD_SIZE) {
0319 if (want_pmd_share(vma, addr) && pud_none(READ_ONCE(*pudp)))
0320 ptep = huge_pmd_share(mm, vma, addr, pudp);
0321 else
0322 ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
0323 } else if (sz == (CONT_PMD_SIZE)) {
0324 pmdp = pmd_alloc(mm, pudp, addr);
0325 WARN_ON(addr & (sz - 1));
0326 return (pte_t *)pmdp;
0327 }
0328
0329 return ptep;
0330 }
0331
0332 pte_t *huge_pte_offset(struct mm_struct *mm,
0333 unsigned long addr, unsigned long sz)
0334 {
0335 pgd_t *pgdp;
0336 p4d_t *p4dp;
0337 pud_t *pudp, pud;
0338 pmd_t *pmdp, pmd;
0339
0340 pgdp = pgd_offset(mm, addr);
0341 if (!pgd_present(READ_ONCE(*pgdp)))
0342 return NULL;
0343
0344 p4dp = p4d_offset(pgdp, addr);
0345 if (!p4d_present(READ_ONCE(*p4dp)))
0346 return NULL;
0347
0348 pudp = pud_offset(p4dp, addr);
0349 pud = READ_ONCE(*pudp);
0350 if (sz != PUD_SIZE && pud_none(pud))
0351 return NULL;
0352
0353 if (pud_huge(pud) || !pud_present(pud))
0354 return (pte_t *)pudp;
0355
0356
0357 if (sz == CONT_PMD_SIZE)
0358 addr &= CONT_PMD_MASK;
0359
0360 pmdp = pmd_offset(pudp, addr);
0361 pmd = READ_ONCE(*pmdp);
0362 if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
0363 pmd_none(pmd))
0364 return NULL;
0365 if (pmd_huge(pmd) || !pmd_present(pmd))
0366 return (pte_t *)pmdp;
0367
0368 if (sz == CONT_PTE_SIZE)
0369 return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));
0370
0371 return NULL;
0372 }
0373
0374 unsigned long hugetlb_mask_last_page(struct hstate *h)
0375 {
0376 unsigned long hp_size = huge_page_size(h);
0377
0378 switch (hp_size) {
0379 #ifndef __PAGETABLE_PMD_FOLDED
0380 case PUD_SIZE:
0381 return PGDIR_SIZE - PUD_SIZE;
0382 #endif
0383 case CONT_PMD_SIZE:
0384 return PUD_SIZE - CONT_PMD_SIZE;
0385 case PMD_SIZE:
0386 return PUD_SIZE - PMD_SIZE;
0387 case CONT_PTE_SIZE:
0388 return PMD_SIZE - CONT_PTE_SIZE;
0389 default:
0390 break;
0391 }
0392
0393 return 0UL;
0394 }
0395
0396 pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
0397 {
0398 size_t pagesize = 1UL << shift;
0399
0400 entry = pte_mkhuge(entry);
0401 if (pagesize == CONT_PTE_SIZE) {
0402 entry = pte_mkcont(entry);
0403 } else if (pagesize == CONT_PMD_SIZE) {
0404 entry = pmd_pte(pmd_mkcont(pte_pmd(entry)));
0405 } else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) {
0406 pr_warn("%s: unrecognized huge page size 0x%lx\n",
0407 __func__, pagesize);
0408 }
0409 return entry;
0410 }
0411
0412 void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
0413 pte_t *ptep, unsigned long sz)
0414 {
0415 int i, ncontig;
0416 size_t pgsize;
0417
0418 ncontig = num_contig_ptes(sz, &pgsize);
0419
0420 for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
0421 pte_clear(mm, addr, ptep);
0422 }
0423
0424 pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
0425 unsigned long addr, pte_t *ptep)
0426 {
0427 int ncontig;
0428 size_t pgsize;
0429 pte_t orig_pte = ptep_get(ptep);
0430
0431 if (!pte_cont(orig_pte))
0432 return ptep_get_and_clear(mm, addr, ptep);
0433
0434 ncontig = find_num_contig(mm, addr, ptep, &pgsize);
0435
0436 return get_clear_contig(mm, addr, ptep, pgsize, ncontig);
0437 }
0438
0439
0440
0441
0442
0443
0444
0445
0446
0447
0448 static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig)
0449 {
0450 int i;
0451
0452 if (pte_write(pte) != pte_write(ptep_get(ptep)))
0453 return 1;
0454
0455 for (i = 0; i < ncontig; i++) {
0456 pte_t orig_pte = ptep_get(ptep + i);
0457
0458 if (pte_dirty(pte) != pte_dirty(orig_pte))
0459 return 1;
0460
0461 if (pte_young(pte) != pte_young(orig_pte))
0462 return 1;
0463 }
0464
0465 return 0;
0466 }
0467
0468 int huge_ptep_set_access_flags(struct vm_area_struct *vma,
0469 unsigned long addr, pte_t *ptep,
0470 pte_t pte, int dirty)
0471 {
0472 int ncontig, i;
0473 size_t pgsize = 0;
0474 unsigned long pfn = pte_pfn(pte), dpfn;
0475 struct mm_struct *mm = vma->vm_mm;
0476 pgprot_t hugeprot;
0477 pte_t orig_pte;
0478
0479 if (!pte_cont(pte))
0480 return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
0481
0482 ncontig = find_num_contig(mm, addr, ptep, &pgsize);
0483 dpfn = pgsize >> PAGE_SHIFT;
0484
0485 if (!__cont_access_flags_changed(ptep, pte, ncontig))
0486 return 0;
0487
0488 orig_pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
0489
0490
0491 if (pte_dirty(orig_pte))
0492 pte = pte_mkdirty(pte);
0493
0494 if (pte_young(orig_pte))
0495 pte = pte_mkyoung(pte);
0496
0497 hugeprot = pte_pgprot(pte);
0498 for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
0499 set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
0500
0501 return 1;
0502 }
0503
0504 void huge_ptep_set_wrprotect(struct mm_struct *mm,
0505 unsigned long addr, pte_t *ptep)
0506 {
0507 unsigned long pfn, dpfn;
0508 pgprot_t hugeprot;
0509 int ncontig, i;
0510 size_t pgsize;
0511 pte_t pte;
0512
0513 if (!pte_cont(READ_ONCE(*ptep))) {
0514 ptep_set_wrprotect(mm, addr, ptep);
0515 return;
0516 }
0517
0518 ncontig = find_num_contig(mm, addr, ptep, &pgsize);
0519 dpfn = pgsize >> PAGE_SHIFT;
0520
0521 pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
0522 pte = pte_wrprotect(pte);
0523
0524 hugeprot = pte_pgprot(pte);
0525 pfn = pte_pfn(pte);
0526
0527 for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
0528 set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
0529 }
0530
0531 pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
0532 unsigned long addr, pte_t *ptep)
0533 {
0534 struct mm_struct *mm = vma->vm_mm;
0535 size_t pgsize;
0536 int ncontig;
0537
0538 if (!pte_cont(READ_ONCE(*ptep)))
0539 return ptep_clear_flush(vma, addr, ptep);
0540
0541 ncontig = find_num_contig(mm, addr, ptep, &pgsize);
0542 return get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
0543 }
0544
0545 static int __init hugetlbpage_init(void)
0546 {
0547 if (pud_sect_supported())
0548 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
0549
0550 hugetlb_add_hstate(CONT_PMD_SHIFT - PAGE_SHIFT);
0551 hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
0552 hugetlb_add_hstate(CONT_PTE_SHIFT - PAGE_SHIFT);
0553
0554 return 0;
0555 }
0556 arch_initcall(hugetlbpage_init);
0557
0558 bool __init arch_hugetlb_valid_size(unsigned long size)
0559 {
0560 return __hugetlb_valid_size(size);
0561 }