Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * arch/arm64/mm/hugetlbpage.c
0004  *
0005  * Copyright (C) 2013 Linaro Ltd.
0006  *
0007  * Based on arch/x86/mm/hugetlbpage.c.
0008  */
0009 
0010 #include <linux/init.h>
0011 #include <linux/fs.h>
0012 #include <linux/mm.h>
0013 #include <linux/hugetlb.h>
0014 #include <linux/pagemap.h>
0015 #include <linux/err.h>
0016 #include <linux/sysctl.h>
0017 #include <asm/mman.h>
0018 #include <asm/tlb.h>
0019 #include <asm/tlbflush.h>
0020 
0021 /*
0022  * HugeTLB Support Matrix
0023  *
0024  * ---------------------------------------------------
0025  * | Page Size | CONT PTE |  PMD  | CONT PMD |  PUD  |
0026  * ---------------------------------------------------
0027  * |     4K    |   64K    |   2M  |    32M   |   1G  |
0028  * |    16K    |    2M    |  32M  |     1G   |       |
0029  * |    64K    |    2M    | 512M  |    16G   |       |
0030  * ---------------------------------------------------
0031  */
0032 
0033 /*
0034  * Reserve CMA areas for the largest supported gigantic
0035  * huge page when requested. Any other smaller gigantic
0036  * huge pages could still be served from those areas.
0037  */
0038 #ifdef CONFIG_CMA
0039 void __init arm64_hugetlb_cma_reserve(void)
0040 {
0041     int order;
0042 
0043     if (pud_sect_supported())
0044         order = PUD_SHIFT - PAGE_SHIFT;
0045     else
0046         order = CONT_PMD_SHIFT - PAGE_SHIFT;
0047 
0048     /*
0049      * HugeTLB CMA reservation is required for gigantic
0050      * huge pages which could not be allocated via the
0051      * page allocator. Just warn if there is any change
0052      * breaking this assumption.
0053      */
0054     WARN_ON(order <= MAX_ORDER);
0055     hugetlb_cma_reserve(order);
0056 }
0057 #endif /* CONFIG_CMA */
0058 
0059 static bool __hugetlb_valid_size(unsigned long size)
0060 {
0061     switch (size) {
0062 #ifndef __PAGETABLE_PMD_FOLDED
0063     case PUD_SIZE:
0064         return pud_sect_supported();
0065 #endif
0066     case CONT_PMD_SIZE:
0067     case PMD_SIZE:
0068     case CONT_PTE_SIZE:
0069         return true;
0070     }
0071 
0072     return false;
0073 }
0074 
0075 #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
0076 bool arch_hugetlb_migration_supported(struct hstate *h)
0077 {
0078     size_t pagesize = huge_page_size(h);
0079 
0080     if (!__hugetlb_valid_size(pagesize)) {
0081         pr_warn("%s: unrecognized huge page size 0x%lx\n",
0082             __func__, pagesize);
0083         return false;
0084     }
0085     return true;
0086 }
0087 #endif
0088 
0089 int pmd_huge(pmd_t pmd)
0090 {
0091     return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
0092 }
0093 
0094 int pud_huge(pud_t pud)
0095 {
0096 #ifndef __PAGETABLE_PMD_FOLDED
0097     return pud_val(pud) && !(pud_val(pud) & PUD_TABLE_BIT);
0098 #else
0099     return 0;
0100 #endif
0101 }
0102 
0103 static int find_num_contig(struct mm_struct *mm, unsigned long addr,
0104                pte_t *ptep, size_t *pgsize)
0105 {
0106     pgd_t *pgdp = pgd_offset(mm, addr);
0107     p4d_t *p4dp;
0108     pud_t *pudp;
0109     pmd_t *pmdp;
0110 
0111     *pgsize = PAGE_SIZE;
0112     p4dp = p4d_offset(pgdp, addr);
0113     pudp = pud_offset(p4dp, addr);
0114     pmdp = pmd_offset(pudp, addr);
0115     if ((pte_t *)pmdp == ptep) {
0116         *pgsize = PMD_SIZE;
0117         return CONT_PMDS;
0118     }
0119     return CONT_PTES;
0120 }
0121 
0122 static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
0123 {
0124     int contig_ptes = 0;
0125 
0126     *pgsize = size;
0127 
0128     switch (size) {
0129 #ifndef __PAGETABLE_PMD_FOLDED
0130     case PUD_SIZE:
0131         if (pud_sect_supported())
0132             contig_ptes = 1;
0133         break;
0134 #endif
0135     case PMD_SIZE:
0136         contig_ptes = 1;
0137         break;
0138     case CONT_PMD_SIZE:
0139         *pgsize = PMD_SIZE;
0140         contig_ptes = CONT_PMDS;
0141         break;
0142     case CONT_PTE_SIZE:
0143         *pgsize = PAGE_SIZE;
0144         contig_ptes = CONT_PTES;
0145         break;
0146     }
0147 
0148     return contig_ptes;
0149 }
0150 
0151 pte_t huge_ptep_get(pte_t *ptep)
0152 {
0153     int ncontig, i;
0154     size_t pgsize;
0155     pte_t orig_pte = ptep_get(ptep);
0156 
0157     if (!pte_present(orig_pte) || !pte_cont(orig_pte))
0158         return orig_pte;
0159 
0160     ncontig = num_contig_ptes(page_size(pte_page(orig_pte)), &pgsize);
0161     for (i = 0; i < ncontig; i++, ptep++) {
0162         pte_t pte = ptep_get(ptep);
0163 
0164         if (pte_dirty(pte))
0165             orig_pte = pte_mkdirty(orig_pte);
0166 
0167         if (pte_young(pte))
0168             orig_pte = pte_mkyoung(orig_pte);
0169     }
0170     return orig_pte;
0171 }
0172 
0173 /*
0174  * Changing some bits of contiguous entries requires us to follow a
0175  * Break-Before-Make approach, breaking the whole contiguous set
0176  * before we can change any entries. See ARM DDI 0487A.k_iss10775,
0177  * "Misprogramming of the Contiguous bit", page D4-1762.
0178  *
0179  * This helper performs the break step.
0180  */
0181 static pte_t get_clear_contig(struct mm_struct *mm,
0182                  unsigned long addr,
0183                  pte_t *ptep,
0184                  unsigned long pgsize,
0185                  unsigned long ncontig)
0186 {
0187     pte_t orig_pte = ptep_get(ptep);
0188     unsigned long i;
0189 
0190     for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
0191         pte_t pte = ptep_get_and_clear(mm, addr, ptep);
0192 
0193         /*
0194          * If HW_AFDBM is enabled, then the HW could turn on
0195          * the dirty or accessed bit for any page in the set,
0196          * so check them all.
0197          */
0198         if (pte_dirty(pte))
0199             orig_pte = pte_mkdirty(orig_pte);
0200 
0201         if (pte_young(pte))
0202             orig_pte = pte_mkyoung(orig_pte);
0203     }
0204     return orig_pte;
0205 }
0206 
0207 static pte_t get_clear_contig_flush(struct mm_struct *mm,
0208                     unsigned long addr,
0209                     pte_t *ptep,
0210                     unsigned long pgsize,
0211                     unsigned long ncontig)
0212 {
0213     pte_t orig_pte = get_clear_contig(mm, addr, ptep, pgsize, ncontig);
0214     struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
0215 
0216     flush_tlb_range(&vma, addr, addr + (pgsize * ncontig));
0217     return orig_pte;
0218 }
0219 
0220 /*
0221  * Changing some bits of contiguous entries requires us to follow a
0222  * Break-Before-Make approach, breaking the whole contiguous set
0223  * before we can change any entries. See ARM DDI 0487A.k_iss10775,
0224  * "Misprogramming of the Contiguous bit", page D4-1762.
0225  *
0226  * This helper performs the break step for use cases where the
0227  * original pte is not needed.
0228  */
0229 static void clear_flush(struct mm_struct *mm,
0230                  unsigned long addr,
0231                  pte_t *ptep,
0232                  unsigned long pgsize,
0233                  unsigned long ncontig)
0234 {
0235     struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
0236     unsigned long i, saddr = addr;
0237 
0238     for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
0239         pte_clear(mm, addr, ptep);
0240 
0241     flush_tlb_range(&vma, saddr, addr);
0242 }
0243 
0244 static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
0245 {
0246     VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
0247 
0248     return page_folio(pfn_to_page(swp_offset(entry)));
0249 }
0250 
0251 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
0252                 pte_t *ptep, pte_t pte)
0253 {
0254     size_t pgsize;
0255     int i;
0256     int ncontig;
0257     unsigned long pfn, dpfn;
0258     pgprot_t hugeprot;
0259 
0260     if (!pte_present(pte)) {
0261         struct folio *folio;
0262 
0263         folio = hugetlb_swap_entry_to_folio(pte_to_swp_entry(pte));
0264         ncontig = num_contig_ptes(folio_size(folio), &pgsize);
0265 
0266         for (i = 0; i < ncontig; i++, ptep++)
0267             set_pte_at(mm, addr, ptep, pte);
0268         return;
0269     }
0270 
0271     if (!pte_cont(pte)) {
0272         set_pte_at(mm, addr, ptep, pte);
0273         return;
0274     }
0275 
0276     ncontig = find_num_contig(mm, addr, ptep, &pgsize);
0277     pfn = pte_pfn(pte);
0278     dpfn = pgsize >> PAGE_SHIFT;
0279     hugeprot = pte_pgprot(pte);
0280 
0281     clear_flush(mm, addr, ptep, pgsize, ncontig);
0282 
0283     for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
0284         set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
0285 }
0286 
0287 pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
0288               unsigned long addr, unsigned long sz)
0289 {
0290     pgd_t *pgdp;
0291     p4d_t *p4dp;
0292     pud_t *pudp;
0293     pmd_t *pmdp;
0294     pte_t *ptep = NULL;
0295 
0296     pgdp = pgd_offset(mm, addr);
0297     p4dp = p4d_offset(pgdp, addr);
0298     pudp = pud_alloc(mm, p4dp, addr);
0299     if (!pudp)
0300         return NULL;
0301 
0302     if (sz == PUD_SIZE) {
0303         ptep = (pte_t *)pudp;
0304     } else if (sz == (CONT_PTE_SIZE)) {
0305         pmdp = pmd_alloc(mm, pudp, addr);
0306         if (!pmdp)
0307             return NULL;
0308 
0309         WARN_ON(addr & (sz - 1));
0310         /*
0311          * Note that if this code were ever ported to the
0312          * 32-bit arm platform then it will cause trouble in
0313          * the case where CONFIG_HIGHPTE is set, since there
0314          * will be no pte_unmap() to correspond with this
0315          * pte_alloc_map().
0316          */
0317         ptep = pte_alloc_map(mm, pmdp, addr);
0318     } else if (sz == PMD_SIZE) {
0319         if (want_pmd_share(vma, addr) && pud_none(READ_ONCE(*pudp)))
0320             ptep = huge_pmd_share(mm, vma, addr, pudp);
0321         else
0322             ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
0323     } else if (sz == (CONT_PMD_SIZE)) {
0324         pmdp = pmd_alloc(mm, pudp, addr);
0325         WARN_ON(addr & (sz - 1));
0326         return (pte_t *)pmdp;
0327     }
0328 
0329     return ptep;
0330 }
0331 
0332 pte_t *huge_pte_offset(struct mm_struct *mm,
0333                unsigned long addr, unsigned long sz)
0334 {
0335     pgd_t *pgdp;
0336     p4d_t *p4dp;
0337     pud_t *pudp, pud;
0338     pmd_t *pmdp, pmd;
0339 
0340     pgdp = pgd_offset(mm, addr);
0341     if (!pgd_present(READ_ONCE(*pgdp)))
0342         return NULL;
0343 
0344     p4dp = p4d_offset(pgdp, addr);
0345     if (!p4d_present(READ_ONCE(*p4dp)))
0346         return NULL;
0347 
0348     pudp = pud_offset(p4dp, addr);
0349     pud = READ_ONCE(*pudp);
0350     if (sz != PUD_SIZE && pud_none(pud))
0351         return NULL;
0352     /* hugepage or swap? */
0353     if (pud_huge(pud) || !pud_present(pud))
0354         return (pte_t *)pudp;
0355     /* table; check the next level */
0356 
0357     if (sz == CONT_PMD_SIZE)
0358         addr &= CONT_PMD_MASK;
0359 
0360     pmdp = pmd_offset(pudp, addr);
0361     pmd = READ_ONCE(*pmdp);
0362     if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
0363         pmd_none(pmd))
0364         return NULL;
0365     if (pmd_huge(pmd) || !pmd_present(pmd))
0366         return (pte_t *)pmdp;
0367 
0368     if (sz == CONT_PTE_SIZE)
0369         return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));
0370 
0371     return NULL;
0372 }
0373 
0374 unsigned long hugetlb_mask_last_page(struct hstate *h)
0375 {
0376     unsigned long hp_size = huge_page_size(h);
0377 
0378     switch (hp_size) {
0379 #ifndef __PAGETABLE_PMD_FOLDED
0380     case PUD_SIZE:
0381         return PGDIR_SIZE - PUD_SIZE;
0382 #endif
0383     case CONT_PMD_SIZE:
0384         return PUD_SIZE - CONT_PMD_SIZE;
0385     case PMD_SIZE:
0386         return PUD_SIZE - PMD_SIZE;
0387     case CONT_PTE_SIZE:
0388         return PMD_SIZE - CONT_PTE_SIZE;
0389     default:
0390         break;
0391     }
0392 
0393     return 0UL;
0394 }
0395 
0396 pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
0397 {
0398     size_t pagesize = 1UL << shift;
0399 
0400     entry = pte_mkhuge(entry);
0401     if (pagesize == CONT_PTE_SIZE) {
0402         entry = pte_mkcont(entry);
0403     } else if (pagesize == CONT_PMD_SIZE) {
0404         entry = pmd_pte(pmd_mkcont(pte_pmd(entry)));
0405     } else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) {
0406         pr_warn("%s: unrecognized huge page size 0x%lx\n",
0407             __func__, pagesize);
0408     }
0409     return entry;
0410 }
0411 
0412 void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
0413             pte_t *ptep, unsigned long sz)
0414 {
0415     int i, ncontig;
0416     size_t pgsize;
0417 
0418     ncontig = num_contig_ptes(sz, &pgsize);
0419 
0420     for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
0421         pte_clear(mm, addr, ptep);
0422 }
0423 
0424 pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
0425                   unsigned long addr, pte_t *ptep)
0426 {
0427     int ncontig;
0428     size_t pgsize;
0429     pte_t orig_pte = ptep_get(ptep);
0430 
0431     if (!pte_cont(orig_pte))
0432         return ptep_get_and_clear(mm, addr, ptep);
0433 
0434     ncontig = find_num_contig(mm, addr, ptep, &pgsize);
0435 
0436     return get_clear_contig(mm, addr, ptep, pgsize, ncontig);
0437 }
0438 
0439 /*
0440  * huge_ptep_set_access_flags will update access flags (dirty, accesssed)
0441  * and write permission.
0442  *
0443  * For a contiguous huge pte range we need to check whether or not write
0444  * permission has to change only on the first pte in the set. Then for
0445  * all the contiguous ptes we need to check whether or not there is a
0446  * discrepancy between dirty or young.
0447  */
0448 static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig)
0449 {
0450     int i;
0451 
0452     if (pte_write(pte) != pte_write(ptep_get(ptep)))
0453         return 1;
0454 
0455     for (i = 0; i < ncontig; i++) {
0456         pte_t orig_pte = ptep_get(ptep + i);
0457 
0458         if (pte_dirty(pte) != pte_dirty(orig_pte))
0459             return 1;
0460 
0461         if (pte_young(pte) != pte_young(orig_pte))
0462             return 1;
0463     }
0464 
0465     return 0;
0466 }
0467 
0468 int huge_ptep_set_access_flags(struct vm_area_struct *vma,
0469                    unsigned long addr, pte_t *ptep,
0470                    pte_t pte, int dirty)
0471 {
0472     int ncontig, i;
0473     size_t pgsize = 0;
0474     unsigned long pfn = pte_pfn(pte), dpfn;
0475     struct mm_struct *mm = vma->vm_mm;
0476     pgprot_t hugeprot;
0477     pte_t orig_pte;
0478 
0479     if (!pte_cont(pte))
0480         return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
0481 
0482     ncontig = find_num_contig(mm, addr, ptep, &pgsize);
0483     dpfn = pgsize >> PAGE_SHIFT;
0484 
0485     if (!__cont_access_flags_changed(ptep, pte, ncontig))
0486         return 0;
0487 
0488     orig_pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
0489 
0490     /* Make sure we don't lose the dirty or young state */
0491     if (pte_dirty(orig_pte))
0492         pte = pte_mkdirty(pte);
0493 
0494     if (pte_young(orig_pte))
0495         pte = pte_mkyoung(pte);
0496 
0497     hugeprot = pte_pgprot(pte);
0498     for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
0499         set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
0500 
0501     return 1;
0502 }
0503 
0504 void huge_ptep_set_wrprotect(struct mm_struct *mm,
0505                  unsigned long addr, pte_t *ptep)
0506 {
0507     unsigned long pfn, dpfn;
0508     pgprot_t hugeprot;
0509     int ncontig, i;
0510     size_t pgsize;
0511     pte_t pte;
0512 
0513     if (!pte_cont(READ_ONCE(*ptep))) {
0514         ptep_set_wrprotect(mm, addr, ptep);
0515         return;
0516     }
0517 
0518     ncontig = find_num_contig(mm, addr, ptep, &pgsize);
0519     dpfn = pgsize >> PAGE_SHIFT;
0520 
0521     pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
0522     pte = pte_wrprotect(pte);
0523 
0524     hugeprot = pte_pgprot(pte);
0525     pfn = pte_pfn(pte);
0526 
0527     for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
0528         set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
0529 }
0530 
0531 pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
0532                 unsigned long addr, pte_t *ptep)
0533 {
0534     struct mm_struct *mm = vma->vm_mm;
0535     size_t pgsize;
0536     int ncontig;
0537 
0538     if (!pte_cont(READ_ONCE(*ptep)))
0539         return ptep_clear_flush(vma, addr, ptep);
0540 
0541     ncontig = find_num_contig(mm, addr, ptep, &pgsize);
0542     return get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
0543 }
0544 
0545 static int __init hugetlbpage_init(void)
0546 {
0547     if (pud_sect_supported())
0548         hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
0549 
0550     hugetlb_add_hstate(CONT_PMD_SHIFT - PAGE_SHIFT);
0551     hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
0552     hugetlb_add_hstate(CONT_PTE_SHIFT - PAGE_SHIFT);
0553 
0554     return 0;
0555 }
0556 arch_initcall(hugetlbpage_init);
0557 
0558 bool __init arch_hugetlb_valid_size(unsigned long size)
0559 {
0560     return __hugetlb_valid_size(size);
0561 }