Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * SPARC64 Huge TLB page support.
0004  *
0005  * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net)
0006  */
0007 
0008 #include <linux/fs.h>
0009 #include <linux/mm.h>
0010 #include <linux/sched/mm.h>
0011 #include <linux/hugetlb.h>
0012 #include <linux/pagemap.h>
0013 #include <linux/sysctl.h>
0014 
0015 #include <asm/mman.h>
0016 #include <asm/pgalloc.h>
0017 #include <asm/tlb.h>
0018 #include <asm/tlbflush.h>
0019 #include <asm/cacheflush.h>
0020 #include <asm/mmu_context.h>
0021 
0022 /* Slightly simplified from the non-hugepage variant because by
0023  * definition we don't have to worry about any page coloring stuff
0024  */
0025 
0026 static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
0027                             unsigned long addr,
0028                             unsigned long len,
0029                             unsigned long pgoff,
0030                             unsigned long flags)
0031 {
0032     struct hstate *h = hstate_file(filp);
0033     unsigned long task_size = TASK_SIZE;
0034     struct vm_unmapped_area_info info;
0035 
0036     if (test_thread_flag(TIF_32BIT))
0037         task_size = STACK_TOP32;
0038 
0039     info.flags = 0;
0040     info.length = len;
0041     info.low_limit = TASK_UNMAPPED_BASE;
0042     info.high_limit = min(task_size, VA_EXCLUDE_START);
0043     info.align_mask = PAGE_MASK & ~huge_page_mask(h);
0044     info.align_offset = 0;
0045     addr = vm_unmapped_area(&info);
0046 
0047     if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) {
0048         VM_BUG_ON(addr != -ENOMEM);
0049         info.low_limit = VA_EXCLUDE_END;
0050         info.high_limit = task_size;
0051         addr = vm_unmapped_area(&info);
0052     }
0053 
0054     return addr;
0055 }
0056 
0057 static unsigned long
0058 hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
0059                   const unsigned long len,
0060                   const unsigned long pgoff,
0061                   const unsigned long flags)
0062 {
0063     struct hstate *h = hstate_file(filp);
0064     struct mm_struct *mm = current->mm;
0065     unsigned long addr = addr0;
0066     struct vm_unmapped_area_info info;
0067 
0068     /* This should only ever run for 32-bit processes.  */
0069     BUG_ON(!test_thread_flag(TIF_32BIT));
0070 
0071     info.flags = VM_UNMAPPED_AREA_TOPDOWN;
0072     info.length = len;
0073     info.low_limit = PAGE_SIZE;
0074     info.high_limit = mm->mmap_base;
0075     info.align_mask = PAGE_MASK & ~huge_page_mask(h);
0076     info.align_offset = 0;
0077     addr = vm_unmapped_area(&info);
0078 
0079     /*
0080      * A failed mmap() very likely causes application failure,
0081      * so fall back to the bottom-up function here. This scenario
0082      * can happen with large stack limits and large mmap()
0083      * allocations.
0084      */
0085     if (addr & ~PAGE_MASK) {
0086         VM_BUG_ON(addr != -ENOMEM);
0087         info.flags = 0;
0088         info.low_limit = TASK_UNMAPPED_BASE;
0089         info.high_limit = STACK_TOP32;
0090         addr = vm_unmapped_area(&info);
0091     }
0092 
0093     return addr;
0094 }
0095 
0096 unsigned long
0097 hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
0098         unsigned long len, unsigned long pgoff, unsigned long flags)
0099 {
0100     struct hstate *h = hstate_file(file);
0101     struct mm_struct *mm = current->mm;
0102     struct vm_area_struct *vma;
0103     unsigned long task_size = TASK_SIZE;
0104 
0105     if (test_thread_flag(TIF_32BIT))
0106         task_size = STACK_TOP32;
0107 
0108     if (len & ~huge_page_mask(h))
0109         return -EINVAL;
0110     if (len > task_size)
0111         return -ENOMEM;
0112 
0113     if (flags & MAP_FIXED) {
0114         if (prepare_hugepage_range(file, addr, len))
0115             return -EINVAL;
0116         return addr;
0117     }
0118 
0119     if (addr) {
0120         addr = ALIGN(addr, huge_page_size(h));
0121         vma = find_vma(mm, addr);
0122         if (task_size - len >= addr &&
0123             (!vma || addr + len <= vm_start_gap(vma)))
0124             return addr;
0125     }
0126     if (mm->get_unmapped_area == arch_get_unmapped_area)
0127         return hugetlb_get_unmapped_area_bottomup(file, addr, len,
0128                 pgoff, flags);
0129     else
0130         return hugetlb_get_unmapped_area_topdown(file, addr, len,
0131                 pgoff, flags);
0132 }
0133 
0134 static pte_t sun4u_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
0135 {
0136     return entry;
0137 }
0138 
0139 static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
0140 {
0141     unsigned long hugepage_size = _PAGE_SZ4MB_4V;
0142 
0143     pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;
0144 
0145     switch (shift) {
0146     case HPAGE_16GB_SHIFT:
0147         hugepage_size = _PAGE_SZ16GB_4V;
0148         pte_val(entry) |= _PAGE_PUD_HUGE;
0149         break;
0150     case HPAGE_2GB_SHIFT:
0151         hugepage_size = _PAGE_SZ2GB_4V;
0152         pte_val(entry) |= _PAGE_PMD_HUGE;
0153         break;
0154     case HPAGE_256MB_SHIFT:
0155         hugepage_size = _PAGE_SZ256MB_4V;
0156         pte_val(entry) |= _PAGE_PMD_HUGE;
0157         break;
0158     case HPAGE_SHIFT:
0159         pte_val(entry) |= _PAGE_PMD_HUGE;
0160         break;
0161     case HPAGE_64K_SHIFT:
0162         hugepage_size = _PAGE_SZ64K_4V;
0163         break;
0164     default:
0165         WARN_ONCE(1, "unsupported hugepage shift=%u\n", shift);
0166     }
0167 
0168     pte_val(entry) = pte_val(entry) | hugepage_size;
0169     return entry;
0170 }
0171 
0172 static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int shift)
0173 {
0174     if (tlb_type == hypervisor)
0175         return sun4v_hugepage_shift_to_tte(entry, shift);
0176     else
0177         return sun4u_hugepage_shift_to_tte(entry, shift);
0178 }
0179 
0180 pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
0181 {
0182     pte_t pte;
0183 
0184     entry = pte_mkhuge(entry);
0185     pte = hugepage_shift_to_tte(entry, shift);
0186 
0187 #ifdef CONFIG_SPARC64
0188     /* If this vma has ADI enabled on it, turn on TTE.mcd
0189      */
0190     if (flags & VM_SPARC_ADI)
0191         return pte_mkmcd(pte);
0192     else
0193         return pte_mknotmcd(pte);
0194 #else
0195     return pte;
0196 #endif
0197 }
0198 
0199 static unsigned int sun4v_huge_tte_to_shift(pte_t entry)
0200 {
0201     unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4V;
0202     unsigned int shift;
0203 
0204     switch (tte_szbits) {
0205     case _PAGE_SZ16GB_4V:
0206         shift = HPAGE_16GB_SHIFT;
0207         break;
0208     case _PAGE_SZ2GB_4V:
0209         shift = HPAGE_2GB_SHIFT;
0210         break;
0211     case _PAGE_SZ256MB_4V:
0212         shift = HPAGE_256MB_SHIFT;
0213         break;
0214     case _PAGE_SZ4MB_4V:
0215         shift = REAL_HPAGE_SHIFT;
0216         break;
0217     case _PAGE_SZ64K_4V:
0218         shift = HPAGE_64K_SHIFT;
0219         break;
0220     default:
0221         shift = PAGE_SHIFT;
0222         break;
0223     }
0224     return shift;
0225 }
0226 
0227 static unsigned int sun4u_huge_tte_to_shift(pte_t entry)
0228 {
0229     unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4U;
0230     unsigned int shift;
0231 
0232     switch (tte_szbits) {
0233     case _PAGE_SZ256MB_4U:
0234         shift = HPAGE_256MB_SHIFT;
0235         break;
0236     case _PAGE_SZ4MB_4U:
0237         shift = REAL_HPAGE_SHIFT;
0238         break;
0239     case _PAGE_SZ64K_4U:
0240         shift = HPAGE_64K_SHIFT;
0241         break;
0242     default:
0243         shift = PAGE_SHIFT;
0244         break;
0245     }
0246     return shift;
0247 }
0248 
0249 static unsigned long tte_to_shift(pte_t entry)
0250 {
0251     if (tlb_type == hypervisor)
0252         return sun4v_huge_tte_to_shift(entry);
0253 
0254     return sun4u_huge_tte_to_shift(entry);
0255 }
0256 
0257 static unsigned int huge_tte_to_shift(pte_t entry)
0258 {
0259     unsigned long shift = tte_to_shift(entry);
0260 
0261     if (shift == PAGE_SHIFT)
0262         WARN_ONCE(1, "tto_to_shift: invalid hugepage tte=0x%lx\n",
0263               pte_val(entry));
0264 
0265     return shift;
0266 }
0267 
0268 static unsigned long huge_tte_to_size(pte_t pte)
0269 {
0270     unsigned long size = 1UL << huge_tte_to_shift(pte);
0271 
0272     if (size == REAL_HPAGE_SIZE)
0273         size = HPAGE_SIZE;
0274     return size;
0275 }
0276 
0277 unsigned long pud_leaf_size(pud_t pud) { return 1UL << tte_to_shift(*(pte_t *)&pud); }
0278 unsigned long pmd_leaf_size(pmd_t pmd) { return 1UL << tte_to_shift(*(pte_t *)&pmd); }
0279 unsigned long pte_leaf_size(pte_t pte) { return 1UL << tte_to_shift(pte); }
0280 
0281 pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
0282             unsigned long addr, unsigned long sz)
0283 {
0284     pgd_t *pgd;
0285     p4d_t *p4d;
0286     pud_t *pud;
0287     pmd_t *pmd;
0288 
0289     pgd = pgd_offset(mm, addr);
0290     p4d = p4d_offset(pgd, addr);
0291     pud = pud_alloc(mm, p4d, addr);
0292     if (!pud)
0293         return NULL;
0294     if (sz >= PUD_SIZE)
0295         return (pte_t *)pud;
0296     pmd = pmd_alloc(mm, pud, addr);
0297     if (!pmd)
0298         return NULL;
0299     if (sz >= PMD_SIZE)
0300         return (pte_t *)pmd;
0301     return pte_alloc_map(mm, pmd, addr);
0302 }
0303 
0304 pte_t *huge_pte_offset(struct mm_struct *mm,
0305                unsigned long addr, unsigned long sz)
0306 {
0307     pgd_t *pgd;
0308     p4d_t *p4d;
0309     pud_t *pud;
0310     pmd_t *pmd;
0311 
0312     pgd = pgd_offset(mm, addr);
0313     if (pgd_none(*pgd))
0314         return NULL;
0315     p4d = p4d_offset(pgd, addr);
0316     if (p4d_none(*p4d))
0317         return NULL;
0318     pud = pud_offset(p4d, addr);
0319     if (pud_none(*pud))
0320         return NULL;
0321     if (is_hugetlb_pud(*pud))
0322         return (pte_t *)pud;
0323     pmd = pmd_offset(pud, addr);
0324     if (pmd_none(*pmd))
0325         return NULL;
0326     if (is_hugetlb_pmd(*pmd))
0327         return (pte_t *)pmd;
0328     return pte_offset_map(pmd, addr);
0329 }
0330 
0331 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
0332              pte_t *ptep, pte_t entry)
0333 {
0334     unsigned int nptes, orig_shift, shift;
0335     unsigned long i, size;
0336     pte_t orig;
0337 
0338     size = huge_tte_to_size(entry);
0339 
0340     shift = PAGE_SHIFT;
0341     if (size >= PUD_SIZE)
0342         shift = PUD_SHIFT;
0343     else if (size >= PMD_SIZE)
0344         shift = PMD_SHIFT;
0345     else
0346         shift = PAGE_SHIFT;
0347 
0348     nptes = size >> shift;
0349 
0350     if (!pte_present(*ptep) && pte_present(entry))
0351         mm->context.hugetlb_pte_count += nptes;
0352 
0353     addr &= ~(size - 1);
0354     orig = *ptep;
0355     orig_shift = pte_none(orig) ? PAGE_SHIFT : huge_tte_to_shift(orig);
0356 
0357     for (i = 0; i < nptes; i++)
0358         ptep[i] = __pte(pte_val(entry) + (i << shift));
0359 
0360     maybe_tlb_batch_add(mm, addr, ptep, orig, 0, orig_shift);
0361     /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
0362     if (size == HPAGE_SIZE)
0363         maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0,
0364                     orig_shift);
0365 }
0366 
0367 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
0368                   pte_t *ptep)
0369 {
0370     unsigned int i, nptes, orig_shift, shift;
0371     unsigned long size;
0372     pte_t entry;
0373 
0374     entry = *ptep;
0375     size = huge_tte_to_size(entry);
0376 
0377     shift = PAGE_SHIFT;
0378     if (size >= PUD_SIZE)
0379         shift = PUD_SHIFT;
0380     else if (size >= PMD_SIZE)
0381         shift = PMD_SHIFT;
0382     else
0383         shift = PAGE_SHIFT;
0384 
0385     nptes = size >> shift;
0386     orig_shift = pte_none(entry) ? PAGE_SHIFT : huge_tte_to_shift(entry);
0387 
0388     if (pte_present(entry))
0389         mm->context.hugetlb_pte_count -= nptes;
0390 
0391     addr &= ~(size - 1);
0392     for (i = 0; i < nptes; i++)
0393         ptep[i] = __pte(0UL);
0394 
0395     maybe_tlb_batch_add(mm, addr, ptep, entry, 0, orig_shift);
0396     /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
0397     if (size == HPAGE_SIZE)
0398         maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0,
0399                     orig_shift);
0400 
0401     return entry;
0402 }
0403 
0404 int pmd_huge(pmd_t pmd)
0405 {
0406     return !pmd_none(pmd) &&
0407         (pmd_val(pmd) & (_PAGE_VALID|_PAGE_PMD_HUGE)) != _PAGE_VALID;
0408 }
0409 
0410 int pud_huge(pud_t pud)
0411 {
0412     return !pud_none(pud) &&
0413         (pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID;
0414 }
0415 
0416 static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
0417                unsigned long addr)
0418 {
0419     pgtable_t token = pmd_pgtable(*pmd);
0420 
0421     pmd_clear(pmd);
0422     pte_free_tlb(tlb, token, addr);
0423     mm_dec_nr_ptes(tlb->mm);
0424 }
0425 
0426 static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
0427                    unsigned long addr, unsigned long end,
0428                    unsigned long floor, unsigned long ceiling)
0429 {
0430     pmd_t *pmd;
0431     unsigned long next;
0432     unsigned long start;
0433 
0434     start = addr;
0435     pmd = pmd_offset(pud, addr);
0436     do {
0437         next = pmd_addr_end(addr, end);
0438         if (pmd_none(*pmd))
0439             continue;
0440         if (is_hugetlb_pmd(*pmd))
0441             pmd_clear(pmd);
0442         else
0443             hugetlb_free_pte_range(tlb, pmd, addr);
0444     } while (pmd++, addr = next, addr != end);
0445 
0446     start &= PUD_MASK;
0447     if (start < floor)
0448         return;
0449     if (ceiling) {
0450         ceiling &= PUD_MASK;
0451         if (!ceiling)
0452             return;
0453     }
0454     if (end - 1 > ceiling - 1)
0455         return;
0456 
0457     pmd = pmd_offset(pud, start);
0458     pud_clear(pud);
0459     pmd_free_tlb(tlb, pmd, start);
0460     mm_dec_nr_pmds(tlb->mm);
0461 }
0462 
0463 static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
0464                    unsigned long addr, unsigned long end,
0465                    unsigned long floor, unsigned long ceiling)
0466 {
0467     pud_t *pud;
0468     unsigned long next;
0469     unsigned long start;
0470 
0471     start = addr;
0472     pud = pud_offset(p4d, addr);
0473     do {
0474         next = pud_addr_end(addr, end);
0475         if (pud_none_or_clear_bad(pud))
0476             continue;
0477         if (is_hugetlb_pud(*pud))
0478             pud_clear(pud);
0479         else
0480             hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
0481                            ceiling);
0482     } while (pud++, addr = next, addr != end);
0483 
0484     start &= PGDIR_MASK;
0485     if (start < floor)
0486         return;
0487     if (ceiling) {
0488         ceiling &= PGDIR_MASK;
0489         if (!ceiling)
0490             return;
0491     }
0492     if (end - 1 > ceiling - 1)
0493         return;
0494 
0495     pud = pud_offset(p4d, start);
0496     p4d_clear(p4d);
0497     pud_free_tlb(tlb, pud, start);
0498     mm_dec_nr_puds(tlb->mm);
0499 }
0500 
0501 void hugetlb_free_pgd_range(struct mmu_gather *tlb,
0502                 unsigned long addr, unsigned long end,
0503                 unsigned long floor, unsigned long ceiling)
0504 {
0505     pgd_t *pgd;
0506     p4d_t *p4d;
0507     unsigned long next;
0508 
0509     addr &= PMD_MASK;
0510     if (addr < floor) {
0511         addr += PMD_SIZE;
0512         if (!addr)
0513             return;
0514     }
0515     if (ceiling) {
0516         ceiling &= PMD_MASK;
0517         if (!ceiling)
0518             return;
0519     }
0520     if (end - 1 > ceiling - 1)
0521         end -= PMD_SIZE;
0522     if (addr > end - 1)
0523         return;
0524 
0525     pgd = pgd_offset(tlb->mm, addr);
0526     p4d = p4d_offset(pgd, addr);
0527     do {
0528         next = p4d_addr_end(addr, end);
0529         if (p4d_none_or_clear_bad(p4d))
0530             continue;
0531         hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling);
0532     } while (p4d++, addr = next, addr != end);
0533 }