Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * Copyright 2007-2008 Paul Mackerras, IBM Corp.
0004  */
0005 
0006 #include <linux/errno.h>
0007 #include <linux/kernel.h>
0008 #include <linux/gfp.h>
0009 #include <linux/types.h>
0010 #include <linux/pagewalk.h>
0011 #include <linux/hugetlb.h>
0012 #include <linux/syscalls.h>
0013 
0014 #include <linux/pgtable.h>
0015 #include <linux/uaccess.h>
0016 
0017 /*
0018  * Free all pages allocated for subpage protection maps and pointers.
0019  * Also makes sure that the subpage_prot_table structure is
0020  * reinitialized for the next user.
0021  */
0022 void subpage_prot_free(struct mm_struct *mm)
0023 {
0024     struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
0025     unsigned long i, j, addr;
0026     u32 **p;
0027 
0028     if (!spt)
0029         return;
0030 
0031     for (i = 0; i < 4; ++i) {
0032         if (spt->low_prot[i]) {
0033             free_page((unsigned long)spt->low_prot[i]);
0034             spt->low_prot[i] = NULL;
0035         }
0036     }
0037     addr = 0;
0038     for (i = 0; i < (TASK_SIZE_USER64 >> 43); ++i) {
0039         p = spt->protptrs[i];
0040         if (!p)
0041             continue;
0042         spt->protptrs[i] = NULL;
0043         for (j = 0; j < SBP_L2_COUNT && addr < spt->maxaddr;
0044              ++j, addr += PAGE_SIZE)
0045             if (p[j])
0046                 free_page((unsigned long)p[j]);
0047         free_page((unsigned long)p);
0048     }
0049     spt->maxaddr = 0;
0050     kfree(spt);
0051 }
0052 
0053 static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
0054                  int npages)
0055 {
0056     pgd_t *pgd;
0057     p4d_t *p4d;
0058     pud_t *pud;
0059     pmd_t *pmd;
0060     pte_t *pte;
0061     spinlock_t *ptl;
0062 
0063     pgd = pgd_offset(mm, addr);
0064     p4d = p4d_offset(pgd, addr);
0065     if (p4d_none(*p4d))
0066         return;
0067     pud = pud_offset(p4d, addr);
0068     if (pud_none(*pud))
0069         return;
0070     pmd = pmd_offset(pud, addr);
0071     if (pmd_none(*pmd))
0072         return;
0073     pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
0074     arch_enter_lazy_mmu_mode();
0075     for (; npages > 0; --npages) {
0076         pte_update(mm, addr, pte, 0, 0, 0);
0077         addr += PAGE_SIZE;
0078         ++pte;
0079     }
0080     arch_leave_lazy_mmu_mode();
0081     pte_unmap_unlock(pte - 1, ptl);
0082 }
0083 
0084 /*
0085  * Clear the subpage protection map for an address range, allowing
0086  * all accesses that are allowed by the pte permissions.
0087  */
0088 static void subpage_prot_clear(unsigned long addr, unsigned long len)
0089 {
0090     struct mm_struct *mm = current->mm;
0091     struct subpage_prot_table *spt;
0092     u32 **spm, *spp;
0093     unsigned long i;
0094     size_t nw;
0095     unsigned long next, limit;
0096 
0097     mmap_write_lock(mm);
0098 
0099     spt = mm_ctx_subpage_prot(&mm->context);
0100     if (!spt)
0101         goto err_out;
0102 
0103     limit = addr + len;
0104     if (limit > spt->maxaddr)
0105         limit = spt->maxaddr;
0106     for (; addr < limit; addr = next) {
0107         next = pmd_addr_end(addr, limit);
0108         if (addr < 0x100000000UL) {
0109             spm = spt->low_prot;
0110         } else {
0111             spm = spt->protptrs[addr >> SBP_L3_SHIFT];
0112             if (!spm)
0113                 continue;
0114         }
0115         spp = spm[(addr >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1)];
0116         if (!spp)
0117             continue;
0118         spp += (addr >> PAGE_SHIFT) & (SBP_L1_COUNT - 1);
0119 
0120         i = (addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
0121         nw = PTRS_PER_PTE - i;
0122         if (addr + (nw << PAGE_SHIFT) > next)
0123             nw = (next - addr) >> PAGE_SHIFT;
0124 
0125         memset(spp, 0, nw * sizeof(u32));
0126 
0127         /* now flush any existing HPTEs for the range */
0128         hpte_flush_range(mm, addr, nw);
0129     }
0130 
0131 err_out:
0132     mmap_write_unlock(mm);
0133 }
0134 
0135 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
0136 static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
0137                   unsigned long end, struct mm_walk *walk)
0138 {
0139     struct vm_area_struct *vma = walk->vma;
0140     split_huge_pmd(vma, pmd, addr);
0141     return 0;
0142 }
0143 
0144 static const struct mm_walk_ops subpage_walk_ops = {
0145     .pmd_entry  = subpage_walk_pmd_entry,
0146 };
0147 
0148 static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
0149                     unsigned long len)
0150 {
0151     struct vm_area_struct *vma;
0152 
0153     /*
0154      * We don't try too hard, we just mark all the vma in that range
0155      * VM_NOHUGEPAGE and split them.
0156      */
0157     vma = find_vma(mm, addr);
0158     /*
0159      * If the range is in unmapped range, just return
0160      */
0161     if (vma && ((addr + len) <= vma->vm_start))
0162         return;
0163 
0164     while (vma) {
0165         if (vma->vm_start >= (addr + len))
0166             break;
0167         vma->vm_flags |= VM_NOHUGEPAGE;
0168         walk_page_vma(vma, &subpage_walk_ops, NULL);
0169         vma = vma->vm_next;
0170     }
0171 }
0172 #else
0173 static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
0174                     unsigned long len)
0175 {
0176     return;
0177 }
0178 #endif
0179 
0180 /*
0181  * Copy in a subpage protection map for an address range.
0182  * The map has 2 bits per 4k subpage, so 32 bits per 64k page.
0183  * Each 2-bit field is 0 to allow any access, 1 to prevent writes,
0184  * 2 or 3 to prevent all accesses.
0185  * Note that the normal page protections also apply; the subpage
0186  * protection mechanism is an additional constraint, so putting 0
0187  * in a 2-bit field won't allow writes to a page that is otherwise
0188  * write-protected.
0189  */
0190 SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
0191         unsigned long, len, u32 __user *, map)
0192 {
0193     struct mm_struct *mm = current->mm;
0194     struct subpage_prot_table *spt;
0195     u32 **spm, *spp;
0196     unsigned long i;
0197     size_t nw;
0198     unsigned long next, limit;
0199     int err;
0200 
0201     if (radix_enabled())
0202         return -ENOENT;
0203 
0204     /* Check parameters */
0205     if ((addr & ~PAGE_MASK) || (len & ~PAGE_MASK) ||
0206         addr >= mm->task_size || len >= mm->task_size ||
0207         addr + len > mm->task_size)
0208         return -EINVAL;
0209 
0210     if (is_hugepage_only_range(mm, addr, len))
0211         return -EINVAL;
0212 
0213     if (!map) {
0214         /* Clear out the protection map for the address range */
0215         subpage_prot_clear(addr, len);
0216         return 0;
0217     }
0218 
0219     if (!access_ok(map, (len >> PAGE_SHIFT) * sizeof(u32)))
0220         return -EFAULT;
0221 
0222     mmap_write_lock(mm);
0223 
0224     spt = mm_ctx_subpage_prot(&mm->context);
0225     if (!spt) {
0226         /*
0227          * Allocate subpage prot table if not already done.
0228          * Do this with mmap_lock held
0229          */
0230         spt = kzalloc(sizeof(struct subpage_prot_table), GFP_KERNEL);
0231         if (!spt) {
0232             err = -ENOMEM;
0233             goto out;
0234         }
0235         mm->context.hash_context->spt = spt;
0236     }
0237 
0238     subpage_mark_vma_nohuge(mm, addr, len);
0239     for (limit = addr + len; addr < limit; addr = next) {
0240         next = pmd_addr_end(addr, limit);
0241         err = -ENOMEM;
0242         if (addr < 0x100000000UL) {
0243             spm = spt->low_prot;
0244         } else {
0245             spm = spt->protptrs[addr >> SBP_L3_SHIFT];
0246             if (!spm) {
0247                 spm = (u32 **)get_zeroed_page(GFP_KERNEL);
0248                 if (!spm)
0249                     goto out;
0250                 spt->protptrs[addr >> SBP_L3_SHIFT] = spm;
0251             }
0252         }
0253         spm += (addr >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1);
0254         spp = *spm;
0255         if (!spp) {
0256             spp = (u32 *)get_zeroed_page(GFP_KERNEL);
0257             if (!spp)
0258                 goto out;
0259             *spm = spp;
0260         }
0261         spp += (addr >> PAGE_SHIFT) & (SBP_L1_COUNT - 1);
0262 
0263         local_irq_disable();
0264         demote_segment_4k(mm, addr);
0265         local_irq_enable();
0266 
0267         i = (addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
0268         nw = PTRS_PER_PTE - i;
0269         if (addr + (nw << PAGE_SHIFT) > next)
0270             nw = (next - addr) >> PAGE_SHIFT;
0271 
0272         mmap_write_unlock(mm);
0273         if (__copy_from_user(spp, map, nw * sizeof(u32)))
0274             return -EFAULT;
0275         map += nw;
0276         mmap_write_lock(mm);
0277 
0278         /* now flush any existing HPTEs for the range */
0279         hpte_flush_range(mm, addr, nw);
0280     }
0281     if (limit > spt->maxaddr)
0282         spt->maxaddr = limit;
0283     err = 0;
0284  out:
0285     mmap_write_unlock(mm);
0286     return err;
0287 }