Back to home page

LXR

 
 

    


0001 #include <linux/mm.h>
0002 #include <linux/highmem.h>
0003 #include <linux/sched.h>
0004 #include <linux/hugetlb.h>
0005 
0006 static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
0007               struct mm_walk *walk)
0008 {
0009     pte_t *pte;
0010     int err = 0;
0011 
0012     pte = pte_offset_map(pmd, addr);
0013     for (;;) {
0014         err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
0015         if (err)
0016                break;
0017         addr += PAGE_SIZE;
0018         if (addr == end)
0019             break;
0020         pte++;
0021     }
0022 
0023     pte_unmap(pte);
0024     return err;
0025 }
0026 
0027 static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
0028               struct mm_walk *walk)
0029 {
0030     pmd_t *pmd;
0031     unsigned long next;
0032     int err = 0;
0033 
0034     pmd = pmd_offset(pud, addr);
0035     do {
0036 again:
0037         next = pmd_addr_end(addr, end);
0038         if (pmd_none(*pmd) || !walk->vma) {
0039             if (walk->pte_hole)
0040                 err = walk->pte_hole(addr, next, walk);
0041             if (err)
0042                 break;
0043             continue;
0044         }
0045         /*
0046          * This implies that each ->pmd_entry() handler
0047          * needs to know about pmd_trans_huge() pmds
0048          */
0049         if (walk->pmd_entry)
0050             err = walk->pmd_entry(pmd, addr, next, walk);
0051         if (err)
0052             break;
0053 
0054         /*
0055          * Check this here so we only break down trans_huge
0056          * pages when we _need_ to
0057          */
0058         if (!walk->pte_entry)
0059             continue;
0060 
0061         split_huge_pmd(walk->vma, pmd, addr);
0062         if (pmd_trans_unstable(pmd))
0063             goto again;
0064         err = walk_pte_range(pmd, addr, next, walk);
0065         if (err)
0066             break;
0067     } while (pmd++, addr = next, addr != end);
0068 
0069     return err;
0070 }
0071 
0072 static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
0073               struct mm_walk *walk)
0074 {
0075     pud_t *pud;
0076     unsigned long next;
0077     int err = 0;
0078 
0079     pud = pud_offset(pgd, addr);
0080     do {
0081         next = pud_addr_end(addr, end);
0082         if (pud_none_or_clear_bad(pud)) {
0083             if (walk->pte_hole)
0084                 err = walk->pte_hole(addr, next, walk);
0085             if (err)
0086                 break;
0087             continue;
0088         }
0089         if (walk->pmd_entry || walk->pte_entry)
0090             err = walk_pmd_range(pud, addr, next, walk);
0091         if (err)
0092             break;
0093     } while (pud++, addr = next, addr != end);
0094 
0095     return err;
0096 }
0097 
0098 static int walk_pgd_range(unsigned long addr, unsigned long end,
0099               struct mm_walk *walk)
0100 {
0101     pgd_t *pgd;
0102     unsigned long next;
0103     int err = 0;
0104 
0105     pgd = pgd_offset(walk->mm, addr);
0106     do {
0107         next = pgd_addr_end(addr, end);
0108         if (pgd_none_or_clear_bad(pgd)) {
0109             if (walk->pte_hole)
0110                 err = walk->pte_hole(addr, next, walk);
0111             if (err)
0112                 break;
0113             continue;
0114         }
0115         if (walk->pmd_entry || walk->pte_entry)
0116             err = walk_pud_range(pgd, addr, next, walk);
0117         if (err)
0118             break;
0119     } while (pgd++, addr = next, addr != end);
0120 
0121     return err;
0122 }
0123 
0124 #ifdef CONFIG_HUGETLB_PAGE
0125 static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
0126                        unsigned long end)
0127 {
0128     unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h);
0129     return boundary < end ? boundary : end;
0130 }
0131 
0132 static int walk_hugetlb_range(unsigned long addr, unsigned long end,
0133                   struct mm_walk *walk)
0134 {
0135     struct vm_area_struct *vma = walk->vma;
0136     struct hstate *h = hstate_vma(vma);
0137     unsigned long next;
0138     unsigned long hmask = huge_page_mask(h);
0139     pte_t *pte;
0140     int err = 0;
0141 
0142     do {
0143         next = hugetlb_entry_end(h, addr, end);
0144         pte = huge_pte_offset(walk->mm, addr & hmask);
0145         if (pte && walk->hugetlb_entry)
0146             err = walk->hugetlb_entry(pte, hmask, addr, next, walk);
0147         if (err)
0148             break;
0149     } while (addr = next, addr != end);
0150 
0151     return err;
0152 }
0153 
0154 #else /* CONFIG_HUGETLB_PAGE */
0155 static int walk_hugetlb_range(unsigned long addr, unsigned long end,
0156                   struct mm_walk *walk)
0157 {
0158     return 0;
0159 }
0160 
0161 #endif /* CONFIG_HUGETLB_PAGE */
0162 
0163 /*
0164  * Decide whether we really walk over the current vma on [@start, @end)
0165  * or skip it via the returned value. Return 0 if we do walk over the
0166  * current vma, and return 1 if we skip the vma. Negative values means
0167  * error, where we abort the current walk.
0168  */
0169 static int walk_page_test(unsigned long start, unsigned long end,
0170             struct mm_walk *walk)
0171 {
0172     struct vm_area_struct *vma = walk->vma;
0173 
0174     if (walk->test_walk)
0175         return walk->test_walk(start, end, walk);
0176 
0177     /*
0178      * vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP
0179      * range, so we don't walk over it as we do for normal vmas. However,
0180      * Some callers are interested in handling hole range and they don't
0181      * want to just ignore any single address range. Such users certainly
0182      * define their ->pte_hole() callbacks, so let's delegate them to handle
0183      * vma(VM_PFNMAP).
0184      */
0185     if (vma->vm_flags & VM_PFNMAP) {
0186         int err = 1;
0187         if (walk->pte_hole)
0188             err = walk->pte_hole(start, end, walk);
0189         return err ? err : 1;
0190     }
0191     return 0;
0192 }
0193 
0194 static int __walk_page_range(unsigned long start, unsigned long end,
0195             struct mm_walk *walk)
0196 {
0197     int err = 0;
0198     struct vm_area_struct *vma = walk->vma;
0199 
0200     if (vma && is_vm_hugetlb_page(vma)) {
0201         if (walk->hugetlb_entry)
0202             err = walk_hugetlb_range(start, end, walk);
0203     } else
0204         err = walk_pgd_range(start, end, walk);
0205 
0206     return err;
0207 }
0208 
0209 /**
0210  * walk_page_range - walk page table with caller specific callbacks
0211  *
0212  * Recursively walk the page table tree of the process represented by @walk->mm
0213  * within the virtual address range [@start, @end). During walking, we can do
0214  * some caller-specific works for each entry, by setting up pmd_entry(),
0215  * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these
0216  * callbacks, the associated entries/pages are just ignored.
0217  * The return values of these callbacks are commonly defined like below:
0218  *  - 0  : succeeded to handle the current entry, and if you don't reach the
0219  *         end address yet, continue to walk.
0220  *  - >0 : succeeded to handle the current entry, and return to the caller
0221  *         with caller specific value.
0222  *  - <0 : failed to handle the current entry, and return to the caller
0223  *         with error code.
0224  *
0225  * Before starting to walk page table, some callers want to check whether
0226  * they really want to walk over the current vma, typically by checking
0227  * its vm_flags. walk_page_test() and @walk->test_walk() are used for this
0228  * purpose.
0229  *
0230  * struct mm_walk keeps current values of some common data like vma and pmd,
0231  * which are useful for the access from callbacks. If you want to pass some
0232  * caller-specific data to callbacks, @walk->private should be helpful.
0233  *
0234  * Locking:
0235  *   Callers of walk_page_range() and walk_page_vma() should hold
0236  *   @walk->mm->mmap_sem, because these function traverse vma list and/or
0237  *   access to vma's data.
0238  */
0239 int walk_page_range(unsigned long start, unsigned long end,
0240             struct mm_walk *walk)
0241 {
0242     int err = 0;
0243     unsigned long next;
0244     struct vm_area_struct *vma;
0245 
0246     if (start >= end)
0247         return -EINVAL;
0248 
0249     if (!walk->mm)
0250         return -EINVAL;
0251 
0252     VM_BUG_ON_MM(!rwsem_is_locked(&walk->mm->mmap_sem), walk->mm);
0253 
0254     vma = find_vma(walk->mm, start);
0255     do {
0256         if (!vma) { /* after the last vma */
0257             walk->vma = NULL;
0258             next = end;
0259         } else if (start < vma->vm_start) { /* outside vma */
0260             walk->vma = NULL;
0261             next = min(end, vma->vm_start);
0262         } else { /* inside vma */
0263             walk->vma = vma;
0264             next = min(end, vma->vm_end);
0265             vma = vma->vm_next;
0266 
0267             err = walk_page_test(start, next, walk);
0268             if (err > 0) {
0269                 /*
0270                  * positive return values are purely for
0271                  * controlling the pagewalk, so should never
0272                  * be passed to the callers.
0273                  */
0274                 err = 0;
0275                 continue;
0276             }
0277             if (err < 0)
0278                 break;
0279         }
0280         if (walk->vma || walk->pte_hole)
0281             err = __walk_page_range(start, next, walk);
0282         if (err)
0283             break;
0284     } while (start = next, start < end);
0285     return err;
0286 }
0287 
0288 int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk)
0289 {
0290     int err;
0291 
0292     if (!walk->mm)
0293         return -EINVAL;
0294 
0295     VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem));
0296     VM_BUG_ON(!vma);
0297     walk->vma = vma;
0298     err = walk_page_test(vma->vm_start, vma->vm_end, walk);
0299     if (err > 0)
0300         return 0;
0301     if (err < 0)
0302         return err;
0303     return __walk_page_range(vma->vm_start, vma->vm_end, walk);
0304 }