Back to home page

OSCL-LXR

 
 

    


0001 #include <linux/gfp.h>
0002 #include <linux/highmem.h>
0003 #include <linux/kernel.h>
0004 #include <linux/mmdebug.h>
0005 #include <linux/mm_types.h>
0006 #include <linux/mm_inline.h>
0007 #include <linux/pagemap.h>
0008 #include <linux/rcupdate.h>
0009 #include <linux/smp.h>
0010 #include <linux/swap.h>
0011 
0012 #include <asm/pgalloc.h>
0013 #include <asm/tlb.h>
0014 
0015 #ifndef CONFIG_MMU_GATHER_NO_GATHER
0016 
0017 static bool tlb_next_batch(struct mmu_gather *tlb)
0018 {
0019     struct mmu_gather_batch *batch;
0020 
0021     batch = tlb->active;
0022     if (batch->next) {
0023         tlb->active = batch->next;
0024         return true;
0025     }
0026 
0027     if (tlb->batch_count == MAX_GATHER_BATCH_COUNT)
0028         return false;
0029 
0030     batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
0031     if (!batch)
0032         return false;
0033 
0034     tlb->batch_count++;
0035     batch->next = NULL;
0036     batch->nr   = 0;
0037     batch->max  = MAX_GATHER_BATCH;
0038 
0039     tlb->active->next = batch;
0040     tlb->active = batch;
0041 
0042     return true;
0043 }
0044 
0045 static void tlb_batch_pages_flush(struct mmu_gather *tlb)
0046 {
0047     struct mmu_gather_batch *batch;
0048 
0049     for (batch = &tlb->local; batch && batch->nr; batch = batch->next) {
0050         struct page **pages = batch->pages;
0051 
0052         do {
0053             /*
0054              * limit free batch count when PAGE_SIZE > 4K
0055              */
0056             unsigned int nr = min(512U, batch->nr);
0057 
0058             free_pages_and_swap_cache(pages, nr);
0059             pages += nr;
0060             batch->nr -= nr;
0061 
0062             cond_resched();
0063         } while (batch->nr);
0064     }
0065     tlb->active = &tlb->local;
0066 }
0067 
0068 static void tlb_batch_list_free(struct mmu_gather *tlb)
0069 {
0070     struct mmu_gather_batch *batch, *next;
0071 
0072     for (batch = tlb->local.next; batch; batch = next) {
0073         next = batch->next;
0074         free_pages((unsigned long)batch, 0);
0075     }
0076     tlb->local.next = NULL;
0077 }
0078 
0079 bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size)
0080 {
0081     struct mmu_gather_batch *batch;
0082 
0083     VM_BUG_ON(!tlb->end);
0084 
0085 #ifdef CONFIG_MMU_GATHER_PAGE_SIZE
0086     VM_WARN_ON(tlb->page_size != page_size);
0087 #endif
0088 
0089     batch = tlb->active;
0090     /*
0091      * Add the page and check if we are full. If so
0092      * force a flush.
0093      */
0094     batch->pages[batch->nr++] = page;
0095     if (batch->nr == batch->max) {
0096         if (!tlb_next_batch(tlb))
0097             return true;
0098         batch = tlb->active;
0099     }
0100     VM_BUG_ON_PAGE(batch->nr > batch->max, page);
0101 
0102     return false;
0103 }
0104 
0105 #endif /* MMU_GATHER_NO_GATHER */
0106 
0107 #ifdef CONFIG_MMU_GATHER_TABLE_FREE
0108 
0109 static void __tlb_remove_table_free(struct mmu_table_batch *batch)
0110 {
0111     int i;
0112 
0113     for (i = 0; i < batch->nr; i++)
0114         __tlb_remove_table(batch->tables[i]);
0115 
0116     free_page((unsigned long)batch);
0117 }
0118 
0119 #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
0120 
0121 /*
0122  * Semi RCU freeing of the page directories.
0123  *
0124  * This is needed by some architectures to implement software pagetable walkers.
0125  *
0126  * gup_fast() and other software pagetable walkers do a lockless page-table
0127  * walk and therefore needs some synchronization with the freeing of the page
0128  * directories. The chosen means to accomplish that is by disabling IRQs over
0129  * the walk.
0130  *
0131  * Architectures that use IPIs to flush TLBs will then automagically DTRT,
0132  * since we unlink the page, flush TLBs, free the page. Since the disabling of
0133  * IRQs delays the completion of the TLB flush we can never observe an already
0134  * freed page.
0135  *
0136  * Architectures that do not have this (PPC) need to delay the freeing by some
0137  * other means, this is that means.
0138  *
0139  * What we do is batch the freed directory pages (tables) and RCU free them.
0140  * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling
0141  * holds off grace periods.
0142  *
0143  * However, in order to batch these pages we need to allocate storage, this
0144  * allocation is deep inside the MM code and can thus easily fail on memory
0145  * pressure. To guarantee progress we fall back to single table freeing, see
0146  * the implementation of tlb_remove_table_one().
0147  *
0148  */
0149 
0150 static void tlb_remove_table_smp_sync(void *arg)
0151 {
0152     /* Simply deliver the interrupt */
0153 }
0154 
0155 static void tlb_remove_table_sync_one(void)
0156 {
0157     /*
0158      * This isn't an RCU grace period and hence the page-tables cannot be
0159      * assumed to be actually RCU-freed.
0160      *
0161      * It is however sufficient for software page-table walkers that rely on
0162      * IRQ disabling.
0163      */
0164     smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
0165 }
0166 
0167 static void tlb_remove_table_rcu(struct rcu_head *head)
0168 {
0169     __tlb_remove_table_free(container_of(head, struct mmu_table_batch, rcu));
0170 }
0171 
0172 static void tlb_remove_table_free(struct mmu_table_batch *batch)
0173 {
0174     call_rcu(&batch->rcu, tlb_remove_table_rcu);
0175 }
0176 
0177 #else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */
0178 
0179 static void tlb_remove_table_sync_one(void) { }
0180 
0181 static void tlb_remove_table_free(struct mmu_table_batch *batch)
0182 {
0183     __tlb_remove_table_free(batch);
0184 }
0185 
0186 #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */
0187 
0188 /*
0189  * If we want tlb_remove_table() to imply TLB invalidates.
0190  */
0191 static inline void tlb_table_invalidate(struct mmu_gather *tlb)
0192 {
0193     if (tlb_needs_table_invalidate()) {
0194         /*
0195          * Invalidate page-table caches used by hardware walkers. Then
0196          * we still need to RCU-sched wait while freeing the pages
0197          * because software walkers can still be in-flight.
0198          */
0199         tlb_flush_mmu_tlbonly(tlb);
0200     }
0201 }
0202 
0203 static void tlb_remove_table_one(void *table)
0204 {
0205     tlb_remove_table_sync_one();
0206     __tlb_remove_table(table);
0207 }
0208 
0209 static void tlb_table_flush(struct mmu_gather *tlb)
0210 {
0211     struct mmu_table_batch **batch = &tlb->batch;
0212 
0213     if (*batch) {
0214         tlb_table_invalidate(tlb);
0215         tlb_remove_table_free(*batch);
0216         *batch = NULL;
0217     }
0218 }
0219 
0220 void tlb_remove_table(struct mmu_gather *tlb, void *table)
0221 {
0222     struct mmu_table_batch **batch = &tlb->batch;
0223 
0224     if (*batch == NULL) {
0225         *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
0226         if (*batch == NULL) {
0227             tlb_table_invalidate(tlb);
0228             tlb_remove_table_one(table);
0229             return;
0230         }
0231         (*batch)->nr = 0;
0232     }
0233 
0234     (*batch)->tables[(*batch)->nr++] = table;
0235     if ((*batch)->nr == MAX_TABLE_BATCH)
0236         tlb_table_flush(tlb);
0237 }
0238 
0239 static inline void tlb_table_init(struct mmu_gather *tlb)
0240 {
0241     tlb->batch = NULL;
0242 }
0243 
0244 #else /* !CONFIG_MMU_GATHER_TABLE_FREE */
0245 
0246 static inline void tlb_table_flush(struct mmu_gather *tlb) { }
0247 static inline void tlb_table_init(struct mmu_gather *tlb) { }
0248 
0249 #endif /* CONFIG_MMU_GATHER_TABLE_FREE */
0250 
0251 static void tlb_flush_mmu_free(struct mmu_gather *tlb)
0252 {
0253     tlb_table_flush(tlb);
0254 #ifndef CONFIG_MMU_GATHER_NO_GATHER
0255     tlb_batch_pages_flush(tlb);
0256 #endif
0257 }
0258 
0259 void tlb_flush_mmu(struct mmu_gather *tlb)
0260 {
0261     tlb_flush_mmu_tlbonly(tlb);
0262     tlb_flush_mmu_free(tlb);
0263 }
0264 
0265 static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
0266                  bool fullmm)
0267 {
0268     tlb->mm = mm;
0269     tlb->fullmm = fullmm;
0270 
0271 #ifndef CONFIG_MMU_GATHER_NO_GATHER
0272     tlb->need_flush_all = 0;
0273     tlb->local.next = NULL;
0274     tlb->local.nr   = 0;
0275     tlb->local.max  = ARRAY_SIZE(tlb->__pages);
0276     tlb->active     = &tlb->local;
0277     tlb->batch_count = 0;
0278 #endif
0279 
0280     tlb_table_init(tlb);
0281 #ifdef CONFIG_MMU_GATHER_PAGE_SIZE
0282     tlb->page_size = 0;
0283 #endif
0284 
0285     __tlb_reset_range(tlb);
0286     inc_tlb_flush_pending(tlb->mm);
0287 }
0288 
0289 /**
0290  * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down
0291  * @tlb: the mmu_gather structure to initialize
0292  * @mm: the mm_struct of the target address space
0293  *
0294  * Called to initialize an (on-stack) mmu_gather structure for page-table
0295  * tear-down from @mm.
0296  */
0297 void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm)
0298 {
0299     __tlb_gather_mmu(tlb, mm, false);
0300 }
0301 
0302 /**
0303  * tlb_gather_mmu_fullmm - initialize an mmu_gather structure for page-table tear-down
0304  * @tlb: the mmu_gather structure to initialize
0305  * @mm: the mm_struct of the target address space
0306  *
0307  * In this case, @mm is without users and we're going to destroy the
0308  * full address space (exit/execve).
0309  *
0310  * Called to initialize an (on-stack) mmu_gather structure for page-table
0311  * tear-down from @mm.
0312  */
0313 void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm)
0314 {
0315     __tlb_gather_mmu(tlb, mm, true);
0316 }
0317 
0318 /**
0319  * tlb_finish_mmu - finish an mmu_gather structure
0320  * @tlb: the mmu_gather structure to finish
0321  *
0322  * Called at the end of the shootdown operation to free up any resources that
0323  * were required.
0324  */
0325 void tlb_finish_mmu(struct mmu_gather *tlb)
0326 {
0327     /*
0328      * If there are parallel threads are doing PTE changes on same range
0329      * under non-exclusive lock (e.g., mmap_lock read-side) but defer TLB
0330      * flush by batching, one thread may end up seeing inconsistent PTEs
0331      * and result in having stale TLB entries.  So flush TLB forcefully
0332      * if we detect parallel PTE batching threads.
0333      *
0334      * However, some syscalls, e.g. munmap(), may free page tables, this
0335      * needs force flush everything in the given range. Otherwise this
0336      * may result in having stale TLB entries for some architectures,
0337      * e.g. aarch64, that could specify flush what level TLB.
0338      */
0339     if (mm_tlb_flush_nested(tlb->mm)) {
0340         /*
0341          * The aarch64 yields better performance with fullmm by
0342          * avoiding multiple CPUs spamming TLBI messages at the
0343          * same time.
0344          *
0345          * On x86 non-fullmm doesn't yield significant difference
0346          * against fullmm.
0347          */
0348         tlb->fullmm = 1;
0349         __tlb_reset_range(tlb);
0350         tlb->freed_tables = 1;
0351     }
0352 
0353     tlb_flush_mmu(tlb);
0354 
0355 #ifndef CONFIG_MMU_GATHER_NO_GATHER
0356     tlb_batch_list_free(tlb);
0357 #endif
0358     dec_tlb_flush_pending(tlb->mm);
0359 }