Back to home page

LXR

 
 

    


0001 /*
0002  * High memory handling common code and variables.
0003  *
0004  * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
0005  *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
0006  *
0007  *
0008  * Redesigned the x86 32-bit VM architecture to deal with
0009  * 64-bit physical space. With current x86 CPUs this
0010  * means up to 64 Gigabytes physical RAM.
0011  *
0012  * Rewrote high memory support to move the page cache into
0013  * high memory. Implemented permanent (schedulable) kmaps
0014  * based on Linus' idea.
0015  *
0016  * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
0017  */
0018 
0019 #include <linux/mm.h>
0020 #include <linux/export.h>
0021 #include <linux/swap.h>
0022 #include <linux/bio.h>
0023 #include <linux/pagemap.h>
0024 #include <linux/mempool.h>
0025 #include <linux/blkdev.h>
0026 #include <linux/init.h>
0027 #include <linux/hash.h>
0028 #include <linux/highmem.h>
0029 #include <linux/kgdb.h>
0030 #include <asm/tlbflush.h>
0031 
0032 
0033 #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
0034 DEFINE_PER_CPU(int, __kmap_atomic_idx);
0035 #endif
0036 
0037 /*
0038  * Virtual_count is not a pure "count".
0039  *  0 means that it is not mapped, and has not been mapped
0040  *    since a TLB flush - it is usable.
0041  *  1 means that there are no users, but it has been mapped
0042  *    since the last TLB flush - so we can't use it.
0043  *  n means that there are (n-1) current users of it.
0044  */
0045 #ifdef CONFIG_HIGHMEM
0046 
0047 /*
0048  * Architecture with aliasing data cache may define the following family of
0049  * helper functions in its asm/highmem.h to control cache color of virtual
0050  * addresses where physical memory pages are mapped by kmap.
0051  */
0052 #ifndef get_pkmap_color
0053 
0054 /*
0055  * Determine color of virtual address where the page should be mapped.
0056  */
0057 static inline unsigned int get_pkmap_color(struct page *page)
0058 {
0059     return 0;
0060 }
0061 #define get_pkmap_color get_pkmap_color
0062 
0063 /*
0064  * Get next index for mapping inside PKMAP region for page with given color.
0065  */
0066 static inline unsigned int get_next_pkmap_nr(unsigned int color)
0067 {
0068     static unsigned int last_pkmap_nr;
0069 
0070     last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK;
0071     return last_pkmap_nr;
0072 }
0073 
0074 /*
0075  * Determine if page index inside PKMAP region (pkmap_nr) of given color
0076  * has wrapped around PKMAP region end. When this happens an attempt to
0077  * flush all unused PKMAP slots is made.
0078  */
0079 static inline int no_more_pkmaps(unsigned int pkmap_nr, unsigned int color)
0080 {
0081     return pkmap_nr == 0;
0082 }
0083 
0084 /*
0085  * Get the number of PKMAP entries of the given color. If no free slot is
0086  * found after checking that many entries, kmap will sleep waiting for
0087  * someone to call kunmap and free PKMAP slot.
0088  */
0089 static inline int get_pkmap_entries_count(unsigned int color)
0090 {
0091     return LAST_PKMAP;
0092 }
0093 
0094 /*
0095  * Get head of a wait queue for PKMAP entries of the given color.
0096  * Wait queues for different mapping colors should be independent to avoid
0097  * unnecessary wakeups caused by freeing of slots of other colors.
0098  */
0099 static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color)
0100 {
0101     static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait);
0102 
0103     return &pkmap_map_wait;
0104 }
0105 #endif
0106 
0107 unsigned long totalhigh_pages __read_mostly;
0108 EXPORT_SYMBOL(totalhigh_pages);
0109 
0110 
0111 EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx);
0112 
0113 unsigned int nr_free_highpages (void)
0114 {
0115     struct zone *zone;
0116     unsigned int pages = 0;
0117 
0118     for_each_populated_zone(zone) {
0119         if (is_highmem(zone))
0120             pages += zone_page_state(zone, NR_FREE_PAGES);
0121     }
0122 
0123     return pages;
0124 }
0125 
0126 static int pkmap_count[LAST_PKMAP];
0127 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
0128 
0129 pte_t * pkmap_page_table;
0130 
0131 /*
0132  * Most architectures have no use for kmap_high_get(), so let's abstract
0133  * the disabling of IRQ out of the locking in that case to save on a
0134  * potential useless overhead.
0135  */
0136 #ifdef ARCH_NEEDS_KMAP_HIGH_GET
0137 #define lock_kmap()             spin_lock_irq(&kmap_lock)
0138 #define unlock_kmap()           spin_unlock_irq(&kmap_lock)
0139 #define lock_kmap_any(flags)    spin_lock_irqsave(&kmap_lock, flags)
0140 #define unlock_kmap_any(flags)  spin_unlock_irqrestore(&kmap_lock, flags)
0141 #else
0142 #define lock_kmap()             spin_lock(&kmap_lock)
0143 #define unlock_kmap()           spin_unlock(&kmap_lock)
0144 #define lock_kmap_any(flags)    \
0145         do { spin_lock(&kmap_lock); (void)(flags); } while (0)
0146 #define unlock_kmap_any(flags)  \
0147         do { spin_unlock(&kmap_lock); (void)(flags); } while (0)
0148 #endif
0149 
0150 struct page *kmap_to_page(void *vaddr)
0151 {
0152     unsigned long addr = (unsigned long)vaddr;
0153 
0154     if (addr >= PKMAP_ADDR(0) && addr < PKMAP_ADDR(LAST_PKMAP)) {
0155         int i = PKMAP_NR(addr);
0156         return pte_page(pkmap_page_table[i]);
0157     }
0158 
0159     return virt_to_page(addr);
0160 }
0161 EXPORT_SYMBOL(kmap_to_page);
0162 
0163 static void flush_all_zero_pkmaps(void)
0164 {
0165     int i;
0166     int need_flush = 0;
0167 
0168     flush_cache_kmaps();
0169 
0170     for (i = 0; i < LAST_PKMAP; i++) {
0171         struct page *page;
0172 
0173         /*
0174          * zero means we don't have anything to do,
0175          * >1 means that it is still in use. Only
0176          * a count of 1 means that it is free but
0177          * needs to be unmapped
0178          */
0179         if (pkmap_count[i] != 1)
0180             continue;
0181         pkmap_count[i] = 0;
0182 
0183         /* sanity check */
0184         BUG_ON(pte_none(pkmap_page_table[i]));
0185 
0186         /*
0187          * Don't need an atomic fetch-and-clear op here;
0188          * no-one has the page mapped, and cannot get at
0189          * its virtual address (and hence PTE) without first
0190          * getting the kmap_lock (which is held here).
0191          * So no dangers, even with speculative execution.
0192          */
0193         page = pte_page(pkmap_page_table[i]);
0194         pte_clear(&init_mm, PKMAP_ADDR(i), &pkmap_page_table[i]);
0195 
0196         set_page_address(page, NULL);
0197         need_flush = 1;
0198     }
0199     if (need_flush)
0200         flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP));
0201 }
0202 
0203 /**
0204  * kmap_flush_unused - flush all unused kmap mappings in order to remove stray mappings
0205  */
0206 void kmap_flush_unused(void)
0207 {
0208     lock_kmap();
0209     flush_all_zero_pkmaps();
0210     unlock_kmap();
0211 }
0212 
0213 static inline unsigned long map_new_virtual(struct page *page)
0214 {
0215     unsigned long vaddr;
0216     int count;
0217     unsigned int last_pkmap_nr;
0218     unsigned int color = get_pkmap_color(page);
0219 
0220 start:
0221     count = get_pkmap_entries_count(color);
0222     /* Find an empty entry */
0223     for (;;) {
0224         last_pkmap_nr = get_next_pkmap_nr(color);
0225         if (no_more_pkmaps(last_pkmap_nr, color)) {
0226             flush_all_zero_pkmaps();
0227             count = get_pkmap_entries_count(color);
0228         }
0229         if (!pkmap_count[last_pkmap_nr])
0230             break;  /* Found a usable entry */
0231         if (--count)
0232             continue;
0233 
0234         /*
0235          * Sleep for somebody else to unmap their entries
0236          */
0237         {
0238             DECLARE_WAITQUEUE(wait, current);
0239             wait_queue_head_t *pkmap_map_wait =
0240                 get_pkmap_wait_queue_head(color);
0241 
0242             __set_current_state(TASK_UNINTERRUPTIBLE);
0243             add_wait_queue(pkmap_map_wait, &wait);
0244             unlock_kmap();
0245             schedule();
0246             remove_wait_queue(pkmap_map_wait, &wait);
0247             lock_kmap();
0248 
0249             /* Somebody else might have mapped it while we slept */
0250             if (page_address(page))
0251                 return (unsigned long)page_address(page);
0252 
0253             /* Re-start */
0254             goto start;
0255         }
0256     }
0257     vaddr = PKMAP_ADDR(last_pkmap_nr);
0258     set_pte_at(&init_mm, vaddr,
0259            &(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));
0260 
0261     pkmap_count[last_pkmap_nr] = 1;
0262     set_page_address(page, (void *)vaddr);
0263 
0264     return vaddr;
0265 }
0266 
0267 /**
0268  * kmap_high - map a highmem page into memory
0269  * @page: &struct page to map
0270  *
0271  * Returns the page's virtual memory address.
0272  *
0273  * We cannot call this from interrupts, as it may block.
0274  */
0275 void *kmap_high(struct page *page)
0276 {
0277     unsigned long vaddr;
0278 
0279     /*
0280      * For highmem pages, we can't trust "virtual" until
0281      * after we have the lock.
0282      */
0283     lock_kmap();
0284     vaddr = (unsigned long)page_address(page);
0285     if (!vaddr)
0286         vaddr = map_new_virtual(page);
0287     pkmap_count[PKMAP_NR(vaddr)]++;
0288     BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 2);
0289     unlock_kmap();
0290     return (void*) vaddr;
0291 }
0292 
0293 EXPORT_SYMBOL(kmap_high);
0294 
0295 #ifdef ARCH_NEEDS_KMAP_HIGH_GET
0296 /**
0297  * kmap_high_get - pin a highmem page into memory
0298  * @page: &struct page to pin
0299  *
0300  * Returns the page's current virtual memory address, or NULL if no mapping
0301  * exists.  If and only if a non null address is returned then a
0302  * matching call to kunmap_high() is necessary.
0303  *
0304  * This can be called from any context.
0305  */
0306 void *kmap_high_get(struct page *page)
0307 {
0308     unsigned long vaddr, flags;
0309 
0310     lock_kmap_any(flags);
0311     vaddr = (unsigned long)page_address(page);
0312     if (vaddr) {
0313         BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 1);
0314         pkmap_count[PKMAP_NR(vaddr)]++;
0315     }
0316     unlock_kmap_any(flags);
0317     return (void*) vaddr;
0318 }
0319 #endif
0320 
0321 /**
0322  * kunmap_high - unmap a highmem page into memory
0323  * @page: &struct page to unmap
0324  *
0325  * If ARCH_NEEDS_KMAP_HIGH_GET is not defined then this may be called
0326  * only from user context.
0327  */
0328 void kunmap_high(struct page *page)
0329 {
0330     unsigned long vaddr;
0331     unsigned long nr;
0332     unsigned long flags;
0333     int need_wakeup;
0334     unsigned int color = get_pkmap_color(page);
0335     wait_queue_head_t *pkmap_map_wait;
0336 
0337     lock_kmap_any(flags);
0338     vaddr = (unsigned long)page_address(page);
0339     BUG_ON(!vaddr);
0340     nr = PKMAP_NR(vaddr);
0341 
0342     /*
0343      * A count must never go down to zero
0344      * without a TLB flush!
0345      */
0346     need_wakeup = 0;
0347     switch (--pkmap_count[nr]) {
0348     case 0:
0349         BUG();
0350     case 1:
0351         /*
0352          * Avoid an unnecessary wake_up() function call.
0353          * The common case is pkmap_count[] == 1, but
0354          * no waiters.
0355          * The tasks queued in the wait-queue are guarded
0356          * by both the lock in the wait-queue-head and by
0357          * the kmap_lock.  As the kmap_lock is held here,
0358          * no need for the wait-queue-head's lock.  Simply
0359          * test if the queue is empty.
0360          */
0361         pkmap_map_wait = get_pkmap_wait_queue_head(color);
0362         need_wakeup = waitqueue_active(pkmap_map_wait);
0363     }
0364     unlock_kmap_any(flags);
0365 
0366     /* do wake-up, if needed, race-free outside of the spin lock */
0367     if (need_wakeup)
0368         wake_up(pkmap_map_wait);
0369 }
0370 
0371 EXPORT_SYMBOL(kunmap_high);
0372 #endif
0373 
0374 #if defined(HASHED_PAGE_VIRTUAL)
0375 
0376 #define PA_HASH_ORDER   7
0377 
0378 /*
0379  * Describes one page->virtual association
0380  */
0381 struct page_address_map {
0382     struct page *page;
0383     void *virtual;
0384     struct list_head list;
0385 };
0386 
0387 static struct page_address_map page_address_maps[LAST_PKMAP];
0388 
0389 /*
0390  * Hash table bucket
0391  */
0392 static struct page_address_slot {
0393     struct list_head lh;            /* List of page_address_maps */
0394     spinlock_t lock;            /* Protect this bucket's list */
0395 } ____cacheline_aligned_in_smp page_address_htable[1<<PA_HASH_ORDER];
0396 
0397 static struct page_address_slot *page_slot(const struct page *page)
0398 {
0399     return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)];
0400 }
0401 
0402 /**
0403  * page_address - get the mapped virtual address of a page
0404  * @page: &struct page to get the virtual address of
0405  *
0406  * Returns the page's virtual address.
0407  */
0408 void *page_address(const struct page *page)
0409 {
0410     unsigned long flags;
0411     void *ret;
0412     struct page_address_slot *pas;
0413 
0414     if (!PageHighMem(page))
0415         return lowmem_page_address(page);
0416 
0417     pas = page_slot(page);
0418     ret = NULL;
0419     spin_lock_irqsave(&pas->lock, flags);
0420     if (!list_empty(&pas->lh)) {
0421         struct page_address_map *pam;
0422 
0423         list_for_each_entry(pam, &pas->lh, list) {
0424             if (pam->page == page) {
0425                 ret = pam->virtual;
0426                 goto done;
0427             }
0428         }
0429     }
0430 done:
0431     spin_unlock_irqrestore(&pas->lock, flags);
0432     return ret;
0433 }
0434 
0435 EXPORT_SYMBOL(page_address);
0436 
0437 /**
0438  * set_page_address - set a page's virtual address
0439  * @page: &struct page to set
0440  * @virtual: virtual address to use
0441  */
0442 void set_page_address(struct page *page, void *virtual)
0443 {
0444     unsigned long flags;
0445     struct page_address_slot *pas;
0446     struct page_address_map *pam;
0447 
0448     BUG_ON(!PageHighMem(page));
0449 
0450     pas = page_slot(page);
0451     if (virtual) {      /* Add */
0452         pam = &page_address_maps[PKMAP_NR((unsigned long)virtual)];
0453         pam->page = page;
0454         pam->virtual = virtual;
0455 
0456         spin_lock_irqsave(&pas->lock, flags);
0457         list_add_tail(&pam->list, &pas->lh);
0458         spin_unlock_irqrestore(&pas->lock, flags);
0459     } else {        /* Remove */
0460         spin_lock_irqsave(&pas->lock, flags);
0461         list_for_each_entry(pam, &pas->lh, list) {
0462             if (pam->page == page) {
0463                 list_del(&pam->list);
0464                 spin_unlock_irqrestore(&pas->lock, flags);
0465                 goto done;
0466             }
0467         }
0468         spin_unlock_irqrestore(&pas->lock, flags);
0469     }
0470 done:
0471     return;
0472 }
0473 
0474 void __init page_address_init(void)
0475 {
0476     int i;
0477 
0478     for (i = 0; i < ARRAY_SIZE(page_address_htable); i++) {
0479         INIT_LIST_HEAD(&page_address_htable[i].lh);
0480         spin_lock_init(&page_address_htable[i].lock);
0481     }
0482 }
0483 
0484 #endif  /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */