Back to home page

LXR

 
 

    


0001 #include <linux/swap_cgroup.h>
0002 #include <linux/vmalloc.h>
0003 #include <linux/mm.h>
0004 
0005 #include <linux/swapops.h> /* depends on mm.h include */
0006 
0007 static DEFINE_MUTEX(swap_cgroup_mutex);
0008 struct swap_cgroup_ctrl {
0009     struct page **map;
0010     unsigned long length;
0011     spinlock_t  lock;
0012 };
0013 
0014 static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
0015 
0016 struct swap_cgroup {
0017     unsigned short      id;
0018 };
0019 #define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup))
0020 
0021 /*
0022  * SwapCgroup implements "lookup" and "exchange" operations.
0023  * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge
0024  * against SwapCache. At swap_free(), this is accessed directly from swap.
0025  *
0026  * This means,
0027  *  - we have no race in "exchange" when we're accessed via SwapCache because
0028  *    SwapCache(and its swp_entry) is under lock.
0029  *  - When called via swap_free(), there is no user of this entry and no race.
0030  * Then, we don't need lock around "exchange".
0031  *
0032  * TODO: we can push these buffers out to HIGHMEM.
0033  */
0034 
0035 /*
0036  * allocate buffer for swap_cgroup.
0037  */
0038 static int swap_cgroup_prepare(int type)
0039 {
0040     struct page *page;
0041     struct swap_cgroup_ctrl *ctrl;
0042     unsigned long idx, max;
0043 
0044     ctrl = &swap_cgroup_ctrl[type];
0045 
0046     for (idx = 0; idx < ctrl->length; idx++) {
0047         page = alloc_page(GFP_KERNEL | __GFP_ZERO);
0048         if (!page)
0049             goto not_enough_page;
0050         ctrl->map[idx] = page;
0051     }
0052     return 0;
0053 not_enough_page:
0054     max = idx;
0055     for (idx = 0; idx < max; idx++)
0056         __free_page(ctrl->map[idx]);
0057 
0058     return -ENOMEM;
0059 }
0060 
0061 static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent,
0062                     struct swap_cgroup_ctrl **ctrlp)
0063 {
0064     pgoff_t offset = swp_offset(ent);
0065     struct swap_cgroup_ctrl *ctrl;
0066     struct page *mappage;
0067     struct swap_cgroup *sc;
0068 
0069     ctrl = &swap_cgroup_ctrl[swp_type(ent)];
0070     if (ctrlp)
0071         *ctrlp = ctrl;
0072 
0073     mappage = ctrl->map[offset / SC_PER_PAGE];
0074     sc = page_address(mappage);
0075     return sc + offset % SC_PER_PAGE;
0076 }
0077 
0078 /**
0079  * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
0080  * @ent: swap entry to be cmpxchged
0081  * @old: old id
0082  * @new: new id
0083  *
0084  * Returns old id at success, 0 at failure.
0085  * (There is no mem_cgroup using 0 as its id)
0086  */
0087 unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
0088                     unsigned short old, unsigned short new)
0089 {
0090     struct swap_cgroup_ctrl *ctrl;
0091     struct swap_cgroup *sc;
0092     unsigned long flags;
0093     unsigned short retval;
0094 
0095     sc = lookup_swap_cgroup(ent, &ctrl);
0096 
0097     spin_lock_irqsave(&ctrl->lock, flags);
0098     retval = sc->id;
0099     if (retval == old)
0100         sc->id = new;
0101     else
0102         retval = 0;
0103     spin_unlock_irqrestore(&ctrl->lock, flags);
0104     return retval;
0105 }
0106 
0107 /**
0108  * swap_cgroup_record - record mem_cgroup for this swp_entry.
0109  * @ent: swap entry to be recorded into
0110  * @id: mem_cgroup to be recorded
0111  *
0112  * Returns old value at success, 0 at failure.
0113  * (Of course, old value can be 0.)
0114  */
0115 unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
0116 {
0117     struct swap_cgroup_ctrl *ctrl;
0118     struct swap_cgroup *sc;
0119     unsigned short old;
0120     unsigned long flags;
0121 
0122     sc = lookup_swap_cgroup(ent, &ctrl);
0123 
0124     spin_lock_irqsave(&ctrl->lock, flags);
0125     old = sc->id;
0126     sc->id = id;
0127     spin_unlock_irqrestore(&ctrl->lock, flags);
0128 
0129     return old;
0130 }
0131 
0132 /**
0133  * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry
0134  * @ent: swap entry to be looked up.
0135  *
0136  * Returns ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
0137  */
0138 unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
0139 {
0140     return lookup_swap_cgroup(ent, NULL)->id;
0141 }
0142 
0143 int swap_cgroup_swapon(int type, unsigned long max_pages)
0144 {
0145     void *array;
0146     unsigned long array_size;
0147     unsigned long length;
0148     struct swap_cgroup_ctrl *ctrl;
0149 
0150     if (!do_swap_account)
0151         return 0;
0152 
0153     length = DIV_ROUND_UP(max_pages, SC_PER_PAGE);
0154     array_size = length * sizeof(void *);
0155 
0156     array = vzalloc(array_size);
0157     if (!array)
0158         goto nomem;
0159 
0160     ctrl = &swap_cgroup_ctrl[type];
0161     mutex_lock(&swap_cgroup_mutex);
0162     ctrl->length = length;
0163     ctrl->map = array;
0164     spin_lock_init(&ctrl->lock);
0165     if (swap_cgroup_prepare(type)) {
0166         /* memory shortage */
0167         ctrl->map = NULL;
0168         ctrl->length = 0;
0169         mutex_unlock(&swap_cgroup_mutex);
0170         vfree(array);
0171         goto nomem;
0172     }
0173     mutex_unlock(&swap_cgroup_mutex);
0174 
0175     return 0;
0176 nomem:
0177     pr_info("couldn't allocate enough memory for swap_cgroup\n");
0178     pr_info("swap_cgroup can be disabled by swapaccount=0 boot option\n");
0179     return -ENOMEM;
0180 }
0181 
0182 void swap_cgroup_swapoff(int type)
0183 {
0184     struct page **map;
0185     unsigned long i, length;
0186     struct swap_cgroup_ctrl *ctrl;
0187 
0188     if (!do_swap_account)
0189         return;
0190 
0191     mutex_lock(&swap_cgroup_mutex);
0192     ctrl = &swap_cgroup_ctrl[type];
0193     map = ctrl->map;
0194     length = ctrl->length;
0195     ctrl->map = NULL;
0196     ctrl->length = 0;
0197     mutex_unlock(&swap_cgroup_mutex);
0198 
0199     if (map) {
0200         for (i = 0; i < length; i++) {
0201             struct page *page = map[i];
0202             if (page)
0203                 __free_page(page);
0204         }
0205         vfree(map);
0206     }
0207 }