Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
0004  * Authors: David Chinner and Glauber Costa
0005  *
0006  * Generic LRU infrastructure
0007  */
0008 #include <linux/kernel.h>
0009 #include <linux/module.h>
0010 #include <linux/mm.h>
0011 #include <linux/list_lru.h>
0012 #include <linux/slab.h>
0013 #include <linux/mutex.h>
0014 #include <linux/memcontrol.h>
0015 #include "slab.h"
0016 #include "internal.h"
0017 
0018 #ifdef CONFIG_MEMCG_KMEM
0019 static LIST_HEAD(memcg_list_lrus);
0020 static DEFINE_MUTEX(list_lrus_mutex);
0021 
0022 static inline bool list_lru_memcg_aware(struct list_lru *lru)
0023 {
0024     return lru->memcg_aware;
0025 }
0026 
0027 static void list_lru_register(struct list_lru *lru)
0028 {
0029     if (!list_lru_memcg_aware(lru))
0030         return;
0031 
0032     mutex_lock(&list_lrus_mutex);
0033     list_add(&lru->list, &memcg_list_lrus);
0034     mutex_unlock(&list_lrus_mutex);
0035 }
0036 
0037 static void list_lru_unregister(struct list_lru *lru)
0038 {
0039     if (!list_lru_memcg_aware(lru))
0040         return;
0041 
0042     mutex_lock(&list_lrus_mutex);
0043     list_del(&lru->list);
0044     mutex_unlock(&list_lrus_mutex);
0045 }
0046 
0047 static int lru_shrinker_id(struct list_lru *lru)
0048 {
0049     return lru->shrinker_id;
0050 }
0051 
0052 static inline struct list_lru_one *
0053 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
0054 {
0055     if (list_lru_memcg_aware(lru) && idx >= 0) {
0056         struct list_lru_memcg *mlru = xa_load(&lru->xa, idx);
0057 
0058         return mlru ? &mlru->node[nid] : NULL;
0059     }
0060     return &lru->node[nid].lru;
0061 }
0062 
0063 static inline struct list_lru_one *
0064 list_lru_from_kmem(struct list_lru *lru, int nid, void *ptr,
0065            struct mem_cgroup **memcg_ptr)
0066 {
0067     struct list_lru_node *nlru = &lru->node[nid];
0068     struct list_lru_one *l = &nlru->lru;
0069     struct mem_cgroup *memcg = NULL;
0070 
0071     if (!list_lru_memcg_aware(lru))
0072         goto out;
0073 
0074     memcg = mem_cgroup_from_slab_obj(ptr);
0075     if (!memcg)
0076         goto out;
0077 
0078     l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
0079 out:
0080     if (memcg_ptr)
0081         *memcg_ptr = memcg;
0082     return l;
0083 }
0084 #else
0085 static void list_lru_register(struct list_lru *lru)
0086 {
0087 }
0088 
0089 static void list_lru_unregister(struct list_lru *lru)
0090 {
0091 }
0092 
0093 static int lru_shrinker_id(struct list_lru *lru)
0094 {
0095     return -1;
0096 }
0097 
0098 static inline bool list_lru_memcg_aware(struct list_lru *lru)
0099 {
0100     return false;
0101 }
0102 
0103 static inline struct list_lru_one *
0104 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
0105 {
0106     return &lru->node[nid].lru;
0107 }
0108 
0109 static inline struct list_lru_one *
0110 list_lru_from_kmem(struct list_lru *lru, int nid, void *ptr,
0111            struct mem_cgroup **memcg_ptr)
0112 {
0113     if (memcg_ptr)
0114         *memcg_ptr = NULL;
0115     return &lru->node[nid].lru;
0116 }
0117 #endif /* CONFIG_MEMCG_KMEM */
0118 
0119 bool list_lru_add(struct list_lru *lru, struct list_head *item)
0120 {
0121     int nid = page_to_nid(virt_to_page(item));
0122     struct list_lru_node *nlru = &lru->node[nid];
0123     struct mem_cgroup *memcg;
0124     struct list_lru_one *l;
0125 
0126     spin_lock(&nlru->lock);
0127     if (list_empty(item)) {
0128         l = list_lru_from_kmem(lru, nid, item, &memcg);
0129         list_add_tail(item, &l->list);
0130         /* Set shrinker bit if the first element was added */
0131         if (!l->nr_items++)
0132             set_shrinker_bit(memcg, nid,
0133                      lru_shrinker_id(lru));
0134         nlru->nr_items++;
0135         spin_unlock(&nlru->lock);
0136         return true;
0137     }
0138     spin_unlock(&nlru->lock);
0139     return false;
0140 }
0141 EXPORT_SYMBOL_GPL(list_lru_add);
0142 
0143 bool list_lru_del(struct list_lru *lru, struct list_head *item)
0144 {
0145     int nid = page_to_nid(virt_to_page(item));
0146     struct list_lru_node *nlru = &lru->node[nid];
0147     struct list_lru_one *l;
0148 
0149     spin_lock(&nlru->lock);
0150     if (!list_empty(item)) {
0151         l = list_lru_from_kmem(lru, nid, item, NULL);
0152         list_del_init(item);
0153         l->nr_items--;
0154         nlru->nr_items--;
0155         spin_unlock(&nlru->lock);
0156         return true;
0157     }
0158     spin_unlock(&nlru->lock);
0159     return false;
0160 }
0161 EXPORT_SYMBOL_GPL(list_lru_del);
0162 
0163 void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
0164 {
0165     list_del_init(item);
0166     list->nr_items--;
0167 }
0168 EXPORT_SYMBOL_GPL(list_lru_isolate);
0169 
0170 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
0171                struct list_head *head)
0172 {
0173     list_move(item, head);
0174     list->nr_items--;
0175 }
0176 EXPORT_SYMBOL_GPL(list_lru_isolate_move);
0177 
0178 unsigned long list_lru_count_one(struct list_lru *lru,
0179                  int nid, struct mem_cgroup *memcg)
0180 {
0181     struct list_lru_one *l;
0182     long count;
0183 
0184     rcu_read_lock();
0185     l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
0186     count = l ? READ_ONCE(l->nr_items) : 0;
0187     rcu_read_unlock();
0188 
0189     if (unlikely(count < 0))
0190         count = 0;
0191 
0192     return count;
0193 }
0194 EXPORT_SYMBOL_GPL(list_lru_count_one);
0195 
0196 unsigned long list_lru_count_node(struct list_lru *lru, int nid)
0197 {
0198     struct list_lru_node *nlru;
0199 
0200     nlru = &lru->node[nid];
0201     return nlru->nr_items;
0202 }
0203 EXPORT_SYMBOL_GPL(list_lru_count_node);
0204 
0205 static unsigned long
0206 __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
0207             list_lru_walk_cb isolate, void *cb_arg,
0208             unsigned long *nr_to_walk)
0209 {
0210     struct list_lru_node *nlru = &lru->node[nid];
0211     struct list_lru_one *l;
0212     struct list_head *item, *n;
0213     unsigned long isolated = 0;
0214 
0215 restart:
0216     l = list_lru_from_memcg_idx(lru, nid, memcg_idx);
0217     if (!l)
0218         goto out;
0219 
0220     list_for_each_safe(item, n, &l->list) {
0221         enum lru_status ret;
0222 
0223         /*
0224          * decrement nr_to_walk first so that we don't livelock if we
0225          * get stuck on large numbers of LRU_RETRY items
0226          */
0227         if (!*nr_to_walk)
0228             break;
0229         --*nr_to_walk;
0230 
0231         ret = isolate(item, l, &nlru->lock, cb_arg);
0232         switch (ret) {
0233         case LRU_REMOVED_RETRY:
0234             assert_spin_locked(&nlru->lock);
0235             fallthrough;
0236         case LRU_REMOVED:
0237             isolated++;
0238             nlru->nr_items--;
0239             /*
0240              * If the lru lock has been dropped, our list
0241              * traversal is now invalid and so we have to
0242              * restart from scratch.
0243              */
0244             if (ret == LRU_REMOVED_RETRY)
0245                 goto restart;
0246             break;
0247         case LRU_ROTATE:
0248             list_move_tail(item, &l->list);
0249             break;
0250         case LRU_SKIP:
0251             break;
0252         case LRU_RETRY:
0253             /*
0254              * The lru lock has been dropped, our list traversal is
0255              * now invalid and so we have to restart from scratch.
0256              */
0257             assert_spin_locked(&nlru->lock);
0258             goto restart;
0259         default:
0260             BUG();
0261         }
0262     }
0263 out:
0264     return isolated;
0265 }
0266 
0267 unsigned long
0268 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
0269           list_lru_walk_cb isolate, void *cb_arg,
0270           unsigned long *nr_to_walk)
0271 {
0272     struct list_lru_node *nlru = &lru->node[nid];
0273     unsigned long ret;
0274 
0275     spin_lock(&nlru->lock);
0276     ret = __list_lru_walk_one(lru, nid, memcg_kmem_id(memcg), isolate,
0277                   cb_arg, nr_to_walk);
0278     spin_unlock(&nlru->lock);
0279     return ret;
0280 }
0281 EXPORT_SYMBOL_GPL(list_lru_walk_one);
0282 
0283 unsigned long
0284 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
0285               list_lru_walk_cb isolate, void *cb_arg,
0286               unsigned long *nr_to_walk)
0287 {
0288     struct list_lru_node *nlru = &lru->node[nid];
0289     unsigned long ret;
0290 
0291     spin_lock_irq(&nlru->lock);
0292     ret = __list_lru_walk_one(lru, nid, memcg_kmem_id(memcg), isolate,
0293                   cb_arg, nr_to_walk);
0294     spin_unlock_irq(&nlru->lock);
0295     return ret;
0296 }
0297 
0298 unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
0299                  list_lru_walk_cb isolate, void *cb_arg,
0300                  unsigned long *nr_to_walk)
0301 {
0302     long isolated = 0;
0303 
0304     isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
0305                       nr_to_walk);
0306 
0307 #ifdef CONFIG_MEMCG_KMEM
0308     if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
0309         struct list_lru_memcg *mlru;
0310         unsigned long index;
0311 
0312         xa_for_each(&lru->xa, index, mlru) {
0313             struct list_lru_node *nlru = &lru->node[nid];
0314 
0315             spin_lock(&nlru->lock);
0316             isolated += __list_lru_walk_one(lru, nid, index,
0317                             isolate, cb_arg,
0318                             nr_to_walk);
0319             spin_unlock(&nlru->lock);
0320 
0321             if (*nr_to_walk <= 0)
0322                 break;
0323         }
0324     }
0325 #endif
0326 
0327     return isolated;
0328 }
0329 EXPORT_SYMBOL_GPL(list_lru_walk_node);
0330 
0331 static void init_one_lru(struct list_lru_one *l)
0332 {
0333     INIT_LIST_HEAD(&l->list);
0334     l->nr_items = 0;
0335 }
0336 
0337 #ifdef CONFIG_MEMCG_KMEM
0338 static struct list_lru_memcg *memcg_init_list_lru_one(gfp_t gfp)
0339 {
0340     int nid;
0341     struct list_lru_memcg *mlru;
0342 
0343     mlru = kmalloc(struct_size(mlru, node, nr_node_ids), gfp);
0344     if (!mlru)
0345         return NULL;
0346 
0347     for_each_node(nid)
0348         init_one_lru(&mlru->node[nid]);
0349 
0350     return mlru;
0351 }
0352 
0353 static void memcg_list_lru_free(struct list_lru *lru, int src_idx)
0354 {
0355     struct list_lru_memcg *mlru = xa_erase_irq(&lru->xa, src_idx);
0356 
0357     /*
0358      * The __list_lru_walk_one() can walk the list of this node.
0359      * We need kvfree_rcu() here. And the walking of the list
0360      * is under lru->node[nid]->lock, which can serve as a RCU
0361      * read-side critical section.
0362      */
0363     if (mlru)
0364         kvfree_rcu(mlru, rcu);
0365 }
0366 
0367 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
0368 {
0369     if (memcg_aware)
0370         xa_init_flags(&lru->xa, XA_FLAGS_LOCK_IRQ);
0371     lru->memcg_aware = memcg_aware;
0372 }
0373 
0374 static void memcg_destroy_list_lru(struct list_lru *lru)
0375 {
0376     XA_STATE(xas, &lru->xa, 0);
0377     struct list_lru_memcg *mlru;
0378 
0379     if (!list_lru_memcg_aware(lru))
0380         return;
0381 
0382     xas_lock_irq(&xas);
0383     xas_for_each(&xas, mlru, ULONG_MAX) {
0384         kfree(mlru);
0385         xas_store(&xas, NULL);
0386     }
0387     xas_unlock_irq(&xas);
0388 }
0389 
0390 static void memcg_reparent_list_lru_node(struct list_lru *lru, int nid,
0391                      int src_idx, struct mem_cgroup *dst_memcg)
0392 {
0393     struct list_lru_node *nlru = &lru->node[nid];
0394     int dst_idx = dst_memcg->kmemcg_id;
0395     struct list_lru_one *src, *dst;
0396 
0397     /*
0398      * Since list_lru_{add,del} may be called under an IRQ-safe lock,
0399      * we have to use IRQ-safe primitives here to avoid deadlock.
0400      */
0401     spin_lock_irq(&nlru->lock);
0402 
0403     src = list_lru_from_memcg_idx(lru, nid, src_idx);
0404     if (!src)
0405         goto out;
0406     dst = list_lru_from_memcg_idx(lru, nid, dst_idx);
0407 
0408     list_splice_init(&src->list, &dst->list);
0409 
0410     if (src->nr_items) {
0411         dst->nr_items += src->nr_items;
0412         set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
0413         src->nr_items = 0;
0414     }
0415 out:
0416     spin_unlock_irq(&nlru->lock);
0417 }
0418 
0419 static void memcg_reparent_list_lru(struct list_lru *lru,
0420                     int src_idx, struct mem_cgroup *dst_memcg)
0421 {
0422     int i;
0423 
0424     for_each_node(i)
0425         memcg_reparent_list_lru_node(lru, i, src_idx, dst_memcg);
0426 
0427     memcg_list_lru_free(lru, src_idx);
0428 }
0429 
0430 void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent)
0431 {
0432     struct cgroup_subsys_state *css;
0433     struct list_lru *lru;
0434     int src_idx = memcg->kmemcg_id;
0435 
0436     /*
0437      * Change kmemcg_id of this cgroup and all its descendants to the
0438      * parent's id, and then move all entries from this cgroup's list_lrus
0439      * to ones of the parent.
0440      *
0441      * After we have finished, all list_lrus corresponding to this cgroup
0442      * are guaranteed to remain empty. So we can safely free this cgroup's
0443      * list lrus in memcg_list_lru_free().
0444      *
0445      * Changing ->kmemcg_id to the parent can prevent memcg_list_lru_alloc()
0446      * from allocating list lrus for this cgroup after memcg_list_lru_free()
0447      * call.
0448      */
0449     rcu_read_lock();
0450     css_for_each_descendant_pre(css, &memcg->css) {
0451         struct mem_cgroup *child;
0452 
0453         child = mem_cgroup_from_css(css);
0454         WRITE_ONCE(child->kmemcg_id, parent->kmemcg_id);
0455     }
0456     rcu_read_unlock();
0457 
0458     mutex_lock(&list_lrus_mutex);
0459     list_for_each_entry(lru, &memcg_list_lrus, list)
0460         memcg_reparent_list_lru(lru, src_idx, parent);
0461     mutex_unlock(&list_lrus_mutex);
0462 }
0463 
0464 static inline bool memcg_list_lru_allocated(struct mem_cgroup *memcg,
0465                         struct list_lru *lru)
0466 {
0467     int idx = memcg->kmemcg_id;
0468 
0469     return idx < 0 || xa_load(&lru->xa, idx);
0470 }
0471 
0472 int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
0473              gfp_t gfp)
0474 {
0475     int i;
0476     unsigned long flags;
0477     struct list_lru_memcg_table {
0478         struct list_lru_memcg *mlru;
0479         struct mem_cgroup *memcg;
0480     } *table;
0481     XA_STATE(xas, &lru->xa, 0);
0482 
0483     if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru))
0484         return 0;
0485 
0486     gfp &= GFP_RECLAIM_MASK;
0487     table = kmalloc_array(memcg->css.cgroup->level, sizeof(*table), gfp);
0488     if (!table)
0489         return -ENOMEM;
0490 
0491     /*
0492      * Because the list_lru can be reparented to the parent cgroup's
0493      * list_lru, we should make sure that this cgroup and all its
0494      * ancestors have allocated list_lru_memcg.
0495      */
0496     for (i = 0; memcg; memcg = parent_mem_cgroup(memcg), i++) {
0497         if (memcg_list_lru_allocated(memcg, lru))
0498             break;
0499 
0500         table[i].memcg = memcg;
0501         table[i].mlru = memcg_init_list_lru_one(gfp);
0502         if (!table[i].mlru) {
0503             while (i--)
0504                 kfree(table[i].mlru);
0505             kfree(table);
0506             return -ENOMEM;
0507         }
0508     }
0509 
0510     xas_lock_irqsave(&xas, flags);
0511     while (i--) {
0512         int index = READ_ONCE(table[i].memcg->kmemcg_id);
0513         struct list_lru_memcg *mlru = table[i].mlru;
0514 
0515         xas_set(&xas, index);
0516 retry:
0517         if (unlikely(index < 0 || xas_error(&xas) || xas_load(&xas))) {
0518             kfree(mlru);
0519         } else {
0520             xas_store(&xas, mlru);
0521             if (xas_error(&xas) == -ENOMEM) {
0522                 xas_unlock_irqrestore(&xas, flags);
0523                 if (xas_nomem(&xas, gfp))
0524                     xas_set_err(&xas, 0);
0525                 xas_lock_irqsave(&xas, flags);
0526                 /*
0527                  * The xas lock has been released, this memcg
0528                  * can be reparented before us. So reload
0529                  * memcg id. More details see the comments
0530                  * in memcg_reparent_list_lrus().
0531                  */
0532                 index = READ_ONCE(table[i].memcg->kmemcg_id);
0533                 if (index < 0)
0534                     xas_set_err(&xas, 0);
0535                 else if (!xas_error(&xas) && index != xas.xa_index)
0536                     xas_set(&xas, index);
0537                 goto retry;
0538             }
0539         }
0540     }
0541     /* xas_nomem() is used to free memory instead of memory allocation. */
0542     if (xas.xa_alloc)
0543         xas_nomem(&xas, gfp);
0544     xas_unlock_irqrestore(&xas, flags);
0545     kfree(table);
0546 
0547     return xas_error(&xas);
0548 }
0549 #else
0550 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
0551 {
0552 }
0553 
0554 static void memcg_destroy_list_lru(struct list_lru *lru)
0555 {
0556 }
0557 #endif /* CONFIG_MEMCG_KMEM */
0558 
0559 int __list_lru_init(struct list_lru *lru, bool memcg_aware,
0560             struct lock_class_key *key, struct shrinker *shrinker)
0561 {
0562     int i;
0563 
0564 #ifdef CONFIG_MEMCG_KMEM
0565     if (shrinker)
0566         lru->shrinker_id = shrinker->id;
0567     else
0568         lru->shrinker_id = -1;
0569 #endif
0570 
0571     lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL);
0572     if (!lru->node)
0573         return -ENOMEM;
0574 
0575     for_each_node(i) {
0576         spin_lock_init(&lru->node[i].lock);
0577         if (key)
0578             lockdep_set_class(&lru->node[i].lock, key);
0579         init_one_lru(&lru->node[i].lru);
0580     }
0581 
0582     memcg_init_list_lru(lru, memcg_aware);
0583     list_lru_register(lru);
0584 
0585     return 0;
0586 }
0587 EXPORT_SYMBOL_GPL(__list_lru_init);
0588 
0589 void list_lru_destroy(struct list_lru *lru)
0590 {
0591     /* Already destroyed or not yet initialized? */
0592     if (!lru->node)
0593         return;
0594 
0595     list_lru_unregister(lru);
0596 
0597     memcg_destroy_list_lru(lru);
0598     kfree(lru->node);
0599     lru->node = NULL;
0600 
0601 #ifdef CONFIG_MEMCG_KMEM
0602     lru->shrinker_id = -1;
0603 #endif
0604 }
0605 EXPORT_SYMBOL_GPL(list_lru_destroy);