0001
0002
0003
0004
0005
0006
0007
0008 #include <linux/kernel.h>
0009 #include <linux/module.h>
0010 #include <linux/mm.h>
0011 #include <linux/list_lru.h>
0012 #include <linux/slab.h>
0013 #include <linux/mutex.h>
0014 #include <linux/memcontrol.h>
0015 #include "slab.h"
0016 #include "internal.h"
0017
0018 #ifdef CONFIG_MEMCG_KMEM
0019 static LIST_HEAD(memcg_list_lrus);
0020 static DEFINE_MUTEX(list_lrus_mutex);
0021
0022 static inline bool list_lru_memcg_aware(struct list_lru *lru)
0023 {
0024 return lru->memcg_aware;
0025 }
0026
0027 static void list_lru_register(struct list_lru *lru)
0028 {
0029 if (!list_lru_memcg_aware(lru))
0030 return;
0031
0032 mutex_lock(&list_lrus_mutex);
0033 list_add(&lru->list, &memcg_list_lrus);
0034 mutex_unlock(&list_lrus_mutex);
0035 }
0036
0037 static void list_lru_unregister(struct list_lru *lru)
0038 {
0039 if (!list_lru_memcg_aware(lru))
0040 return;
0041
0042 mutex_lock(&list_lrus_mutex);
0043 list_del(&lru->list);
0044 mutex_unlock(&list_lrus_mutex);
0045 }
0046
0047 static int lru_shrinker_id(struct list_lru *lru)
0048 {
0049 return lru->shrinker_id;
0050 }
0051
0052 static inline struct list_lru_one *
0053 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
0054 {
0055 if (list_lru_memcg_aware(lru) && idx >= 0) {
0056 struct list_lru_memcg *mlru = xa_load(&lru->xa, idx);
0057
0058 return mlru ? &mlru->node[nid] : NULL;
0059 }
0060 return &lru->node[nid].lru;
0061 }
0062
0063 static inline struct list_lru_one *
0064 list_lru_from_kmem(struct list_lru *lru, int nid, void *ptr,
0065 struct mem_cgroup **memcg_ptr)
0066 {
0067 struct list_lru_node *nlru = &lru->node[nid];
0068 struct list_lru_one *l = &nlru->lru;
0069 struct mem_cgroup *memcg = NULL;
0070
0071 if (!list_lru_memcg_aware(lru))
0072 goto out;
0073
0074 memcg = mem_cgroup_from_slab_obj(ptr);
0075 if (!memcg)
0076 goto out;
0077
0078 l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
0079 out:
0080 if (memcg_ptr)
0081 *memcg_ptr = memcg;
0082 return l;
0083 }
0084 #else
0085 static void list_lru_register(struct list_lru *lru)
0086 {
0087 }
0088
0089 static void list_lru_unregister(struct list_lru *lru)
0090 {
0091 }
0092
0093 static int lru_shrinker_id(struct list_lru *lru)
0094 {
0095 return -1;
0096 }
0097
0098 static inline bool list_lru_memcg_aware(struct list_lru *lru)
0099 {
0100 return false;
0101 }
0102
0103 static inline struct list_lru_one *
0104 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
0105 {
0106 return &lru->node[nid].lru;
0107 }
0108
0109 static inline struct list_lru_one *
0110 list_lru_from_kmem(struct list_lru *lru, int nid, void *ptr,
0111 struct mem_cgroup **memcg_ptr)
0112 {
0113 if (memcg_ptr)
0114 *memcg_ptr = NULL;
0115 return &lru->node[nid].lru;
0116 }
0117 #endif
0118
0119 bool list_lru_add(struct list_lru *lru, struct list_head *item)
0120 {
0121 int nid = page_to_nid(virt_to_page(item));
0122 struct list_lru_node *nlru = &lru->node[nid];
0123 struct mem_cgroup *memcg;
0124 struct list_lru_one *l;
0125
0126 spin_lock(&nlru->lock);
0127 if (list_empty(item)) {
0128 l = list_lru_from_kmem(lru, nid, item, &memcg);
0129 list_add_tail(item, &l->list);
0130
0131 if (!l->nr_items++)
0132 set_shrinker_bit(memcg, nid,
0133 lru_shrinker_id(lru));
0134 nlru->nr_items++;
0135 spin_unlock(&nlru->lock);
0136 return true;
0137 }
0138 spin_unlock(&nlru->lock);
0139 return false;
0140 }
0141 EXPORT_SYMBOL_GPL(list_lru_add);
0142
0143 bool list_lru_del(struct list_lru *lru, struct list_head *item)
0144 {
0145 int nid = page_to_nid(virt_to_page(item));
0146 struct list_lru_node *nlru = &lru->node[nid];
0147 struct list_lru_one *l;
0148
0149 spin_lock(&nlru->lock);
0150 if (!list_empty(item)) {
0151 l = list_lru_from_kmem(lru, nid, item, NULL);
0152 list_del_init(item);
0153 l->nr_items--;
0154 nlru->nr_items--;
0155 spin_unlock(&nlru->lock);
0156 return true;
0157 }
0158 spin_unlock(&nlru->lock);
0159 return false;
0160 }
0161 EXPORT_SYMBOL_GPL(list_lru_del);
0162
0163 void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
0164 {
0165 list_del_init(item);
0166 list->nr_items--;
0167 }
0168 EXPORT_SYMBOL_GPL(list_lru_isolate);
0169
0170 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
0171 struct list_head *head)
0172 {
0173 list_move(item, head);
0174 list->nr_items--;
0175 }
0176 EXPORT_SYMBOL_GPL(list_lru_isolate_move);
0177
0178 unsigned long list_lru_count_one(struct list_lru *lru,
0179 int nid, struct mem_cgroup *memcg)
0180 {
0181 struct list_lru_one *l;
0182 long count;
0183
0184 rcu_read_lock();
0185 l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
0186 count = l ? READ_ONCE(l->nr_items) : 0;
0187 rcu_read_unlock();
0188
0189 if (unlikely(count < 0))
0190 count = 0;
0191
0192 return count;
0193 }
0194 EXPORT_SYMBOL_GPL(list_lru_count_one);
0195
0196 unsigned long list_lru_count_node(struct list_lru *lru, int nid)
0197 {
0198 struct list_lru_node *nlru;
0199
0200 nlru = &lru->node[nid];
0201 return nlru->nr_items;
0202 }
0203 EXPORT_SYMBOL_GPL(list_lru_count_node);
0204
0205 static unsigned long
0206 __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
0207 list_lru_walk_cb isolate, void *cb_arg,
0208 unsigned long *nr_to_walk)
0209 {
0210 struct list_lru_node *nlru = &lru->node[nid];
0211 struct list_lru_one *l;
0212 struct list_head *item, *n;
0213 unsigned long isolated = 0;
0214
0215 restart:
0216 l = list_lru_from_memcg_idx(lru, nid, memcg_idx);
0217 if (!l)
0218 goto out;
0219
0220 list_for_each_safe(item, n, &l->list) {
0221 enum lru_status ret;
0222
0223
0224
0225
0226
0227 if (!*nr_to_walk)
0228 break;
0229 --*nr_to_walk;
0230
0231 ret = isolate(item, l, &nlru->lock, cb_arg);
0232 switch (ret) {
0233 case LRU_REMOVED_RETRY:
0234 assert_spin_locked(&nlru->lock);
0235 fallthrough;
0236 case LRU_REMOVED:
0237 isolated++;
0238 nlru->nr_items--;
0239
0240
0241
0242
0243
0244 if (ret == LRU_REMOVED_RETRY)
0245 goto restart;
0246 break;
0247 case LRU_ROTATE:
0248 list_move_tail(item, &l->list);
0249 break;
0250 case LRU_SKIP:
0251 break;
0252 case LRU_RETRY:
0253
0254
0255
0256
0257 assert_spin_locked(&nlru->lock);
0258 goto restart;
0259 default:
0260 BUG();
0261 }
0262 }
0263 out:
0264 return isolated;
0265 }
0266
0267 unsigned long
0268 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
0269 list_lru_walk_cb isolate, void *cb_arg,
0270 unsigned long *nr_to_walk)
0271 {
0272 struct list_lru_node *nlru = &lru->node[nid];
0273 unsigned long ret;
0274
0275 spin_lock(&nlru->lock);
0276 ret = __list_lru_walk_one(lru, nid, memcg_kmem_id(memcg), isolate,
0277 cb_arg, nr_to_walk);
0278 spin_unlock(&nlru->lock);
0279 return ret;
0280 }
0281 EXPORT_SYMBOL_GPL(list_lru_walk_one);
0282
0283 unsigned long
0284 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
0285 list_lru_walk_cb isolate, void *cb_arg,
0286 unsigned long *nr_to_walk)
0287 {
0288 struct list_lru_node *nlru = &lru->node[nid];
0289 unsigned long ret;
0290
0291 spin_lock_irq(&nlru->lock);
0292 ret = __list_lru_walk_one(lru, nid, memcg_kmem_id(memcg), isolate,
0293 cb_arg, nr_to_walk);
0294 spin_unlock_irq(&nlru->lock);
0295 return ret;
0296 }
0297
0298 unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
0299 list_lru_walk_cb isolate, void *cb_arg,
0300 unsigned long *nr_to_walk)
0301 {
0302 long isolated = 0;
0303
0304 isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
0305 nr_to_walk);
0306
0307 #ifdef CONFIG_MEMCG_KMEM
0308 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
0309 struct list_lru_memcg *mlru;
0310 unsigned long index;
0311
0312 xa_for_each(&lru->xa, index, mlru) {
0313 struct list_lru_node *nlru = &lru->node[nid];
0314
0315 spin_lock(&nlru->lock);
0316 isolated += __list_lru_walk_one(lru, nid, index,
0317 isolate, cb_arg,
0318 nr_to_walk);
0319 spin_unlock(&nlru->lock);
0320
0321 if (*nr_to_walk <= 0)
0322 break;
0323 }
0324 }
0325 #endif
0326
0327 return isolated;
0328 }
0329 EXPORT_SYMBOL_GPL(list_lru_walk_node);
0330
0331 static void init_one_lru(struct list_lru_one *l)
0332 {
0333 INIT_LIST_HEAD(&l->list);
0334 l->nr_items = 0;
0335 }
0336
0337 #ifdef CONFIG_MEMCG_KMEM
0338 static struct list_lru_memcg *memcg_init_list_lru_one(gfp_t gfp)
0339 {
0340 int nid;
0341 struct list_lru_memcg *mlru;
0342
0343 mlru = kmalloc(struct_size(mlru, node, nr_node_ids), gfp);
0344 if (!mlru)
0345 return NULL;
0346
0347 for_each_node(nid)
0348 init_one_lru(&mlru->node[nid]);
0349
0350 return mlru;
0351 }
0352
0353 static void memcg_list_lru_free(struct list_lru *lru, int src_idx)
0354 {
0355 struct list_lru_memcg *mlru = xa_erase_irq(&lru->xa, src_idx);
0356
0357
0358
0359
0360
0361
0362
0363 if (mlru)
0364 kvfree_rcu(mlru, rcu);
0365 }
0366
0367 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
0368 {
0369 if (memcg_aware)
0370 xa_init_flags(&lru->xa, XA_FLAGS_LOCK_IRQ);
0371 lru->memcg_aware = memcg_aware;
0372 }
0373
0374 static void memcg_destroy_list_lru(struct list_lru *lru)
0375 {
0376 XA_STATE(xas, &lru->xa, 0);
0377 struct list_lru_memcg *mlru;
0378
0379 if (!list_lru_memcg_aware(lru))
0380 return;
0381
0382 xas_lock_irq(&xas);
0383 xas_for_each(&xas, mlru, ULONG_MAX) {
0384 kfree(mlru);
0385 xas_store(&xas, NULL);
0386 }
0387 xas_unlock_irq(&xas);
0388 }
0389
0390 static void memcg_reparent_list_lru_node(struct list_lru *lru, int nid,
0391 int src_idx, struct mem_cgroup *dst_memcg)
0392 {
0393 struct list_lru_node *nlru = &lru->node[nid];
0394 int dst_idx = dst_memcg->kmemcg_id;
0395 struct list_lru_one *src, *dst;
0396
0397
0398
0399
0400
0401 spin_lock_irq(&nlru->lock);
0402
0403 src = list_lru_from_memcg_idx(lru, nid, src_idx);
0404 if (!src)
0405 goto out;
0406 dst = list_lru_from_memcg_idx(lru, nid, dst_idx);
0407
0408 list_splice_init(&src->list, &dst->list);
0409
0410 if (src->nr_items) {
0411 dst->nr_items += src->nr_items;
0412 set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
0413 src->nr_items = 0;
0414 }
0415 out:
0416 spin_unlock_irq(&nlru->lock);
0417 }
0418
0419 static void memcg_reparent_list_lru(struct list_lru *lru,
0420 int src_idx, struct mem_cgroup *dst_memcg)
0421 {
0422 int i;
0423
0424 for_each_node(i)
0425 memcg_reparent_list_lru_node(lru, i, src_idx, dst_memcg);
0426
0427 memcg_list_lru_free(lru, src_idx);
0428 }
0429
0430 void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent)
0431 {
0432 struct cgroup_subsys_state *css;
0433 struct list_lru *lru;
0434 int src_idx = memcg->kmemcg_id;
0435
0436
0437
0438
0439
0440
0441
0442
0443
0444
0445
0446
0447
0448
0449 rcu_read_lock();
0450 css_for_each_descendant_pre(css, &memcg->css) {
0451 struct mem_cgroup *child;
0452
0453 child = mem_cgroup_from_css(css);
0454 WRITE_ONCE(child->kmemcg_id, parent->kmemcg_id);
0455 }
0456 rcu_read_unlock();
0457
0458 mutex_lock(&list_lrus_mutex);
0459 list_for_each_entry(lru, &memcg_list_lrus, list)
0460 memcg_reparent_list_lru(lru, src_idx, parent);
0461 mutex_unlock(&list_lrus_mutex);
0462 }
0463
0464 static inline bool memcg_list_lru_allocated(struct mem_cgroup *memcg,
0465 struct list_lru *lru)
0466 {
0467 int idx = memcg->kmemcg_id;
0468
0469 return idx < 0 || xa_load(&lru->xa, idx);
0470 }
0471
0472 int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
0473 gfp_t gfp)
0474 {
0475 int i;
0476 unsigned long flags;
0477 struct list_lru_memcg_table {
0478 struct list_lru_memcg *mlru;
0479 struct mem_cgroup *memcg;
0480 } *table;
0481 XA_STATE(xas, &lru->xa, 0);
0482
0483 if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru))
0484 return 0;
0485
0486 gfp &= GFP_RECLAIM_MASK;
0487 table = kmalloc_array(memcg->css.cgroup->level, sizeof(*table), gfp);
0488 if (!table)
0489 return -ENOMEM;
0490
0491
0492
0493
0494
0495
0496 for (i = 0; memcg; memcg = parent_mem_cgroup(memcg), i++) {
0497 if (memcg_list_lru_allocated(memcg, lru))
0498 break;
0499
0500 table[i].memcg = memcg;
0501 table[i].mlru = memcg_init_list_lru_one(gfp);
0502 if (!table[i].mlru) {
0503 while (i--)
0504 kfree(table[i].mlru);
0505 kfree(table);
0506 return -ENOMEM;
0507 }
0508 }
0509
0510 xas_lock_irqsave(&xas, flags);
0511 while (i--) {
0512 int index = READ_ONCE(table[i].memcg->kmemcg_id);
0513 struct list_lru_memcg *mlru = table[i].mlru;
0514
0515 xas_set(&xas, index);
0516 retry:
0517 if (unlikely(index < 0 || xas_error(&xas) || xas_load(&xas))) {
0518 kfree(mlru);
0519 } else {
0520 xas_store(&xas, mlru);
0521 if (xas_error(&xas) == -ENOMEM) {
0522 xas_unlock_irqrestore(&xas, flags);
0523 if (xas_nomem(&xas, gfp))
0524 xas_set_err(&xas, 0);
0525 xas_lock_irqsave(&xas, flags);
0526
0527
0528
0529
0530
0531
0532 index = READ_ONCE(table[i].memcg->kmemcg_id);
0533 if (index < 0)
0534 xas_set_err(&xas, 0);
0535 else if (!xas_error(&xas) && index != xas.xa_index)
0536 xas_set(&xas, index);
0537 goto retry;
0538 }
0539 }
0540 }
0541
0542 if (xas.xa_alloc)
0543 xas_nomem(&xas, gfp);
0544 xas_unlock_irqrestore(&xas, flags);
0545 kfree(table);
0546
0547 return xas_error(&xas);
0548 }
0549 #else
0550 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
0551 {
0552 }
0553
0554 static void memcg_destroy_list_lru(struct list_lru *lru)
0555 {
0556 }
0557 #endif
0558
0559 int __list_lru_init(struct list_lru *lru, bool memcg_aware,
0560 struct lock_class_key *key, struct shrinker *shrinker)
0561 {
0562 int i;
0563
0564 #ifdef CONFIG_MEMCG_KMEM
0565 if (shrinker)
0566 lru->shrinker_id = shrinker->id;
0567 else
0568 lru->shrinker_id = -1;
0569 #endif
0570
0571 lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL);
0572 if (!lru->node)
0573 return -ENOMEM;
0574
0575 for_each_node(i) {
0576 spin_lock_init(&lru->node[i].lock);
0577 if (key)
0578 lockdep_set_class(&lru->node[i].lock, key);
0579 init_one_lru(&lru->node[i].lru);
0580 }
0581
0582 memcg_init_list_lru(lru, memcg_aware);
0583 list_lru_register(lru);
0584
0585 return 0;
0586 }
0587 EXPORT_SYMBOL_GPL(__list_lru_init);
0588
0589 void list_lru_destroy(struct list_lru *lru)
0590 {
0591
0592 if (!lru->node)
0593 return;
0594
0595 list_lru_unregister(lru);
0596
0597 memcg_destroy_list_lru(lru);
0598 kfree(lru->node);
0599 lru->node = NULL;
0600
0601 #ifdef CONFIG_MEMCG_KMEM
0602 lru->shrinker_id = -1;
0603 #endif
0604 }
0605 EXPORT_SYMBOL_GPL(list_lru_destroy);