0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060 #include <linux/kernel.h>
0061 #include <linux/slab.h>
0062
0063 #include <linux/mm.h>
0064 #include <linux/swap.h> /* struct reclaim_state */
0065 #include <linux/cache.h>
0066 #include <linux/init.h>
0067 #include <linux/export.h>
0068 #include <linux/rcupdate.h>
0069 #include <linux/list.h>
0070 #include <linux/kmemleak.h>
0071
0072 #include <trace/events/kmem.h>
0073
0074 #include <linux/atomic.h>
0075
0076 #include "slab.h"
0077
0078
0079
0080
0081
0082
0083
0084
0085 #if PAGE_SIZE <= (32767 * 2)
0086 typedef s16 slobidx_t;
0087 #else
0088 typedef s32 slobidx_t;
0089 #endif
0090
0091 struct slob_block {
0092 slobidx_t units;
0093 };
0094 typedef struct slob_block slob_t;
0095
0096
0097
0098
0099 #define SLOB_BREAK1 256
0100 #define SLOB_BREAK2 1024
0101 static LIST_HEAD(free_slob_small);
0102 static LIST_HEAD(free_slob_medium);
0103 static LIST_HEAD(free_slob_large);
0104
0105
0106
0107
0108 static inline int slob_page_free(struct slab *slab)
0109 {
0110 return PageSlobFree(slab_page(slab));
0111 }
0112
0113 static void set_slob_page_free(struct slab *slab, struct list_head *list)
0114 {
0115 list_add(&slab->slab_list, list);
0116 __SetPageSlobFree(slab_page(slab));
0117 }
0118
0119 static inline void clear_slob_page_free(struct slab *slab)
0120 {
0121 list_del(&slab->slab_list);
0122 __ClearPageSlobFree(slab_page(slab));
0123 }
0124
0125 #define SLOB_UNIT sizeof(slob_t)
0126 #define SLOB_UNITS(size) DIV_ROUND_UP(size, SLOB_UNIT)
0127
0128
0129
0130
0131
0132
0133 struct slob_rcu {
0134 struct rcu_head head;
0135 int size;
0136 };
0137
0138
0139
0140
0141 static DEFINE_SPINLOCK(slob_lock);
0142
0143
0144
0145
0146 static void set_slob(slob_t *s, slobidx_t size, slob_t *next)
0147 {
0148 slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK);
0149 slobidx_t offset = next - base;
0150
0151 if (size > 1) {
0152 s[0].units = size;
0153 s[1].units = offset;
0154 } else
0155 s[0].units = -offset;
0156 }
0157
0158
0159
0160
0161 static slobidx_t slob_units(slob_t *s)
0162 {
0163 if (s->units > 0)
0164 return s->units;
0165 return 1;
0166 }
0167
0168
0169
0170
0171 static slob_t *slob_next(slob_t *s)
0172 {
0173 slob_t *base = (slob_t *)((unsigned long)s & PAGE_MASK);
0174 slobidx_t next;
0175
0176 if (s[0].units < 0)
0177 next = -s[0].units;
0178 else
0179 next = s[1].units;
0180 return base+next;
0181 }
0182
0183
0184
0185
0186 static int slob_last(slob_t *s)
0187 {
0188 return !((unsigned long)slob_next(s) & ~PAGE_MASK);
0189 }
0190
0191 static void *slob_new_pages(gfp_t gfp, int order, int node)
0192 {
0193 struct page *page;
0194
0195 #ifdef CONFIG_NUMA
0196 if (node != NUMA_NO_NODE)
0197 page = __alloc_pages_node(node, gfp, order);
0198 else
0199 #endif
0200 page = alloc_pages(gfp, order);
0201
0202 if (!page)
0203 return NULL;
0204
0205 mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B,
0206 PAGE_SIZE << order);
0207 return page_address(page);
0208 }
0209
0210 static void slob_free_pages(void *b, int order)
0211 {
0212 struct page *sp = virt_to_page(b);
0213
0214 if (current->reclaim_state)
0215 current->reclaim_state->reclaimed_slab += 1 << order;
0216
0217 mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B,
0218 -(PAGE_SIZE << order));
0219 __free_pages(sp, order);
0220 }
0221
0222
0223
0224
0225
0226
0227
0228
0229
0230
0231
0232
0233
0234
0235
0236
0237 static void *slob_page_alloc(struct slab *sp, size_t size, int align,
0238 int align_offset, bool *page_removed_from_list)
0239 {
0240 slob_t *prev, *cur, *aligned = NULL;
0241 int delta = 0, units = SLOB_UNITS(size);
0242
0243 *page_removed_from_list = false;
0244 for (prev = NULL, cur = sp->freelist; ; prev = cur, cur = slob_next(cur)) {
0245 slobidx_t avail = slob_units(cur);
0246
0247
0248
0249
0250
0251
0252
0253
0254 if (align) {
0255 aligned = (slob_t *)
0256 (ALIGN((unsigned long)cur + align_offset, align)
0257 - align_offset);
0258 delta = aligned - cur;
0259 }
0260 if (avail >= units + delta) {
0261 slob_t *next;
0262
0263 if (delta) {
0264 next = slob_next(cur);
0265 set_slob(aligned, avail - delta, next);
0266 set_slob(cur, delta, aligned);
0267 prev = cur;
0268 cur = aligned;
0269 avail = slob_units(cur);
0270 }
0271
0272 next = slob_next(cur);
0273 if (avail == units) {
0274 if (prev)
0275 set_slob(prev, slob_units(prev), next);
0276 else
0277 sp->freelist = next;
0278 } else {
0279 if (prev)
0280 set_slob(prev, slob_units(prev), cur + units);
0281 else
0282 sp->freelist = cur + units;
0283 set_slob(cur + units, avail - units, next);
0284 }
0285
0286 sp->units -= units;
0287 if (!sp->units) {
0288 clear_slob_page_free(sp);
0289 *page_removed_from_list = true;
0290 }
0291 return cur;
0292 }
0293 if (slob_last(cur))
0294 return NULL;
0295 }
0296 }
0297
0298
0299
0300
0301 static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
0302 int align_offset)
0303 {
0304 struct folio *folio;
0305 struct slab *sp;
0306 struct list_head *slob_list;
0307 slob_t *b = NULL;
0308 unsigned long flags;
0309 bool _unused;
0310
0311 if (size < SLOB_BREAK1)
0312 slob_list = &free_slob_small;
0313 else if (size < SLOB_BREAK2)
0314 slob_list = &free_slob_medium;
0315 else
0316 slob_list = &free_slob_large;
0317
0318 spin_lock_irqsave(&slob_lock, flags);
0319
0320 list_for_each_entry(sp, slob_list, slab_list) {
0321 bool page_removed_from_list = false;
0322 #ifdef CONFIG_NUMA
0323
0324
0325
0326
0327 if (node != NUMA_NO_NODE && slab_nid(sp) != node)
0328 continue;
0329 #endif
0330
0331 if (sp->units < SLOB_UNITS(size))
0332 continue;
0333
0334 b = slob_page_alloc(sp, size, align, align_offset, &page_removed_from_list);
0335 if (!b)
0336 continue;
0337
0338
0339
0340
0341
0342
0343
0344 if (!page_removed_from_list) {
0345
0346
0347
0348
0349
0350 if (!list_is_first(&sp->slab_list, slob_list))
0351 list_rotate_to_front(&sp->slab_list, slob_list);
0352 }
0353 break;
0354 }
0355 spin_unlock_irqrestore(&slob_lock, flags);
0356
0357
0358 if (!b) {
0359 b = slob_new_pages(gfp & ~__GFP_ZERO, 0, node);
0360 if (!b)
0361 return NULL;
0362 folio = virt_to_folio(b);
0363 __folio_set_slab(folio);
0364 sp = folio_slab(folio);
0365
0366 spin_lock_irqsave(&slob_lock, flags);
0367 sp->units = SLOB_UNITS(PAGE_SIZE);
0368 sp->freelist = b;
0369 INIT_LIST_HEAD(&sp->slab_list);
0370 set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
0371 set_slob_page_free(sp, slob_list);
0372 b = slob_page_alloc(sp, size, align, align_offset, &_unused);
0373 BUG_ON(!b);
0374 spin_unlock_irqrestore(&slob_lock, flags);
0375 }
0376 if (unlikely(gfp & __GFP_ZERO))
0377 memset(b, 0, size);
0378 return b;
0379 }
0380
0381
0382
0383
0384 static void slob_free(void *block, int size)
0385 {
0386 struct slab *sp;
0387 slob_t *prev, *next, *b = (slob_t *)block;
0388 slobidx_t units;
0389 unsigned long flags;
0390 struct list_head *slob_list;
0391
0392 if (unlikely(ZERO_OR_NULL_PTR(block)))
0393 return;
0394 BUG_ON(!size);
0395
0396 sp = virt_to_slab(block);
0397 units = SLOB_UNITS(size);
0398
0399 spin_lock_irqsave(&slob_lock, flags);
0400
0401 if (sp->units + units == SLOB_UNITS(PAGE_SIZE)) {
0402
0403 if (slob_page_free(sp))
0404 clear_slob_page_free(sp);
0405 spin_unlock_irqrestore(&slob_lock, flags);
0406 __folio_clear_slab(slab_folio(sp));
0407 slob_free_pages(b, 0);
0408 return;
0409 }
0410
0411 if (!slob_page_free(sp)) {
0412
0413 sp->units = units;
0414 sp->freelist = b;
0415 set_slob(b, units,
0416 (void *)((unsigned long)(b +
0417 SLOB_UNITS(PAGE_SIZE)) & PAGE_MASK));
0418 if (size < SLOB_BREAK1)
0419 slob_list = &free_slob_small;
0420 else if (size < SLOB_BREAK2)
0421 slob_list = &free_slob_medium;
0422 else
0423 slob_list = &free_slob_large;
0424 set_slob_page_free(sp, slob_list);
0425 goto out;
0426 }
0427
0428
0429
0430
0431
0432 sp->units += units;
0433
0434 if (b < (slob_t *)sp->freelist) {
0435 if (b + units == sp->freelist) {
0436 units += slob_units(sp->freelist);
0437 sp->freelist = slob_next(sp->freelist);
0438 }
0439 set_slob(b, units, sp->freelist);
0440 sp->freelist = b;
0441 } else {
0442 prev = sp->freelist;
0443 next = slob_next(prev);
0444 while (b > next) {
0445 prev = next;
0446 next = slob_next(prev);
0447 }
0448
0449 if (!slob_last(prev) && b + units == next) {
0450 units += slob_units(next);
0451 set_slob(b, units, slob_next(next));
0452 } else
0453 set_slob(b, units, next);
0454
0455 if (prev + slob_units(prev) == b) {
0456 units = slob_units(b) + slob_units(prev);
0457 set_slob(prev, units, slob_next(b));
0458 } else
0459 set_slob(prev, slob_units(prev), b);
0460 }
0461 out:
0462 spin_unlock_irqrestore(&slob_lock, flags);
0463 }
0464
0465 #ifdef CONFIG_PRINTK
0466 void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
0467 {
0468 kpp->kp_ptr = object;
0469 kpp->kp_slab = slab;
0470 }
0471 #endif
0472
0473
0474
0475
0476
0477 static __always_inline void *
0478 __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
0479 {
0480 unsigned int *m;
0481 unsigned int minalign;
0482 void *ret;
0483
0484 minalign = max_t(unsigned int, ARCH_KMALLOC_MINALIGN,
0485 arch_slab_minalign());
0486 gfp &= gfp_allowed_mask;
0487
0488 might_alloc(gfp);
0489
0490 if (size < PAGE_SIZE - minalign) {
0491 int align = minalign;
0492
0493
0494
0495
0496
0497 if (is_power_of_2(size))
0498 align = max_t(unsigned int, minalign, size);
0499
0500 if (!size)
0501 return ZERO_SIZE_PTR;
0502
0503 m = slob_alloc(size + minalign, gfp, align, node, minalign);
0504
0505 if (!m)
0506 return NULL;
0507 *m = size;
0508 ret = (void *)m + minalign;
0509
0510 trace_kmalloc_node(caller, ret, NULL,
0511 size, size + minalign, gfp, node);
0512 } else {
0513 unsigned int order = get_order(size);
0514
0515 if (likely(order))
0516 gfp |= __GFP_COMP;
0517 ret = slob_new_pages(gfp, order, node);
0518
0519 trace_kmalloc_node(caller, ret, NULL,
0520 size, PAGE_SIZE << order, gfp, node);
0521 }
0522
0523 kmemleak_alloc(ret, size, 1, gfp);
0524 return ret;
0525 }
0526
0527 void *__kmalloc(size_t size, gfp_t gfp)
0528 {
0529 return __do_kmalloc_node(size, gfp, NUMA_NO_NODE, _RET_IP_);
0530 }
0531 EXPORT_SYMBOL(__kmalloc);
0532
0533 void *__kmalloc_track_caller(size_t size, gfp_t gfp, unsigned long caller)
0534 {
0535 return __do_kmalloc_node(size, gfp, NUMA_NO_NODE, caller);
0536 }
0537 EXPORT_SYMBOL(__kmalloc_track_caller);
0538
0539 #ifdef CONFIG_NUMA
0540 void *__kmalloc_node_track_caller(size_t size, gfp_t gfp,
0541 int node, unsigned long caller)
0542 {
0543 return __do_kmalloc_node(size, gfp, node, caller);
0544 }
0545 EXPORT_SYMBOL(__kmalloc_node_track_caller);
0546 #endif
0547
0548 void kfree(const void *block)
0549 {
0550 struct folio *sp;
0551
0552 trace_kfree(_RET_IP_, block);
0553
0554 if (unlikely(ZERO_OR_NULL_PTR(block)))
0555 return;
0556 kmemleak_free(block);
0557
0558 sp = virt_to_folio(block);
0559 if (folio_test_slab(sp)) {
0560 unsigned int align = max_t(unsigned int,
0561 ARCH_KMALLOC_MINALIGN,
0562 arch_slab_minalign());
0563 unsigned int *m = (unsigned int *)(block - align);
0564
0565 slob_free(m, *m + align);
0566 } else {
0567 unsigned int order = folio_order(sp);
0568
0569 mod_node_page_state(folio_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B,
0570 -(PAGE_SIZE << order));
0571 __free_pages(folio_page(sp, 0), order);
0572
0573 }
0574 }
0575 EXPORT_SYMBOL(kfree);
0576
0577
0578 size_t __ksize(const void *block)
0579 {
0580 struct folio *folio;
0581 unsigned int align;
0582 unsigned int *m;
0583
0584 BUG_ON(!block);
0585 if (unlikely(block == ZERO_SIZE_PTR))
0586 return 0;
0587
0588 folio = virt_to_folio(block);
0589 if (unlikely(!folio_test_slab(folio)))
0590 return folio_size(folio);
0591
0592 align = max_t(unsigned int, ARCH_KMALLOC_MINALIGN,
0593 arch_slab_minalign());
0594 m = (unsigned int *)(block - align);
0595 return SLOB_UNITS(*m) * SLOB_UNIT;
0596 }
0597 EXPORT_SYMBOL(__ksize);
0598
0599 int __kmem_cache_create(struct kmem_cache *c, slab_flags_t flags)
0600 {
0601 if (flags & SLAB_TYPESAFE_BY_RCU) {
0602
0603 c->size += sizeof(struct slob_rcu);
0604 }
0605 c->flags = flags;
0606 return 0;
0607 }
0608
0609 static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
0610 {
0611 void *b;
0612
0613 flags &= gfp_allowed_mask;
0614
0615 might_alloc(flags);
0616
0617 if (c->size < PAGE_SIZE) {
0618 b = slob_alloc(c->size, flags, c->align, node, 0);
0619 trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
0620 SLOB_UNITS(c->size) * SLOB_UNIT,
0621 flags, node);
0622 } else {
0623 b = slob_new_pages(flags, get_order(c->size), node);
0624 trace_kmem_cache_alloc_node(_RET_IP_, b, NULL, c->object_size,
0625 PAGE_SIZE << get_order(c->size),
0626 flags, node);
0627 }
0628
0629 if (b && c->ctor) {
0630 WARN_ON_ONCE(flags & __GFP_ZERO);
0631 c->ctor(b);
0632 }
0633
0634 kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags);
0635 return b;
0636 }
0637
0638 void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
0639 {
0640 return slob_alloc_node(cachep, flags, NUMA_NO_NODE);
0641 }
0642 EXPORT_SYMBOL(kmem_cache_alloc);
0643
0644
0645 void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, gfp_t flags)
0646 {
0647 return slob_alloc_node(cachep, flags, NUMA_NO_NODE);
0648 }
0649 EXPORT_SYMBOL(kmem_cache_alloc_lru);
0650 #ifdef CONFIG_NUMA
0651 void *__kmalloc_node(size_t size, gfp_t gfp, int node)
0652 {
0653 return __do_kmalloc_node(size, gfp, node, _RET_IP_);
0654 }
0655 EXPORT_SYMBOL(__kmalloc_node);
0656
0657 void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t gfp, int node)
0658 {
0659 return slob_alloc_node(cachep, gfp, node);
0660 }
0661 EXPORT_SYMBOL(kmem_cache_alloc_node);
0662 #endif
0663
0664 static void __kmem_cache_free(void *b, int size)
0665 {
0666 if (size < PAGE_SIZE)
0667 slob_free(b, size);
0668 else
0669 slob_free_pages(b, get_order(size));
0670 }
0671
0672 static void kmem_rcu_free(struct rcu_head *head)
0673 {
0674 struct slob_rcu *slob_rcu = (struct slob_rcu *)head;
0675 void *b = (void *)slob_rcu - (slob_rcu->size - sizeof(struct slob_rcu));
0676
0677 __kmem_cache_free(b, slob_rcu->size);
0678 }
0679
0680 void kmem_cache_free(struct kmem_cache *c, void *b)
0681 {
0682 kmemleak_free_recursive(b, c->flags);
0683 trace_kmem_cache_free(_RET_IP_, b, c->name);
0684 if (unlikely(c->flags & SLAB_TYPESAFE_BY_RCU)) {
0685 struct slob_rcu *slob_rcu;
0686 slob_rcu = b + (c->size - sizeof(struct slob_rcu));
0687 slob_rcu->size = c->size;
0688 call_rcu(&slob_rcu->head, kmem_rcu_free);
0689 } else {
0690 __kmem_cache_free(b, c->size);
0691 }
0692 }
0693 EXPORT_SYMBOL(kmem_cache_free);
0694
0695 void kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
0696 {
0697 size_t i;
0698
0699 for (i = 0; i < nr; i++) {
0700 if (s)
0701 kmem_cache_free(s, p[i]);
0702 else
0703 kfree(p[i]);
0704 }
0705 }
0706 EXPORT_SYMBOL(kmem_cache_free_bulk);
0707
0708 int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
0709 void **p)
0710 {
0711 size_t i;
0712
0713 for (i = 0; i < nr; i++) {
0714 void *x = p[i] = kmem_cache_alloc(s, flags);
0715
0716 if (!x) {
0717 kmem_cache_free_bulk(s, i, p);
0718 return 0;
0719 }
0720 }
0721 return i;
0722 }
0723 EXPORT_SYMBOL(kmem_cache_alloc_bulk);
0724
0725 int __kmem_cache_shutdown(struct kmem_cache *c)
0726 {
0727
0728 return 0;
0729 }
0730
0731 void __kmem_cache_release(struct kmem_cache *c)
0732 {
0733 }
0734
0735 int __kmem_cache_shrink(struct kmem_cache *d)
0736 {
0737 return 0;
0738 }
0739
0740 static struct kmem_cache kmem_cache_boot = {
0741 .name = "kmem_cache",
0742 .size = sizeof(struct kmem_cache),
0743 .flags = SLAB_PANIC,
0744 .align = ARCH_KMALLOC_MINALIGN,
0745 };
0746
0747 void __init kmem_cache_init(void)
0748 {
0749 kmem_cache = &kmem_cache_boot;
0750 slab_state = UP;
0751 }
0752
0753 void __init kmem_cache_init_late(void)
0754 {
0755 slab_state = FULL;
0756 }