0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #include <linux/mm.h>
0014 #include <linux/swap.h> /* struct reclaim_state */
0015 #include <linux/module.h>
0016 #include <linux/bit_spinlock.h>
0017 #include <linux/interrupt.h>
0018 #include <linux/swab.h>
0019 #include <linux/bitops.h>
0020 #include <linux/slab.h>
0021 #include "slab.h"
0022 #include <linux/proc_fs.h>
0023 #include <linux/seq_file.h>
0024 #include <linux/kasan.h>
0025 #include <linux/cpu.h>
0026 #include <linux/cpuset.h>
0027 #include <linux/mempolicy.h>
0028 #include <linux/ctype.h>
0029 #include <linux/stackdepot.h>
0030 #include <linux/debugobjects.h>
0031 #include <linux/kallsyms.h>
0032 #include <linux/kfence.h>
0033 #include <linux/memory.h>
0034 #include <linux/math64.h>
0035 #include <linux/fault-inject.h>
0036 #include <linux/stacktrace.h>
0037 #include <linux/prefetch.h>
0038 #include <linux/memcontrol.h>
0039 #include <linux/random.h>
0040 #include <kunit/test.h>
0041 #include <linux/sort.h>
0042
0043 #include <linux/debugfs.h>
0044 #include <trace/events/kmem.h>
0045
0046 #include "internal.h"
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146
0147
0148
0149
0150
0151
0152
0153
0154
0155
0156
0157
0158
0159
0160
0161
0162
0163
0164
0165 #ifndef CONFIG_PREEMPT_RT
0166 #define slub_get_cpu_ptr(var) get_cpu_ptr(var)
0167 #define slub_put_cpu_ptr(var) put_cpu_ptr(var)
0168 #else
0169 #define slub_get_cpu_ptr(var) \
0170 ({ \
0171 migrate_disable(); \
0172 this_cpu_ptr(var); \
0173 })
0174 #define slub_put_cpu_ptr(var) \
0175 do { \
0176 (void)(var); \
0177 migrate_enable(); \
0178 } while (0)
0179 #endif
0180
0181 #ifdef CONFIG_SLUB_DEBUG
0182 #ifdef CONFIG_SLUB_DEBUG_ON
0183 DEFINE_STATIC_KEY_TRUE(slub_debug_enabled);
0184 #else
0185 DEFINE_STATIC_KEY_FALSE(slub_debug_enabled);
0186 #endif
0187 #endif
0188
0189 static inline bool kmem_cache_debug(struct kmem_cache *s)
0190 {
0191 return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
0192 }
0193
0194 void *fixup_red_left(struct kmem_cache *s, void *p)
0195 {
0196 if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))
0197 p += s->red_left_pad;
0198
0199 return p;
0200 }
0201
0202 static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
0203 {
0204 #ifdef CONFIG_SLUB_CPU_PARTIAL
0205 return !kmem_cache_debug(s);
0206 #else
0207 return false;
0208 #endif
0209 }
0210
0211
0212
0213
0214
0215
0216
0217
0218
0219
0220 #undef SLUB_DEBUG_CMPXCHG
0221
0222
0223
0224
0225
0226 #define MIN_PARTIAL 5
0227
0228
0229
0230
0231
0232
0233 #define MAX_PARTIAL 10
0234
0235 #define DEBUG_DEFAULT_FLAGS (SLAB_CONSISTENCY_CHECKS | SLAB_RED_ZONE | \
0236 SLAB_POISON | SLAB_STORE_USER)
0237
0238
0239
0240
0241
0242 #define SLAB_NO_CMPXCHG (SLAB_CONSISTENCY_CHECKS | SLAB_STORE_USER | \
0243 SLAB_TRACE)
0244
0245
0246
0247
0248
0249
0250
0251 #define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
0252
0253 #define OO_SHIFT 16
0254 #define OO_MASK ((1 << OO_SHIFT) - 1)
0255 #define MAX_OBJS_PER_PAGE 32767
0256
0257
0258
0259 #define __OBJECT_POISON ((slab_flags_t __force)0x80000000U)
0260
0261 #define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U)
0262
0263
0264
0265
0266 #define TRACK_ADDRS_COUNT 16
0267 struct track {
0268 unsigned long addr;
0269 #ifdef CONFIG_STACKDEPOT
0270 depot_stack_handle_t handle;
0271 #endif
0272 int cpu;
0273 int pid;
0274 unsigned long when;
0275 };
0276
0277 enum track_item { TRACK_ALLOC, TRACK_FREE };
0278
0279 #ifdef CONFIG_SYSFS
0280 static int sysfs_slab_add(struct kmem_cache *);
0281 static int sysfs_slab_alias(struct kmem_cache *, const char *);
0282 #else
0283 static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
0284 static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
0285 { return 0; }
0286 #endif
0287
0288 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_SLUB_DEBUG)
0289 static void debugfs_slab_add(struct kmem_cache *);
0290 #else
0291 static inline void debugfs_slab_add(struct kmem_cache *s) { }
0292 #endif
0293
0294 static inline void stat(const struct kmem_cache *s, enum stat_item si)
0295 {
0296 #ifdef CONFIG_SLUB_STATS
0297
0298
0299
0300
0301 raw_cpu_inc(s->cpu_slab->stat[si]);
0302 #endif
0303 }
0304
0305
0306
0307
0308
0309
0310
0311 static nodemask_t slab_nodes;
0312
0313
0314
0315
0316 static struct workqueue_struct *flushwq;
0317
0318
0319
0320
0321
0322
0323
0324
0325
0326
0327 static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
0328 unsigned long ptr_addr)
0329 {
0330 #ifdef CONFIG_SLAB_FREELIST_HARDENED
0331
0332
0333
0334
0335
0336
0337
0338
0339
0340
0341 return (void *)((unsigned long)ptr ^ s->random ^
0342 swab((unsigned long)kasan_reset_tag((void *)ptr_addr)));
0343 #else
0344 return ptr;
0345 #endif
0346 }
0347
0348
0349 static inline void *freelist_dereference(const struct kmem_cache *s,
0350 void *ptr_addr)
0351 {
0352 return freelist_ptr(s, (void *)*(unsigned long *)(ptr_addr),
0353 (unsigned long)ptr_addr);
0354 }
0355
0356 static inline void *get_freepointer(struct kmem_cache *s, void *object)
0357 {
0358 object = kasan_reset_tag(object);
0359 return freelist_dereference(s, object + s->offset);
0360 }
0361
0362 static void prefetch_freepointer(const struct kmem_cache *s, void *object)
0363 {
0364 prefetchw(object + s->offset);
0365 }
0366
0367 static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
0368 {
0369 unsigned long freepointer_addr;
0370 void *p;
0371
0372 if (!debug_pagealloc_enabled_static())
0373 return get_freepointer(s, object);
0374
0375 object = kasan_reset_tag(object);
0376 freepointer_addr = (unsigned long)object + s->offset;
0377 copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p));
0378 return freelist_ptr(s, p, freepointer_addr);
0379 }
0380
0381 static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
0382 {
0383 unsigned long freeptr_addr = (unsigned long)object + s->offset;
0384
0385 #ifdef CONFIG_SLAB_FREELIST_HARDENED
0386 BUG_ON(object == fp);
0387 #endif
0388
0389 freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr);
0390 *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
0391 }
0392
0393
0394 #define for_each_object(__p, __s, __addr, __objects) \
0395 for (__p = fixup_red_left(__s, __addr); \
0396 __p < (__addr) + (__objects) * (__s)->size; \
0397 __p += (__s)->size)
0398
0399 static inline unsigned int order_objects(unsigned int order, unsigned int size)
0400 {
0401 return ((unsigned int)PAGE_SIZE << order) / size;
0402 }
0403
0404 static inline struct kmem_cache_order_objects oo_make(unsigned int order,
0405 unsigned int size)
0406 {
0407 struct kmem_cache_order_objects x = {
0408 (order << OO_SHIFT) + order_objects(order, size)
0409 };
0410
0411 return x;
0412 }
0413
0414 static inline unsigned int oo_order(struct kmem_cache_order_objects x)
0415 {
0416 return x.x >> OO_SHIFT;
0417 }
0418
0419 static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
0420 {
0421 return x.x & OO_MASK;
0422 }
0423
0424 #ifdef CONFIG_SLUB_CPU_PARTIAL
0425 static void slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
0426 {
0427 unsigned int nr_slabs;
0428
0429 s->cpu_partial = nr_objects;
0430
0431
0432
0433
0434
0435
0436
0437 nr_slabs = DIV_ROUND_UP(nr_objects * 2, oo_objects(s->oo));
0438 s->cpu_partial_slabs = nr_slabs;
0439 }
0440 #else
0441 static inline void
0442 slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
0443 {
0444 }
0445 #endif
0446
0447
0448
0449
0450 static __always_inline void __slab_lock(struct slab *slab)
0451 {
0452 struct page *page = slab_page(slab);
0453
0454 VM_BUG_ON_PAGE(PageTail(page), page);
0455 bit_spin_lock(PG_locked, &page->flags);
0456 }
0457
0458 static __always_inline void __slab_unlock(struct slab *slab)
0459 {
0460 struct page *page = slab_page(slab);
0461
0462 VM_BUG_ON_PAGE(PageTail(page), page);
0463 __bit_spin_unlock(PG_locked, &page->flags);
0464 }
0465
0466 static __always_inline void slab_lock(struct slab *slab, unsigned long *flags)
0467 {
0468 if (IS_ENABLED(CONFIG_PREEMPT_RT))
0469 local_irq_save(*flags);
0470 __slab_lock(slab);
0471 }
0472
0473 static __always_inline void slab_unlock(struct slab *slab, unsigned long *flags)
0474 {
0475 __slab_unlock(slab);
0476 if (IS_ENABLED(CONFIG_PREEMPT_RT))
0477 local_irq_restore(*flags);
0478 }
0479
0480
0481
0482
0483
0484
0485 static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab,
0486 void *freelist_old, unsigned long counters_old,
0487 void *freelist_new, unsigned long counters_new,
0488 const char *n)
0489 {
0490 if (!IS_ENABLED(CONFIG_PREEMPT_RT))
0491 lockdep_assert_irqs_disabled();
0492 #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
0493 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
0494 if (s->flags & __CMPXCHG_DOUBLE) {
0495 if (cmpxchg_double(&slab->freelist, &slab->counters,
0496 freelist_old, counters_old,
0497 freelist_new, counters_new))
0498 return true;
0499 } else
0500 #endif
0501 {
0502
0503 unsigned long flags = 0;
0504
0505 slab_lock(slab, &flags);
0506 if (slab->freelist == freelist_old &&
0507 slab->counters == counters_old) {
0508 slab->freelist = freelist_new;
0509 slab->counters = counters_new;
0510 slab_unlock(slab, &flags);
0511 return true;
0512 }
0513 slab_unlock(slab, &flags);
0514 }
0515
0516 cpu_relax();
0517 stat(s, CMPXCHG_DOUBLE_FAIL);
0518
0519 #ifdef SLUB_DEBUG_CMPXCHG
0520 pr_info("%s %s: cmpxchg double redo ", n, s->name);
0521 #endif
0522
0523 return false;
0524 }
0525
0526 static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab,
0527 void *freelist_old, unsigned long counters_old,
0528 void *freelist_new, unsigned long counters_new,
0529 const char *n)
0530 {
0531 #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
0532 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
0533 if (s->flags & __CMPXCHG_DOUBLE) {
0534 if (cmpxchg_double(&slab->freelist, &slab->counters,
0535 freelist_old, counters_old,
0536 freelist_new, counters_new))
0537 return true;
0538 } else
0539 #endif
0540 {
0541 unsigned long flags;
0542
0543 local_irq_save(flags);
0544 __slab_lock(slab);
0545 if (slab->freelist == freelist_old &&
0546 slab->counters == counters_old) {
0547 slab->freelist = freelist_new;
0548 slab->counters = counters_new;
0549 __slab_unlock(slab);
0550 local_irq_restore(flags);
0551 return true;
0552 }
0553 __slab_unlock(slab);
0554 local_irq_restore(flags);
0555 }
0556
0557 cpu_relax();
0558 stat(s, CMPXCHG_DOUBLE_FAIL);
0559
0560 #ifdef SLUB_DEBUG_CMPXCHG
0561 pr_info("%s %s: cmpxchg double redo ", n, s->name);
0562 #endif
0563
0564 return false;
0565 }
0566
0567 #ifdef CONFIG_SLUB_DEBUG
0568 static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
0569 static DEFINE_RAW_SPINLOCK(object_map_lock);
0570
0571 static void __fill_map(unsigned long *obj_map, struct kmem_cache *s,
0572 struct slab *slab)
0573 {
0574 void *addr = slab_address(slab);
0575 void *p;
0576
0577 bitmap_zero(obj_map, slab->objects);
0578
0579 for (p = slab->freelist; p; p = get_freepointer(s, p))
0580 set_bit(__obj_to_index(s, addr, p), obj_map);
0581 }
0582
0583 #if IS_ENABLED(CONFIG_KUNIT)
0584 static bool slab_add_kunit_errors(void)
0585 {
0586 struct kunit_resource *resource;
0587
0588 if (likely(!current->kunit_test))
0589 return false;
0590
0591 resource = kunit_find_named_resource(current->kunit_test, "slab_errors");
0592 if (!resource)
0593 return false;
0594
0595 (*(int *)resource->data)++;
0596 kunit_put_resource(resource);
0597 return true;
0598 }
0599 #else
0600 static inline bool slab_add_kunit_errors(void) { return false; }
0601 #endif
0602
0603
0604
0605
0606
0607
0608
0609 static unsigned long *get_map(struct kmem_cache *s, struct slab *slab)
0610 __acquires(&object_map_lock)
0611 {
0612 VM_BUG_ON(!irqs_disabled());
0613
0614 raw_spin_lock(&object_map_lock);
0615
0616 __fill_map(object_map, s, slab);
0617
0618 return object_map;
0619 }
0620
0621 static void put_map(unsigned long *map) __releases(&object_map_lock)
0622 {
0623 VM_BUG_ON(map != object_map);
0624 raw_spin_unlock(&object_map_lock);
0625 }
0626
0627 static inline unsigned int size_from_object(struct kmem_cache *s)
0628 {
0629 if (s->flags & SLAB_RED_ZONE)
0630 return s->size - s->red_left_pad;
0631
0632 return s->size;
0633 }
0634
0635 static inline void *restore_red_left(struct kmem_cache *s, void *p)
0636 {
0637 if (s->flags & SLAB_RED_ZONE)
0638 p -= s->red_left_pad;
0639
0640 return p;
0641 }
0642
0643
0644
0645
0646 #if defined(CONFIG_SLUB_DEBUG_ON)
0647 static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
0648 #else
0649 static slab_flags_t slub_debug;
0650 #endif
0651
0652 static char *slub_debug_string;
0653 static int disable_higher_order_debug;
0654
0655
0656
0657
0658
0659
0660
0661 static inline void metadata_access_enable(void)
0662 {
0663 kasan_disable_current();
0664 }
0665
0666 static inline void metadata_access_disable(void)
0667 {
0668 kasan_enable_current();
0669 }
0670
0671
0672
0673
0674
0675
0676 static inline int check_valid_pointer(struct kmem_cache *s,
0677 struct slab *slab, void *object)
0678 {
0679 void *base;
0680
0681 if (!object)
0682 return 1;
0683
0684 base = slab_address(slab);
0685 object = kasan_reset_tag(object);
0686 object = restore_red_left(s, object);
0687 if (object < base || object >= base + slab->objects * s->size ||
0688 (object - base) % s->size) {
0689 return 0;
0690 }
0691
0692 return 1;
0693 }
0694
0695 static void print_section(char *level, char *text, u8 *addr,
0696 unsigned int length)
0697 {
0698 metadata_access_enable();
0699 print_hex_dump(level, text, DUMP_PREFIX_ADDRESS,
0700 16, 1, kasan_reset_tag((void *)addr), length, 1);
0701 metadata_access_disable();
0702 }
0703
0704
0705
0706
0707 static inline bool freeptr_outside_object(struct kmem_cache *s)
0708 {
0709 return s->offset >= s->inuse;
0710 }
0711
0712
0713
0714
0715
0716 static inline unsigned int get_info_end(struct kmem_cache *s)
0717 {
0718 if (freeptr_outside_object(s))
0719 return s->inuse + sizeof(void *);
0720 else
0721 return s->inuse;
0722 }
0723
0724 static struct track *get_track(struct kmem_cache *s, void *object,
0725 enum track_item alloc)
0726 {
0727 struct track *p;
0728
0729 p = object + get_info_end(s);
0730
0731 return kasan_reset_tag(p + alloc);
0732 }
0733
0734 #ifdef CONFIG_STACKDEPOT
0735 static noinline depot_stack_handle_t set_track_prepare(void)
0736 {
0737 depot_stack_handle_t handle;
0738 unsigned long entries[TRACK_ADDRS_COUNT];
0739 unsigned int nr_entries;
0740
0741 nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3);
0742 handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
0743
0744 return handle;
0745 }
0746 #else
0747 static inline depot_stack_handle_t set_track_prepare(void)
0748 {
0749 return 0;
0750 }
0751 #endif
0752
0753 static void set_track_update(struct kmem_cache *s, void *object,
0754 enum track_item alloc, unsigned long addr,
0755 depot_stack_handle_t handle)
0756 {
0757 struct track *p = get_track(s, object, alloc);
0758
0759 #ifdef CONFIG_STACKDEPOT
0760 p->handle = handle;
0761 #endif
0762 p->addr = addr;
0763 p->cpu = smp_processor_id();
0764 p->pid = current->pid;
0765 p->when = jiffies;
0766 }
0767
0768 static __always_inline void set_track(struct kmem_cache *s, void *object,
0769 enum track_item alloc, unsigned long addr)
0770 {
0771 depot_stack_handle_t handle = set_track_prepare();
0772
0773 set_track_update(s, object, alloc, addr, handle);
0774 }
0775
0776 static void init_tracking(struct kmem_cache *s, void *object)
0777 {
0778 struct track *p;
0779
0780 if (!(s->flags & SLAB_STORE_USER))
0781 return;
0782
0783 p = get_track(s, object, TRACK_ALLOC);
0784 memset(p, 0, 2*sizeof(struct track));
0785 }
0786
0787 static void print_track(const char *s, struct track *t, unsigned long pr_time)
0788 {
0789 depot_stack_handle_t handle __maybe_unused;
0790
0791 if (!t->addr)
0792 return;
0793
0794 pr_err("%s in %pS age=%lu cpu=%u pid=%d\n",
0795 s, (void *)t->addr, pr_time - t->when, t->cpu, t->pid);
0796 #ifdef CONFIG_STACKDEPOT
0797 handle = READ_ONCE(t->handle);
0798 if (handle)
0799 stack_depot_print(handle);
0800 else
0801 pr_err("object allocation/free stack trace missing\n");
0802 #endif
0803 }
0804
0805 void print_tracking(struct kmem_cache *s, void *object)
0806 {
0807 unsigned long pr_time = jiffies;
0808 if (!(s->flags & SLAB_STORE_USER))
0809 return;
0810
0811 print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time);
0812 print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
0813 }
0814
0815 static void print_slab_info(const struct slab *slab)
0816 {
0817 struct folio *folio = (struct folio *)slab_folio(slab);
0818
0819 pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n",
0820 slab, slab->objects, slab->inuse, slab->freelist,
0821 folio_flags(folio, 0));
0822 }
0823
0824 static void slab_bug(struct kmem_cache *s, char *fmt, ...)
0825 {
0826 struct va_format vaf;
0827 va_list args;
0828
0829 va_start(args, fmt);
0830 vaf.fmt = fmt;
0831 vaf.va = &args;
0832 pr_err("=============================================================================\n");
0833 pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf);
0834 pr_err("-----------------------------------------------------------------------------\n\n");
0835 va_end(args);
0836 }
0837
0838 __printf(2, 3)
0839 static void slab_fix(struct kmem_cache *s, char *fmt, ...)
0840 {
0841 struct va_format vaf;
0842 va_list args;
0843
0844 if (slab_add_kunit_errors())
0845 return;
0846
0847 va_start(args, fmt);
0848 vaf.fmt = fmt;
0849 vaf.va = &args;
0850 pr_err("FIX %s: %pV\n", s->name, &vaf);
0851 va_end(args);
0852 }
0853
0854 static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p)
0855 {
0856 unsigned int off;
0857 u8 *addr = slab_address(slab);
0858
0859 print_tracking(s, p);
0860
0861 print_slab_info(slab);
0862
0863 pr_err("Object 0x%p @offset=%tu fp=0x%p\n\n",
0864 p, p - addr, get_freepointer(s, p));
0865
0866 if (s->flags & SLAB_RED_ZONE)
0867 print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
0868 s->red_left_pad);
0869 else if (p > addr + 16)
0870 print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
0871
0872 print_section(KERN_ERR, "Object ", p,
0873 min_t(unsigned int, s->object_size, PAGE_SIZE));
0874 if (s->flags & SLAB_RED_ZONE)
0875 print_section(KERN_ERR, "Redzone ", p + s->object_size,
0876 s->inuse - s->object_size);
0877
0878 off = get_info_end(s);
0879
0880 if (s->flags & SLAB_STORE_USER)
0881 off += 2 * sizeof(struct track);
0882
0883 off += kasan_metadata_size(s);
0884
0885 if (off != size_from_object(s))
0886
0887 print_section(KERN_ERR, "Padding ", p + off,
0888 size_from_object(s) - off);
0889
0890 dump_stack();
0891 }
0892
0893 static void object_err(struct kmem_cache *s, struct slab *slab,
0894 u8 *object, char *reason)
0895 {
0896 if (slab_add_kunit_errors())
0897 return;
0898
0899 slab_bug(s, "%s", reason);
0900 print_trailer(s, slab, object);
0901 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
0902 }
0903
0904 static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
0905 void **freelist, void *nextfree)
0906 {
0907 if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
0908 !check_valid_pointer(s, slab, nextfree) && freelist) {
0909 object_err(s, slab, *freelist, "Freechain corrupt");
0910 *freelist = NULL;
0911 slab_fix(s, "Isolate corrupted freechain");
0912 return true;
0913 }
0914
0915 return false;
0916 }
0917
0918 static __printf(3, 4) void slab_err(struct kmem_cache *s, struct slab *slab,
0919 const char *fmt, ...)
0920 {
0921 va_list args;
0922 char buf[100];
0923
0924 if (slab_add_kunit_errors())
0925 return;
0926
0927 va_start(args, fmt);
0928 vsnprintf(buf, sizeof(buf), fmt, args);
0929 va_end(args);
0930 slab_bug(s, "%s", buf);
0931 print_slab_info(slab);
0932 dump_stack();
0933 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
0934 }
0935
0936 static void init_object(struct kmem_cache *s, void *object, u8 val)
0937 {
0938 u8 *p = kasan_reset_tag(object);
0939
0940 if (s->flags & SLAB_RED_ZONE)
0941 memset(p - s->red_left_pad, val, s->red_left_pad);
0942
0943 if (s->flags & __OBJECT_POISON) {
0944 memset(p, POISON_FREE, s->object_size - 1);
0945 p[s->object_size - 1] = POISON_END;
0946 }
0947
0948 if (s->flags & SLAB_RED_ZONE)
0949 memset(p + s->object_size, val, s->inuse - s->object_size);
0950 }
0951
0952 static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
0953 void *from, void *to)
0954 {
0955 slab_fix(s, "Restoring %s 0x%p-0x%p=0x%x", message, from, to - 1, data);
0956 memset(from, data, to - from);
0957 }
0958
0959 static int check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
0960 u8 *object, char *what,
0961 u8 *start, unsigned int value, unsigned int bytes)
0962 {
0963 u8 *fault;
0964 u8 *end;
0965 u8 *addr = slab_address(slab);
0966
0967 metadata_access_enable();
0968 fault = memchr_inv(kasan_reset_tag(start), value, bytes);
0969 metadata_access_disable();
0970 if (!fault)
0971 return 1;
0972
0973 end = start + bytes;
0974 while (end > fault && end[-1] == value)
0975 end--;
0976
0977 if (slab_add_kunit_errors())
0978 goto skip_bug_print;
0979
0980 slab_bug(s, "%s overwritten", what);
0981 pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
0982 fault, end - 1, fault - addr,
0983 fault[0], value);
0984 print_trailer(s, slab, object);
0985 add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
0986
0987 skip_bug_print:
0988 restore_bytes(s, what, value, fault, end);
0989 return 0;
0990 }
0991
0992
0993
0994
0995
0996
0997
0998
0999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030 static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
1031 {
1032 unsigned long off = get_info_end(s);
1033
1034 if (s->flags & SLAB_STORE_USER)
1035
1036 off += 2 * sizeof(struct track);
1037
1038 off += kasan_metadata_size(s);
1039
1040 if (size_from_object(s) == off)
1041 return 1;
1042
1043 return check_bytes_and_report(s, slab, p, "Object padding",
1044 p + off, POISON_INUSE, size_from_object(s) - off);
1045 }
1046
1047
1048 static void slab_pad_check(struct kmem_cache *s, struct slab *slab)
1049 {
1050 u8 *start;
1051 u8 *fault;
1052 u8 *end;
1053 u8 *pad;
1054 int length;
1055 int remainder;
1056
1057 if (!(s->flags & SLAB_POISON))
1058 return;
1059
1060 start = slab_address(slab);
1061 length = slab_size(slab);
1062 end = start + length;
1063 remainder = length % s->size;
1064 if (!remainder)
1065 return;
1066
1067 pad = end - remainder;
1068 metadata_access_enable();
1069 fault = memchr_inv(kasan_reset_tag(pad), POISON_INUSE, remainder);
1070 metadata_access_disable();
1071 if (!fault)
1072 return;
1073 while (end > fault && end[-1] == POISON_INUSE)
1074 end--;
1075
1076 slab_err(s, slab, "Padding overwritten. 0x%p-0x%p @offset=%tu",
1077 fault, end - 1, fault - start);
1078 print_section(KERN_ERR, "Padding ", pad, remainder);
1079
1080 restore_bytes(s, "slab padding", POISON_INUSE, fault, end);
1081 }
1082
1083 static int check_object(struct kmem_cache *s, struct slab *slab,
1084 void *object, u8 val)
1085 {
1086 u8 *p = object;
1087 u8 *endobject = object + s->object_size;
1088
1089 if (s->flags & SLAB_RED_ZONE) {
1090 if (!check_bytes_and_report(s, slab, object, "Left Redzone",
1091 object - s->red_left_pad, val, s->red_left_pad))
1092 return 0;
1093
1094 if (!check_bytes_and_report(s, slab, object, "Right Redzone",
1095 endobject, val, s->inuse - s->object_size))
1096 return 0;
1097 } else {
1098 if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
1099 check_bytes_and_report(s, slab, p, "Alignment padding",
1100 endobject, POISON_INUSE,
1101 s->inuse - s->object_size);
1102 }
1103 }
1104
1105 if (s->flags & SLAB_POISON) {
1106 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
1107 (!check_bytes_and_report(s, slab, p, "Poison", p,
1108 POISON_FREE, s->object_size - 1) ||
1109 !check_bytes_and_report(s, slab, p, "End Poison",
1110 p + s->object_size - 1, POISON_END, 1)))
1111 return 0;
1112
1113
1114
1115 check_pad_bytes(s, slab, p);
1116 }
1117
1118 if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
1119
1120
1121
1122
1123 return 1;
1124
1125
1126 if (!check_valid_pointer(s, slab, get_freepointer(s, p))) {
1127 object_err(s, slab, p, "Freepointer corrupt");
1128
1129
1130
1131
1132
1133 set_freepointer(s, p, NULL);
1134 return 0;
1135 }
1136 return 1;
1137 }
1138
1139 static int check_slab(struct kmem_cache *s, struct slab *slab)
1140 {
1141 int maxobj;
1142
1143 if (!folio_test_slab(slab_folio(slab))) {
1144 slab_err(s, slab, "Not a valid slab page");
1145 return 0;
1146 }
1147
1148 maxobj = order_objects(slab_order(slab), s->size);
1149 if (slab->objects > maxobj) {
1150 slab_err(s, slab, "objects %u > max %u",
1151 slab->objects, maxobj);
1152 return 0;
1153 }
1154 if (slab->inuse > slab->objects) {
1155 slab_err(s, slab, "inuse %u > max %u",
1156 slab->inuse, slab->objects);
1157 return 0;
1158 }
1159
1160 slab_pad_check(s, slab);
1161 return 1;
1162 }
1163
1164
1165
1166
1167
1168 static int on_freelist(struct kmem_cache *s, struct slab *slab, void *search)
1169 {
1170 int nr = 0;
1171 void *fp;
1172 void *object = NULL;
1173 int max_objects;
1174
1175 fp = slab->freelist;
1176 while (fp && nr <= slab->objects) {
1177 if (fp == search)
1178 return 1;
1179 if (!check_valid_pointer(s, slab, fp)) {
1180 if (object) {
1181 object_err(s, slab, object,
1182 "Freechain corrupt");
1183 set_freepointer(s, object, NULL);
1184 } else {
1185 slab_err(s, slab, "Freepointer corrupt");
1186 slab->freelist = NULL;
1187 slab->inuse = slab->objects;
1188 slab_fix(s, "Freelist cleared");
1189 return 0;
1190 }
1191 break;
1192 }
1193 object = fp;
1194 fp = get_freepointer(s, object);
1195 nr++;
1196 }
1197
1198 max_objects = order_objects(slab_order(slab), s->size);
1199 if (max_objects > MAX_OBJS_PER_PAGE)
1200 max_objects = MAX_OBJS_PER_PAGE;
1201
1202 if (slab->objects != max_objects) {
1203 slab_err(s, slab, "Wrong number of objects. Found %d but should be %d",
1204 slab->objects, max_objects);
1205 slab->objects = max_objects;
1206 slab_fix(s, "Number of objects adjusted");
1207 }
1208 if (slab->inuse != slab->objects - nr) {
1209 slab_err(s, slab, "Wrong object count. Counter is %d but counted were %d",
1210 slab->inuse, slab->objects - nr);
1211 slab->inuse = slab->objects - nr;
1212 slab_fix(s, "Object count adjusted");
1213 }
1214 return search == NULL;
1215 }
1216
1217 static void trace(struct kmem_cache *s, struct slab *slab, void *object,
1218 int alloc)
1219 {
1220 if (s->flags & SLAB_TRACE) {
1221 pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
1222 s->name,
1223 alloc ? "alloc" : "free",
1224 object, slab->inuse,
1225 slab->freelist);
1226
1227 if (!alloc)
1228 print_section(KERN_INFO, "Object ", (void *)object,
1229 s->object_size);
1230
1231 dump_stack();
1232 }
1233 }
1234
1235
1236
1237
1238 static void add_full(struct kmem_cache *s,
1239 struct kmem_cache_node *n, struct slab *slab)
1240 {
1241 if (!(s->flags & SLAB_STORE_USER))
1242 return;
1243
1244 lockdep_assert_held(&n->list_lock);
1245 list_add(&slab->slab_list, &n->full);
1246 }
1247
1248 static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct slab *slab)
1249 {
1250 if (!(s->flags & SLAB_STORE_USER))
1251 return;
1252
1253 lockdep_assert_held(&n->list_lock);
1254 list_del(&slab->slab_list);
1255 }
1256
1257
1258 static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1259 {
1260 struct kmem_cache_node *n = get_node(s, node);
1261
1262 return atomic_long_read(&n->nr_slabs);
1263 }
1264
1265 static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1266 {
1267 return atomic_long_read(&n->nr_slabs);
1268 }
1269
1270 static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1271 {
1272 struct kmem_cache_node *n = get_node(s, node);
1273
1274
1275
1276
1277
1278
1279
1280 if (likely(n)) {
1281 atomic_long_inc(&n->nr_slabs);
1282 atomic_long_add(objects, &n->total_objects);
1283 }
1284 }
1285 static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
1286 {
1287 struct kmem_cache_node *n = get_node(s, node);
1288
1289 atomic_long_dec(&n->nr_slabs);
1290 atomic_long_sub(objects, &n->total_objects);
1291 }
1292
1293
1294 static void setup_object_debug(struct kmem_cache *s, void *object)
1295 {
1296 if (!kmem_cache_debug_flags(s, SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))
1297 return;
1298
1299 init_object(s, object, SLUB_RED_INACTIVE);
1300 init_tracking(s, object);
1301 }
1302
1303 static
1304 void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr)
1305 {
1306 if (!kmem_cache_debug_flags(s, SLAB_POISON))
1307 return;
1308
1309 metadata_access_enable();
1310 memset(kasan_reset_tag(addr), POISON_INUSE, slab_size(slab));
1311 metadata_access_disable();
1312 }
1313
1314 static inline int alloc_consistency_checks(struct kmem_cache *s,
1315 struct slab *slab, void *object)
1316 {
1317 if (!check_slab(s, slab))
1318 return 0;
1319
1320 if (!check_valid_pointer(s, slab, object)) {
1321 object_err(s, slab, object, "Freelist Pointer check fails");
1322 return 0;
1323 }
1324
1325 if (!check_object(s, slab, object, SLUB_RED_INACTIVE))
1326 return 0;
1327
1328 return 1;
1329 }
1330
1331 static noinline int alloc_debug_processing(struct kmem_cache *s,
1332 struct slab *slab,
1333 void *object, unsigned long addr)
1334 {
1335 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1336 if (!alloc_consistency_checks(s, slab, object))
1337 goto bad;
1338 }
1339
1340
1341 if (s->flags & SLAB_STORE_USER)
1342 set_track(s, object, TRACK_ALLOC, addr);
1343 trace(s, slab, object, 1);
1344 init_object(s, object, SLUB_RED_ACTIVE);
1345 return 1;
1346
1347 bad:
1348 if (folio_test_slab(slab_folio(slab))) {
1349
1350
1351
1352
1353
1354 slab_fix(s, "Marking all objects used");
1355 slab->inuse = slab->objects;
1356 slab->freelist = NULL;
1357 }
1358 return 0;
1359 }
1360
1361 static inline int free_consistency_checks(struct kmem_cache *s,
1362 struct slab *slab, void *object, unsigned long addr)
1363 {
1364 if (!check_valid_pointer(s, slab, object)) {
1365 slab_err(s, slab, "Invalid object pointer 0x%p", object);
1366 return 0;
1367 }
1368
1369 if (on_freelist(s, slab, object)) {
1370 object_err(s, slab, object, "Object already free");
1371 return 0;
1372 }
1373
1374 if (!check_object(s, slab, object, SLUB_RED_ACTIVE))
1375 return 0;
1376
1377 if (unlikely(s != slab->slab_cache)) {
1378 if (!folio_test_slab(slab_folio(slab))) {
1379 slab_err(s, slab, "Attempt to free object(0x%p) outside of slab",
1380 object);
1381 } else if (!slab->slab_cache) {
1382 pr_err("SLUB <none>: no slab for object 0x%p.\n",
1383 object);
1384 dump_stack();
1385 } else
1386 object_err(s, slab, object,
1387 "page slab pointer corrupt.");
1388 return 0;
1389 }
1390 return 1;
1391 }
1392
1393
1394 static noinline int free_debug_processing(
1395 struct kmem_cache *s, struct slab *slab,
1396 void *head, void *tail, int bulk_cnt,
1397 unsigned long addr)
1398 {
1399 struct kmem_cache_node *n = get_node(s, slab_nid(slab));
1400 void *object = head;
1401 int cnt = 0;
1402 unsigned long flags, flags2;
1403 int ret = 0;
1404 depot_stack_handle_t handle = 0;
1405
1406 if (s->flags & SLAB_STORE_USER)
1407 handle = set_track_prepare();
1408
1409 spin_lock_irqsave(&n->list_lock, flags);
1410 slab_lock(slab, &flags2);
1411
1412 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1413 if (!check_slab(s, slab))
1414 goto out;
1415 }
1416
1417 next_object:
1418 cnt++;
1419
1420 if (s->flags & SLAB_CONSISTENCY_CHECKS) {
1421 if (!free_consistency_checks(s, slab, object, addr))
1422 goto out;
1423 }
1424
1425 if (s->flags & SLAB_STORE_USER)
1426 set_track_update(s, object, TRACK_FREE, addr, handle);
1427 trace(s, slab, object, 0);
1428
1429 init_object(s, object, SLUB_RED_INACTIVE);
1430
1431
1432 if (object != tail) {
1433 object = get_freepointer(s, object);
1434 goto next_object;
1435 }
1436 ret = 1;
1437
1438 out:
1439 if (cnt != bulk_cnt)
1440 slab_err(s, slab, "Bulk freelist count(%d) invalid(%d)\n",
1441 bulk_cnt, cnt);
1442
1443 slab_unlock(slab, &flags2);
1444 spin_unlock_irqrestore(&n->list_lock, flags);
1445 if (!ret)
1446 slab_fix(s, "Object at 0x%p not freed", object);
1447 return ret;
1448 }
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460 static char *
1461 parse_slub_debug_flags(char *str, slab_flags_t *flags, char **slabs, bool init)
1462 {
1463 bool higher_order_disable = false;
1464
1465
1466 while (*str && *str == ';')
1467 str++;
1468
1469 if (*str == ',') {
1470
1471
1472
1473
1474 *flags = DEBUG_DEFAULT_FLAGS;
1475 goto check_slabs;
1476 }
1477 *flags = 0;
1478
1479
1480 for (; *str && *str != ',' && *str != ';'; str++) {
1481 switch (tolower(*str)) {
1482 case '-':
1483 *flags = 0;
1484 break;
1485 case 'f':
1486 *flags |= SLAB_CONSISTENCY_CHECKS;
1487 break;
1488 case 'z':
1489 *flags |= SLAB_RED_ZONE;
1490 break;
1491 case 'p':
1492 *flags |= SLAB_POISON;
1493 break;
1494 case 'u':
1495 *flags |= SLAB_STORE_USER;
1496 break;
1497 case 't':
1498 *flags |= SLAB_TRACE;
1499 break;
1500 case 'a':
1501 *flags |= SLAB_FAILSLAB;
1502 break;
1503 case 'o':
1504
1505
1506
1507
1508 higher_order_disable = true;
1509 break;
1510 default:
1511 if (init)
1512 pr_err("slub_debug option '%c' unknown. skipped\n", *str);
1513 }
1514 }
1515 check_slabs:
1516 if (*str == ',')
1517 *slabs = ++str;
1518 else
1519 *slabs = NULL;
1520
1521
1522 while (*str && *str != ';')
1523 str++;
1524
1525
1526 while (*str && *str == ';')
1527 str++;
1528
1529 if (init && higher_order_disable)
1530 disable_higher_order_debug = 1;
1531
1532 if (*str)
1533 return str;
1534 else
1535 return NULL;
1536 }
1537
1538 static int __init setup_slub_debug(char *str)
1539 {
1540 slab_flags_t flags;
1541 slab_flags_t global_flags;
1542 char *saved_str;
1543 char *slab_list;
1544 bool global_slub_debug_changed = false;
1545 bool slab_list_specified = false;
1546
1547 global_flags = DEBUG_DEFAULT_FLAGS;
1548 if (*str++ != '=' || !*str)
1549
1550
1551
1552 goto out;
1553
1554 saved_str = str;
1555 while (str) {
1556 str = parse_slub_debug_flags(str, &flags, &slab_list, true);
1557
1558 if (!slab_list) {
1559 global_flags = flags;
1560 global_slub_debug_changed = true;
1561 } else {
1562 slab_list_specified = true;
1563 if (flags & SLAB_STORE_USER)
1564 stack_depot_want_early_init();
1565 }
1566 }
1567
1568
1569
1570
1571
1572
1573
1574
1575 if (slab_list_specified) {
1576 if (!global_slub_debug_changed)
1577 global_flags = slub_debug;
1578 slub_debug_string = saved_str;
1579 }
1580 out:
1581 slub_debug = global_flags;
1582 if (slub_debug & SLAB_STORE_USER)
1583 stack_depot_want_early_init();
1584 if (slub_debug != 0 || slub_debug_string)
1585 static_branch_enable(&slub_debug_enabled);
1586 else
1587 static_branch_disable(&slub_debug_enabled);
1588 if ((static_branch_unlikely(&init_on_alloc) ||
1589 static_branch_unlikely(&init_on_free)) &&
1590 (slub_debug & SLAB_POISON))
1591 pr_info("mem auto-init: SLAB_POISON will take precedence over init_on_alloc/init_on_free\n");
1592 return 1;
1593 }
1594
1595 __setup("slub_debug", setup_slub_debug);
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608 slab_flags_t kmem_cache_flags(unsigned int object_size,
1609 slab_flags_t flags, const char *name)
1610 {
1611 char *iter;
1612 size_t len;
1613 char *next_block;
1614 slab_flags_t block_flags;
1615 slab_flags_t slub_debug_local = slub_debug;
1616
1617 if (flags & SLAB_NO_USER_FLAGS)
1618 return flags;
1619
1620
1621
1622
1623
1624
1625 if (flags & SLAB_NOLEAKTRACE)
1626 slub_debug_local &= ~SLAB_STORE_USER;
1627
1628 len = strlen(name);
1629 next_block = slub_debug_string;
1630
1631 while (next_block) {
1632 next_block = parse_slub_debug_flags(next_block, &block_flags, &iter, false);
1633 if (!iter)
1634 continue;
1635
1636 while (*iter) {
1637 char *end, *glob;
1638 size_t cmplen;
1639
1640 end = strchrnul(iter, ',');
1641 if (next_block && next_block < end)
1642 end = next_block - 1;
1643
1644 glob = strnchr(iter, end - iter, '*');
1645 if (glob)
1646 cmplen = glob - iter;
1647 else
1648 cmplen = max_t(size_t, len, (end - iter));
1649
1650 if (!strncmp(name, iter, cmplen)) {
1651 flags |= block_flags;
1652 return flags;
1653 }
1654
1655 if (!*end || *end == ';')
1656 break;
1657 iter = end + 1;
1658 }
1659 }
1660
1661 return flags | slub_debug_local;
1662 }
1663 #else
1664 static inline void setup_object_debug(struct kmem_cache *s, void *object) {}
1665 static inline
1666 void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) {}
1667
1668 static inline int alloc_debug_processing(struct kmem_cache *s,
1669 struct slab *slab, void *object, unsigned long addr) { return 0; }
1670
1671 static inline int free_debug_processing(
1672 struct kmem_cache *s, struct slab *slab,
1673 void *head, void *tail, int bulk_cnt,
1674 unsigned long addr) { return 0; }
1675
1676 static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {}
1677 static inline int check_object(struct kmem_cache *s, struct slab *slab,
1678 void *object, u8 val) { return 1; }
1679 static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1680 struct slab *slab) {}
1681 static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
1682 struct slab *slab) {}
1683 slab_flags_t kmem_cache_flags(unsigned int object_size,
1684 slab_flags_t flags, const char *name)
1685 {
1686 return flags;
1687 }
1688 #define slub_debug 0
1689
1690 #define disable_higher_order_debug 0
1691
1692 static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1693 { return 0; }
1694 static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1695 { return 0; }
1696 static inline void inc_slabs_node(struct kmem_cache *s, int node,
1697 int objects) {}
1698 static inline void dec_slabs_node(struct kmem_cache *s, int node,
1699 int objects) {}
1700
1701 static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
1702 void **freelist, void *nextfree)
1703 {
1704 return false;
1705 }
1706 #endif
1707
1708
1709
1710
1711
1712 static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
1713 {
1714 ptr = kasan_kmalloc_large(ptr, size, flags);
1715
1716 kmemleak_alloc(ptr, size, 1, flags);
1717 return ptr;
1718 }
1719
1720 static __always_inline void kfree_hook(void *x)
1721 {
1722 kmemleak_free(x);
1723 kasan_kfree_large(x);
1724 }
1725
1726 static __always_inline bool slab_free_hook(struct kmem_cache *s,
1727 void *x, bool init)
1728 {
1729 kmemleak_free_recursive(x, s->flags);
1730
1731 debug_check_no_locks_freed(x, s->object_size);
1732
1733 if (!(s->flags & SLAB_DEBUG_OBJECTS))
1734 debug_check_no_obj_freed(x, s->object_size);
1735
1736
1737 if (!(s->flags & SLAB_TYPESAFE_BY_RCU))
1738 __kcsan_check_access(x, s->object_size,
1739 KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749 if (init) {
1750 int rsize;
1751
1752 if (!kasan_has_integrated_init())
1753 memset(kasan_reset_tag(x), 0, s->object_size);
1754 rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad : 0;
1755 memset((char *)kasan_reset_tag(x) + s->inuse, 0,
1756 s->size - s->inuse - rsize);
1757 }
1758
1759 return kasan_slab_free(s, x, init);
1760 }
1761
1762 static inline bool slab_free_freelist_hook(struct kmem_cache *s,
1763 void **head, void **tail,
1764 int *cnt)
1765 {
1766
1767 void *object;
1768 void *next = *head;
1769 void *old_tail = *tail ? *tail : *head;
1770
1771 if (is_kfence_address(next)) {
1772 slab_free_hook(s, next, false);
1773 return true;
1774 }
1775
1776
1777 *head = NULL;
1778 *tail = NULL;
1779
1780 do {
1781 object = next;
1782 next = get_freepointer(s, object);
1783
1784
1785 if (!slab_free_hook(s, object, slab_want_init_on_free(s))) {
1786
1787 set_freepointer(s, object, *head);
1788 *head = object;
1789 if (!*tail)
1790 *tail = object;
1791 } else {
1792
1793
1794
1795
1796 --(*cnt);
1797 }
1798 } while (object != old_tail);
1799
1800 if (*head == *tail)
1801 *tail = NULL;
1802
1803 return *head != NULL;
1804 }
1805
1806 static void *setup_object(struct kmem_cache *s, void *object)
1807 {
1808 setup_object_debug(s, object);
1809 object = kasan_init_slab_obj(s, object);
1810 if (unlikely(s->ctor)) {
1811 kasan_unpoison_object_data(s, object);
1812 s->ctor(object);
1813 kasan_poison_object_data(s, object);
1814 }
1815 return object;
1816 }
1817
1818
1819
1820
1821 static inline struct slab *alloc_slab_page(gfp_t flags, int node,
1822 struct kmem_cache_order_objects oo)
1823 {
1824 struct folio *folio;
1825 struct slab *slab;
1826 unsigned int order = oo_order(oo);
1827
1828 if (node == NUMA_NO_NODE)
1829 folio = (struct folio *)alloc_pages(flags, order);
1830 else
1831 folio = (struct folio *)__alloc_pages_node(node, flags, order);
1832
1833 if (!folio)
1834 return NULL;
1835
1836 slab = folio_slab(folio);
1837 __folio_set_slab(folio);
1838 if (page_is_pfmemalloc(folio_page(folio, 0)))
1839 slab_set_pfmemalloc(slab);
1840
1841 return slab;
1842 }
1843
1844 #ifdef CONFIG_SLAB_FREELIST_RANDOM
1845
1846 static int init_cache_random_seq(struct kmem_cache *s)
1847 {
1848 unsigned int count = oo_objects(s->oo);
1849 int err;
1850
1851
1852 if (s->random_seq)
1853 return 0;
1854
1855 err = cache_random_seq_create(s, count, GFP_KERNEL);
1856 if (err) {
1857 pr_err("SLUB: Unable to initialize free list for %s\n",
1858 s->name);
1859 return err;
1860 }
1861
1862
1863 if (s->random_seq) {
1864 unsigned int i;
1865
1866 for (i = 0; i < count; i++)
1867 s->random_seq[i] *= s->size;
1868 }
1869 return 0;
1870 }
1871
1872
1873 static void __init init_freelist_randomization(void)
1874 {
1875 struct kmem_cache *s;
1876
1877 mutex_lock(&slab_mutex);
1878
1879 list_for_each_entry(s, &slab_caches, list)
1880 init_cache_random_seq(s);
1881
1882 mutex_unlock(&slab_mutex);
1883 }
1884
1885
1886 static void *next_freelist_entry(struct kmem_cache *s, struct slab *slab,
1887 unsigned long *pos, void *start,
1888 unsigned long page_limit,
1889 unsigned long freelist_count)
1890 {
1891 unsigned int idx;
1892
1893
1894
1895
1896
1897 do {
1898 idx = s->random_seq[*pos];
1899 *pos += 1;
1900 if (*pos >= freelist_count)
1901 *pos = 0;
1902 } while (unlikely(idx >= page_limit));
1903
1904 return (char *)start + idx;
1905 }
1906
1907
1908 static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
1909 {
1910 void *start;
1911 void *cur;
1912 void *next;
1913 unsigned long idx, pos, page_limit, freelist_count;
1914
1915 if (slab->objects < 2 || !s->random_seq)
1916 return false;
1917
1918 freelist_count = oo_objects(s->oo);
1919 pos = get_random_int() % freelist_count;
1920
1921 page_limit = slab->objects * s->size;
1922 start = fixup_red_left(s, slab_address(slab));
1923
1924
1925 cur = next_freelist_entry(s, slab, &pos, start, page_limit,
1926 freelist_count);
1927 cur = setup_object(s, cur);
1928 slab->freelist = cur;
1929
1930 for (idx = 1; idx < slab->objects; idx++) {
1931 next = next_freelist_entry(s, slab, &pos, start, page_limit,
1932 freelist_count);
1933 next = setup_object(s, next);
1934 set_freepointer(s, cur, next);
1935 cur = next;
1936 }
1937 set_freepointer(s, cur, NULL);
1938
1939 return true;
1940 }
1941 #else
1942 static inline int init_cache_random_seq(struct kmem_cache *s)
1943 {
1944 return 0;
1945 }
1946 static inline void init_freelist_randomization(void) { }
1947 static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
1948 {
1949 return false;
1950 }
1951 #endif
1952
1953 static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1954 {
1955 struct slab *slab;
1956 struct kmem_cache_order_objects oo = s->oo;
1957 gfp_t alloc_gfp;
1958 void *start, *p, *next;
1959 int idx;
1960 bool shuffle;
1961
1962 flags &= gfp_allowed_mask;
1963
1964 flags |= s->allocflags;
1965
1966
1967
1968
1969
1970 alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
1971 if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
1972 alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_RECLAIM;
1973
1974 slab = alloc_slab_page(alloc_gfp, node, oo);
1975 if (unlikely(!slab)) {
1976 oo = s->min;
1977 alloc_gfp = flags;
1978
1979
1980
1981
1982 slab = alloc_slab_page(alloc_gfp, node, oo);
1983 if (unlikely(!slab))
1984 goto out;
1985 stat(s, ORDER_FALLBACK);
1986 }
1987
1988 slab->objects = oo_objects(oo);
1989
1990 account_slab(slab, oo_order(oo), s, flags);
1991
1992 slab->slab_cache = s;
1993
1994 kasan_poison_slab(slab);
1995
1996 start = slab_address(slab);
1997
1998 setup_slab_debug(s, slab, start);
1999
2000 shuffle = shuffle_freelist(s, slab);
2001
2002 if (!shuffle) {
2003 start = fixup_red_left(s, start);
2004 start = setup_object(s, start);
2005 slab->freelist = start;
2006 for (idx = 0, p = start; idx < slab->objects - 1; idx++) {
2007 next = p + s->size;
2008 next = setup_object(s, next);
2009 set_freepointer(s, p, next);
2010 p = next;
2011 }
2012 set_freepointer(s, p, NULL);
2013 }
2014
2015 slab->inuse = slab->objects;
2016 slab->frozen = 1;
2017
2018 out:
2019 if (!slab)
2020 return NULL;
2021
2022 inc_slabs_node(s, slab_nid(slab), slab->objects);
2023
2024 return slab;
2025 }
2026
2027 static struct slab *new_slab(struct kmem_cache *s, gfp_t flags, int node)
2028 {
2029 if (unlikely(flags & GFP_SLAB_BUG_MASK))
2030 flags = kmalloc_fix_flags(flags);
2031
2032 WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
2033
2034 return allocate_slab(s,
2035 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
2036 }
2037
2038 static void __free_slab(struct kmem_cache *s, struct slab *slab)
2039 {
2040 struct folio *folio = slab_folio(slab);
2041 int order = folio_order(folio);
2042 int pages = 1 << order;
2043
2044 if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
2045 void *p;
2046
2047 slab_pad_check(s, slab);
2048 for_each_object(p, s, slab_address(slab), slab->objects)
2049 check_object(s, slab, p, SLUB_RED_INACTIVE);
2050 }
2051
2052 __slab_clear_pfmemalloc(slab);
2053 __folio_clear_slab(folio);
2054 folio->mapping = NULL;
2055 if (current->reclaim_state)
2056 current->reclaim_state->reclaimed_slab += pages;
2057 unaccount_slab(slab, order, s);
2058 __free_pages(folio_page(folio, 0), order);
2059 }
2060
2061 static void rcu_free_slab(struct rcu_head *h)
2062 {
2063 struct slab *slab = container_of(h, struct slab, rcu_head);
2064
2065 __free_slab(slab->slab_cache, slab);
2066 }
2067
2068 static void free_slab(struct kmem_cache *s, struct slab *slab)
2069 {
2070 if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
2071 call_rcu(&slab->rcu_head, rcu_free_slab);
2072 } else
2073 __free_slab(s, slab);
2074 }
2075
2076 static void discard_slab(struct kmem_cache *s, struct slab *slab)
2077 {
2078 dec_slabs_node(s, slab_nid(slab), slab->objects);
2079 free_slab(s, slab);
2080 }
2081
2082
2083
2084
2085 static inline void
2086 __add_partial(struct kmem_cache_node *n, struct slab *slab, int tail)
2087 {
2088 n->nr_partial++;
2089 if (tail == DEACTIVATE_TO_TAIL)
2090 list_add_tail(&slab->slab_list, &n->partial);
2091 else
2092 list_add(&slab->slab_list, &n->partial);
2093 }
2094
2095 static inline void add_partial(struct kmem_cache_node *n,
2096 struct slab *slab, int tail)
2097 {
2098 lockdep_assert_held(&n->list_lock);
2099 __add_partial(n, slab, tail);
2100 }
2101
2102 static inline void remove_partial(struct kmem_cache_node *n,
2103 struct slab *slab)
2104 {
2105 lockdep_assert_held(&n->list_lock);
2106 list_del(&slab->slab_list);
2107 n->nr_partial--;
2108 }
2109
2110
2111
2112
2113
2114
2115
2116 static inline void *acquire_slab(struct kmem_cache *s,
2117 struct kmem_cache_node *n, struct slab *slab,
2118 int mode)
2119 {
2120 void *freelist;
2121 unsigned long counters;
2122 struct slab new;
2123
2124 lockdep_assert_held(&n->list_lock);
2125
2126
2127
2128
2129
2130
2131 freelist = slab->freelist;
2132 counters = slab->counters;
2133 new.counters = counters;
2134 if (mode) {
2135 new.inuse = slab->objects;
2136 new.freelist = NULL;
2137 } else {
2138 new.freelist = freelist;
2139 }
2140
2141 VM_BUG_ON(new.frozen);
2142 new.frozen = 1;
2143
2144 if (!__cmpxchg_double_slab(s, slab,
2145 freelist, counters,
2146 new.freelist, new.counters,
2147 "acquire_slab"))
2148 return NULL;
2149
2150 remove_partial(n, slab);
2151 WARN_ON(!freelist);
2152 return freelist;
2153 }
2154
2155 #ifdef CONFIG_SLUB_CPU_PARTIAL
2156 static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain);
2157 #else
2158 static inline void put_cpu_partial(struct kmem_cache *s, struct slab *slab,
2159 int drain) { }
2160 #endif
2161 static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags);
2162
2163
2164
2165
2166 static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
2167 struct slab **ret_slab, gfp_t gfpflags)
2168 {
2169 struct slab *slab, *slab2;
2170 void *object = NULL;
2171 unsigned long flags;
2172 unsigned int partial_slabs = 0;
2173
2174
2175
2176
2177
2178
2179
2180 if (!n || !n->nr_partial)
2181 return NULL;
2182
2183 spin_lock_irqsave(&n->list_lock, flags);
2184 list_for_each_entry_safe(slab, slab2, &n->partial, slab_list) {
2185 void *t;
2186
2187 if (!pfmemalloc_match(slab, gfpflags))
2188 continue;
2189
2190 t = acquire_slab(s, n, slab, object == NULL);
2191 if (!t)
2192 break;
2193
2194 if (!object) {
2195 *ret_slab = slab;
2196 stat(s, ALLOC_FROM_PARTIAL);
2197 object = t;
2198 } else {
2199 put_cpu_partial(s, slab, 0);
2200 stat(s, CPU_PARTIAL_NODE);
2201 partial_slabs++;
2202 }
2203 #ifdef CONFIG_SLUB_CPU_PARTIAL
2204 if (!kmem_cache_has_cpu_partial(s)
2205 || partial_slabs > s->cpu_partial_slabs / 2)
2206 break;
2207 #else
2208 break;
2209 #endif
2210
2211 }
2212 spin_unlock_irqrestore(&n->list_lock, flags);
2213 return object;
2214 }
2215
2216
2217
2218
2219 static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
2220 struct slab **ret_slab)
2221 {
2222 #ifdef CONFIG_NUMA
2223 struct zonelist *zonelist;
2224 struct zoneref *z;
2225 struct zone *zone;
2226 enum zone_type highest_zoneidx = gfp_zone(flags);
2227 void *object;
2228 unsigned int cpuset_mems_cookie;
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248 if (!s->remote_node_defrag_ratio ||
2249 get_cycles() % 1024 > s->remote_node_defrag_ratio)
2250 return NULL;
2251
2252 do {
2253 cpuset_mems_cookie = read_mems_allowed_begin();
2254 zonelist = node_zonelist(mempolicy_slab_node(), flags);
2255 for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
2256 struct kmem_cache_node *n;
2257
2258 n = get_node(s, zone_to_nid(zone));
2259
2260 if (n && cpuset_zone_allowed(zone, flags) &&
2261 n->nr_partial > s->min_partial) {
2262 object = get_partial_node(s, n, ret_slab, flags);
2263 if (object) {
2264
2265
2266
2267
2268
2269
2270
2271 return object;
2272 }
2273 }
2274 }
2275 } while (read_mems_allowed_retry(cpuset_mems_cookie));
2276 #endif
2277 return NULL;
2278 }
2279
2280
2281
2282
2283 static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
2284 struct slab **ret_slab)
2285 {
2286 void *object;
2287 int searchnode = node;
2288
2289 if (node == NUMA_NO_NODE)
2290 searchnode = numa_mem_id();
2291
2292 object = get_partial_node(s, get_node(s, searchnode), ret_slab, flags);
2293 if (object || node != NUMA_NO_NODE)
2294 return object;
2295
2296 return get_any_partial(s, flags, ret_slab);
2297 }
2298
2299 #ifdef CONFIG_PREEMPTION
2300
2301
2302
2303
2304
2305 #define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
2306 #else
2307
2308
2309
2310
2311 #define TID_STEP 1
2312 #endif
2313
2314 static inline unsigned long next_tid(unsigned long tid)
2315 {
2316 return tid + TID_STEP;
2317 }
2318
2319 #ifdef SLUB_DEBUG_CMPXCHG
2320 static inline unsigned int tid_to_cpu(unsigned long tid)
2321 {
2322 return tid % TID_STEP;
2323 }
2324
2325 static inline unsigned long tid_to_event(unsigned long tid)
2326 {
2327 return tid / TID_STEP;
2328 }
2329 #endif
2330
2331 static inline unsigned int init_tid(int cpu)
2332 {
2333 return cpu;
2334 }
2335
2336 static inline void note_cmpxchg_failure(const char *n,
2337 const struct kmem_cache *s, unsigned long tid)
2338 {
2339 #ifdef SLUB_DEBUG_CMPXCHG
2340 unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
2341
2342 pr_info("%s %s: cmpxchg redo ", n, s->name);
2343
2344 #ifdef CONFIG_PREEMPTION
2345 if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
2346 pr_warn("due to cpu change %d -> %d\n",
2347 tid_to_cpu(tid), tid_to_cpu(actual_tid));
2348 else
2349 #endif
2350 if (tid_to_event(tid) != tid_to_event(actual_tid))
2351 pr_warn("due to cpu running other code. Event %ld->%ld\n",
2352 tid_to_event(tid), tid_to_event(actual_tid));
2353 else
2354 pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n",
2355 actual_tid, tid, next_tid(tid));
2356 #endif
2357 stat(s, CMPXCHG_DOUBLE_CPU_FAIL);
2358 }
2359
2360 static void init_kmem_cache_cpus(struct kmem_cache *s)
2361 {
2362 int cpu;
2363 struct kmem_cache_cpu *c;
2364
2365 for_each_possible_cpu(cpu) {
2366 c = per_cpu_ptr(s->cpu_slab, cpu);
2367 local_lock_init(&c->lock);
2368 c->tid = init_tid(cpu);
2369 }
2370 }
2371
2372
2373
2374
2375
2376
2377
2378 static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
2379 void *freelist)
2380 {
2381 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE, M_FULL_NOLIST };
2382 struct kmem_cache_node *n = get_node(s, slab_nid(slab));
2383 int free_delta = 0;
2384 enum slab_modes mode = M_NONE;
2385 void *nextfree, *freelist_iter, *freelist_tail;
2386 int tail = DEACTIVATE_TO_HEAD;
2387 unsigned long flags = 0;
2388 struct slab new;
2389 struct slab old;
2390
2391 if (slab->freelist) {
2392 stat(s, DEACTIVATE_REMOTE_FREES);
2393 tail = DEACTIVATE_TO_TAIL;
2394 }
2395
2396
2397
2398
2399
2400 freelist_tail = NULL;
2401 freelist_iter = freelist;
2402 while (freelist_iter) {
2403 nextfree = get_freepointer(s, freelist_iter);
2404
2405
2406
2407
2408
2409
2410 if (freelist_corrupted(s, slab, &freelist_iter, nextfree))
2411 break;
2412
2413 freelist_tail = freelist_iter;
2414 free_delta++;
2415
2416 freelist_iter = nextfree;
2417 }
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431 redo:
2432
2433 old.freelist = READ_ONCE(slab->freelist);
2434 old.counters = READ_ONCE(slab->counters);
2435 VM_BUG_ON(!old.frozen);
2436
2437
2438 new.counters = old.counters;
2439 if (freelist_tail) {
2440 new.inuse -= free_delta;
2441 set_freepointer(s, freelist_tail, old.freelist);
2442 new.freelist = freelist;
2443 } else
2444 new.freelist = old.freelist;
2445
2446 new.frozen = 0;
2447
2448 if (!new.inuse && n->nr_partial >= s->min_partial) {
2449 mode = M_FREE;
2450 } else if (new.freelist) {
2451 mode = M_PARTIAL;
2452
2453
2454
2455
2456 spin_lock_irqsave(&n->list_lock, flags);
2457 } else if (kmem_cache_debug_flags(s, SLAB_STORE_USER)) {
2458 mode = M_FULL;
2459
2460
2461
2462
2463
2464 spin_lock_irqsave(&n->list_lock, flags);
2465 } else {
2466 mode = M_FULL_NOLIST;
2467 }
2468
2469
2470 if (!cmpxchg_double_slab(s, slab,
2471 old.freelist, old.counters,
2472 new.freelist, new.counters,
2473 "unfreezing slab")) {
2474 if (mode == M_PARTIAL || mode == M_FULL)
2475 spin_unlock_irqrestore(&n->list_lock, flags);
2476 goto redo;
2477 }
2478
2479
2480 if (mode == M_PARTIAL) {
2481 add_partial(n, slab, tail);
2482 spin_unlock_irqrestore(&n->list_lock, flags);
2483 stat(s, tail);
2484 } else if (mode == M_FREE) {
2485 stat(s, DEACTIVATE_EMPTY);
2486 discard_slab(s, slab);
2487 stat(s, FREE_SLAB);
2488 } else if (mode == M_FULL) {
2489 add_full(s, n, slab);
2490 spin_unlock_irqrestore(&n->list_lock, flags);
2491 stat(s, DEACTIVATE_FULL);
2492 } else if (mode == M_FULL_NOLIST) {
2493 stat(s, DEACTIVATE_FULL);
2494 }
2495 }
2496
2497 #ifdef CONFIG_SLUB_CPU_PARTIAL
2498 static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab)
2499 {
2500 struct kmem_cache_node *n = NULL, *n2 = NULL;
2501 struct slab *slab, *slab_to_discard = NULL;
2502 unsigned long flags = 0;
2503
2504 while (partial_slab) {
2505 struct slab new;
2506 struct slab old;
2507
2508 slab = partial_slab;
2509 partial_slab = slab->next;
2510
2511 n2 = get_node(s, slab_nid(slab));
2512 if (n != n2) {
2513 if (n)
2514 spin_unlock_irqrestore(&n->list_lock, flags);
2515
2516 n = n2;
2517 spin_lock_irqsave(&n->list_lock, flags);
2518 }
2519
2520 do {
2521
2522 old.freelist = slab->freelist;
2523 old.counters = slab->counters;
2524 VM_BUG_ON(!old.frozen);
2525
2526 new.counters = old.counters;
2527 new.freelist = old.freelist;
2528
2529 new.frozen = 0;
2530
2531 } while (!__cmpxchg_double_slab(s, slab,
2532 old.freelist, old.counters,
2533 new.freelist, new.counters,
2534 "unfreezing slab"));
2535
2536 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
2537 slab->next = slab_to_discard;
2538 slab_to_discard = slab;
2539 } else {
2540 add_partial(n, slab, DEACTIVATE_TO_TAIL);
2541 stat(s, FREE_ADD_PARTIAL);
2542 }
2543 }
2544
2545 if (n)
2546 spin_unlock_irqrestore(&n->list_lock, flags);
2547
2548 while (slab_to_discard) {
2549 slab = slab_to_discard;
2550 slab_to_discard = slab_to_discard->next;
2551
2552 stat(s, DEACTIVATE_EMPTY);
2553 discard_slab(s, slab);
2554 stat(s, FREE_SLAB);
2555 }
2556 }
2557
2558
2559
2560
2561 static void unfreeze_partials(struct kmem_cache *s)
2562 {
2563 struct slab *partial_slab;
2564 unsigned long flags;
2565
2566 local_lock_irqsave(&s->cpu_slab->lock, flags);
2567 partial_slab = this_cpu_read(s->cpu_slab->partial);
2568 this_cpu_write(s->cpu_slab->partial, NULL);
2569 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
2570
2571 if (partial_slab)
2572 __unfreeze_partials(s, partial_slab);
2573 }
2574
2575 static void unfreeze_partials_cpu(struct kmem_cache *s,
2576 struct kmem_cache_cpu *c)
2577 {
2578 struct slab *partial_slab;
2579
2580 partial_slab = slub_percpu_partial(c);
2581 c->partial = NULL;
2582
2583 if (partial_slab)
2584 __unfreeze_partials(s, partial_slab);
2585 }
2586
2587
2588
2589
2590
2591
2592
2593
2594 static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain)
2595 {
2596 struct slab *oldslab;
2597 struct slab *slab_to_unfreeze = NULL;
2598 unsigned long flags;
2599 int slabs = 0;
2600
2601 local_lock_irqsave(&s->cpu_slab->lock, flags);
2602
2603 oldslab = this_cpu_read(s->cpu_slab->partial);
2604
2605 if (oldslab) {
2606 if (drain && oldslab->slabs >= s->cpu_partial_slabs) {
2607
2608
2609
2610
2611
2612 slab_to_unfreeze = oldslab;
2613 oldslab = NULL;
2614 } else {
2615 slabs = oldslab->slabs;
2616 }
2617 }
2618
2619 slabs++;
2620
2621 slab->slabs = slabs;
2622 slab->next = oldslab;
2623
2624 this_cpu_write(s->cpu_slab->partial, slab);
2625
2626 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
2627
2628 if (slab_to_unfreeze) {
2629 __unfreeze_partials(s, slab_to_unfreeze);
2630 stat(s, CPU_PARTIAL_DRAIN);
2631 }
2632 }
2633
2634 #else
2635
2636 static inline void unfreeze_partials(struct kmem_cache *s) { }
2637 static inline void unfreeze_partials_cpu(struct kmem_cache *s,
2638 struct kmem_cache_cpu *c) { }
2639
2640 #endif
2641
2642 static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
2643 {
2644 unsigned long flags;
2645 struct slab *slab;
2646 void *freelist;
2647
2648 local_lock_irqsave(&s->cpu_slab->lock, flags);
2649
2650 slab = c->slab;
2651 freelist = c->freelist;
2652
2653 c->slab = NULL;
2654 c->freelist = NULL;
2655 c->tid = next_tid(c->tid);
2656
2657 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
2658
2659 if (slab) {
2660 deactivate_slab(s, slab, freelist);
2661 stat(s, CPUSLAB_FLUSH);
2662 }
2663 }
2664
2665 static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
2666 {
2667 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2668 void *freelist = c->freelist;
2669 struct slab *slab = c->slab;
2670
2671 c->slab = NULL;
2672 c->freelist = NULL;
2673 c->tid = next_tid(c->tid);
2674
2675 if (slab) {
2676 deactivate_slab(s, slab, freelist);
2677 stat(s, CPUSLAB_FLUSH);
2678 }
2679
2680 unfreeze_partials_cpu(s, c);
2681 }
2682
2683 struct slub_flush_work {
2684 struct work_struct work;
2685 struct kmem_cache *s;
2686 bool skip;
2687 };
2688
2689
2690
2691
2692
2693
2694 static void flush_cpu_slab(struct work_struct *w)
2695 {
2696 struct kmem_cache *s;
2697 struct kmem_cache_cpu *c;
2698 struct slub_flush_work *sfw;
2699
2700 sfw = container_of(w, struct slub_flush_work, work);
2701
2702 s = sfw->s;
2703 c = this_cpu_ptr(s->cpu_slab);
2704
2705 if (c->slab)
2706 flush_slab(s, c);
2707
2708 unfreeze_partials(s);
2709 }
2710
2711 static bool has_cpu_slab(int cpu, struct kmem_cache *s)
2712 {
2713 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
2714
2715 return c->slab || slub_percpu_partial(c);
2716 }
2717
2718 static DEFINE_MUTEX(flush_lock);
2719 static DEFINE_PER_CPU(struct slub_flush_work, slub_flush);
2720
2721 static void flush_all_cpus_locked(struct kmem_cache *s)
2722 {
2723 struct slub_flush_work *sfw;
2724 unsigned int cpu;
2725
2726 lockdep_assert_cpus_held();
2727 mutex_lock(&flush_lock);
2728
2729 for_each_online_cpu(cpu) {
2730 sfw = &per_cpu(slub_flush, cpu);
2731 if (!has_cpu_slab(cpu, s)) {
2732 sfw->skip = true;
2733 continue;
2734 }
2735 INIT_WORK(&sfw->work, flush_cpu_slab);
2736 sfw->skip = false;
2737 sfw->s = s;
2738 queue_work_on(cpu, flushwq, &sfw->work);
2739 }
2740
2741 for_each_online_cpu(cpu) {
2742 sfw = &per_cpu(slub_flush, cpu);
2743 if (sfw->skip)
2744 continue;
2745 flush_work(&sfw->work);
2746 }
2747
2748 mutex_unlock(&flush_lock);
2749 }
2750
2751 static void flush_all(struct kmem_cache *s)
2752 {
2753 cpus_read_lock();
2754 flush_all_cpus_locked(s);
2755 cpus_read_unlock();
2756 }
2757
2758
2759
2760
2761
2762 static int slub_cpu_dead(unsigned int cpu)
2763 {
2764 struct kmem_cache *s;
2765
2766 mutex_lock(&slab_mutex);
2767 list_for_each_entry(s, &slab_caches, list)
2768 __flush_cpu_slab(s, cpu);
2769 mutex_unlock(&slab_mutex);
2770 return 0;
2771 }
2772
2773
2774
2775
2776
2777 static inline int node_match(struct slab *slab, int node)
2778 {
2779 #ifdef CONFIG_NUMA
2780 if (node != NUMA_NO_NODE && slab_nid(slab) != node)
2781 return 0;
2782 #endif
2783 return 1;
2784 }
2785
2786 #ifdef CONFIG_SLUB_DEBUG
2787 static int count_free(struct slab *slab)
2788 {
2789 return slab->objects - slab->inuse;
2790 }
2791
2792 static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
2793 {
2794 return atomic_long_read(&n->total_objects);
2795 }
2796 #endif
2797
2798 #if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
2799 static unsigned long count_partial(struct kmem_cache_node *n,
2800 int (*get_count)(struct slab *))
2801 {
2802 unsigned long flags;
2803 unsigned long x = 0;
2804 struct slab *slab;
2805
2806 spin_lock_irqsave(&n->list_lock, flags);
2807 list_for_each_entry(slab, &n->partial, slab_list)
2808 x += get_count(slab);
2809 spin_unlock_irqrestore(&n->list_lock, flags);
2810 return x;
2811 }
2812 #endif
2813
2814 static noinline void
2815 slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
2816 {
2817 #ifdef CONFIG_SLUB_DEBUG
2818 static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
2819 DEFAULT_RATELIMIT_BURST);
2820 int node;
2821 struct kmem_cache_node *n;
2822
2823 if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs))
2824 return;
2825
2826 pr_warn("SLUB: Unable to allocate memory on node %d, gfp=%#x(%pGg)\n",
2827 nid, gfpflags, &gfpflags);
2828 pr_warn(" cache: %s, object size: %u, buffer size: %u, default order: %u, min order: %u\n",
2829 s->name, s->object_size, s->size, oo_order(s->oo),
2830 oo_order(s->min));
2831
2832 if (oo_order(s->min) > get_order(s->object_size))
2833 pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
2834 s->name);
2835
2836 for_each_kmem_cache_node(s, node, n) {
2837 unsigned long nr_slabs;
2838 unsigned long nr_objs;
2839 unsigned long nr_free;
2840
2841 nr_free = count_partial(n, count_free);
2842 nr_slabs = node_nr_slabs(n);
2843 nr_objs = node_nr_objs(n);
2844
2845 pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n",
2846 node, nr_slabs, nr_objs, nr_free);
2847 }
2848 #endif
2849 }
2850
2851 static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags)
2852 {
2853 if (unlikely(slab_test_pfmemalloc(slab)))
2854 return gfp_pfmemalloc_allowed(gfpflags);
2855
2856 return true;
2857 }
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867 static inline void *get_freelist(struct kmem_cache *s, struct slab *slab)
2868 {
2869 struct slab new;
2870 unsigned long counters;
2871 void *freelist;
2872
2873 lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock));
2874
2875 do {
2876 freelist = slab->freelist;
2877 counters = slab->counters;
2878
2879 new.counters = counters;
2880 VM_BUG_ON(!new.frozen);
2881
2882 new.inuse = slab->objects;
2883 new.frozen = freelist != NULL;
2884
2885 } while (!__cmpxchg_double_slab(s, slab,
2886 freelist, counters,
2887 NULL, new.counters,
2888 "get_freelist"));
2889
2890 return freelist;
2891 }
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912 static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
2913 unsigned long addr, struct kmem_cache_cpu *c)
2914 {
2915 void *freelist;
2916 struct slab *slab;
2917 unsigned long flags;
2918
2919 stat(s, ALLOC_SLOWPATH);
2920
2921 reread_slab:
2922
2923 slab = READ_ONCE(c->slab);
2924 if (!slab) {
2925
2926
2927
2928
2929 if (unlikely(node != NUMA_NO_NODE &&
2930 !node_isset(node, slab_nodes)))
2931 node = NUMA_NO_NODE;
2932 goto new_slab;
2933 }
2934 redo:
2935
2936 if (unlikely(!node_match(slab, node))) {
2937
2938
2939
2940
2941 if (!node_isset(node, slab_nodes)) {
2942 node = NUMA_NO_NODE;
2943 } else {
2944 stat(s, ALLOC_NODE_MISMATCH);
2945 goto deactivate_slab;
2946 }
2947 }
2948
2949
2950
2951
2952
2953
2954 if (unlikely(!pfmemalloc_match(slab, gfpflags)))
2955 goto deactivate_slab;
2956
2957
2958 local_lock_irqsave(&s->cpu_slab->lock, flags);
2959 if (unlikely(slab != c->slab)) {
2960 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
2961 goto reread_slab;
2962 }
2963 freelist = c->freelist;
2964 if (freelist)
2965 goto load_freelist;
2966
2967 freelist = get_freelist(s, slab);
2968
2969 if (!freelist) {
2970 c->slab = NULL;
2971 c->tid = next_tid(c->tid);
2972 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
2973 stat(s, DEACTIVATE_BYPASS);
2974 goto new_slab;
2975 }
2976
2977 stat(s, ALLOC_REFILL);
2978
2979 load_freelist:
2980
2981 lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock));
2982
2983
2984
2985
2986
2987
2988 VM_BUG_ON(!c->slab->frozen);
2989 c->freelist = get_freepointer(s, freelist);
2990 c->tid = next_tid(c->tid);
2991 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
2992 return freelist;
2993
2994 deactivate_slab:
2995
2996 local_lock_irqsave(&s->cpu_slab->lock, flags);
2997 if (slab != c->slab) {
2998 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
2999 goto reread_slab;
3000 }
3001 freelist = c->freelist;
3002 c->slab = NULL;
3003 c->freelist = NULL;
3004 c->tid = next_tid(c->tid);
3005 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
3006 deactivate_slab(s, slab, freelist);
3007
3008 new_slab:
3009
3010 if (slub_percpu_partial(c)) {
3011 local_lock_irqsave(&s->cpu_slab->lock, flags);
3012 if (unlikely(c->slab)) {
3013 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
3014 goto reread_slab;
3015 }
3016 if (unlikely(!slub_percpu_partial(c))) {
3017 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
3018
3019 goto new_objects;
3020 }
3021
3022 slab = c->slab = slub_percpu_partial(c);
3023 slub_set_percpu_partial(c, slab);
3024 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
3025 stat(s, CPU_PARTIAL_ALLOC);
3026 goto redo;
3027 }
3028
3029 new_objects:
3030
3031 freelist = get_partial(s, gfpflags, node, &slab);
3032 if (freelist)
3033 goto check_new_slab;
3034
3035 slub_put_cpu_ptr(s->cpu_slab);
3036 slab = new_slab(s, gfpflags, node);
3037 c = slub_get_cpu_ptr(s->cpu_slab);
3038
3039 if (unlikely(!slab)) {
3040 slab_out_of_memory(s, gfpflags, node);
3041 return NULL;
3042 }
3043
3044
3045
3046
3047
3048 freelist = slab->freelist;
3049 slab->freelist = NULL;
3050
3051 stat(s, ALLOC_SLAB);
3052
3053 check_new_slab:
3054
3055 if (kmem_cache_debug(s)) {
3056 if (!alloc_debug_processing(s, slab, freelist, addr)) {
3057
3058 goto new_slab;
3059 } else {
3060
3061
3062
3063
3064 goto return_single;
3065 }
3066 }
3067
3068 if (unlikely(!pfmemalloc_match(slab, gfpflags)))
3069
3070
3071
3072
3073 goto return_single;
3074
3075 retry_load_slab:
3076
3077 local_lock_irqsave(&s->cpu_slab->lock, flags);
3078 if (unlikely(c->slab)) {
3079 void *flush_freelist = c->freelist;
3080 struct slab *flush_slab = c->slab;
3081
3082 c->slab = NULL;
3083 c->freelist = NULL;
3084 c->tid = next_tid(c->tid);
3085
3086 local_unlock_irqrestore(&s->cpu_slab->lock, flags);
3087
3088 deactivate_slab(s, flush_slab, flush_freelist);
3089
3090 stat(s, CPUSLAB_FLUSH);
3091
3092 goto retry_load_slab;
3093 }
3094 c->slab = slab;
3095
3096 goto load_freelist;
3097
3098 return_single:
3099
3100 deactivate_slab(s, slab, get_freepointer(s, freelist));
3101 return freelist;
3102 }
3103
3104
3105
3106
3107
3108
3109 static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
3110 unsigned long addr, struct kmem_cache_cpu *c)
3111 {
3112 void *p;
3113
3114 #ifdef CONFIG_PREEMPT_COUNT
3115
3116
3117
3118
3119
3120 c = slub_get_cpu_ptr(s->cpu_slab);
3121 #endif
3122
3123 p = ___slab_alloc(s, gfpflags, node, addr, c);
3124 #ifdef CONFIG_PREEMPT_COUNT
3125 slub_put_cpu_ptr(s->cpu_slab);
3126 #endif
3127 return p;
3128 }
3129
3130
3131
3132
3133
3134 static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
3135 void *obj)
3136 {
3137 if (unlikely(slab_want_init_on_free(s)) && obj)
3138 memset((void *)((char *)kasan_reset_tag(obj) + s->offset),
3139 0, sizeof(void *));
3140 }
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152 static __always_inline void *slab_alloc_node(struct kmem_cache *s, struct list_lru *lru,
3153 gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
3154 {
3155 void *object;
3156 struct kmem_cache_cpu *c;
3157 struct slab *slab;
3158 unsigned long tid;
3159 struct obj_cgroup *objcg = NULL;
3160 bool init = false;
3161
3162 s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags);
3163 if (!s)
3164 return NULL;
3165
3166 object = kfence_alloc(s, orig_size, gfpflags);
3167 if (unlikely(object))
3168 goto out;
3169
3170 redo:
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183 c = raw_cpu_ptr(s->cpu_slab);
3184 tid = READ_ONCE(c->tid);
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194 barrier();
3195
3196
3197
3198
3199
3200
3201
3202
3203 object = c->freelist;
3204 slab = c->slab;
3205
3206
3207
3208
3209
3210
3211
3212 if (IS_ENABLED(CONFIG_PREEMPT_RT) ||
3213 unlikely(!object || !slab || !node_match(slab, node))) {
3214 object = __slab_alloc(s, gfpflags, node, addr, c);
3215 } else {
3216 void *next_object = get_freepointer_safe(s, object);
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232 if (unlikely(!this_cpu_cmpxchg_double(
3233 s->cpu_slab->freelist, s->cpu_slab->tid,
3234 object, tid,
3235 next_object, next_tid(tid)))) {
3236
3237 note_cmpxchg_failure("slab_alloc", s, tid);
3238 goto redo;
3239 }
3240 prefetch_freepointer(s, next_object);
3241 stat(s, ALLOC_FASTPATH);
3242 }
3243
3244 maybe_wipe_obj_freeptr(s, object);
3245 init = slab_want_init_on_alloc(gfpflags, s);
3246
3247 out:
3248 slab_post_alloc_hook(s, objcg, gfpflags, 1, &object, init);
3249
3250 return object;
3251 }
3252
3253 static __always_inline void *slab_alloc(struct kmem_cache *s, struct list_lru *lru,
3254 gfp_t gfpflags, unsigned long addr, size_t orig_size)
3255 {
3256 return slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, addr, orig_size);
3257 }
3258
3259 static __always_inline
3260 void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
3261 gfp_t gfpflags)
3262 {
3263 void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);
3264
3265 trace_kmem_cache_alloc(_RET_IP_, ret, s, s->object_size,
3266 s->size, gfpflags);
3267
3268 return ret;
3269 }
3270
3271 void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
3272 {
3273 return __kmem_cache_alloc_lru(s, NULL, gfpflags);
3274 }
3275 EXPORT_SYMBOL(kmem_cache_alloc);
3276
3277 void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
3278 gfp_t gfpflags)
3279 {
3280 return __kmem_cache_alloc_lru(s, lru, gfpflags);
3281 }
3282 EXPORT_SYMBOL(kmem_cache_alloc_lru);
3283
3284 #ifdef CONFIG_TRACING
3285 void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
3286 {
3287 void *ret = slab_alloc(s, NULL, gfpflags, _RET_IP_, size);
3288 trace_kmalloc(_RET_IP_, ret, s, size, s->size, gfpflags);
3289 ret = kasan_kmalloc(s, ret, size, gfpflags);
3290 return ret;
3291 }
3292 EXPORT_SYMBOL(kmem_cache_alloc_trace);
3293 #endif
3294
3295 #ifdef CONFIG_NUMA
3296 void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
3297 {
3298 void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
3299
3300 trace_kmem_cache_alloc_node(_RET_IP_, ret, s,
3301 s->object_size, s->size, gfpflags, node);
3302
3303 return ret;
3304 }
3305 EXPORT_SYMBOL(kmem_cache_alloc_node);
3306
3307 #ifdef CONFIG_TRACING
3308 void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
3309 gfp_t gfpflags,
3310 int node, size_t size)
3311 {
3312 void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size);
3313
3314 trace_kmalloc_node(_RET_IP_, ret, s,
3315 size, s->size, gfpflags, node);
3316
3317 ret = kasan_kmalloc(s, ret, size, gfpflags);
3318 return ret;
3319 }
3320 EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
3321 #endif
3322 #endif
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332 static void __slab_free(struct kmem_cache *s, struct slab *slab,
3333 void *head, void *tail, int cnt,
3334 unsigned long addr)
3335
3336 {
3337 void *prior;
3338 int was_frozen;
3339 struct slab new;
3340 unsigned long counters;
3341 struct kmem_cache_node *n = NULL;
3342 unsigned long flags;
3343
3344 stat(s, FREE_SLOWPATH);
3345
3346 if (kfence_free(head))
3347 return;
3348
3349 if (kmem_cache_debug(s) &&
3350 !free_debug_processing(s, slab, head, tail, cnt, addr))
3351 return;
3352
3353 do {
3354 if (unlikely(n)) {
3355 spin_unlock_irqrestore(&n->list_lock, flags);
3356 n = NULL;
3357 }
3358 prior = slab->freelist;
3359 counters = slab->counters;
3360 set_freepointer(s, tail, prior);
3361 new.counters = counters;
3362 was_frozen = new.frozen;
3363 new.inuse -= cnt;
3364 if ((!new.inuse || !prior) && !was_frozen) {
3365
3366 if (kmem_cache_has_cpu_partial(s) && !prior) {
3367
3368
3369
3370
3371
3372
3373
3374 new.frozen = 1;
3375
3376 } else {
3377
3378 n = get_node(s, slab_nid(slab));
3379
3380
3381
3382
3383
3384
3385
3386
3387 spin_lock_irqsave(&n->list_lock, flags);
3388
3389 }
3390 }
3391
3392 } while (!cmpxchg_double_slab(s, slab,
3393 prior, counters,
3394 head, new.counters,
3395 "__slab_free"));
3396
3397 if (likely(!n)) {
3398
3399 if (likely(was_frozen)) {
3400
3401
3402
3403
3404 stat(s, FREE_FROZEN);
3405 } else if (new.frozen) {
3406
3407
3408
3409
3410 put_cpu_partial(s, slab, 1);
3411 stat(s, CPU_PARTIAL_FREE);
3412 }
3413
3414 return;
3415 }
3416
3417 if (unlikely(!new.inuse && n->nr_partial >= s->min_partial))
3418 goto slab_empty;
3419
3420
3421
3422
3423
3424 if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
3425 remove_full(s, n, slab);
3426 add_partial(n, slab, DEACTIVATE_TO_TAIL);
3427 stat(s, FREE_ADD_PARTIAL);
3428 }
3429 spin_unlock_irqrestore(&n->list_lock, flags);
3430 return;
3431
3432 slab_empty:
3433 if (prior) {
3434
3435
3436
3437 remove_partial(n, slab);
3438 stat(s, FREE_REMOVE_PARTIAL);
3439 } else {
3440
3441 remove_full(s, n, slab);
3442 }
3443
3444 spin_unlock_irqrestore(&n->list_lock, flags);
3445 stat(s, FREE_SLAB);
3446 discard_slab(s, slab);
3447 }
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464 static __always_inline void do_slab_free(struct kmem_cache *s,
3465 struct slab *slab, void *head, void *tail,
3466 int cnt, unsigned long addr)
3467 {
3468 void *tail_obj = tail ? : head;
3469 struct kmem_cache_cpu *c;
3470 unsigned long tid;
3471
3472 redo:
3473
3474
3475
3476
3477
3478
3479 c = raw_cpu_ptr(s->cpu_slab);
3480 tid = READ_ONCE(c->tid);
3481
3482
3483 barrier();
3484
3485 if (likely(slab == c->slab)) {
3486 #ifndef CONFIG_PREEMPT_RT
3487 void **freelist = READ_ONCE(c->freelist);
3488
3489 set_freepointer(s, tail_obj, freelist);
3490
3491 if (unlikely(!this_cpu_cmpxchg_double(
3492 s->cpu_slab->freelist, s->cpu_slab->tid,
3493 freelist, tid,
3494 head, next_tid(tid)))) {
3495
3496 note_cmpxchg_failure("slab_free", s, tid);
3497 goto redo;
3498 }
3499 #else
3500
3501
3502
3503
3504
3505
3506
3507 void **freelist;
3508
3509 local_lock(&s->cpu_slab->lock);
3510 c = this_cpu_ptr(s->cpu_slab);
3511 if (unlikely(slab != c->slab)) {
3512 local_unlock(&s->cpu_slab->lock);
3513 goto redo;
3514 }
3515 tid = c->tid;
3516 freelist = c->freelist;
3517
3518 set_freepointer(s, tail_obj, freelist);
3519 c->freelist = head;
3520 c->tid = next_tid(tid);
3521
3522 local_unlock(&s->cpu_slab->lock);
3523 #endif
3524 stat(s, FREE_FASTPATH);
3525 } else
3526 __slab_free(s, slab, head, tail_obj, cnt, addr);
3527
3528 }
3529
3530 static __always_inline void slab_free(struct kmem_cache *s, struct slab *slab,
3531 void *head, void *tail, void **p, int cnt,
3532 unsigned long addr)
3533 {
3534 memcg_slab_free_hook(s, slab, p, cnt);
3535
3536
3537
3538
3539 if (slab_free_freelist_hook(s, &head, &tail, &cnt))
3540 do_slab_free(s, slab, head, tail, cnt, addr);
3541 }
3542
3543 #ifdef CONFIG_KASAN_GENERIC
3544 void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
3545 {
3546 do_slab_free(cache, virt_to_slab(x), x, NULL, 1, addr);
3547 }
3548 #endif
3549
3550 void kmem_cache_free(struct kmem_cache *s, void *x)
3551 {
3552 s = cache_from_obj(s, x);
3553 if (!s)
3554 return;
3555 trace_kmem_cache_free(_RET_IP_, x, s->name);
3556 slab_free(s, virt_to_slab(x), x, NULL, &x, 1, _RET_IP_);
3557 }
3558 EXPORT_SYMBOL(kmem_cache_free);
3559
3560 struct detached_freelist {
3561 struct slab *slab;
3562 void *tail;
3563 void *freelist;
3564 int cnt;
3565 struct kmem_cache *s;
3566 };
3567
3568 static inline void free_large_kmalloc(struct folio *folio, void *object)
3569 {
3570 unsigned int order = folio_order(folio);
3571
3572 if (WARN_ON_ONCE(order == 0))
3573 pr_warn_once("object pointer: 0x%p\n", object);
3574
3575 kfree_hook(object);
3576 mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
3577 -(PAGE_SIZE << order));
3578 __free_pages(folio_page(folio, 0), order);
3579 }
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593 static inline
3594 int build_detached_freelist(struct kmem_cache *s, size_t size,
3595 void **p, struct detached_freelist *df)
3596 {
3597 int lookahead = 3;
3598 void *object;
3599 struct folio *folio;
3600 size_t same;
3601
3602 object = p[--size];
3603 folio = virt_to_folio(object);
3604 if (!s) {
3605
3606 if (unlikely(!folio_test_slab(folio))) {
3607 free_large_kmalloc(folio, object);
3608 df->slab = NULL;
3609 return size;
3610 }
3611
3612 df->slab = folio_slab(folio);
3613 df->s = df->slab->slab_cache;
3614 } else {
3615 df->slab = folio_slab(folio);
3616 df->s = cache_from_obj(s, object);
3617 }
3618
3619
3620 df->tail = object;
3621 df->freelist = object;
3622 df->cnt = 1;
3623
3624 if (is_kfence_address(object))
3625 return size;
3626
3627 set_freepointer(df->s, object, NULL);
3628
3629 same = size;
3630 while (size) {
3631 object = p[--size];
3632
3633 if (df->slab == virt_to_slab(object)) {
3634
3635 set_freepointer(df->s, object, df->freelist);
3636 df->freelist = object;
3637 df->cnt++;
3638 same--;
3639 if (size != same)
3640 swap(p[size], p[same]);
3641 continue;
3642 }
3643
3644
3645 if (!--lookahead)
3646 break;
3647 }
3648
3649 return same;
3650 }
3651
3652
3653 void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
3654 {
3655 if (!size)
3656 return;
3657
3658 do {
3659 struct detached_freelist df;
3660
3661 size = build_detached_freelist(s, size, p, &df);
3662 if (!df.slab)
3663 continue;
3664
3665 slab_free(df.s, df.slab, df.freelist, df.tail, &p[size], df.cnt,
3666 _RET_IP_);
3667 } while (likely(size));
3668 }
3669 EXPORT_SYMBOL(kmem_cache_free_bulk);
3670
3671
3672 int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
3673 void **p)
3674 {
3675 struct kmem_cache_cpu *c;
3676 int i;
3677 struct obj_cgroup *objcg = NULL;
3678
3679
3680 s = slab_pre_alloc_hook(s, NULL, &objcg, size, flags);
3681 if (unlikely(!s))
3682 return false;
3683
3684
3685
3686
3687
3688 c = slub_get_cpu_ptr(s->cpu_slab);
3689 local_lock_irq(&s->cpu_slab->lock);
3690
3691 for (i = 0; i < size; i++) {
3692 void *object = kfence_alloc(s, s->object_size, flags);
3693
3694 if (unlikely(object)) {
3695 p[i] = object;
3696 continue;
3697 }
3698
3699 object = c->freelist;
3700 if (unlikely(!object)) {
3701
3702
3703
3704
3705
3706
3707
3708 c->tid = next_tid(c->tid);
3709
3710 local_unlock_irq(&s->cpu_slab->lock);
3711
3712
3713
3714
3715
3716 p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
3717 _RET_IP_, c);
3718 if (unlikely(!p[i]))
3719 goto error;
3720
3721 c = this_cpu_ptr(s->cpu_slab);
3722 maybe_wipe_obj_freeptr(s, p[i]);
3723
3724 local_lock_irq(&s->cpu_slab->lock);
3725
3726 continue;
3727 }
3728 c->freelist = get_freepointer(s, object);
3729 p[i] = object;
3730 maybe_wipe_obj_freeptr(s, p[i]);
3731 }
3732 c->tid = next_tid(c->tid);
3733 local_unlock_irq(&s->cpu_slab->lock);
3734 slub_put_cpu_ptr(s->cpu_slab);
3735
3736
3737
3738
3739
3740 slab_post_alloc_hook(s, objcg, flags, size, p,
3741 slab_want_init_on_alloc(flags, s));
3742 return i;
3743 error:
3744 slub_put_cpu_ptr(s->cpu_slab);
3745 slab_post_alloc_hook(s, objcg, flags, i, p, false);
3746 kmem_cache_free_bulk(s, i, p);
3747 return 0;
3748 }
3749 EXPORT_SYMBOL(kmem_cache_alloc_bulk);
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771 static unsigned int slub_min_order;
3772 static unsigned int slub_max_order = PAGE_ALLOC_COSTLY_ORDER;
3773 static unsigned int slub_min_objects;
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800 static inline unsigned int calc_slab_order(unsigned int size,
3801 unsigned int min_objects, unsigned int max_order,
3802 unsigned int fract_leftover)
3803 {
3804 unsigned int min_order = slub_min_order;
3805 unsigned int order;
3806
3807 if (order_objects(min_order, size) > MAX_OBJS_PER_PAGE)
3808 return get_order(size * MAX_OBJS_PER_PAGE) - 1;
3809
3810 for (order = max(min_order, (unsigned int)get_order(min_objects * size));
3811 order <= max_order; order++) {
3812
3813 unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
3814 unsigned int rem;
3815
3816 rem = slab_size % size;
3817
3818 if (rem <= slab_size / fract_leftover)
3819 break;
3820 }
3821
3822 return order;
3823 }
3824
3825 static inline int calculate_order(unsigned int size)
3826 {
3827 unsigned int order;
3828 unsigned int min_objects;
3829 unsigned int max_objects;
3830 unsigned int nr_cpus;
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840 min_objects = slub_min_objects;
3841 if (!min_objects) {
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851 nr_cpus = num_present_cpus();
3852 if (nr_cpus <= 1)
3853 nr_cpus = nr_cpu_ids;
3854 min_objects = 4 * (fls(nr_cpus) + 1);
3855 }
3856 max_objects = order_objects(slub_max_order, size);
3857 min_objects = min(min_objects, max_objects);
3858
3859 while (min_objects > 1) {
3860 unsigned int fraction;
3861
3862 fraction = 16;
3863 while (fraction >= 4) {
3864 order = calc_slab_order(size, min_objects,
3865 slub_max_order, fraction);
3866 if (order <= slub_max_order)
3867 return order;
3868 fraction /= 2;
3869 }
3870 min_objects--;
3871 }
3872
3873
3874
3875
3876
3877 order = calc_slab_order(size, 1, slub_max_order, 1);
3878 if (order <= slub_max_order)
3879 return order;
3880
3881
3882
3883
3884 order = calc_slab_order(size, 1, MAX_ORDER, 1);
3885 if (order < MAX_ORDER)
3886 return order;
3887 return -ENOSYS;
3888 }
3889
3890 static void
3891 init_kmem_cache_node(struct kmem_cache_node *n)
3892 {
3893 n->nr_partial = 0;
3894 spin_lock_init(&n->list_lock);
3895 INIT_LIST_HEAD(&n->partial);
3896 #ifdef CONFIG_SLUB_DEBUG
3897 atomic_long_set(&n->nr_slabs, 0);
3898 atomic_long_set(&n->total_objects, 0);
3899 INIT_LIST_HEAD(&n->full);
3900 #endif
3901 }
3902
3903 static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
3904 {
3905 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
3906 KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
3907
3908
3909
3910
3911
3912 s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu),
3913 2 * sizeof(void *));
3914
3915 if (!s->cpu_slab)
3916 return 0;
3917
3918 init_kmem_cache_cpus(s);
3919
3920 return 1;
3921 }
3922
3923 static struct kmem_cache *kmem_cache_node;
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934 static void early_kmem_cache_node_alloc(int node)
3935 {
3936 struct slab *slab;
3937 struct kmem_cache_node *n;
3938
3939 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
3940
3941 slab = new_slab(kmem_cache_node, GFP_NOWAIT, node);
3942
3943 BUG_ON(!slab);
3944 if (slab_nid(slab) != node) {
3945 pr_err("SLUB: Unable to allocate memory from node %d\n", node);
3946 pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
3947 }
3948
3949 n = slab->freelist;
3950 BUG_ON(!n);
3951 #ifdef CONFIG_SLUB_DEBUG
3952 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
3953 init_tracking(kmem_cache_node, n);
3954 #endif
3955 n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false);
3956 slab->freelist = get_freepointer(kmem_cache_node, n);
3957 slab->inuse = 1;
3958 slab->frozen = 0;
3959 kmem_cache_node->node[node] = n;
3960 init_kmem_cache_node(n);
3961 inc_slabs_node(kmem_cache_node, node, slab->objects);
3962
3963
3964
3965
3966
3967 __add_partial(n, slab, DEACTIVATE_TO_HEAD);
3968 }
3969
3970 static void free_kmem_cache_nodes(struct kmem_cache *s)
3971 {
3972 int node;
3973 struct kmem_cache_node *n;
3974
3975 for_each_kmem_cache_node(s, node, n) {
3976 s->node[node] = NULL;
3977 kmem_cache_free(kmem_cache_node, n);
3978 }
3979 }
3980
3981 void __kmem_cache_release(struct kmem_cache *s)
3982 {
3983 cache_random_seq_destroy(s);
3984 free_percpu(s->cpu_slab);
3985 free_kmem_cache_nodes(s);
3986 }
3987
3988 static int init_kmem_cache_nodes(struct kmem_cache *s)
3989 {
3990 int node;
3991
3992 for_each_node_mask(node, slab_nodes) {
3993 struct kmem_cache_node *n;
3994
3995 if (slab_state == DOWN) {
3996 early_kmem_cache_node_alloc(node);
3997 continue;
3998 }
3999 n = kmem_cache_alloc_node(kmem_cache_node,
4000 GFP_KERNEL, node);
4001
4002 if (!n) {
4003 free_kmem_cache_nodes(s);
4004 return 0;
4005 }
4006
4007 init_kmem_cache_node(n);
4008 s->node[node] = n;
4009 }
4010 return 1;
4011 }
4012
4013 static void set_cpu_partial(struct kmem_cache *s)
4014 {
4015 #ifdef CONFIG_SLUB_CPU_PARTIAL
4016 unsigned int nr_objects;
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031 if (!kmem_cache_has_cpu_partial(s))
4032 nr_objects = 0;
4033 else if (s->size >= PAGE_SIZE)
4034 nr_objects = 6;
4035 else if (s->size >= 1024)
4036 nr_objects = 24;
4037 else if (s->size >= 256)
4038 nr_objects = 52;
4039 else
4040 nr_objects = 120;
4041
4042 slub_set_cpu_partial(s, nr_objects);
4043 #endif
4044 }
4045
4046
4047
4048
4049
4050 static int calculate_sizes(struct kmem_cache *s)
4051 {
4052 slab_flags_t flags = s->flags;
4053 unsigned int size = s->object_size;
4054 unsigned int order;
4055
4056
4057
4058
4059
4060
4061 size = ALIGN(size, sizeof(void *));
4062
4063 #ifdef CONFIG_SLUB_DEBUG
4064
4065
4066
4067
4068
4069 if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) &&
4070 !s->ctor)
4071 s->flags |= __OBJECT_POISON;
4072 else
4073 s->flags &= ~__OBJECT_POISON;
4074
4075
4076
4077
4078
4079
4080
4081 if ((flags & SLAB_RED_ZONE) && size == s->object_size)
4082 size += sizeof(void *);
4083 #endif
4084
4085
4086
4087
4088
4089 s->inuse = size;
4090
4091 if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
4092 ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) ||
4093 s->ctor) {
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108 s->offset = size;
4109 size += sizeof(void *);
4110 } else {
4111
4112
4113
4114
4115
4116 s->offset = ALIGN_DOWN(s->object_size / 2, sizeof(void *));
4117 }
4118
4119 #ifdef CONFIG_SLUB_DEBUG
4120 if (flags & SLAB_STORE_USER)
4121
4122
4123
4124
4125 size += 2 * sizeof(struct track);
4126 #endif
4127
4128 kasan_cache_create(s, &size, &s->flags);
4129 #ifdef CONFIG_SLUB_DEBUG
4130 if (flags & SLAB_RED_ZONE) {
4131
4132
4133
4134
4135
4136
4137
4138 size += sizeof(void *);
4139
4140 s->red_left_pad = sizeof(void *);
4141 s->red_left_pad = ALIGN(s->red_left_pad, s->align);
4142 size += s->red_left_pad;
4143 }
4144 #endif
4145
4146
4147
4148
4149
4150
4151 size = ALIGN(size, s->align);
4152 s->size = size;
4153 s->reciprocal_size = reciprocal_value(size);
4154 order = calculate_order(size);
4155
4156 if ((int)order < 0)
4157 return 0;
4158
4159 s->allocflags = 0;
4160 if (order)
4161 s->allocflags |= __GFP_COMP;
4162
4163 if (s->flags & SLAB_CACHE_DMA)
4164 s->allocflags |= GFP_DMA;
4165
4166 if (s->flags & SLAB_CACHE_DMA32)
4167 s->allocflags |= GFP_DMA32;
4168
4169 if (s->flags & SLAB_RECLAIM_ACCOUNT)
4170 s->allocflags |= __GFP_RECLAIMABLE;
4171
4172
4173
4174
4175 s->oo = oo_make(order, size);
4176 s->min = oo_make(get_order(size), size);
4177
4178 return !!oo_objects(s->oo);
4179 }
4180
4181 static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
4182 {
4183 s->flags = kmem_cache_flags(s->size, flags, s->name);
4184 #ifdef CONFIG_SLAB_FREELIST_HARDENED
4185 s->random = get_random_long();
4186 #endif
4187
4188 if (!calculate_sizes(s))
4189 goto error;
4190 if (disable_higher_order_debug) {
4191
4192
4193
4194
4195 if (get_order(s->size) > get_order(s->object_size)) {
4196 s->flags &= ~DEBUG_METADATA_FLAGS;
4197 s->offset = 0;
4198 if (!calculate_sizes(s))
4199 goto error;
4200 }
4201 }
4202
4203 #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
4204 defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
4205 if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
4206
4207 s->flags |= __CMPXCHG_DOUBLE;
4208 #endif
4209
4210
4211
4212
4213
4214 s->min_partial = min_t(unsigned long, MAX_PARTIAL, ilog2(s->size) / 2);
4215 s->min_partial = max_t(unsigned long, MIN_PARTIAL, s->min_partial);
4216
4217 set_cpu_partial(s);
4218
4219 #ifdef CONFIG_NUMA
4220 s->remote_node_defrag_ratio = 1000;
4221 #endif
4222
4223
4224 if (slab_state >= UP) {
4225 if (init_cache_random_seq(s))
4226 goto error;
4227 }
4228
4229 if (!init_kmem_cache_nodes(s))
4230 goto error;
4231
4232 if (alloc_kmem_cache_cpus(s))
4233 return 0;
4234
4235 error:
4236 __kmem_cache_release(s);
4237 return -EINVAL;
4238 }
4239
4240 static void list_slab_objects(struct kmem_cache *s, struct slab *slab,
4241 const char *text)
4242 {
4243 #ifdef CONFIG_SLUB_DEBUG
4244 void *addr = slab_address(slab);
4245 unsigned long flags;
4246 unsigned long *map;
4247 void *p;
4248
4249 slab_err(s, slab, text, s->name);
4250 slab_lock(slab, &flags);
4251
4252 map = get_map(s, slab);
4253 for_each_object(p, s, addr, slab->objects) {
4254
4255 if (!test_bit(__obj_to_index(s, addr, p), map)) {
4256 pr_err("Object 0x%p @offset=%tu\n", p, p - addr);
4257 print_tracking(s, p);
4258 }
4259 }
4260 put_map(map);
4261 slab_unlock(slab, &flags);
4262 #endif
4263 }
4264
4265
4266
4267
4268
4269
4270 static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
4271 {
4272 LIST_HEAD(discard);
4273 struct slab *slab, *h;
4274
4275 BUG_ON(irqs_disabled());
4276 spin_lock_irq(&n->list_lock);
4277 list_for_each_entry_safe(slab, h, &n->partial, slab_list) {
4278 if (!slab->inuse) {
4279 remove_partial(n, slab);
4280 list_add(&slab->slab_list, &discard);
4281 } else {
4282 list_slab_objects(s, slab,
4283 "Objects remaining in %s on __kmem_cache_shutdown()");
4284 }
4285 }
4286 spin_unlock_irq(&n->list_lock);
4287
4288 list_for_each_entry_safe(slab, h, &discard, slab_list)
4289 discard_slab(s, slab);
4290 }
4291
4292 bool __kmem_cache_empty(struct kmem_cache *s)
4293 {
4294 int node;
4295 struct kmem_cache_node *n;
4296
4297 for_each_kmem_cache_node(s, node, n)
4298 if (n->nr_partial || slabs_node(s, node))
4299 return false;
4300 return true;
4301 }
4302
4303
4304
4305
4306 int __kmem_cache_shutdown(struct kmem_cache *s)
4307 {
4308 int node;
4309 struct kmem_cache_node *n;
4310
4311 flush_all_cpus_locked(s);
4312
4313 for_each_kmem_cache_node(s, node, n) {
4314 free_partial(s, n);
4315 if (n->nr_partial || slabs_node(s, node))
4316 return 1;
4317 }
4318 return 0;
4319 }
4320
4321 #ifdef CONFIG_PRINTK
4322 void __kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
4323 {
4324 void *base;
4325 int __maybe_unused i;
4326 unsigned int objnr;
4327 void *objp;
4328 void *objp0;
4329 struct kmem_cache *s = slab->slab_cache;
4330 struct track __maybe_unused *trackp;
4331
4332 kpp->kp_ptr = object;
4333 kpp->kp_slab = slab;
4334 kpp->kp_slab_cache = s;
4335 base = slab_address(slab);
4336 objp0 = kasan_reset_tag(object);
4337 #ifdef CONFIG_SLUB_DEBUG
4338 objp = restore_red_left(s, objp0);
4339 #else
4340 objp = objp0;
4341 #endif
4342 objnr = obj_to_index(s, slab, objp);
4343 kpp->kp_data_offset = (unsigned long)((char *)objp0 - (char *)objp);
4344 objp = base + s->size * objnr;
4345 kpp->kp_objp = objp;
4346 if (WARN_ON_ONCE(objp < base || objp >= base + slab->objects * s->size
4347 || (objp - base) % s->size) ||
4348 !(s->flags & SLAB_STORE_USER))
4349 return;
4350 #ifdef CONFIG_SLUB_DEBUG
4351 objp = fixup_red_left(s, objp);
4352 trackp = get_track(s, objp, TRACK_ALLOC);
4353 kpp->kp_ret = (void *)trackp->addr;
4354 #ifdef CONFIG_STACKDEPOT
4355 {
4356 depot_stack_handle_t handle;
4357 unsigned long *entries;
4358 unsigned int nr_entries;
4359
4360 handle = READ_ONCE(trackp->handle);
4361 if (handle) {
4362 nr_entries = stack_depot_fetch(handle, &entries);
4363 for (i = 0; i < KS_ADDRS_COUNT && i < nr_entries; i++)
4364 kpp->kp_stack[i] = (void *)entries[i];
4365 }
4366
4367 trackp = get_track(s, objp, TRACK_FREE);
4368 handle = READ_ONCE(trackp->handle);
4369 if (handle) {
4370 nr_entries = stack_depot_fetch(handle, &entries);
4371 for (i = 0; i < KS_ADDRS_COUNT && i < nr_entries; i++)
4372 kpp->kp_free_stack[i] = (void *)entries[i];
4373 }
4374 }
4375 #endif
4376 #endif
4377 }
4378 #endif
4379
4380
4381
4382
4383
4384 static int __init setup_slub_min_order(char *str)
4385 {
4386 get_option(&str, (int *)&slub_min_order);
4387
4388 return 1;
4389 }
4390
4391 __setup("slub_min_order=", setup_slub_min_order);
4392
4393 static int __init setup_slub_max_order(char *str)
4394 {
4395 get_option(&str, (int *)&slub_max_order);
4396 slub_max_order = min(slub_max_order, (unsigned int)MAX_ORDER - 1);
4397
4398 return 1;
4399 }
4400
4401 __setup("slub_max_order=", setup_slub_max_order);
4402
4403 static int __init setup_slub_min_objects(char *str)
4404 {
4405 get_option(&str, (int *)&slub_min_objects);
4406
4407 return 1;
4408 }
4409
4410 __setup("slub_min_objects=", setup_slub_min_objects);
4411
4412 void *__kmalloc(size_t size, gfp_t flags)
4413 {
4414 struct kmem_cache *s;
4415 void *ret;
4416
4417 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
4418 return kmalloc_large(size, flags);
4419
4420 s = kmalloc_slab(size, flags);
4421
4422 if (unlikely(ZERO_OR_NULL_PTR(s)))
4423 return s;
4424
4425 ret = slab_alloc(s, NULL, flags, _RET_IP_, size);
4426
4427 trace_kmalloc(_RET_IP_, ret, s, size, s->size, flags);
4428
4429 ret = kasan_kmalloc(s, ret, size, flags);
4430
4431 return ret;
4432 }
4433 EXPORT_SYMBOL(__kmalloc);
4434
4435 #ifdef CONFIG_NUMA
4436 static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
4437 {
4438 struct page *page;
4439 void *ptr = NULL;
4440 unsigned int order = get_order(size);
4441
4442 flags |= __GFP_COMP;
4443 page = alloc_pages_node(node, flags, order);
4444 if (page) {
4445 ptr = page_address(page);
4446 mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
4447 PAGE_SIZE << order);
4448 }
4449
4450 return kmalloc_large_node_hook(ptr, size, flags);
4451 }
4452
4453 void *__kmalloc_node(size_t size, gfp_t flags, int node)
4454 {
4455 struct kmem_cache *s;
4456 void *ret;
4457
4458 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
4459 ret = kmalloc_large_node(size, flags, node);
4460
4461 trace_kmalloc_node(_RET_IP_, ret, NULL,
4462 size, PAGE_SIZE << get_order(size),
4463 flags, node);
4464
4465 return ret;
4466 }
4467
4468 s = kmalloc_slab(size, flags);
4469
4470 if (unlikely(ZERO_OR_NULL_PTR(s)))
4471 return s;
4472
4473 ret = slab_alloc_node(s, NULL, flags, node, _RET_IP_, size);
4474
4475 trace_kmalloc_node(_RET_IP_, ret, s, size, s->size, flags, node);
4476
4477 ret = kasan_kmalloc(s, ret, size, flags);
4478
4479 return ret;
4480 }
4481 EXPORT_SYMBOL(__kmalloc_node);
4482 #endif
4483
4484 #ifdef CONFIG_HARDENED_USERCOPY
4485
4486
4487
4488
4489
4490
4491
4492
4493 void __check_heap_object(const void *ptr, unsigned long n,
4494 const struct slab *slab, bool to_user)
4495 {
4496 struct kmem_cache *s;
4497 unsigned int offset;
4498 bool is_kfence = is_kfence_address(ptr);
4499
4500 ptr = kasan_reset_tag(ptr);
4501
4502
4503 s = slab->slab_cache;
4504
4505
4506 if (ptr < slab_address(slab))
4507 usercopy_abort("SLUB object not in SLUB page?!", NULL,
4508 to_user, 0, n);
4509
4510
4511 if (is_kfence)
4512 offset = ptr - kfence_object_start(ptr);
4513 else
4514 offset = (ptr - slab_address(slab)) % s->size;
4515
4516
4517 if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) {
4518 if (offset < s->red_left_pad)
4519 usercopy_abort("SLUB object in left red zone",
4520 s->name, to_user, offset, n);
4521 offset -= s->red_left_pad;
4522 }
4523
4524
4525 if (offset >= s->useroffset &&
4526 offset - s->useroffset <= s->usersize &&
4527 n <= s->useroffset - offset + s->usersize)
4528 return;
4529
4530 usercopy_abort("SLUB object", s->name, to_user, offset, n);
4531 }
4532 #endif
4533
4534 size_t __ksize(const void *object)
4535 {
4536 struct folio *folio;
4537
4538 if (unlikely(object == ZERO_SIZE_PTR))
4539 return 0;
4540
4541 folio = virt_to_folio(object);
4542
4543 if (unlikely(!folio_test_slab(folio)))
4544 return folio_size(folio);
4545
4546 return slab_ksize(folio_slab(folio)->slab_cache);
4547 }
4548 EXPORT_SYMBOL(__ksize);
4549
4550 void kfree(const void *x)
4551 {
4552 struct folio *folio;
4553 struct slab *slab;
4554 void *object = (void *)x;
4555
4556 trace_kfree(_RET_IP_, x);
4557
4558 if (unlikely(ZERO_OR_NULL_PTR(x)))
4559 return;
4560
4561 folio = virt_to_folio(x);
4562 if (unlikely(!folio_test_slab(folio))) {
4563 free_large_kmalloc(folio, object);
4564 return;
4565 }
4566 slab = folio_slab(folio);
4567 slab_free(slab->slab_cache, slab, object, NULL, &object, 1, _RET_IP_);
4568 }
4569 EXPORT_SYMBOL(kfree);
4570
4571 #define SHRINK_PROMOTE_MAX 32
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582 static int __kmem_cache_do_shrink(struct kmem_cache *s)
4583 {
4584 int node;
4585 int i;
4586 struct kmem_cache_node *n;
4587 struct slab *slab;
4588 struct slab *t;
4589 struct list_head discard;
4590 struct list_head promote[SHRINK_PROMOTE_MAX];
4591 unsigned long flags;
4592 int ret = 0;
4593
4594 for_each_kmem_cache_node(s, node, n) {
4595 INIT_LIST_HEAD(&discard);
4596 for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
4597 INIT_LIST_HEAD(promote + i);
4598
4599 spin_lock_irqsave(&n->list_lock, flags);
4600
4601
4602
4603
4604
4605
4606
4607 list_for_each_entry_safe(slab, t, &n->partial, slab_list) {
4608 int free = slab->objects - slab->inuse;
4609
4610
4611 barrier();
4612
4613
4614 BUG_ON(free <= 0);
4615
4616 if (free == slab->objects) {
4617 list_move(&slab->slab_list, &discard);
4618 n->nr_partial--;
4619 } else if (free <= SHRINK_PROMOTE_MAX)
4620 list_move(&slab->slab_list, promote + free - 1);
4621 }
4622
4623
4624
4625
4626
4627 for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
4628 list_splice(promote + i, &n->partial);
4629
4630 spin_unlock_irqrestore(&n->list_lock, flags);
4631
4632
4633 list_for_each_entry_safe(slab, t, &discard, slab_list)
4634 discard_slab(s, slab);
4635
4636 if (slabs_node(s, node))
4637 ret = 1;
4638 }
4639
4640 return ret;
4641 }
4642
4643 int __kmem_cache_shrink(struct kmem_cache *s)
4644 {
4645 flush_all(s);
4646 return __kmem_cache_do_shrink(s);
4647 }
4648
4649 static int slab_mem_going_offline_callback(void *arg)
4650 {
4651 struct kmem_cache *s;
4652
4653 mutex_lock(&slab_mutex);
4654 list_for_each_entry(s, &slab_caches, list) {
4655 flush_all_cpus_locked(s);
4656 __kmem_cache_do_shrink(s);
4657 }
4658 mutex_unlock(&slab_mutex);
4659
4660 return 0;
4661 }
4662
4663 static void slab_mem_offline_callback(void *arg)
4664 {
4665 struct memory_notify *marg = arg;
4666 int offline_node;
4667
4668 offline_node = marg->status_change_nid_normal;
4669
4670
4671
4672
4673
4674 if (offline_node < 0)
4675 return;
4676
4677 mutex_lock(&slab_mutex);
4678 node_clear(offline_node, slab_nodes);
4679
4680
4681
4682
4683
4684 mutex_unlock(&slab_mutex);
4685 }
4686
4687 static int slab_mem_going_online_callback(void *arg)
4688 {
4689 struct kmem_cache_node *n;
4690 struct kmem_cache *s;
4691 struct memory_notify *marg = arg;
4692 int nid = marg->status_change_nid_normal;
4693 int ret = 0;
4694
4695
4696
4697
4698
4699 if (nid < 0)
4700 return 0;
4701
4702
4703
4704
4705
4706
4707 mutex_lock(&slab_mutex);
4708 list_for_each_entry(s, &slab_caches, list) {
4709
4710
4711
4712
4713 if (get_node(s, nid))
4714 continue;
4715
4716
4717
4718
4719
4720 n = kmem_cache_alloc(kmem_cache_node, GFP_KERNEL);
4721 if (!n) {
4722 ret = -ENOMEM;
4723 goto out;
4724 }
4725 init_kmem_cache_node(n);
4726 s->node[nid] = n;
4727 }
4728
4729
4730
4731
4732 node_set(nid, slab_nodes);
4733 out:
4734 mutex_unlock(&slab_mutex);
4735 return ret;
4736 }
4737
4738 static int slab_memory_callback(struct notifier_block *self,
4739 unsigned long action, void *arg)
4740 {
4741 int ret = 0;
4742
4743 switch (action) {
4744 case MEM_GOING_ONLINE:
4745 ret = slab_mem_going_online_callback(arg);
4746 break;
4747 case MEM_GOING_OFFLINE:
4748 ret = slab_mem_going_offline_callback(arg);
4749 break;
4750 case MEM_OFFLINE:
4751 case MEM_CANCEL_ONLINE:
4752 slab_mem_offline_callback(arg);
4753 break;
4754 case MEM_ONLINE:
4755 case MEM_CANCEL_OFFLINE:
4756 break;
4757 }
4758 if (ret)
4759 ret = notifier_from_errno(ret);
4760 else
4761 ret = NOTIFY_OK;
4762 return ret;
4763 }
4764
4765 static struct notifier_block slab_memory_callback_nb = {
4766 .notifier_call = slab_memory_callback,
4767 .priority = SLAB_CALLBACK_PRI,
4768 };
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780 static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
4781 {
4782 int node;
4783 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
4784 struct kmem_cache_node *n;
4785
4786 memcpy(s, static_cache, kmem_cache->object_size);
4787
4788
4789
4790
4791
4792
4793 __flush_cpu_slab(s, smp_processor_id());
4794 for_each_kmem_cache_node(s, node, n) {
4795 struct slab *p;
4796
4797 list_for_each_entry(p, &n->partial, slab_list)
4798 p->slab_cache = s;
4799
4800 #ifdef CONFIG_SLUB_DEBUG
4801 list_for_each_entry(p, &n->full, slab_list)
4802 p->slab_cache = s;
4803 #endif
4804 }
4805 list_add(&s->list, &slab_caches);
4806 return s;
4807 }
4808
4809 void __init kmem_cache_init(void)
4810 {
4811 static __initdata struct kmem_cache boot_kmem_cache,
4812 boot_kmem_cache_node;
4813 int node;
4814
4815 if (debug_guardpage_minorder())
4816 slub_max_order = 0;
4817
4818
4819 if (__slub_debug_enabled())
4820 no_hash_pointers_enable(NULL);
4821
4822 kmem_cache_node = &boot_kmem_cache_node;
4823 kmem_cache = &boot_kmem_cache;
4824
4825
4826
4827
4828
4829 for_each_node_state(node, N_NORMAL_MEMORY)
4830 node_set(node, slab_nodes);
4831
4832 create_boot_cache(kmem_cache_node, "kmem_cache_node",
4833 sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
4834
4835 register_hotmemory_notifier(&slab_memory_callback_nb);
4836
4837
4838 slab_state = PARTIAL;
4839
4840 create_boot_cache(kmem_cache, "kmem_cache",
4841 offsetof(struct kmem_cache, node) +
4842 nr_node_ids * sizeof(struct kmem_cache_node *),
4843 SLAB_HWCACHE_ALIGN, 0, 0);
4844
4845 kmem_cache = bootstrap(&boot_kmem_cache);
4846 kmem_cache_node = bootstrap(&boot_kmem_cache_node);
4847
4848
4849 setup_kmalloc_cache_index_table();
4850 create_kmalloc_caches(0);
4851
4852
4853 init_freelist_randomization();
4854
4855 cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
4856 slub_cpu_dead);
4857
4858 pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n",
4859 cache_line_size(),
4860 slub_min_order, slub_max_order, slub_min_objects,
4861 nr_cpu_ids, nr_node_ids);
4862 }
4863
4864 void __init kmem_cache_init_late(void)
4865 {
4866 flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM, 0);
4867 WARN_ON(!flushwq);
4868 }
4869
4870 struct kmem_cache *
4871 __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
4872 slab_flags_t flags, void (*ctor)(void *))
4873 {
4874 struct kmem_cache *s;
4875
4876 s = find_mergeable(size, align, flags, name, ctor);
4877 if (s) {
4878 if (sysfs_slab_alias(s, name))
4879 return NULL;
4880
4881 s->refcount++;
4882
4883
4884
4885
4886
4887 s->object_size = max(s->object_size, size);
4888 s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
4889 }
4890
4891 return s;
4892 }
4893
4894 int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
4895 {
4896 int err;
4897
4898 err = kmem_cache_open(s, flags);
4899 if (err)
4900 return err;
4901
4902
4903 if (slab_state <= UP)
4904 return 0;
4905
4906 err = sysfs_slab_add(s);
4907 if (err) {
4908 __kmem_cache_release(s);
4909 return err;
4910 }
4911
4912 if (s->flags & SLAB_STORE_USER)
4913 debugfs_slab_add(s);
4914
4915 return 0;
4916 }
4917
4918 void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
4919 {
4920 struct kmem_cache *s;
4921 void *ret;
4922
4923 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
4924 return kmalloc_large(size, gfpflags);
4925
4926 s = kmalloc_slab(size, gfpflags);
4927
4928 if (unlikely(ZERO_OR_NULL_PTR(s)))
4929 return s;
4930
4931 ret = slab_alloc(s, NULL, gfpflags, caller, size);
4932
4933
4934 trace_kmalloc(caller, ret, s, size, s->size, gfpflags);
4935
4936 ret = kasan_kmalloc(s, ret, size, gfpflags);
4937
4938 return ret;
4939 }
4940 EXPORT_SYMBOL(__kmalloc_track_caller);
4941
4942 #ifdef CONFIG_NUMA
4943 void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
4944 int node, unsigned long caller)
4945 {
4946 struct kmem_cache *s;
4947 void *ret;
4948
4949 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
4950 ret = kmalloc_large_node(size, gfpflags, node);
4951
4952 trace_kmalloc_node(caller, ret, NULL,
4953 size, PAGE_SIZE << get_order(size),
4954 gfpflags, node);
4955
4956 return ret;
4957 }
4958
4959 s = kmalloc_slab(size, gfpflags);
4960
4961 if (unlikely(ZERO_OR_NULL_PTR(s)))
4962 return s;
4963
4964 ret = slab_alloc_node(s, NULL, gfpflags, node, caller, size);
4965
4966
4967 trace_kmalloc_node(caller, ret, s, size, s->size, gfpflags, node);
4968
4969 ret = kasan_kmalloc(s, ret, size, gfpflags);
4970
4971 return ret;
4972 }
4973 EXPORT_SYMBOL(__kmalloc_node_track_caller);
4974 #endif
4975
4976 #ifdef CONFIG_SYSFS
4977 static int count_inuse(struct slab *slab)
4978 {
4979 return slab->inuse;
4980 }
4981
4982 static int count_total(struct slab *slab)
4983 {
4984 return slab->objects;
4985 }
4986 #endif
4987
4988 #ifdef CONFIG_SLUB_DEBUG
4989 static void validate_slab(struct kmem_cache *s, struct slab *slab,
4990 unsigned long *obj_map)
4991 {
4992 void *p;
4993 void *addr = slab_address(slab);
4994 unsigned long flags;
4995
4996 slab_lock(slab, &flags);
4997
4998 if (!check_slab(s, slab) || !on_freelist(s, slab, NULL))
4999 goto unlock;
5000
5001
5002 __fill_map(obj_map, s, slab);
5003 for_each_object(p, s, addr, slab->objects) {
5004 u8 val = test_bit(__obj_to_index(s, addr, p), obj_map) ?
5005 SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
5006
5007 if (!check_object(s, slab, p, val))
5008 break;
5009 }
5010 unlock:
5011 slab_unlock(slab, &flags);
5012 }
5013
5014 static int validate_slab_node(struct kmem_cache *s,
5015 struct kmem_cache_node *n, unsigned long *obj_map)
5016 {
5017 unsigned long count = 0;
5018 struct slab *slab;
5019 unsigned long flags;
5020
5021 spin_lock_irqsave(&n->list_lock, flags);
5022
5023 list_for_each_entry(slab, &n->partial, slab_list) {
5024 validate_slab(s, slab, obj_map);
5025 count++;
5026 }
5027 if (count != n->nr_partial) {
5028 pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n",
5029 s->name, count, n->nr_partial);
5030 slab_add_kunit_errors();
5031 }
5032
5033 if (!(s->flags & SLAB_STORE_USER))
5034 goto out;
5035
5036 list_for_each_entry(slab, &n->full, slab_list) {
5037 validate_slab(s, slab, obj_map);
5038 count++;
5039 }
5040 if (count != atomic_long_read(&n->nr_slabs)) {
5041 pr_err("SLUB: %s %ld slabs counted but counter=%ld\n",
5042 s->name, count, atomic_long_read(&n->nr_slabs));
5043 slab_add_kunit_errors();
5044 }
5045
5046 out:
5047 spin_unlock_irqrestore(&n->list_lock, flags);
5048 return count;
5049 }
5050
5051 long validate_slab_cache(struct kmem_cache *s)
5052 {
5053 int node;
5054 unsigned long count = 0;
5055 struct kmem_cache_node *n;
5056 unsigned long *obj_map;
5057
5058 obj_map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL);
5059 if (!obj_map)
5060 return -ENOMEM;
5061
5062 flush_all(s);
5063 for_each_kmem_cache_node(s, node, n)
5064 count += validate_slab_node(s, n, obj_map);
5065
5066 bitmap_free(obj_map);
5067
5068 return count;
5069 }
5070 EXPORT_SYMBOL(validate_slab_cache);
5071
5072 #ifdef CONFIG_DEBUG_FS
5073
5074
5075
5076
5077
5078 struct location {
5079 depot_stack_handle_t handle;
5080 unsigned long count;
5081 unsigned long addr;
5082 long long sum_time;
5083 long min_time;
5084 long max_time;
5085 long min_pid;
5086 long max_pid;
5087 DECLARE_BITMAP(cpus, NR_CPUS);
5088 nodemask_t nodes;
5089 };
5090
5091 struct loc_track {
5092 unsigned long max;
5093 unsigned long count;
5094 struct location *loc;
5095 loff_t idx;
5096 };
5097
5098 static struct dentry *slab_debugfs_root;
5099
5100 static void free_loc_track(struct loc_track *t)
5101 {
5102 if (t->max)
5103 free_pages((unsigned long)t->loc,
5104 get_order(sizeof(struct location) * t->max));
5105 }
5106
5107 static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
5108 {
5109 struct location *l;
5110 int order;
5111
5112 order = get_order(sizeof(struct location) * max);
5113
5114 l = (void *)__get_free_pages(flags, order);
5115 if (!l)
5116 return 0;
5117
5118 if (t->count) {
5119 memcpy(l, t->loc, sizeof(struct location) * t->count);
5120 free_loc_track(t);
5121 }
5122 t->max = max;
5123 t->loc = l;
5124 return 1;
5125 }
5126
5127 static int add_location(struct loc_track *t, struct kmem_cache *s,
5128 const struct track *track)
5129 {
5130 long start, end, pos;
5131 struct location *l;
5132 unsigned long caddr, chandle;
5133 unsigned long age = jiffies - track->when;
5134 depot_stack_handle_t handle = 0;
5135
5136 #ifdef CONFIG_STACKDEPOT
5137 handle = READ_ONCE(track->handle);
5138 #endif
5139 start = -1;
5140 end = t->count;
5141
5142 for ( ; ; ) {
5143 pos = start + (end - start + 1) / 2;
5144
5145
5146
5147
5148
5149 if (pos == end)
5150 break;
5151
5152 caddr = t->loc[pos].addr;
5153 chandle = t->loc[pos].handle;
5154 if ((track->addr == caddr) && (handle == chandle)) {
5155
5156 l = &t->loc[pos];
5157 l->count++;
5158 if (track->when) {
5159 l->sum_time += age;
5160 if (age < l->min_time)
5161 l->min_time = age;
5162 if (age > l->max_time)
5163 l->max_time = age;
5164
5165 if (track->pid < l->min_pid)
5166 l->min_pid = track->pid;
5167 if (track->pid > l->max_pid)
5168 l->max_pid = track->pid;
5169
5170 cpumask_set_cpu(track->cpu,
5171 to_cpumask(l->cpus));
5172 }
5173 node_set(page_to_nid(virt_to_page(track)), l->nodes);
5174 return 1;
5175 }
5176
5177 if (track->addr < caddr)
5178 end = pos;
5179 else if (track->addr == caddr && handle < chandle)
5180 end = pos;
5181 else
5182 start = pos;
5183 }
5184
5185
5186
5187
5188 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC))
5189 return 0;
5190
5191 l = t->loc + pos;
5192 if (pos < t->count)
5193 memmove(l + 1, l,
5194 (t->count - pos) * sizeof(struct location));
5195 t->count++;
5196 l->count = 1;
5197 l->addr = track->addr;
5198 l->sum_time = age;
5199 l->min_time = age;
5200 l->max_time = age;
5201 l->min_pid = track->pid;
5202 l->max_pid = track->pid;
5203 l->handle = handle;
5204 cpumask_clear(to_cpumask(l->cpus));
5205 cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
5206 nodes_clear(l->nodes);
5207 node_set(page_to_nid(virt_to_page(track)), l->nodes);
5208 return 1;
5209 }
5210
5211 static void process_slab(struct loc_track *t, struct kmem_cache *s,
5212 struct slab *slab, enum track_item alloc,
5213 unsigned long *obj_map)
5214 {
5215 void *addr = slab_address(slab);
5216 void *p;
5217
5218 __fill_map(obj_map, s, slab);
5219
5220 for_each_object(p, s, addr, slab->objects)
5221 if (!test_bit(__obj_to_index(s, addr, p), obj_map))
5222 add_location(t, s, get_track(s, p, alloc));
5223 }
5224 #endif
5225 #endif
5226
5227 #ifdef CONFIG_SYSFS
5228 enum slab_stat_type {
5229 SL_ALL,
5230 SL_PARTIAL,
5231 SL_CPU,
5232 SL_OBJECTS,
5233 SL_TOTAL
5234 };
5235
5236 #define SO_ALL (1 << SL_ALL)
5237 #define SO_PARTIAL (1 << SL_PARTIAL)
5238 #define SO_CPU (1 << SL_CPU)
5239 #define SO_OBJECTS (1 << SL_OBJECTS)
5240 #define SO_TOTAL (1 << SL_TOTAL)
5241
5242 static ssize_t show_slab_objects(struct kmem_cache *s,
5243 char *buf, unsigned long flags)
5244 {
5245 unsigned long total = 0;
5246 int node;
5247 int x;
5248 unsigned long *nodes;
5249 int len = 0;
5250
5251 nodes = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL);
5252 if (!nodes)
5253 return -ENOMEM;
5254
5255 if (flags & SO_CPU) {
5256 int cpu;
5257
5258 for_each_possible_cpu(cpu) {
5259 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
5260 cpu);
5261 int node;
5262 struct slab *slab;
5263
5264 slab = READ_ONCE(c->slab);
5265 if (!slab)
5266 continue;
5267
5268 node = slab_nid(slab);
5269 if (flags & SO_TOTAL)
5270 x = slab->objects;
5271 else if (flags & SO_OBJECTS)
5272 x = slab->inuse;
5273 else
5274 x = 1;
5275
5276 total += x;
5277 nodes[node] += x;
5278
5279 #ifdef CONFIG_SLUB_CPU_PARTIAL
5280 slab = slub_percpu_partial_read_once(c);
5281 if (slab) {
5282 node = slab_nid(slab);
5283 if (flags & SO_TOTAL)
5284 WARN_ON_ONCE(1);
5285 else if (flags & SO_OBJECTS)
5286 WARN_ON_ONCE(1);
5287 else
5288 x = slab->slabs;
5289 total += x;
5290 nodes[node] += x;
5291 }
5292 #endif
5293 }
5294 }
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307 #ifdef CONFIG_SLUB_DEBUG
5308 if (flags & SO_ALL) {
5309 struct kmem_cache_node *n;
5310
5311 for_each_kmem_cache_node(s, node, n) {
5312
5313 if (flags & SO_TOTAL)
5314 x = atomic_long_read(&n->total_objects);
5315 else if (flags & SO_OBJECTS)
5316 x = atomic_long_read(&n->total_objects) -
5317 count_partial(n, count_free);
5318 else
5319 x = atomic_long_read(&n->nr_slabs);
5320 total += x;
5321 nodes[node] += x;
5322 }
5323
5324 } else
5325 #endif
5326 if (flags & SO_PARTIAL) {
5327 struct kmem_cache_node *n;
5328
5329 for_each_kmem_cache_node(s, node, n) {
5330 if (flags & SO_TOTAL)
5331 x = count_partial(n, count_total);
5332 else if (flags & SO_OBJECTS)
5333 x = count_partial(n, count_inuse);
5334 else
5335 x = n->nr_partial;
5336 total += x;
5337 nodes[node] += x;
5338 }
5339 }
5340
5341 len += sysfs_emit_at(buf, len, "%lu", total);
5342 #ifdef CONFIG_NUMA
5343 for (node = 0; node < nr_node_ids; node++) {
5344 if (nodes[node])
5345 len += sysfs_emit_at(buf, len, " N%d=%lu",
5346 node, nodes[node]);
5347 }
5348 #endif
5349 len += sysfs_emit_at(buf, len, "\n");
5350 kfree(nodes);
5351
5352 return len;
5353 }
5354
5355 #define to_slab_attr(n) container_of(n, struct slab_attribute, attr)
5356 #define to_slab(n) container_of(n, struct kmem_cache, kobj)
5357
5358 struct slab_attribute {
5359 struct attribute attr;
5360 ssize_t (*show)(struct kmem_cache *s, char *buf);
5361 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count);
5362 };
5363
5364 #define SLAB_ATTR_RO(_name) \
5365 static struct slab_attribute _name##_attr = __ATTR_RO_MODE(_name, 0400)
5366
5367 #define SLAB_ATTR(_name) \
5368 static struct slab_attribute _name##_attr = __ATTR_RW_MODE(_name, 0600)
5369
5370 static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
5371 {
5372 return sysfs_emit(buf, "%u\n", s->size);
5373 }
5374 SLAB_ATTR_RO(slab_size);
5375
5376 static ssize_t align_show(struct kmem_cache *s, char *buf)
5377 {
5378 return sysfs_emit(buf, "%u\n", s->align);
5379 }
5380 SLAB_ATTR_RO(align);
5381
5382 static ssize_t object_size_show(struct kmem_cache *s, char *buf)
5383 {
5384 return sysfs_emit(buf, "%u\n", s->object_size);
5385 }
5386 SLAB_ATTR_RO(object_size);
5387
5388 static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf)
5389 {
5390 return sysfs_emit(buf, "%u\n", oo_objects(s->oo));
5391 }
5392 SLAB_ATTR_RO(objs_per_slab);
5393
5394 static ssize_t order_show(struct kmem_cache *s, char *buf)
5395 {
5396 return sysfs_emit(buf, "%u\n", oo_order(s->oo));
5397 }
5398 SLAB_ATTR_RO(order);
5399
5400 static ssize_t min_partial_show(struct kmem_cache *s, char *buf)
5401 {
5402 return sysfs_emit(buf, "%lu\n", s->min_partial);
5403 }
5404
5405 static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
5406 size_t length)
5407 {
5408 unsigned long min;
5409 int err;
5410
5411 err = kstrtoul(buf, 10, &min);
5412 if (err)
5413 return err;
5414
5415 s->min_partial = min;
5416 return length;
5417 }
5418 SLAB_ATTR(min_partial);
5419
5420 static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
5421 {
5422 unsigned int nr_partial = 0;
5423 #ifdef CONFIG_SLUB_CPU_PARTIAL
5424 nr_partial = s->cpu_partial;
5425 #endif
5426
5427 return sysfs_emit(buf, "%u\n", nr_partial);
5428 }
5429
5430 static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
5431 size_t length)
5432 {
5433 unsigned int objects;
5434 int err;
5435
5436 err = kstrtouint(buf, 10, &objects);
5437 if (err)
5438 return err;
5439 if (objects && !kmem_cache_has_cpu_partial(s))
5440 return -EINVAL;
5441
5442 slub_set_cpu_partial(s, objects);
5443 flush_all(s);
5444 return length;
5445 }
5446 SLAB_ATTR(cpu_partial);
5447
5448 static ssize_t ctor_show(struct kmem_cache *s, char *buf)
5449 {
5450 if (!s->ctor)
5451 return 0;
5452 return sysfs_emit(buf, "%pS\n", s->ctor);
5453 }
5454 SLAB_ATTR_RO(ctor);
5455
5456 static ssize_t aliases_show(struct kmem_cache *s, char *buf)
5457 {
5458 return sysfs_emit(buf, "%d\n", s->refcount < 0 ? 0 : s->refcount - 1);
5459 }
5460 SLAB_ATTR_RO(aliases);
5461
5462 static ssize_t partial_show(struct kmem_cache *s, char *buf)
5463 {
5464 return show_slab_objects(s, buf, SO_PARTIAL);
5465 }
5466 SLAB_ATTR_RO(partial);
5467
5468 static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
5469 {
5470 return show_slab_objects(s, buf, SO_CPU);
5471 }
5472 SLAB_ATTR_RO(cpu_slabs);
5473
5474 static ssize_t objects_show(struct kmem_cache *s, char *buf)
5475 {
5476 return show_slab_objects(s, buf, SO_ALL|SO_OBJECTS);
5477 }
5478 SLAB_ATTR_RO(objects);
5479
5480 static ssize_t objects_partial_show(struct kmem_cache *s, char *buf)
5481 {
5482 return show_slab_objects(s, buf, SO_PARTIAL|SO_OBJECTS);
5483 }
5484 SLAB_ATTR_RO(objects_partial);
5485
5486 static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
5487 {
5488 int objects = 0;
5489 int slabs = 0;
5490 int cpu __maybe_unused;
5491 int len = 0;
5492
5493 #ifdef CONFIG_SLUB_CPU_PARTIAL
5494 for_each_online_cpu(cpu) {
5495 struct slab *slab;
5496
5497 slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5498
5499 if (slab)
5500 slabs += slab->slabs;
5501 }
5502 #endif
5503
5504
5505 objects = (slabs * oo_objects(s->oo)) / 2;
5506 len += sysfs_emit_at(buf, len, "%d(%d)", objects, slabs);
5507
5508 #if defined(CONFIG_SLUB_CPU_PARTIAL) && defined(CONFIG_SMP)
5509 for_each_online_cpu(cpu) {
5510 struct slab *slab;
5511
5512 slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
5513 if (slab) {
5514 slabs = READ_ONCE(slab->slabs);
5515 objects = (slabs * oo_objects(s->oo)) / 2;
5516 len += sysfs_emit_at(buf, len, " C%d=%d(%d)",
5517 cpu, objects, slabs);
5518 }
5519 }
5520 #endif
5521 len += sysfs_emit_at(buf, len, "\n");
5522
5523 return len;
5524 }
5525 SLAB_ATTR_RO(slabs_cpu_partial);
5526
5527 static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
5528 {
5529 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
5530 }
5531 SLAB_ATTR_RO(reclaim_account);
5532
5533 static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf)
5534 {
5535 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN));
5536 }
5537 SLAB_ATTR_RO(hwcache_align);
5538
5539 #ifdef CONFIG_ZONE_DMA
5540 static ssize_t cache_dma_show(struct kmem_cache *s, char *buf)
5541 {
5542 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA));
5543 }
5544 SLAB_ATTR_RO(cache_dma);
5545 #endif
5546
5547 static ssize_t usersize_show(struct kmem_cache *s, char *buf)
5548 {
5549 return sysfs_emit(buf, "%u\n", s->usersize);
5550 }
5551 SLAB_ATTR_RO(usersize);
5552
5553 static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
5554 {
5555 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU));
5556 }
5557 SLAB_ATTR_RO(destroy_by_rcu);
5558
5559 #ifdef CONFIG_SLUB_DEBUG
5560 static ssize_t slabs_show(struct kmem_cache *s, char *buf)
5561 {
5562 return show_slab_objects(s, buf, SO_ALL);
5563 }
5564 SLAB_ATTR_RO(slabs);
5565
5566 static ssize_t total_objects_show(struct kmem_cache *s, char *buf)
5567 {
5568 return show_slab_objects(s, buf, SO_ALL|SO_TOTAL);
5569 }
5570 SLAB_ATTR_RO(total_objects);
5571
5572 static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf)
5573 {
5574 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_CONSISTENCY_CHECKS));
5575 }
5576 SLAB_ATTR_RO(sanity_checks);
5577
5578 static ssize_t trace_show(struct kmem_cache *s, char *buf)
5579 {
5580 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_TRACE));
5581 }
5582 SLAB_ATTR_RO(trace);
5583
5584 static ssize_t red_zone_show(struct kmem_cache *s, char *buf)
5585 {
5586 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE));
5587 }
5588
5589 SLAB_ATTR_RO(red_zone);
5590
5591 static ssize_t poison_show(struct kmem_cache *s, char *buf)
5592 {
5593 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_POISON));
5594 }
5595
5596 SLAB_ATTR_RO(poison);
5597
5598 static ssize_t store_user_show(struct kmem_cache *s, char *buf)
5599 {
5600 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_STORE_USER));
5601 }
5602
5603 SLAB_ATTR_RO(store_user);
5604
5605 static ssize_t validate_show(struct kmem_cache *s, char *buf)
5606 {
5607 return 0;
5608 }
5609
5610 static ssize_t validate_store(struct kmem_cache *s,
5611 const char *buf, size_t length)
5612 {
5613 int ret = -EINVAL;
5614
5615 if (buf[0] == '1') {
5616 ret = validate_slab_cache(s);
5617 if (ret >= 0)
5618 ret = length;
5619 }
5620 return ret;
5621 }
5622 SLAB_ATTR(validate);
5623
5624 #endif
5625
5626 #ifdef CONFIG_FAILSLAB
5627 static ssize_t failslab_show(struct kmem_cache *s, char *buf)
5628 {
5629 return sysfs_emit(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
5630 }
5631 SLAB_ATTR_RO(failslab);
5632 #endif
5633
5634 static ssize_t shrink_show(struct kmem_cache *s, char *buf)
5635 {
5636 return 0;
5637 }
5638
5639 static ssize_t shrink_store(struct kmem_cache *s,
5640 const char *buf, size_t length)
5641 {
5642 if (buf[0] == '1')
5643 kmem_cache_shrink(s);
5644 else
5645 return -EINVAL;
5646 return length;
5647 }
5648 SLAB_ATTR(shrink);
5649
5650 #ifdef CONFIG_NUMA
5651 static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf)
5652 {
5653 return sysfs_emit(buf, "%u\n", s->remote_node_defrag_ratio / 10);
5654 }
5655
5656 static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
5657 const char *buf, size_t length)
5658 {
5659 unsigned int ratio;
5660 int err;
5661
5662 err = kstrtouint(buf, 10, &ratio);
5663 if (err)
5664 return err;
5665 if (ratio > 100)
5666 return -ERANGE;
5667
5668 s->remote_node_defrag_ratio = ratio * 10;
5669
5670 return length;
5671 }
5672 SLAB_ATTR(remote_node_defrag_ratio);
5673 #endif
5674
5675 #ifdef CONFIG_SLUB_STATS
5676 static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
5677 {
5678 unsigned long sum = 0;
5679 int cpu;
5680 int len = 0;
5681 int *data = kmalloc_array(nr_cpu_ids, sizeof(int), GFP_KERNEL);
5682
5683 if (!data)
5684 return -ENOMEM;
5685
5686 for_each_online_cpu(cpu) {
5687 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
5688
5689 data[cpu] = x;
5690 sum += x;
5691 }
5692
5693 len += sysfs_emit_at(buf, len, "%lu", sum);
5694
5695 #ifdef CONFIG_SMP
5696 for_each_online_cpu(cpu) {
5697 if (data[cpu])
5698 len += sysfs_emit_at(buf, len, " C%d=%u",
5699 cpu, data[cpu]);
5700 }
5701 #endif
5702 kfree(data);
5703 len += sysfs_emit_at(buf, len, "\n");
5704
5705 return len;
5706 }
5707
5708 static void clear_stat(struct kmem_cache *s, enum stat_item si)
5709 {
5710 int cpu;
5711
5712 for_each_online_cpu(cpu)
5713 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
5714 }
5715
5716 #define STAT_ATTR(si, text) \
5717 static ssize_t text##_show(struct kmem_cache *s, char *buf) \
5718 { \
5719 return show_stat(s, buf, si); \
5720 } \
5721 static ssize_t text##_store(struct kmem_cache *s, \
5722 const char *buf, size_t length) \
5723 { \
5724 if (buf[0] != '0') \
5725 return -EINVAL; \
5726 clear_stat(s, si); \
5727 return length; \
5728 } \
5729 SLAB_ATTR(text); \
5730
5731 STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
5732 STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
5733 STAT_ATTR(FREE_FASTPATH, free_fastpath);
5734 STAT_ATTR(FREE_SLOWPATH, free_slowpath);
5735 STAT_ATTR(FREE_FROZEN, free_frozen);
5736 STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
5737 STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
5738 STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
5739 STAT_ATTR(ALLOC_SLAB, alloc_slab);
5740 STAT_ATTR(ALLOC_REFILL, alloc_refill);
5741 STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
5742 STAT_ATTR(FREE_SLAB, free_slab);
5743 STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
5744 STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
5745 STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
5746 STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
5747 STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
5748 STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
5749 STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
5750 STAT_ATTR(ORDER_FALLBACK, order_fallback);
5751 STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
5752 STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
5753 STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
5754 STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
5755 STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
5756 STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
5757 #endif
5758
5759 static struct attribute *slab_attrs[] = {
5760 &slab_size_attr.attr,
5761 &object_size_attr.attr,
5762 &objs_per_slab_attr.attr,
5763 &order_attr.attr,
5764 &min_partial_attr.attr,
5765 &cpu_partial_attr.attr,
5766 &objects_attr.attr,
5767 &objects_partial_attr.attr,
5768 &partial_attr.attr,
5769 &cpu_slabs_attr.attr,
5770 &ctor_attr.attr,
5771 &aliases_attr.attr,
5772 &align_attr.attr,
5773 &hwcache_align_attr.attr,
5774 &reclaim_account_attr.attr,
5775 &destroy_by_rcu_attr.attr,
5776 &shrink_attr.attr,
5777 &slabs_cpu_partial_attr.attr,
5778 #ifdef CONFIG_SLUB_DEBUG
5779 &total_objects_attr.attr,
5780 &slabs_attr.attr,
5781 &sanity_checks_attr.attr,
5782 &trace_attr.attr,
5783 &red_zone_attr.attr,
5784 &poison_attr.attr,
5785 &store_user_attr.attr,
5786 &validate_attr.attr,
5787 #endif
5788 #ifdef CONFIG_ZONE_DMA
5789 &cache_dma_attr.attr,
5790 #endif
5791 #ifdef CONFIG_NUMA
5792 &remote_node_defrag_ratio_attr.attr,
5793 #endif
5794 #ifdef CONFIG_SLUB_STATS
5795 &alloc_fastpath_attr.attr,
5796 &alloc_slowpath_attr.attr,
5797 &free_fastpath_attr.attr,
5798 &free_slowpath_attr.attr,
5799 &free_frozen_attr.attr,
5800 &free_add_partial_attr.attr,
5801 &free_remove_partial_attr.attr,
5802 &alloc_from_partial_attr.attr,
5803 &alloc_slab_attr.attr,
5804 &alloc_refill_attr.attr,
5805 &alloc_node_mismatch_attr.attr,
5806 &free_slab_attr.attr,
5807 &cpuslab_flush_attr.attr,
5808 &deactivate_full_attr.attr,
5809 &deactivate_empty_attr.attr,
5810 &deactivate_to_head_attr.attr,
5811 &deactivate_to_tail_attr.attr,
5812 &deactivate_remote_frees_attr.attr,
5813 &deactivate_bypass_attr.attr,
5814 &order_fallback_attr.attr,
5815 &cmpxchg_double_fail_attr.attr,
5816 &cmpxchg_double_cpu_fail_attr.attr,
5817 &cpu_partial_alloc_attr.attr,
5818 &cpu_partial_free_attr.attr,
5819 &cpu_partial_node_attr.attr,
5820 &cpu_partial_drain_attr.attr,
5821 #endif
5822 #ifdef CONFIG_FAILSLAB
5823 &failslab_attr.attr,
5824 #endif
5825 &usersize_attr.attr,
5826
5827 NULL
5828 };
5829
5830 static const struct attribute_group slab_attr_group = {
5831 .attrs = slab_attrs,
5832 };
5833
5834 static ssize_t slab_attr_show(struct kobject *kobj,
5835 struct attribute *attr,
5836 char *buf)
5837 {
5838 struct slab_attribute *attribute;
5839 struct kmem_cache *s;
5840 int err;
5841
5842 attribute = to_slab_attr(attr);
5843 s = to_slab(kobj);
5844
5845 if (!attribute->show)
5846 return -EIO;
5847
5848 err = attribute->show(s, buf);
5849
5850 return err;
5851 }
5852
5853 static ssize_t slab_attr_store(struct kobject *kobj,
5854 struct attribute *attr,
5855 const char *buf, size_t len)
5856 {
5857 struct slab_attribute *attribute;
5858 struct kmem_cache *s;
5859 int err;
5860
5861 attribute = to_slab_attr(attr);
5862 s = to_slab(kobj);
5863
5864 if (!attribute->store)
5865 return -EIO;
5866
5867 err = attribute->store(s, buf, len);
5868 return err;
5869 }
5870
5871 static void kmem_cache_release(struct kobject *k)
5872 {
5873 slab_kmem_cache_release(to_slab(k));
5874 }
5875
5876 static const struct sysfs_ops slab_sysfs_ops = {
5877 .show = slab_attr_show,
5878 .store = slab_attr_store,
5879 };
5880
5881 static struct kobj_type slab_ktype = {
5882 .sysfs_ops = &slab_sysfs_ops,
5883 .release = kmem_cache_release,
5884 };
5885
5886 static struct kset *slab_kset;
5887
5888 static inline struct kset *cache_kset(struct kmem_cache *s)
5889 {
5890 return slab_kset;
5891 }
5892
5893 #define ID_STR_LENGTH 64
5894
5895
5896
5897
5898
5899 static char *create_unique_id(struct kmem_cache *s)
5900 {
5901 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
5902 char *p = name;
5903
5904 if (!name)
5905 return ERR_PTR(-ENOMEM);
5906
5907 *p++ = ':';
5908
5909
5910
5911
5912
5913
5914
5915 if (s->flags & SLAB_CACHE_DMA)
5916 *p++ = 'd';
5917 if (s->flags & SLAB_CACHE_DMA32)
5918 *p++ = 'D';
5919 if (s->flags & SLAB_RECLAIM_ACCOUNT)
5920 *p++ = 'a';
5921 if (s->flags & SLAB_CONSISTENCY_CHECKS)
5922 *p++ = 'F';
5923 if (s->flags & SLAB_ACCOUNT)
5924 *p++ = 'A';
5925 if (p != name + 1)
5926 *p++ = '-';
5927 p += sprintf(p, "%07u", s->size);
5928
5929 BUG_ON(p > name + ID_STR_LENGTH - 1);
5930 return name;
5931 }
5932
5933 static int sysfs_slab_add(struct kmem_cache *s)
5934 {
5935 int err;
5936 const char *name;
5937 struct kset *kset = cache_kset(s);
5938 int unmergeable = slab_unmergeable(s);
5939
5940 if (!kset) {
5941 kobject_init(&s->kobj, &slab_ktype);
5942 return 0;
5943 }
5944
5945 if (!unmergeable && disable_higher_order_debug &&
5946 (slub_debug & DEBUG_METADATA_FLAGS))
5947 unmergeable = 1;
5948
5949 if (unmergeable) {
5950
5951
5952
5953
5954
5955 sysfs_remove_link(&slab_kset->kobj, s->name);
5956 name = s->name;
5957 } else {
5958
5959
5960
5961
5962 name = create_unique_id(s);
5963 if (IS_ERR(name))
5964 return PTR_ERR(name);
5965 }
5966
5967 s->kobj.kset = kset;
5968 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name);
5969 if (err)
5970 goto out;
5971
5972 err = sysfs_create_group(&s->kobj, &slab_attr_group);
5973 if (err)
5974 goto out_del_kobj;
5975
5976 if (!unmergeable) {
5977
5978 sysfs_slab_alias(s, s->name);
5979 }
5980 out:
5981 if (!unmergeable)
5982 kfree(name);
5983 return err;
5984 out_del_kobj:
5985 kobject_del(&s->kobj);
5986 goto out;
5987 }
5988
5989 void sysfs_slab_unlink(struct kmem_cache *s)
5990 {
5991 if (slab_state >= FULL)
5992 kobject_del(&s->kobj);
5993 }
5994
5995 void sysfs_slab_release(struct kmem_cache *s)
5996 {
5997 if (slab_state >= FULL)
5998 kobject_put(&s->kobj);
5999 }
6000
6001
6002
6003
6004
6005 struct saved_alias {
6006 struct kmem_cache *s;
6007 const char *name;
6008 struct saved_alias *next;
6009 };
6010
6011 static struct saved_alias *alias_list;
6012
6013 static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
6014 {
6015 struct saved_alias *al;
6016
6017 if (slab_state == FULL) {
6018
6019
6020
6021 sysfs_remove_link(&slab_kset->kobj, name);
6022 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
6023 }
6024
6025 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
6026 if (!al)
6027 return -ENOMEM;
6028
6029 al->s = s;
6030 al->name = name;
6031 al->next = alias_list;
6032 alias_list = al;
6033 return 0;
6034 }
6035
6036 static int __init slab_sysfs_init(void)
6037 {
6038 struct kmem_cache *s;
6039 int err;
6040
6041 mutex_lock(&slab_mutex);
6042
6043 slab_kset = kset_create_and_add("slab", NULL, kernel_kobj);
6044 if (!slab_kset) {
6045 mutex_unlock(&slab_mutex);
6046 pr_err("Cannot register slab subsystem.\n");
6047 return -ENOSYS;
6048 }
6049
6050 slab_state = FULL;
6051
6052 list_for_each_entry(s, &slab_caches, list) {
6053 err = sysfs_slab_add(s);
6054 if (err)
6055 pr_err("SLUB: Unable to add boot slab %s to sysfs\n",
6056 s->name);
6057 }
6058
6059 while (alias_list) {
6060 struct saved_alias *al = alias_list;
6061
6062 alias_list = alias_list->next;
6063 err = sysfs_slab_alias(al->s, al->name);
6064 if (err)
6065 pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n",
6066 al->name);
6067 kfree(al);
6068 }
6069
6070 mutex_unlock(&slab_mutex);
6071 return 0;
6072 }
6073
6074 __initcall(slab_sysfs_init);
6075 #endif
6076
6077 #if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS)
6078 static int slab_debugfs_show(struct seq_file *seq, void *v)
6079 {
6080 struct loc_track *t = seq->private;
6081 struct location *l;
6082 unsigned long idx;
6083
6084 idx = (unsigned long) t->idx;
6085 if (idx < t->count) {
6086 l = &t->loc[idx];
6087
6088 seq_printf(seq, "%7ld ", l->count);
6089
6090 if (l->addr)
6091 seq_printf(seq, "%pS", (void *)l->addr);
6092 else
6093 seq_puts(seq, "<not-available>");
6094
6095 if (l->sum_time != l->min_time) {
6096 seq_printf(seq, " age=%ld/%llu/%ld",
6097 l->min_time, div_u64(l->sum_time, l->count),
6098 l->max_time);
6099 } else
6100 seq_printf(seq, " age=%ld", l->min_time);
6101
6102 if (l->min_pid != l->max_pid)
6103 seq_printf(seq, " pid=%ld-%ld", l->min_pid, l->max_pid);
6104 else
6105 seq_printf(seq, " pid=%ld",
6106 l->min_pid);
6107
6108 if (num_online_cpus() > 1 && !cpumask_empty(to_cpumask(l->cpus)))
6109 seq_printf(seq, " cpus=%*pbl",
6110 cpumask_pr_args(to_cpumask(l->cpus)));
6111
6112 if (nr_online_nodes > 1 && !nodes_empty(l->nodes))
6113 seq_printf(seq, " nodes=%*pbl",
6114 nodemask_pr_args(&l->nodes));
6115
6116 #ifdef CONFIG_STACKDEPOT
6117 {
6118 depot_stack_handle_t handle;
6119 unsigned long *entries;
6120 unsigned int nr_entries, j;
6121
6122 handle = READ_ONCE(l->handle);
6123 if (handle) {
6124 nr_entries = stack_depot_fetch(handle, &entries);
6125 seq_puts(seq, "\n");
6126 for (j = 0; j < nr_entries; j++)
6127 seq_printf(seq, " %pS\n", (void *)entries[j]);
6128 }
6129 }
6130 #endif
6131 seq_puts(seq, "\n");
6132 }
6133
6134 if (!idx && !t->count)
6135 seq_puts(seq, "No data\n");
6136
6137 return 0;
6138 }
6139
6140 static void slab_debugfs_stop(struct seq_file *seq, void *v)
6141 {
6142 }
6143
6144 static void *slab_debugfs_next(struct seq_file *seq, void *v, loff_t *ppos)
6145 {
6146 struct loc_track *t = seq->private;
6147
6148 t->idx = ++(*ppos);
6149 if (*ppos <= t->count)
6150 return ppos;
6151
6152 return NULL;
6153 }
6154
6155 static int cmp_loc_by_count(const void *a, const void *b, const void *data)
6156 {
6157 struct location *loc1 = (struct location *)a;
6158 struct location *loc2 = (struct location *)b;
6159
6160 if (loc1->count > loc2->count)
6161 return -1;
6162 else
6163 return 1;
6164 }
6165
6166 static void *slab_debugfs_start(struct seq_file *seq, loff_t *ppos)
6167 {
6168 struct loc_track *t = seq->private;
6169
6170 t->idx = *ppos;
6171 return ppos;
6172 }
6173
6174 static const struct seq_operations slab_debugfs_sops = {
6175 .start = slab_debugfs_start,
6176 .next = slab_debugfs_next,
6177 .stop = slab_debugfs_stop,
6178 .show = slab_debugfs_show,
6179 };
6180
6181 static int slab_debug_trace_open(struct inode *inode, struct file *filep)
6182 {
6183
6184 struct kmem_cache_node *n;
6185 enum track_item alloc;
6186 int node;
6187 struct loc_track *t = __seq_open_private(filep, &slab_debugfs_sops,
6188 sizeof(struct loc_track));
6189 struct kmem_cache *s = file_inode(filep)->i_private;
6190 unsigned long *obj_map;
6191
6192 if (!t)
6193 return -ENOMEM;
6194
6195 obj_map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL);
6196 if (!obj_map) {
6197 seq_release_private(inode, filep);
6198 return -ENOMEM;
6199 }
6200
6201 if (strcmp(filep->f_path.dentry->d_name.name, "alloc_traces") == 0)
6202 alloc = TRACK_ALLOC;
6203 else
6204 alloc = TRACK_FREE;
6205
6206 if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) {
6207 bitmap_free(obj_map);
6208 seq_release_private(inode, filep);
6209 return -ENOMEM;
6210 }
6211
6212 for_each_kmem_cache_node(s, node, n) {
6213 unsigned long flags;
6214 struct slab *slab;
6215
6216 if (!atomic_long_read(&n->nr_slabs))
6217 continue;
6218
6219 spin_lock_irqsave(&n->list_lock, flags);
6220 list_for_each_entry(slab, &n->partial, slab_list)
6221 process_slab(t, s, slab, alloc, obj_map);
6222 list_for_each_entry(slab, &n->full, slab_list)
6223 process_slab(t, s, slab, alloc, obj_map);
6224 spin_unlock_irqrestore(&n->list_lock, flags);
6225 }
6226
6227
6228 sort_r(t->loc, t->count, sizeof(struct location),
6229 cmp_loc_by_count, NULL, NULL);
6230
6231 bitmap_free(obj_map);
6232 return 0;
6233 }
6234
6235 static int slab_debug_trace_release(struct inode *inode, struct file *file)
6236 {
6237 struct seq_file *seq = file->private_data;
6238 struct loc_track *t = seq->private;
6239
6240 free_loc_track(t);
6241 return seq_release_private(inode, file);
6242 }
6243
6244 static const struct file_operations slab_debugfs_fops = {
6245 .open = slab_debug_trace_open,
6246 .read = seq_read,
6247 .llseek = seq_lseek,
6248 .release = slab_debug_trace_release,
6249 };
6250
6251 static void debugfs_slab_add(struct kmem_cache *s)
6252 {
6253 struct dentry *slab_cache_dir;
6254
6255 if (unlikely(!slab_debugfs_root))
6256 return;
6257
6258 slab_cache_dir = debugfs_create_dir(s->name, slab_debugfs_root);
6259
6260 debugfs_create_file("alloc_traces", 0400,
6261 slab_cache_dir, s, &slab_debugfs_fops);
6262
6263 debugfs_create_file("free_traces", 0400,
6264 slab_cache_dir, s, &slab_debugfs_fops);
6265 }
6266
6267 void debugfs_slab_release(struct kmem_cache *s)
6268 {
6269 debugfs_remove_recursive(debugfs_lookup(s->name, slab_debugfs_root));
6270 }
6271
6272 static int __init slab_debugfs_init(void)
6273 {
6274 struct kmem_cache *s;
6275
6276 slab_debugfs_root = debugfs_create_dir("slab", NULL);
6277
6278 list_for_each_entry(s, &slab_caches, list)
6279 if (s->flags & SLAB_STORE_USER)
6280 debugfs_slab_add(s);
6281
6282 return 0;
6283
6284 }
6285 __initcall(slab_debugfs_init);
6286 #endif
6287
6288
6289
6290 #ifdef CONFIG_SLUB_DEBUG
6291 void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
6292 {
6293 unsigned long nr_slabs = 0;
6294 unsigned long nr_objs = 0;
6295 unsigned long nr_free = 0;
6296 int node;
6297 struct kmem_cache_node *n;
6298
6299 for_each_kmem_cache_node(s, node, n) {
6300 nr_slabs += node_nr_slabs(n);
6301 nr_objs += node_nr_objs(n);
6302 nr_free += count_partial(n, count_free);
6303 }
6304
6305 sinfo->active_objs = nr_objs - nr_free;
6306 sinfo->num_objs = nr_objs;
6307 sinfo->active_slabs = nr_slabs;
6308 sinfo->num_slabs = nr_slabs;
6309 sinfo->objects_per_slab = oo_objects(s->oo);
6310 sinfo->cache_order = oo_order(s->oo);
6311 }
6312
6313 void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
6314 {
6315 }
6316
6317 ssize_t slabinfo_write(struct file *file, const char __user *buffer,
6318 size_t count, loff_t *ppos)
6319 {
6320 return -EIO;
6321 }
6322 #endif