0001
0002
0003
0004
0005
0006
0007
0008 #define pr_fmt(fmt) "kfence: " fmt
0009
0010 #include <linux/atomic.h>
0011 #include <linux/bug.h>
0012 #include <linux/debugfs.h>
0013 #include <linux/hash.h>
0014 #include <linux/irq_work.h>
0015 #include <linux/jhash.h>
0016 #include <linux/kcsan-checks.h>
0017 #include <linux/kfence.h>
0018 #include <linux/kmemleak.h>
0019 #include <linux/list.h>
0020 #include <linux/lockdep.h>
0021 #include <linux/log2.h>
0022 #include <linux/memblock.h>
0023 #include <linux/moduleparam.h>
0024 #include <linux/notifier.h>
0025 #include <linux/panic_notifier.h>
0026 #include <linux/random.h>
0027 #include <linux/rcupdate.h>
0028 #include <linux/sched/clock.h>
0029 #include <linux/sched/sysctl.h>
0030 #include <linux/seq_file.h>
0031 #include <linux/slab.h>
0032 #include <linux/spinlock.h>
0033 #include <linux/string.h>
0034
0035 #include <asm/kfence.h>
0036
0037 #include "kfence.h"
0038
0039
0040 #define KFENCE_WARN_ON(cond) \
0041 ({ \
0042 const bool __cond = WARN_ON(cond); \
0043 if (unlikely(__cond)) { \
0044 WRITE_ONCE(kfence_enabled, false); \
0045 disabled_by_warn = true; \
0046 } \
0047 __cond; \
0048 })
0049
0050
0051
0052 static bool kfence_enabled __read_mostly;
0053 static bool disabled_by_warn __read_mostly;
0054
0055 unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL;
0056 EXPORT_SYMBOL_GPL(kfence_sample_interval);
0057
0058 #ifdef MODULE_PARAM_PREFIX
0059 #undef MODULE_PARAM_PREFIX
0060 #endif
0061 #define MODULE_PARAM_PREFIX "kfence."
0062
0063 static int kfence_enable_late(void);
0064 static int param_set_sample_interval(const char *val, const struct kernel_param *kp)
0065 {
0066 unsigned long num;
0067 int ret = kstrtoul(val, 0, &num);
0068
0069 if (ret < 0)
0070 return ret;
0071
0072
0073 if (!num && READ_ONCE(kfence_enabled)) {
0074 pr_info("disabled\n");
0075 WRITE_ONCE(kfence_enabled, false);
0076 }
0077
0078 *((unsigned long *)kp->arg) = num;
0079
0080 if (num && !READ_ONCE(kfence_enabled) && system_state != SYSTEM_BOOTING)
0081 return disabled_by_warn ? -EINVAL : kfence_enable_late();
0082 return 0;
0083 }
0084
0085 static int param_get_sample_interval(char *buffer, const struct kernel_param *kp)
0086 {
0087 if (!READ_ONCE(kfence_enabled))
0088 return sprintf(buffer, "0\n");
0089
0090 return param_get_ulong(buffer, kp);
0091 }
0092
0093 static const struct kernel_param_ops sample_interval_param_ops = {
0094 .set = param_set_sample_interval,
0095 .get = param_get_sample_interval,
0096 };
0097 module_param_cb(sample_interval, &sample_interval_param_ops, &kfence_sample_interval, 0600);
0098
0099
0100 static unsigned long kfence_skip_covered_thresh __read_mostly = 75;
0101 module_param_named(skip_covered_thresh, kfence_skip_covered_thresh, ulong, 0644);
0102
0103
0104 static bool kfence_deferrable __read_mostly = IS_ENABLED(CONFIG_KFENCE_DEFERRABLE);
0105 module_param_named(deferrable, kfence_deferrable, bool, 0444);
0106
0107
0108 static bool kfence_check_on_panic __read_mostly;
0109 module_param_named(check_on_panic, kfence_check_on_panic, bool, 0444);
0110
0111
0112 char *__kfence_pool __read_mostly;
0113 EXPORT_SYMBOL(__kfence_pool);
0114
0115
0116
0117
0118
0119 static_assert(CONFIG_KFENCE_NUM_OBJECTS > 0);
0120 struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS];
0121
0122
0123 static struct list_head kfence_freelist = LIST_HEAD_INIT(kfence_freelist);
0124 static DEFINE_RAW_SPINLOCK(kfence_freelist_lock);
0125
0126
0127
0128
0129
0130 DEFINE_STATIC_KEY_FALSE(kfence_allocation_key);
0131
0132
0133 atomic_t kfence_allocation_gate = ATOMIC_INIT(1);
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144
0145 #define ALLOC_COVERED_HNUM 2
0146 #define ALLOC_COVERED_ORDER (const_ilog2(CONFIG_KFENCE_NUM_OBJECTS) + 2)
0147 #define ALLOC_COVERED_SIZE (1 << ALLOC_COVERED_ORDER)
0148 #define ALLOC_COVERED_HNEXT(h) hash_32(h, ALLOC_COVERED_ORDER)
0149 #define ALLOC_COVERED_MASK (ALLOC_COVERED_SIZE - 1)
0150 static atomic_t alloc_covered[ALLOC_COVERED_SIZE];
0151
0152
0153 #define UNIQUE_ALLOC_STACK_DEPTH ((size_t)8)
0154
0155
0156
0157
0158
0159 static u32 stack_hash_seed __ro_after_init;
0160
0161
0162 enum kfence_counter_id {
0163 KFENCE_COUNTER_ALLOCATED,
0164 KFENCE_COUNTER_ALLOCS,
0165 KFENCE_COUNTER_FREES,
0166 KFENCE_COUNTER_ZOMBIES,
0167 KFENCE_COUNTER_BUGS,
0168 KFENCE_COUNTER_SKIP_INCOMPAT,
0169 KFENCE_COUNTER_SKIP_CAPACITY,
0170 KFENCE_COUNTER_SKIP_COVERED,
0171 KFENCE_COUNTER_COUNT,
0172 };
0173 static atomic_long_t counters[KFENCE_COUNTER_COUNT];
0174 static const char *const counter_names[] = {
0175 [KFENCE_COUNTER_ALLOCATED] = "currently allocated",
0176 [KFENCE_COUNTER_ALLOCS] = "total allocations",
0177 [KFENCE_COUNTER_FREES] = "total frees",
0178 [KFENCE_COUNTER_ZOMBIES] = "zombie allocations",
0179 [KFENCE_COUNTER_BUGS] = "total bugs",
0180 [KFENCE_COUNTER_SKIP_INCOMPAT] = "skipped allocations (incompatible)",
0181 [KFENCE_COUNTER_SKIP_CAPACITY] = "skipped allocations (capacity)",
0182 [KFENCE_COUNTER_SKIP_COVERED] = "skipped allocations (covered)",
0183 };
0184 static_assert(ARRAY_SIZE(counter_names) == KFENCE_COUNTER_COUNT);
0185
0186
0187
0188 static inline bool should_skip_covered(void)
0189 {
0190 unsigned long thresh = (CONFIG_KFENCE_NUM_OBJECTS * kfence_skip_covered_thresh) / 100;
0191
0192 return atomic_long_read(&counters[KFENCE_COUNTER_ALLOCATED]) > thresh;
0193 }
0194
0195 static u32 get_alloc_stack_hash(unsigned long *stack_entries, size_t num_entries)
0196 {
0197 num_entries = min(num_entries, UNIQUE_ALLOC_STACK_DEPTH);
0198 num_entries = filter_irq_stacks(stack_entries, num_entries);
0199 return jhash(stack_entries, num_entries * sizeof(stack_entries[0]), stack_hash_seed);
0200 }
0201
0202
0203
0204
0205
0206 static void alloc_covered_add(u32 alloc_stack_hash, int val)
0207 {
0208 int i;
0209
0210 for (i = 0; i < ALLOC_COVERED_HNUM; i++) {
0211 atomic_add(val, &alloc_covered[alloc_stack_hash & ALLOC_COVERED_MASK]);
0212 alloc_stack_hash = ALLOC_COVERED_HNEXT(alloc_stack_hash);
0213 }
0214 }
0215
0216
0217
0218
0219
0220 static bool alloc_covered_contains(u32 alloc_stack_hash)
0221 {
0222 int i;
0223
0224 for (i = 0; i < ALLOC_COVERED_HNUM; i++) {
0225 if (!atomic_read(&alloc_covered[alloc_stack_hash & ALLOC_COVERED_MASK]))
0226 return false;
0227 alloc_stack_hash = ALLOC_COVERED_HNEXT(alloc_stack_hash);
0228 }
0229
0230 return true;
0231 }
0232
0233 static bool kfence_protect(unsigned long addr)
0234 {
0235 return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), true));
0236 }
0237
0238 static bool kfence_unprotect(unsigned long addr)
0239 {
0240 return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), false));
0241 }
0242
0243 static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *meta)
0244 {
0245 unsigned long offset = (meta - kfence_metadata + 1) * PAGE_SIZE * 2;
0246 unsigned long pageaddr = (unsigned long)&__kfence_pool[offset];
0247
0248
0249
0250
0251 if (KFENCE_WARN_ON(meta < kfence_metadata ||
0252 meta >= kfence_metadata + CONFIG_KFENCE_NUM_OBJECTS))
0253 return 0;
0254
0255
0256
0257
0258
0259 if (KFENCE_WARN_ON(ALIGN_DOWN(meta->addr, PAGE_SIZE) != pageaddr))
0260 return 0;
0261
0262 return pageaddr;
0263 }
0264
0265
0266
0267
0268
0269 static noinline void
0270 metadata_update_state(struct kfence_metadata *meta, enum kfence_object_state next,
0271 unsigned long *stack_entries, size_t num_stack_entries)
0272 {
0273 struct kfence_track *track =
0274 next == KFENCE_OBJECT_FREED ? &meta->free_track : &meta->alloc_track;
0275
0276 lockdep_assert_held(&meta->lock);
0277
0278 if (stack_entries) {
0279 memcpy(track->stack_entries, stack_entries,
0280 num_stack_entries * sizeof(stack_entries[0]));
0281 } else {
0282
0283
0284
0285
0286 num_stack_entries = stack_trace_save(track->stack_entries, KFENCE_STACK_DEPTH, 1);
0287 }
0288 track->num_stack_entries = num_stack_entries;
0289 track->pid = task_pid_nr(current);
0290 track->cpu = raw_smp_processor_id();
0291 track->ts_nsec = local_clock();
0292
0293
0294
0295
0296
0297
0298 WRITE_ONCE(meta->state, next);
0299 }
0300
0301
0302 static inline bool set_canary_byte(u8 *addr)
0303 {
0304 *addr = KFENCE_CANARY_PATTERN(addr);
0305 return true;
0306 }
0307
0308
0309 static inline bool check_canary_byte(u8 *addr)
0310 {
0311 struct kfence_metadata *meta;
0312 unsigned long flags;
0313
0314 if (likely(*addr == KFENCE_CANARY_PATTERN(addr)))
0315 return true;
0316
0317 atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
0318
0319 meta = addr_to_metadata((unsigned long)addr);
0320 raw_spin_lock_irqsave(&meta->lock, flags);
0321 kfence_report_error((unsigned long)addr, false, NULL, meta, KFENCE_ERROR_CORRUPTION);
0322 raw_spin_unlock_irqrestore(&meta->lock, flags);
0323
0324 return false;
0325 }
0326
0327
0328 static __always_inline void for_each_canary(const struct kfence_metadata *meta, bool (*fn)(u8 *))
0329 {
0330 const unsigned long pageaddr = ALIGN_DOWN(meta->addr, PAGE_SIZE);
0331 unsigned long addr;
0332
0333
0334
0335
0336
0337
0338
0339
0340
0341
0342
0343 for (addr = pageaddr; addr < meta->addr; addr++) {
0344 if (!fn((u8 *)addr))
0345 break;
0346 }
0347
0348
0349 for (addr = meta->addr + meta->size; addr < pageaddr + PAGE_SIZE; addr++) {
0350 if (!fn((u8 *)addr))
0351 break;
0352 }
0353 }
0354
0355 static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t gfp,
0356 unsigned long *stack_entries, size_t num_stack_entries,
0357 u32 alloc_stack_hash)
0358 {
0359 struct kfence_metadata *meta = NULL;
0360 unsigned long flags;
0361 struct slab *slab;
0362 void *addr;
0363 const bool random_right_allocate = prandom_u32_max(2);
0364 const bool random_fault = CONFIG_KFENCE_STRESS_TEST_FAULTS &&
0365 !prandom_u32_max(CONFIG_KFENCE_STRESS_TEST_FAULTS);
0366
0367
0368 raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
0369 if (!list_empty(&kfence_freelist)) {
0370 meta = list_entry(kfence_freelist.next, struct kfence_metadata, list);
0371 list_del_init(&meta->list);
0372 }
0373 raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
0374 if (!meta) {
0375 atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_CAPACITY]);
0376 return NULL;
0377 }
0378
0379 if (unlikely(!raw_spin_trylock_irqsave(&meta->lock, flags))) {
0380
0381
0382
0383
0384
0385
0386
0387
0388
0389 raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
0390
0391 list_add_tail(&meta->list, &kfence_freelist);
0392 raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
0393
0394 return NULL;
0395 }
0396
0397 meta->addr = metadata_to_pageaddr(meta);
0398
0399 if (meta->state == KFENCE_OBJECT_FREED)
0400 kfence_unprotect(meta->addr);
0401
0402
0403
0404
0405
0406
0407
0408
0409
0410 if (random_right_allocate) {
0411
0412 meta->addr += PAGE_SIZE - size;
0413 meta->addr = ALIGN_DOWN(meta->addr, cache->align);
0414 }
0415
0416 addr = (void *)meta->addr;
0417
0418
0419 metadata_update_state(meta, KFENCE_OBJECT_ALLOCATED, stack_entries, num_stack_entries);
0420
0421 WRITE_ONCE(meta->cache, cache);
0422 meta->size = size;
0423 meta->alloc_stack_hash = alloc_stack_hash;
0424 raw_spin_unlock_irqrestore(&meta->lock, flags);
0425
0426 alloc_covered_add(alloc_stack_hash, 1);
0427
0428
0429 slab = virt_to_slab((void *)meta->addr);
0430 slab->slab_cache = cache;
0431 #if defined(CONFIG_SLUB)
0432 slab->objects = 1;
0433 #elif defined(CONFIG_SLAB)
0434 slab->s_mem = addr;
0435 #endif
0436
0437
0438 for_each_canary(meta, set_canary_byte);
0439
0440
0441
0442
0443
0444
0445 if (unlikely(slab_want_init_on_alloc(gfp, cache)))
0446 memzero_explicit(addr, size);
0447 if (cache->ctor)
0448 cache->ctor(addr);
0449
0450 if (random_fault)
0451 kfence_protect(meta->addr);
0452
0453 atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCATED]);
0454 atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCS]);
0455
0456 return addr;
0457 }
0458
0459 static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool zombie)
0460 {
0461 struct kcsan_scoped_access assert_page_exclusive;
0462 unsigned long flags;
0463 bool init;
0464
0465 raw_spin_lock_irqsave(&meta->lock, flags);
0466
0467 if (meta->state != KFENCE_OBJECT_ALLOCATED || meta->addr != (unsigned long)addr) {
0468
0469 atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
0470 kfence_report_error((unsigned long)addr, false, NULL, meta,
0471 KFENCE_ERROR_INVALID_FREE);
0472 raw_spin_unlock_irqrestore(&meta->lock, flags);
0473 return;
0474 }
0475
0476
0477 kcsan_begin_scoped_access((void *)ALIGN_DOWN((unsigned long)addr, PAGE_SIZE), PAGE_SIZE,
0478 KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT,
0479 &assert_page_exclusive);
0480
0481 if (CONFIG_KFENCE_STRESS_TEST_FAULTS)
0482 kfence_unprotect((unsigned long)addr);
0483
0484
0485 if (meta->unprotected_page) {
0486 memzero_explicit((void *)ALIGN_DOWN(meta->unprotected_page, PAGE_SIZE), PAGE_SIZE);
0487 kfence_protect(meta->unprotected_page);
0488 meta->unprotected_page = 0;
0489 }
0490
0491
0492 metadata_update_state(meta, KFENCE_OBJECT_FREED, NULL, 0);
0493 init = slab_want_init_on_free(meta->cache);
0494 raw_spin_unlock_irqrestore(&meta->lock, flags);
0495
0496 alloc_covered_add(meta->alloc_stack_hash, -1);
0497
0498
0499 for_each_canary(meta, check_canary_byte);
0500
0501
0502
0503
0504
0505
0506 if (!zombie && unlikely(init))
0507 memzero_explicit(addr, meta->size);
0508
0509
0510 kfence_protect((unsigned long)addr);
0511
0512 kcsan_end_scoped_access(&assert_page_exclusive);
0513 if (!zombie) {
0514
0515 raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
0516 KFENCE_WARN_ON(!list_empty(&meta->list));
0517 list_add_tail(&meta->list, &kfence_freelist);
0518 raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
0519
0520 atomic_long_dec(&counters[KFENCE_COUNTER_ALLOCATED]);
0521 atomic_long_inc(&counters[KFENCE_COUNTER_FREES]);
0522 } else {
0523
0524 atomic_long_inc(&counters[KFENCE_COUNTER_ZOMBIES]);
0525 }
0526 }
0527
0528 static void rcu_guarded_free(struct rcu_head *h)
0529 {
0530 struct kfence_metadata *meta = container_of(h, struct kfence_metadata, rcu_head);
0531
0532 kfence_guarded_free((void *)meta->addr, meta, false);
0533 }
0534
0535
0536
0537
0538
0539
0540 static unsigned long kfence_init_pool(void)
0541 {
0542 unsigned long addr = (unsigned long)__kfence_pool;
0543 struct page *pages;
0544 int i;
0545
0546 if (!arch_kfence_init_pool())
0547 return addr;
0548
0549 pages = virt_to_page(__kfence_pool);
0550
0551
0552
0553
0554
0555
0556
0557
0558
0559 for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) {
0560 struct slab *slab = page_slab(&pages[i]);
0561
0562 if (!i || (i % 2))
0563 continue;
0564
0565
0566 if (WARN_ON(compound_head(&pages[i]) != &pages[i]))
0567 return addr;
0568
0569 __folio_set_slab(slab_folio(slab));
0570 #ifdef CONFIG_MEMCG
0571 slab->memcg_data = (unsigned long)&kfence_metadata[i / 2 - 1].objcg |
0572 MEMCG_DATA_OBJCGS;
0573 #endif
0574 }
0575
0576
0577
0578
0579
0580
0581
0582 for (i = 0; i < 2; i++) {
0583 if (unlikely(!kfence_protect(addr)))
0584 return addr;
0585
0586 addr += PAGE_SIZE;
0587 }
0588
0589 for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
0590 struct kfence_metadata *meta = &kfence_metadata[i];
0591
0592
0593 INIT_LIST_HEAD(&meta->list);
0594 raw_spin_lock_init(&meta->lock);
0595 meta->state = KFENCE_OBJECT_UNUSED;
0596 meta->addr = addr;
0597 list_add_tail(&meta->list, &kfence_freelist);
0598
0599
0600 if (unlikely(!kfence_protect(addr + PAGE_SIZE)))
0601 return addr;
0602
0603 addr += 2 * PAGE_SIZE;
0604 }
0605
0606 return 0;
0607 }
0608
0609 static bool __init kfence_init_pool_early(void)
0610 {
0611 unsigned long addr;
0612
0613 if (!__kfence_pool)
0614 return false;
0615
0616 addr = kfence_init_pool();
0617
0618 if (!addr) {
0619
0620
0621
0622
0623
0624
0625 kmemleak_ignore_phys(__pa(__kfence_pool));
0626 return true;
0627 }
0628
0629
0630
0631
0632
0633
0634
0635
0636 for (char *p = (char *)addr; p < __kfence_pool + KFENCE_POOL_SIZE; p += PAGE_SIZE) {
0637 struct slab *slab = virt_to_slab(p);
0638
0639 if (!slab)
0640 continue;
0641 #ifdef CONFIG_MEMCG
0642 slab->memcg_data = 0;
0643 #endif
0644 __folio_clear_slab(slab_folio(slab));
0645 }
0646 memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool));
0647 __kfence_pool = NULL;
0648 return false;
0649 }
0650
0651 static bool kfence_init_pool_late(void)
0652 {
0653 unsigned long addr, free_size;
0654
0655 addr = kfence_init_pool();
0656
0657 if (!addr)
0658 return true;
0659
0660
0661 free_size = KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool);
0662 #ifdef CONFIG_CONTIG_ALLOC
0663 free_contig_range(page_to_pfn(virt_to_page((void *)addr)), free_size / PAGE_SIZE);
0664 #else
0665 free_pages_exact((void *)addr, free_size);
0666 #endif
0667 __kfence_pool = NULL;
0668 return false;
0669 }
0670
0671
0672
0673 static int stats_show(struct seq_file *seq, void *v)
0674 {
0675 int i;
0676
0677 seq_printf(seq, "enabled: %i\n", READ_ONCE(kfence_enabled));
0678 for (i = 0; i < KFENCE_COUNTER_COUNT; i++)
0679 seq_printf(seq, "%s: %ld\n", counter_names[i], atomic_long_read(&counters[i]));
0680
0681 return 0;
0682 }
0683 DEFINE_SHOW_ATTRIBUTE(stats);
0684
0685
0686
0687
0688
0689
0690 static void *start_object(struct seq_file *seq, loff_t *pos)
0691 {
0692 if (*pos < CONFIG_KFENCE_NUM_OBJECTS)
0693 return (void *)((long)*pos + 1);
0694 return NULL;
0695 }
0696
0697 static void stop_object(struct seq_file *seq, void *v)
0698 {
0699 }
0700
0701 static void *next_object(struct seq_file *seq, void *v, loff_t *pos)
0702 {
0703 ++*pos;
0704 if (*pos < CONFIG_KFENCE_NUM_OBJECTS)
0705 return (void *)((long)*pos + 1);
0706 return NULL;
0707 }
0708
0709 static int show_object(struct seq_file *seq, void *v)
0710 {
0711 struct kfence_metadata *meta = &kfence_metadata[(long)v - 1];
0712 unsigned long flags;
0713
0714 raw_spin_lock_irqsave(&meta->lock, flags);
0715 kfence_print_object(seq, meta);
0716 raw_spin_unlock_irqrestore(&meta->lock, flags);
0717 seq_puts(seq, "---------------------------------\n");
0718
0719 return 0;
0720 }
0721
0722 static const struct seq_operations object_seqops = {
0723 .start = start_object,
0724 .next = next_object,
0725 .stop = stop_object,
0726 .show = show_object,
0727 };
0728
0729 static int open_objects(struct inode *inode, struct file *file)
0730 {
0731 return seq_open(file, &object_seqops);
0732 }
0733
0734 static const struct file_operations objects_fops = {
0735 .open = open_objects,
0736 .read = seq_read,
0737 .llseek = seq_lseek,
0738 .release = seq_release,
0739 };
0740
0741 static int __init kfence_debugfs_init(void)
0742 {
0743 struct dentry *kfence_dir = debugfs_create_dir("kfence", NULL);
0744
0745 debugfs_create_file("stats", 0444, kfence_dir, NULL, &stats_fops);
0746 debugfs_create_file("objects", 0400, kfence_dir, NULL, &objects_fops);
0747 return 0;
0748 }
0749
0750 late_initcall(kfence_debugfs_init);
0751
0752
0753
0754 static void kfence_check_all_canary(void)
0755 {
0756 int i;
0757
0758 for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
0759 struct kfence_metadata *meta = &kfence_metadata[i];
0760
0761 if (meta->state == KFENCE_OBJECT_ALLOCATED)
0762 for_each_canary(meta, check_canary_byte);
0763 }
0764 }
0765
0766 static int kfence_check_canary_callback(struct notifier_block *nb,
0767 unsigned long reason, void *arg)
0768 {
0769 kfence_check_all_canary();
0770 return NOTIFY_OK;
0771 }
0772
0773 static struct notifier_block kfence_check_canary_notifier = {
0774 .notifier_call = kfence_check_canary_callback,
0775 };
0776
0777
0778
0779 static struct delayed_work kfence_timer;
0780
0781 #ifdef CONFIG_KFENCE_STATIC_KEYS
0782
0783 static DECLARE_WAIT_QUEUE_HEAD(allocation_wait);
0784
0785 static void wake_up_kfence_timer(struct irq_work *work)
0786 {
0787 wake_up(&allocation_wait);
0788 }
0789 static DEFINE_IRQ_WORK(wake_up_kfence_timer_work, wake_up_kfence_timer);
0790 #endif
0791
0792
0793
0794
0795
0796
0797
0798
0799
0800
0801
0802
0803 static void toggle_allocation_gate(struct work_struct *work)
0804 {
0805 if (!READ_ONCE(kfence_enabled))
0806 return;
0807
0808 atomic_set(&kfence_allocation_gate, 0);
0809 #ifdef CONFIG_KFENCE_STATIC_KEYS
0810
0811 static_branch_enable(&kfence_allocation_key);
0812
0813 if (sysctl_hung_task_timeout_secs) {
0814
0815
0816
0817
0818 wait_event_idle_timeout(allocation_wait, atomic_read(&kfence_allocation_gate),
0819 sysctl_hung_task_timeout_secs * HZ / 2);
0820 } else {
0821 wait_event_idle(allocation_wait, atomic_read(&kfence_allocation_gate));
0822 }
0823
0824
0825 static_branch_disable(&kfence_allocation_key);
0826 #endif
0827 queue_delayed_work(system_unbound_wq, &kfence_timer,
0828 msecs_to_jiffies(kfence_sample_interval));
0829 }
0830
0831
0832
0833 void __init kfence_alloc_pool(void)
0834 {
0835 if (!kfence_sample_interval)
0836 return;
0837
0838 __kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
0839
0840 if (!__kfence_pool)
0841 pr_err("failed to allocate pool\n");
0842 }
0843
0844 static void kfence_init_enable(void)
0845 {
0846 if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS))
0847 static_branch_enable(&kfence_allocation_key);
0848
0849 if (kfence_deferrable)
0850 INIT_DEFERRABLE_WORK(&kfence_timer, toggle_allocation_gate);
0851 else
0852 INIT_DELAYED_WORK(&kfence_timer, toggle_allocation_gate);
0853
0854 if (kfence_check_on_panic)
0855 atomic_notifier_chain_register(&panic_notifier_list, &kfence_check_canary_notifier);
0856
0857 WRITE_ONCE(kfence_enabled, true);
0858 queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
0859
0860 pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE,
0861 CONFIG_KFENCE_NUM_OBJECTS, (void *)__kfence_pool,
0862 (void *)(__kfence_pool + KFENCE_POOL_SIZE));
0863 }
0864
0865 void __init kfence_init(void)
0866 {
0867 stack_hash_seed = (u32)random_get_entropy();
0868
0869
0870 if (!kfence_sample_interval)
0871 return;
0872
0873 if (!kfence_init_pool_early()) {
0874 pr_err("%s failed\n", __func__);
0875 return;
0876 }
0877
0878 kfence_init_enable();
0879 }
0880
0881 static int kfence_init_late(void)
0882 {
0883 const unsigned long nr_pages = KFENCE_POOL_SIZE / PAGE_SIZE;
0884 #ifdef CONFIG_CONTIG_ALLOC
0885 struct page *pages;
0886
0887 pages = alloc_contig_pages(nr_pages, GFP_KERNEL, first_online_node, NULL);
0888 if (!pages)
0889 return -ENOMEM;
0890 __kfence_pool = page_to_virt(pages);
0891 #else
0892 if (nr_pages > MAX_ORDER_NR_PAGES) {
0893 pr_warn("KFENCE_NUM_OBJECTS too large for buddy allocator\n");
0894 return -EINVAL;
0895 }
0896 __kfence_pool = alloc_pages_exact(KFENCE_POOL_SIZE, GFP_KERNEL);
0897 if (!__kfence_pool)
0898 return -ENOMEM;
0899 #endif
0900
0901 if (!kfence_init_pool_late()) {
0902 pr_err("%s failed\n", __func__);
0903 return -EBUSY;
0904 }
0905
0906 kfence_init_enable();
0907 return 0;
0908 }
0909
0910 static int kfence_enable_late(void)
0911 {
0912 if (!__kfence_pool)
0913 return kfence_init_late();
0914
0915 WRITE_ONCE(kfence_enabled, true);
0916 queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
0917 pr_info("re-enabled\n");
0918 return 0;
0919 }
0920
0921 void kfence_shutdown_cache(struct kmem_cache *s)
0922 {
0923 unsigned long flags;
0924 struct kfence_metadata *meta;
0925 int i;
0926
0927 for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
0928 bool in_use;
0929
0930 meta = &kfence_metadata[i];
0931
0932
0933
0934
0935
0936
0937
0938
0939 if (READ_ONCE(meta->cache) != s ||
0940 READ_ONCE(meta->state) != KFENCE_OBJECT_ALLOCATED)
0941 continue;
0942
0943 raw_spin_lock_irqsave(&meta->lock, flags);
0944 in_use = meta->cache == s && meta->state == KFENCE_OBJECT_ALLOCATED;
0945 raw_spin_unlock_irqrestore(&meta->lock, flags);
0946
0947 if (in_use) {
0948
0949
0950
0951
0952
0953
0954
0955
0956
0957
0958
0959
0960
0961
0962 kfence_guarded_free((void *)meta->addr, meta, true);
0963 }
0964 }
0965
0966 for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
0967 meta = &kfence_metadata[i];
0968
0969
0970 if (READ_ONCE(meta->cache) != s || READ_ONCE(meta->state) != KFENCE_OBJECT_FREED)
0971 continue;
0972
0973 raw_spin_lock_irqsave(&meta->lock, flags);
0974 if (meta->cache == s && meta->state == KFENCE_OBJECT_FREED)
0975 meta->cache = NULL;
0976 raw_spin_unlock_irqrestore(&meta->lock, flags);
0977 }
0978 }
0979
0980 void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
0981 {
0982 unsigned long stack_entries[KFENCE_STACK_DEPTH];
0983 size_t num_stack_entries;
0984 u32 alloc_stack_hash;
0985
0986
0987
0988
0989
0990 if (size > PAGE_SIZE) {
0991 atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]);
0992 return NULL;
0993 }
0994
0995
0996
0997
0998
0999
1000 if ((flags & GFP_ZONEMASK) ||
1001 (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) {
1002 atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_INCOMPAT]);
1003 return NULL;
1004 }
1005
1006 if (atomic_inc_return(&kfence_allocation_gate) > 1)
1007 return NULL;
1008 #ifdef CONFIG_KFENCE_STATIC_KEYS
1009
1010
1011
1012
1013 if (waitqueue_active(&allocation_wait)) {
1014
1015
1016
1017
1018 irq_work_queue(&wake_up_kfence_timer_work);
1019 }
1020 #endif
1021
1022 if (!READ_ONCE(kfence_enabled))
1023 return NULL;
1024
1025 num_stack_entries = stack_trace_save(stack_entries, KFENCE_STACK_DEPTH, 0);
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036 alloc_stack_hash = get_alloc_stack_hash(stack_entries, num_stack_entries);
1037 if (should_skip_covered() && alloc_covered_contains(alloc_stack_hash)) {
1038 atomic_long_inc(&counters[KFENCE_COUNTER_SKIP_COVERED]);
1039 return NULL;
1040 }
1041
1042 return kfence_guarded_alloc(s, size, flags, stack_entries, num_stack_entries,
1043 alloc_stack_hash);
1044 }
1045
1046 size_t kfence_ksize(const void *addr)
1047 {
1048 const struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
1049
1050
1051
1052
1053
1054 return meta ? meta->size : 0;
1055 }
1056
1057 void *kfence_object_start(const void *addr)
1058 {
1059 const struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
1060
1061
1062
1063
1064
1065 return meta ? (void *)meta->addr : NULL;
1066 }
1067
1068 void __kfence_free(void *addr)
1069 {
1070 struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
1071
1072 #ifdef CONFIG_MEMCG
1073 KFENCE_WARN_ON(meta->objcg);
1074 #endif
1075
1076
1077
1078
1079
1080
1081 if (unlikely(meta->cache && (meta->cache->flags & SLAB_TYPESAFE_BY_RCU)))
1082 call_rcu(&meta->rcu_head, rcu_guarded_free);
1083 else
1084 kfence_guarded_free(addr, meta, false);
1085 }
1086
1087 bool kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs)
1088 {
1089 const int page_index = (addr - (unsigned long)__kfence_pool) / PAGE_SIZE;
1090 struct kfence_metadata *to_report = NULL;
1091 enum kfence_error_type error_type;
1092 unsigned long flags;
1093
1094 if (!is_kfence_address((void *)addr))
1095 return false;
1096
1097 if (!READ_ONCE(kfence_enabled))
1098 return kfence_unprotect(addr);
1099
1100 atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
1101
1102 if (page_index % 2) {
1103
1104 struct kfence_metadata *meta;
1105 int distance = 0;
1106
1107 meta = addr_to_metadata(addr - PAGE_SIZE);
1108 if (meta && READ_ONCE(meta->state) == KFENCE_OBJECT_ALLOCATED) {
1109 to_report = meta;
1110
1111 distance = addr - data_race(meta->addr + meta->size);
1112 }
1113
1114 meta = addr_to_metadata(addr + PAGE_SIZE);
1115 if (meta && READ_ONCE(meta->state) == KFENCE_OBJECT_ALLOCATED) {
1116
1117 if (!to_report || distance > data_race(meta->addr) - addr)
1118 to_report = meta;
1119 }
1120
1121 if (!to_report)
1122 goto out;
1123
1124 raw_spin_lock_irqsave(&to_report->lock, flags);
1125 to_report->unprotected_page = addr;
1126 error_type = KFENCE_ERROR_OOB;
1127
1128
1129
1130
1131
1132
1133 } else {
1134 to_report = addr_to_metadata(addr);
1135 if (!to_report)
1136 goto out;
1137
1138 raw_spin_lock_irqsave(&to_report->lock, flags);
1139 error_type = KFENCE_ERROR_UAF;
1140
1141
1142
1143
1144
1145
1146 }
1147
1148 out:
1149 if (to_report) {
1150 kfence_report_error(addr, is_write, regs, to_report, error_type);
1151 raw_spin_unlock_irqrestore(&to_report->lock, flags);
1152 } else {
1153
1154 kfence_report_error(addr, is_write, regs, NULL, KFENCE_ERROR_INVALID);
1155 }
1156
1157 return kfence_unprotect(addr);
1158 }