Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Generic stack depot for storing stack traces.
0004  *
0005  * Some debugging tools need to save stack traces of certain events which can
0006  * be later presented to the user. For example, KASAN needs to safe alloc and
0007  * free stacks for each object, but storing two stack traces per object
0008  * requires too much memory (e.g. SLUB_DEBUG needs 256 bytes per object for
0009  * that).
0010  *
0011  * Instead, stack depot maintains a hashtable of unique stacktraces. Since alloc
0012  * and free stacks repeat a lot, we save about 100x space.
0013  * Stacks are never removed from depot, so we store them contiguously one after
0014  * another in a contiguous memory allocation.
0015  *
0016  * Author: Alexander Potapenko <glider@google.com>
0017  * Copyright (C) 2016 Google, Inc.
0018  *
0019  * Based on code by Dmitry Chernenkov.
0020  */
0021 
0022 #include <linux/gfp.h>
0023 #include <linux/jhash.h>
0024 #include <linux/kernel.h>
0025 #include <linux/mm.h>
0026 #include <linux/mutex.h>
0027 #include <linux/percpu.h>
0028 #include <linux/printk.h>
0029 #include <linux/slab.h>
0030 #include <linux/stacktrace.h>
0031 #include <linux/stackdepot.h>
0032 #include <linux/string.h>
0033 #include <linux/types.h>
0034 #include <linux/memblock.h>
0035 #include <linux/kasan-enabled.h>
0036 
0037 #define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8)
0038 
0039 #define STACK_ALLOC_NULL_PROTECTION_BITS 1
0040 #define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */
0041 #define STACK_ALLOC_SIZE (1LL << (PAGE_SHIFT + STACK_ALLOC_ORDER))
0042 #define STACK_ALLOC_ALIGN 4
0043 #define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \
0044                     STACK_ALLOC_ALIGN)
0045 #define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \
0046         STACK_ALLOC_NULL_PROTECTION_BITS - STACK_ALLOC_OFFSET_BITS)
0047 #define STACK_ALLOC_SLABS_CAP 8192
0048 #define STACK_ALLOC_MAX_SLABS \
0049     (((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \
0050      (1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_SLABS_CAP)
0051 
0052 /* The compact structure to store the reference to stacks. */
0053 union handle_parts {
0054     depot_stack_handle_t handle;
0055     struct {
0056         u32 slabindex : STACK_ALLOC_INDEX_BITS;
0057         u32 offset : STACK_ALLOC_OFFSET_BITS;
0058         u32 valid : STACK_ALLOC_NULL_PROTECTION_BITS;
0059     };
0060 };
0061 
0062 struct stack_record {
0063     struct stack_record *next;  /* Link in the hashtable */
0064     u32 hash;           /* Hash in the hastable */
0065     u32 size;           /* Number of frames in the stack */
0066     union handle_parts handle;
0067     unsigned long entries[];    /* Variable-sized array of entries. */
0068 };
0069 
0070 static bool __stack_depot_want_early_init __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT);
0071 static bool __stack_depot_early_init_passed __initdata;
0072 
0073 static void *stack_slabs[STACK_ALLOC_MAX_SLABS];
0074 
0075 static int depot_index;
0076 static int next_slab_inited;
0077 static size_t depot_offset;
0078 static DEFINE_RAW_SPINLOCK(depot_lock);
0079 
0080 static bool init_stack_slab(void **prealloc)
0081 {
0082     if (!*prealloc)
0083         return false;
0084     /*
0085      * This smp_load_acquire() pairs with smp_store_release() to
0086      * |next_slab_inited| below and in depot_alloc_stack().
0087      */
0088     if (smp_load_acquire(&next_slab_inited))
0089         return true;
0090     if (stack_slabs[depot_index] == NULL) {
0091         stack_slabs[depot_index] = *prealloc;
0092         *prealloc = NULL;
0093     } else {
0094         /* If this is the last depot slab, do not touch the next one. */
0095         if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) {
0096             stack_slabs[depot_index + 1] = *prealloc;
0097             *prealloc = NULL;
0098         }
0099         /*
0100          * This smp_store_release pairs with smp_load_acquire() from
0101          * |next_slab_inited| above and in stack_depot_save().
0102          */
0103         smp_store_release(&next_slab_inited, 1);
0104     }
0105     return true;
0106 }
0107 
0108 /* Allocation of a new stack in raw storage */
0109 static struct stack_record *
0110 depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc)
0111 {
0112     struct stack_record *stack;
0113     size_t required_size = struct_size(stack, entries, size);
0114 
0115     required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN);
0116 
0117     if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) {
0118         if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) {
0119             WARN_ONCE(1, "Stack depot reached limit capacity");
0120             return NULL;
0121         }
0122         depot_index++;
0123         depot_offset = 0;
0124         /*
0125          * smp_store_release() here pairs with smp_load_acquire() from
0126          * |next_slab_inited| in stack_depot_save() and
0127          * init_stack_slab().
0128          */
0129         if (depot_index + 1 < STACK_ALLOC_MAX_SLABS)
0130             smp_store_release(&next_slab_inited, 0);
0131     }
0132     init_stack_slab(prealloc);
0133     if (stack_slabs[depot_index] == NULL)
0134         return NULL;
0135 
0136     stack = stack_slabs[depot_index] + depot_offset;
0137 
0138     stack->hash = hash;
0139     stack->size = size;
0140     stack->handle.slabindex = depot_index;
0141     stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN;
0142     stack->handle.valid = 1;
0143     memcpy(stack->entries, entries, flex_array_size(stack, entries, size));
0144     depot_offset += required_size;
0145 
0146     return stack;
0147 }
0148 
0149 /* one hash table bucket entry per 16kB of memory */
0150 #define STACK_HASH_SCALE    14
0151 /* limited between 4k and 1M buckets */
0152 #define STACK_HASH_ORDER_MIN    12
0153 #define STACK_HASH_ORDER_MAX    20
0154 #define STACK_HASH_SEED 0x9747b28c
0155 
0156 static unsigned int stack_hash_order;
0157 static unsigned int stack_hash_mask;
0158 
0159 static bool stack_depot_disable;
0160 static struct stack_record **stack_table;
0161 
0162 static int __init is_stack_depot_disabled(char *str)
0163 {
0164     int ret;
0165 
0166     ret = kstrtobool(str, &stack_depot_disable);
0167     if (!ret && stack_depot_disable) {
0168         pr_info("Stack Depot is disabled\n");
0169         stack_table = NULL;
0170     }
0171     return 0;
0172 }
0173 early_param("stack_depot_disable", is_stack_depot_disabled);
0174 
0175 void __init stack_depot_want_early_init(void)
0176 {
0177     /* Too late to request early init now */
0178     WARN_ON(__stack_depot_early_init_passed);
0179 
0180     __stack_depot_want_early_init = true;
0181 }
0182 
0183 int __init stack_depot_early_init(void)
0184 {
0185     unsigned long entries = 0;
0186 
0187     /* This is supposed to be called only once, from mm_init() */
0188     if (WARN_ON(__stack_depot_early_init_passed))
0189         return 0;
0190 
0191     __stack_depot_early_init_passed = true;
0192 
0193     if (kasan_enabled() && !stack_hash_order)
0194         stack_hash_order = STACK_HASH_ORDER_MAX;
0195 
0196     if (!__stack_depot_want_early_init || stack_depot_disable)
0197         return 0;
0198 
0199     if (stack_hash_order)
0200         entries = 1UL <<  stack_hash_order;
0201     stack_table = alloc_large_system_hash("stackdepot",
0202                         sizeof(struct stack_record *),
0203                         entries,
0204                         STACK_HASH_SCALE,
0205                         HASH_EARLY | HASH_ZERO,
0206                         NULL,
0207                         &stack_hash_mask,
0208                         1UL << STACK_HASH_ORDER_MIN,
0209                         1UL << STACK_HASH_ORDER_MAX);
0210 
0211     if (!stack_table) {
0212         pr_err("Stack Depot hash table allocation failed, disabling\n");
0213         stack_depot_disable = true;
0214         return -ENOMEM;
0215     }
0216 
0217     return 0;
0218 }
0219 
0220 int stack_depot_init(void)
0221 {
0222     static DEFINE_MUTEX(stack_depot_init_mutex);
0223     int ret = 0;
0224 
0225     mutex_lock(&stack_depot_init_mutex);
0226     if (!stack_depot_disable && !stack_table) {
0227         unsigned long entries;
0228         int scale = STACK_HASH_SCALE;
0229 
0230         if (stack_hash_order) {
0231             entries = 1UL << stack_hash_order;
0232         } else {
0233             entries = nr_free_buffer_pages();
0234             entries = roundup_pow_of_two(entries);
0235 
0236             if (scale > PAGE_SHIFT)
0237                 entries >>= (scale - PAGE_SHIFT);
0238             else
0239                 entries <<= (PAGE_SHIFT - scale);
0240         }
0241 
0242         if (entries < 1UL << STACK_HASH_ORDER_MIN)
0243             entries = 1UL << STACK_HASH_ORDER_MIN;
0244         if (entries > 1UL << STACK_HASH_ORDER_MAX)
0245             entries = 1UL << STACK_HASH_ORDER_MAX;
0246 
0247         pr_info("Stack Depot allocating hash table of %lu entries with kvcalloc\n",
0248                 entries);
0249         stack_table = kvcalloc(entries, sizeof(struct stack_record *), GFP_KERNEL);
0250         if (!stack_table) {
0251             pr_err("Stack Depot hash table allocation failed, disabling\n");
0252             stack_depot_disable = true;
0253             ret = -ENOMEM;
0254         }
0255         stack_hash_mask = entries - 1;
0256     }
0257     mutex_unlock(&stack_depot_init_mutex);
0258     return ret;
0259 }
0260 EXPORT_SYMBOL_GPL(stack_depot_init);
0261 
0262 /* Calculate hash for a stack */
0263 static inline u32 hash_stack(unsigned long *entries, unsigned int size)
0264 {
0265     return jhash2((u32 *)entries,
0266               array_size(size,  sizeof(*entries)) / sizeof(u32),
0267               STACK_HASH_SEED);
0268 }
0269 
0270 /* Use our own, non-instrumented version of memcmp().
0271  *
0272  * We actually don't care about the order, just the equality.
0273  */
0274 static inline
0275 int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2,
0276             unsigned int n)
0277 {
0278     for ( ; n-- ; u1++, u2++) {
0279         if (*u1 != *u2)
0280             return 1;
0281     }
0282     return 0;
0283 }
0284 
0285 /* Find a stack that is equal to the one stored in entries in the hash */
0286 static inline struct stack_record *find_stack(struct stack_record *bucket,
0287                          unsigned long *entries, int size,
0288                          u32 hash)
0289 {
0290     struct stack_record *found;
0291 
0292     for (found = bucket; found; found = found->next) {
0293         if (found->hash == hash &&
0294             found->size == size &&
0295             !stackdepot_memcmp(entries, found->entries, size))
0296             return found;
0297     }
0298     return NULL;
0299 }
0300 
0301 /**
0302  * stack_depot_snprint - print stack entries from a depot into a buffer
0303  *
0304  * @handle: Stack depot handle which was returned from
0305  *      stack_depot_save().
0306  * @buf:    Pointer to the print buffer
0307  *
0308  * @size:   Size of the print buffer
0309  *
0310  * @spaces: Number of leading spaces to print
0311  *
0312  * Return:  Number of bytes printed.
0313  */
0314 int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size,
0315                int spaces)
0316 {
0317     unsigned long *entries;
0318     unsigned int nr_entries;
0319 
0320     nr_entries = stack_depot_fetch(handle, &entries);
0321     return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries,
0322                         spaces) : 0;
0323 }
0324 EXPORT_SYMBOL_GPL(stack_depot_snprint);
0325 
0326 /**
0327  * stack_depot_print - print stack entries from a depot
0328  *
0329  * @stack:      Stack depot handle which was returned from
0330  *          stack_depot_save().
0331  *
0332  */
0333 void stack_depot_print(depot_stack_handle_t stack)
0334 {
0335     unsigned long *entries;
0336     unsigned int nr_entries;
0337 
0338     nr_entries = stack_depot_fetch(stack, &entries);
0339     if (nr_entries > 0)
0340         stack_trace_print(entries, nr_entries, 0);
0341 }
0342 EXPORT_SYMBOL_GPL(stack_depot_print);
0343 
0344 /**
0345  * stack_depot_fetch - Fetch stack entries from a depot
0346  *
0347  * @handle:     Stack depot handle which was returned from
0348  *          stack_depot_save().
0349  * @entries:        Pointer to store the entries address
0350  *
0351  * Return: The number of trace entries for this depot.
0352  */
0353 unsigned int stack_depot_fetch(depot_stack_handle_t handle,
0354                    unsigned long **entries)
0355 {
0356     union handle_parts parts = { .handle = handle };
0357     void *slab;
0358     size_t offset = parts.offset << STACK_ALLOC_ALIGN;
0359     struct stack_record *stack;
0360 
0361     *entries = NULL;
0362     if (!handle)
0363         return 0;
0364 
0365     if (parts.slabindex > depot_index) {
0366         WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n",
0367             parts.slabindex, depot_index, handle);
0368         return 0;
0369     }
0370     slab = stack_slabs[parts.slabindex];
0371     if (!slab)
0372         return 0;
0373     stack = slab + offset;
0374 
0375     *entries = stack->entries;
0376     return stack->size;
0377 }
0378 EXPORT_SYMBOL_GPL(stack_depot_fetch);
0379 
0380 /**
0381  * __stack_depot_save - Save a stack trace from an array
0382  *
0383  * @entries:        Pointer to storage array
0384  * @nr_entries:     Size of the storage array
0385  * @alloc_flags:    Allocation gfp flags
0386  * @can_alloc:      Allocate stack slabs (increased chance of failure if false)
0387  *
0388  * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is
0389  * %true, is allowed to replenish the stack slab pool in case no space is left
0390  * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids
0391  * any allocations and will fail if no space is left to store the stack trace.
0392  *
0393  * If the stack trace in @entries is from an interrupt, only the portion up to
0394  * interrupt entry is saved.
0395  *
0396  * Context: Any context, but setting @can_alloc to %false is required if
0397  *          alloc_pages() cannot be used from the current context. Currently
0398  *          this is the case from contexts where neither %GFP_ATOMIC nor
0399  *          %GFP_NOWAIT can be used (NMI, raw_spin_lock).
0400  *
0401  * Return: The handle of the stack struct stored in depot, 0 on failure.
0402  */
0403 depot_stack_handle_t __stack_depot_save(unsigned long *entries,
0404                     unsigned int nr_entries,
0405                     gfp_t alloc_flags, bool can_alloc)
0406 {
0407     struct stack_record *found = NULL, **bucket;
0408     depot_stack_handle_t retval = 0;
0409     struct page *page = NULL;
0410     void *prealloc = NULL;
0411     unsigned long flags;
0412     u32 hash;
0413 
0414     /*
0415      * If this stack trace is from an interrupt, including anything before
0416      * interrupt entry usually leads to unbounded stackdepot growth.
0417      *
0418      * Because use of filter_irq_stacks() is a requirement to ensure
0419      * stackdepot can efficiently deduplicate interrupt stacks, always
0420      * filter_irq_stacks() to simplify all callers' use of stackdepot.
0421      */
0422     nr_entries = filter_irq_stacks(entries, nr_entries);
0423 
0424     if (unlikely(nr_entries == 0) || stack_depot_disable)
0425         goto fast_exit;
0426 
0427     hash = hash_stack(entries, nr_entries);
0428     bucket = &stack_table[hash & stack_hash_mask];
0429 
0430     /*
0431      * Fast path: look the stack trace up without locking.
0432      * The smp_load_acquire() here pairs with smp_store_release() to
0433      * |bucket| below.
0434      */
0435     found = find_stack(smp_load_acquire(bucket), entries,
0436                nr_entries, hash);
0437     if (found)
0438         goto exit;
0439 
0440     /*
0441      * Check if the current or the next stack slab need to be initialized.
0442      * If so, allocate the memory - we won't be able to do that under the
0443      * lock.
0444      *
0445      * The smp_load_acquire() here pairs with smp_store_release() to
0446      * |next_slab_inited| in depot_alloc_stack() and init_stack_slab().
0447      */
0448     if (unlikely(can_alloc && !smp_load_acquire(&next_slab_inited))) {
0449         /*
0450          * Zero out zone modifiers, as we don't have specific zone
0451          * requirements. Keep the flags related to allocation in atomic
0452          * contexts and I/O.
0453          */
0454         alloc_flags &= ~GFP_ZONEMASK;
0455         alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
0456         alloc_flags |= __GFP_NOWARN;
0457         page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER);
0458         if (page)
0459             prealloc = page_address(page);
0460     }
0461 
0462     raw_spin_lock_irqsave(&depot_lock, flags);
0463 
0464     found = find_stack(*bucket, entries, nr_entries, hash);
0465     if (!found) {
0466         struct stack_record *new = depot_alloc_stack(entries, nr_entries, hash, &prealloc);
0467 
0468         if (new) {
0469             new->next = *bucket;
0470             /*
0471              * This smp_store_release() pairs with
0472              * smp_load_acquire() from |bucket| above.
0473              */
0474             smp_store_release(bucket, new);
0475             found = new;
0476         }
0477     } else if (prealloc) {
0478         /*
0479          * We didn't need to store this stack trace, but let's keep
0480          * the preallocated memory for the future.
0481          */
0482         WARN_ON(!init_stack_slab(&prealloc));
0483     }
0484 
0485     raw_spin_unlock_irqrestore(&depot_lock, flags);
0486 exit:
0487     if (prealloc) {
0488         /* Nobody used this memory, ok to free it. */
0489         free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER);
0490     }
0491     if (found)
0492         retval = found->handle.handle;
0493 fast_exit:
0494     return retval;
0495 }
0496 EXPORT_SYMBOL_GPL(__stack_depot_save);
0497 
0498 /**
0499  * stack_depot_save - Save a stack trace from an array
0500  *
0501  * @entries:        Pointer to storage array
0502  * @nr_entries:     Size of the storage array
0503  * @alloc_flags:    Allocation gfp flags
0504  *
0505  * Context: Contexts where allocations via alloc_pages() are allowed.
0506  *          See __stack_depot_save() for more details.
0507  *
0508  * Return: The handle of the stack struct stored in depot, 0 on failure.
0509  */
0510 depot_stack_handle_t stack_depot_save(unsigned long *entries,
0511                       unsigned int nr_entries,
0512                       gfp_t alloc_flags)
0513 {
0514     return __stack_depot_save(entries, nr_entries, alloc_flags, true);
0515 }
0516 EXPORT_SYMBOL_GPL(stack_depot_save);