Back to home page

LXR

 
 

    


0001 /*
0002  * Slab allocator functions that are independent of the allocator strategy
0003  *
0004  * (C) 2012 Christoph Lameter <cl@linux.com>
0005  */
0006 #include <linux/slab.h>
0007 
0008 #include <linux/mm.h>
0009 #include <linux/poison.h>
0010 #include <linux/interrupt.h>
0011 #include <linux/memory.h>
0012 #include <linux/compiler.h>
0013 #include <linux/module.h>
0014 #include <linux/cpu.h>
0015 #include <linux/uaccess.h>
0016 #include <linux/seq_file.h>
0017 #include <linux/proc_fs.h>
0018 #include <asm/cacheflush.h>
0019 #include <asm/tlbflush.h>
0020 #include <asm/page.h>
0021 #include <linux/memcontrol.h>
0022 
0023 #define CREATE_TRACE_POINTS
0024 #include <trace/events/kmem.h>
0025 
0026 #include "slab.h"
0027 
0028 enum slab_state slab_state;
0029 LIST_HEAD(slab_caches);
0030 DEFINE_MUTEX(slab_mutex);
0031 struct kmem_cache *kmem_cache;
0032 
0033 /*
0034  * Set of flags that will prevent slab merging
0035  */
0036 #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
0037         SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
0038         SLAB_FAILSLAB | SLAB_KASAN)
0039 
0040 #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
0041              SLAB_NOTRACK | SLAB_ACCOUNT)
0042 
0043 /*
0044  * Merge control. If this is set then no merging of slab caches will occur.
0045  * (Could be removed. This was introduced to pacify the merge skeptics.)
0046  */
0047 static int slab_nomerge;
0048 
0049 static int __init setup_slab_nomerge(char *str)
0050 {
0051     slab_nomerge = 1;
0052     return 1;
0053 }
0054 
0055 #ifdef CONFIG_SLUB
0056 __setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0);
0057 #endif
0058 
0059 __setup("slab_nomerge", setup_slab_nomerge);
0060 
0061 /*
0062  * Determine the size of a slab object
0063  */
0064 unsigned int kmem_cache_size(struct kmem_cache *s)
0065 {
0066     return s->object_size;
0067 }
0068 EXPORT_SYMBOL(kmem_cache_size);
0069 
0070 #ifdef CONFIG_DEBUG_VM
0071 static int kmem_cache_sanity_check(const char *name, size_t size)
0072 {
0073     struct kmem_cache *s = NULL;
0074 
0075     if (!name || in_interrupt() || size < sizeof(void *) ||
0076         size > KMALLOC_MAX_SIZE) {
0077         pr_err("kmem_cache_create(%s) integrity check failed\n", name);
0078         return -EINVAL;
0079     }
0080 
0081     list_for_each_entry(s, &slab_caches, list) {
0082         char tmp;
0083         int res;
0084 
0085         /*
0086          * This happens when the module gets unloaded and doesn't
0087          * destroy its slab cache and no-one else reuses the vmalloc
0088          * area of the module.  Print a warning.
0089          */
0090         res = probe_kernel_address(s->name, tmp);
0091         if (res) {
0092             pr_err("Slab cache with size %d has lost its name\n",
0093                    s->object_size);
0094             continue;
0095         }
0096     }
0097 
0098     WARN_ON(strchr(name, ' ')); /* It confuses parsers */
0099     return 0;
0100 }
0101 #else
0102 static inline int kmem_cache_sanity_check(const char *name, size_t size)
0103 {
0104     return 0;
0105 }
0106 #endif
0107 
0108 void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
0109 {
0110     size_t i;
0111 
0112     for (i = 0; i < nr; i++) {
0113         if (s)
0114             kmem_cache_free(s, p[i]);
0115         else
0116             kfree(p[i]);
0117     }
0118 }
0119 
0120 int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
0121                                 void **p)
0122 {
0123     size_t i;
0124 
0125     for (i = 0; i < nr; i++) {
0126         void *x = p[i] = kmem_cache_alloc(s, flags);
0127         if (!x) {
0128             __kmem_cache_free_bulk(s, i, p);
0129             return 0;
0130         }
0131     }
0132     return i;
0133 }
0134 
0135 #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
0136 void slab_init_memcg_params(struct kmem_cache *s)
0137 {
0138     s->memcg_params.is_root_cache = true;
0139     INIT_LIST_HEAD(&s->memcg_params.list);
0140     RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
0141 }
0142 
0143 static int init_memcg_params(struct kmem_cache *s,
0144         struct mem_cgroup *memcg, struct kmem_cache *root_cache)
0145 {
0146     struct memcg_cache_array *arr;
0147 
0148     if (memcg) {
0149         s->memcg_params.is_root_cache = false;
0150         s->memcg_params.memcg = memcg;
0151         s->memcg_params.root_cache = root_cache;
0152         return 0;
0153     }
0154 
0155     slab_init_memcg_params(s);
0156 
0157     if (!memcg_nr_cache_ids)
0158         return 0;
0159 
0160     arr = kzalloc(sizeof(struct memcg_cache_array) +
0161               memcg_nr_cache_ids * sizeof(void *),
0162               GFP_KERNEL);
0163     if (!arr)
0164         return -ENOMEM;
0165 
0166     RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr);
0167     return 0;
0168 }
0169 
0170 static void destroy_memcg_params(struct kmem_cache *s)
0171 {
0172     if (is_root_cache(s))
0173         kfree(rcu_access_pointer(s->memcg_params.memcg_caches));
0174 }
0175 
0176 static int update_memcg_params(struct kmem_cache *s, int new_array_size)
0177 {
0178     struct memcg_cache_array *old, *new;
0179 
0180     if (!is_root_cache(s))
0181         return 0;
0182 
0183     new = kzalloc(sizeof(struct memcg_cache_array) +
0184               new_array_size * sizeof(void *), GFP_KERNEL);
0185     if (!new)
0186         return -ENOMEM;
0187 
0188     old = rcu_dereference_protected(s->memcg_params.memcg_caches,
0189                     lockdep_is_held(&slab_mutex));
0190     if (old)
0191         memcpy(new->entries, old->entries,
0192                memcg_nr_cache_ids * sizeof(void *));
0193 
0194     rcu_assign_pointer(s->memcg_params.memcg_caches, new);
0195     if (old)
0196         kfree_rcu(old, rcu);
0197     return 0;
0198 }
0199 
0200 int memcg_update_all_caches(int num_memcgs)
0201 {
0202     struct kmem_cache *s;
0203     int ret = 0;
0204 
0205     mutex_lock(&slab_mutex);
0206     list_for_each_entry(s, &slab_caches, list) {
0207         ret = update_memcg_params(s, num_memcgs);
0208         /*
0209          * Instead of freeing the memory, we'll just leave the caches
0210          * up to this point in an updated state.
0211          */
0212         if (ret)
0213             break;
0214     }
0215     mutex_unlock(&slab_mutex);
0216     return ret;
0217 }
0218 #else
0219 static inline int init_memcg_params(struct kmem_cache *s,
0220         struct mem_cgroup *memcg, struct kmem_cache *root_cache)
0221 {
0222     return 0;
0223 }
0224 
0225 static inline void destroy_memcg_params(struct kmem_cache *s)
0226 {
0227 }
0228 #endif /* CONFIG_MEMCG && !CONFIG_SLOB */
0229 
0230 /*
0231  * Find a mergeable slab cache
0232  */
0233 int slab_unmergeable(struct kmem_cache *s)
0234 {
0235     if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE))
0236         return 1;
0237 
0238     if (!is_root_cache(s))
0239         return 1;
0240 
0241     if (s->ctor)
0242         return 1;
0243 
0244     /*
0245      * We may have set a slab to be unmergeable during bootstrap.
0246      */
0247     if (s->refcount < 0)
0248         return 1;
0249 
0250     return 0;
0251 }
0252 
0253 struct kmem_cache *find_mergeable(size_t size, size_t align,
0254         unsigned long flags, const char *name, void (*ctor)(void *))
0255 {
0256     struct kmem_cache *s;
0257 
0258     if (slab_nomerge || (flags & SLAB_NEVER_MERGE))
0259         return NULL;
0260 
0261     if (ctor)
0262         return NULL;
0263 
0264     size = ALIGN(size, sizeof(void *));
0265     align = calculate_alignment(flags, align, size);
0266     size = ALIGN(size, align);
0267     flags = kmem_cache_flags(size, flags, name, NULL);
0268 
0269     list_for_each_entry_reverse(s, &slab_caches, list) {
0270         if (slab_unmergeable(s))
0271             continue;
0272 
0273         if (size > s->size)
0274             continue;
0275 
0276         if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
0277             continue;
0278         /*
0279          * Check if alignment is compatible.
0280          * Courtesy of Adrian Drzewiecki
0281          */
0282         if ((s->size & ~(align - 1)) != s->size)
0283             continue;
0284 
0285         if (s->size - size >= sizeof(void *))
0286             continue;
0287 
0288         if (IS_ENABLED(CONFIG_SLAB) && align &&
0289             (align > s->align || s->align % align))
0290             continue;
0291 
0292         return s;
0293     }
0294     return NULL;
0295 }
0296 
0297 /*
0298  * Figure out what the alignment of the objects will be given a set of
0299  * flags, a user specified alignment and the size of the objects.
0300  */
0301 unsigned long calculate_alignment(unsigned long flags,
0302         unsigned long align, unsigned long size)
0303 {
0304     /*
0305      * If the user wants hardware cache aligned objects then follow that
0306      * suggestion if the object is sufficiently large.
0307      *
0308      * The hardware cache alignment cannot override the specified
0309      * alignment though. If that is greater then use it.
0310      */
0311     if (flags & SLAB_HWCACHE_ALIGN) {
0312         unsigned long ralign = cache_line_size();
0313         while (size <= ralign / 2)
0314             ralign /= 2;
0315         align = max(align, ralign);
0316     }
0317 
0318     if (align < ARCH_SLAB_MINALIGN)
0319         align = ARCH_SLAB_MINALIGN;
0320 
0321     return ALIGN(align, sizeof(void *));
0322 }
0323 
0324 static struct kmem_cache *create_cache(const char *name,
0325         size_t object_size, size_t size, size_t align,
0326         unsigned long flags, void (*ctor)(void *),
0327         struct mem_cgroup *memcg, struct kmem_cache *root_cache)
0328 {
0329     struct kmem_cache *s;
0330     int err;
0331 
0332     err = -ENOMEM;
0333     s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
0334     if (!s)
0335         goto out;
0336 
0337     s->name = name;
0338     s->object_size = object_size;
0339     s->size = size;
0340     s->align = align;
0341     s->ctor = ctor;
0342 
0343     err = init_memcg_params(s, memcg, root_cache);
0344     if (err)
0345         goto out_free_cache;
0346 
0347     err = __kmem_cache_create(s, flags);
0348     if (err)
0349         goto out_free_cache;
0350 
0351     s->refcount = 1;
0352     list_add(&s->list, &slab_caches);
0353 out:
0354     if (err)
0355         return ERR_PTR(err);
0356     return s;
0357 
0358 out_free_cache:
0359     destroy_memcg_params(s);
0360     kmem_cache_free(kmem_cache, s);
0361     goto out;
0362 }
0363 
0364 /*
0365  * kmem_cache_create - Create a cache.
0366  * @name: A string which is used in /proc/slabinfo to identify this cache.
0367  * @size: The size of objects to be created in this cache.
0368  * @align: The required alignment for the objects.
0369  * @flags: SLAB flags
0370  * @ctor: A constructor for the objects.
0371  *
0372  * Returns a ptr to the cache on success, NULL on failure.
0373  * Cannot be called within a interrupt, but can be interrupted.
0374  * The @ctor is run when new pages are allocated by the cache.
0375  *
0376  * The flags are
0377  *
0378  * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
0379  * to catch references to uninitialised memory.
0380  *
0381  * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
0382  * for buffer overruns.
0383  *
0384  * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
0385  * cacheline.  This can be beneficial if you're counting cycles as closely
0386  * as davem.
0387  */
0388 struct kmem_cache *
0389 kmem_cache_create(const char *name, size_t size, size_t align,
0390           unsigned long flags, void (*ctor)(void *))
0391 {
0392     struct kmem_cache *s = NULL;
0393     const char *cache_name;
0394     int err;
0395 
0396     get_online_cpus();
0397     get_online_mems();
0398     memcg_get_cache_ids();
0399 
0400     mutex_lock(&slab_mutex);
0401 
0402     err = kmem_cache_sanity_check(name, size);
0403     if (err) {
0404         goto out_unlock;
0405     }
0406 
0407     /* Refuse requests with allocator specific flags */
0408     if (flags & ~SLAB_FLAGS_PERMITTED) {
0409         err = -EINVAL;
0410         goto out_unlock;
0411     }
0412 
0413     /*
0414      * Some allocators will constraint the set of valid flags to a subset
0415      * of all flags. We expect them to define CACHE_CREATE_MASK in this
0416      * case, and we'll just provide them with a sanitized version of the
0417      * passed flags.
0418      */
0419     flags &= CACHE_CREATE_MASK;
0420 
0421     s = __kmem_cache_alias(name, size, align, flags, ctor);
0422     if (s)
0423         goto out_unlock;
0424 
0425     cache_name = kstrdup_const(name, GFP_KERNEL);
0426     if (!cache_name) {
0427         err = -ENOMEM;
0428         goto out_unlock;
0429     }
0430 
0431     s = create_cache(cache_name, size, size,
0432              calculate_alignment(flags, align, size),
0433              flags, ctor, NULL, NULL);
0434     if (IS_ERR(s)) {
0435         err = PTR_ERR(s);
0436         kfree_const(cache_name);
0437     }
0438 
0439 out_unlock:
0440     mutex_unlock(&slab_mutex);
0441 
0442     memcg_put_cache_ids();
0443     put_online_mems();
0444     put_online_cpus();
0445 
0446     if (err) {
0447         if (flags & SLAB_PANIC)
0448             panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
0449                 name, err);
0450         else {
0451             pr_warn("kmem_cache_create(%s) failed with error %d\n",
0452                 name, err);
0453             dump_stack();
0454         }
0455         return NULL;
0456     }
0457     return s;
0458 }
0459 EXPORT_SYMBOL(kmem_cache_create);
0460 
0461 static int shutdown_cache(struct kmem_cache *s,
0462         struct list_head *release, bool *need_rcu_barrier)
0463 {
0464     if (__kmem_cache_shutdown(s) != 0)
0465         return -EBUSY;
0466 
0467     if (s->flags & SLAB_DESTROY_BY_RCU)
0468         *need_rcu_barrier = true;
0469 
0470     list_move(&s->list, release);
0471     return 0;
0472 }
0473 
0474 static void release_caches(struct list_head *release, bool need_rcu_barrier)
0475 {
0476     struct kmem_cache *s, *s2;
0477 
0478     if (need_rcu_barrier)
0479         rcu_barrier();
0480 
0481     list_for_each_entry_safe(s, s2, release, list) {
0482 #ifdef SLAB_SUPPORTS_SYSFS
0483         sysfs_slab_remove(s);
0484 #else
0485         slab_kmem_cache_release(s);
0486 #endif
0487     }
0488 }
0489 
0490 #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
0491 /*
0492  * memcg_create_kmem_cache - Create a cache for a memory cgroup.
0493  * @memcg: The memory cgroup the new cache is for.
0494  * @root_cache: The parent of the new cache.
0495  *
0496  * This function attempts to create a kmem cache that will serve allocation
0497  * requests going from @memcg to @root_cache. The new cache inherits properties
0498  * from its parent.
0499  */
0500 void memcg_create_kmem_cache(struct mem_cgroup *memcg,
0501                  struct kmem_cache *root_cache)
0502 {
0503     static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
0504     struct cgroup_subsys_state *css = &memcg->css;
0505     struct memcg_cache_array *arr;
0506     struct kmem_cache *s = NULL;
0507     char *cache_name;
0508     int idx;
0509 
0510     get_online_cpus();
0511     get_online_mems();
0512 
0513     mutex_lock(&slab_mutex);
0514 
0515     /*
0516      * The memory cgroup could have been offlined while the cache
0517      * creation work was pending.
0518      */
0519     if (memcg->kmem_state != KMEM_ONLINE)
0520         goto out_unlock;
0521 
0522     idx = memcg_cache_id(memcg);
0523     arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches,
0524                     lockdep_is_held(&slab_mutex));
0525 
0526     /*
0527      * Since per-memcg caches are created asynchronously on first
0528      * allocation (see memcg_kmem_get_cache()), several threads can try to
0529      * create the same cache, but only one of them may succeed.
0530      */
0531     if (arr->entries[idx])
0532         goto out_unlock;
0533 
0534     cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
0535     cache_name = kasprintf(GFP_KERNEL, "%s(%llu:%s)", root_cache->name,
0536                    css->serial_nr, memcg_name_buf);
0537     if (!cache_name)
0538         goto out_unlock;
0539 
0540     s = create_cache(cache_name, root_cache->object_size,
0541              root_cache->size, root_cache->align,
0542              root_cache->flags & CACHE_CREATE_MASK,
0543              root_cache->ctor, memcg, root_cache);
0544     /*
0545      * If we could not create a memcg cache, do not complain, because
0546      * that's not critical at all as we can always proceed with the root
0547      * cache.
0548      */
0549     if (IS_ERR(s)) {
0550         kfree(cache_name);
0551         goto out_unlock;
0552     }
0553 
0554     list_add(&s->memcg_params.list, &root_cache->memcg_params.list);
0555 
0556     /*
0557      * Since readers won't lock (see cache_from_memcg_idx()), we need a
0558      * barrier here to ensure nobody will see the kmem_cache partially
0559      * initialized.
0560      */
0561     smp_wmb();
0562     arr->entries[idx] = s;
0563 
0564 out_unlock:
0565     mutex_unlock(&slab_mutex);
0566 
0567     put_online_mems();
0568     put_online_cpus();
0569 }
0570 
0571 void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
0572 {
0573     int idx;
0574     struct memcg_cache_array *arr;
0575     struct kmem_cache *s, *c;
0576 
0577     idx = memcg_cache_id(memcg);
0578 
0579     get_online_cpus();
0580     get_online_mems();
0581 
0582 #ifdef CONFIG_SLUB
0583     /*
0584      * In case of SLUB, we need to disable empty slab caching to
0585      * avoid pinning the offline memory cgroup by freeable kmem
0586      * pages charged to it. SLAB doesn't need this, as it
0587      * periodically purges unused slabs.
0588      */
0589     mutex_lock(&slab_mutex);
0590     list_for_each_entry(s, &slab_caches, list) {
0591         c = is_root_cache(s) ? cache_from_memcg_idx(s, idx) : NULL;
0592         if (c) {
0593             c->cpu_partial = 0;
0594             c->min_partial = 0;
0595         }
0596     }
0597     mutex_unlock(&slab_mutex);
0598     /*
0599      * kmem_cache->cpu_partial is checked locklessly (see
0600      * put_cpu_partial()). Make sure the change is visible.
0601      */
0602     synchronize_sched();
0603 #endif
0604 
0605     mutex_lock(&slab_mutex);
0606     list_for_each_entry(s, &slab_caches, list) {
0607         if (!is_root_cache(s))
0608             continue;
0609 
0610         arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
0611                         lockdep_is_held(&slab_mutex));
0612         c = arr->entries[idx];
0613         if (!c)
0614             continue;
0615 
0616         __kmem_cache_shrink(c);
0617         arr->entries[idx] = NULL;
0618     }
0619     mutex_unlock(&slab_mutex);
0620 
0621     put_online_mems();
0622     put_online_cpus();
0623 }
0624 
0625 static int __shutdown_memcg_cache(struct kmem_cache *s,
0626         struct list_head *release, bool *need_rcu_barrier)
0627 {
0628     BUG_ON(is_root_cache(s));
0629 
0630     if (shutdown_cache(s, release, need_rcu_barrier))
0631         return -EBUSY;
0632 
0633     list_del(&s->memcg_params.list);
0634     return 0;
0635 }
0636 
0637 void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
0638 {
0639     LIST_HEAD(release);
0640     bool need_rcu_barrier = false;
0641     struct kmem_cache *s, *s2;
0642 
0643     get_online_cpus();
0644     get_online_mems();
0645 
0646     mutex_lock(&slab_mutex);
0647     list_for_each_entry_safe(s, s2, &slab_caches, list) {
0648         if (is_root_cache(s) || s->memcg_params.memcg != memcg)
0649             continue;
0650         /*
0651          * The cgroup is about to be freed and therefore has no charges
0652          * left. Hence, all its caches must be empty by now.
0653          */
0654         BUG_ON(__shutdown_memcg_cache(s, &release, &need_rcu_barrier));
0655     }
0656     mutex_unlock(&slab_mutex);
0657 
0658     put_online_mems();
0659     put_online_cpus();
0660 
0661     release_caches(&release, need_rcu_barrier);
0662 }
0663 
0664 static int shutdown_memcg_caches(struct kmem_cache *s,
0665         struct list_head *release, bool *need_rcu_barrier)
0666 {
0667     struct memcg_cache_array *arr;
0668     struct kmem_cache *c, *c2;
0669     LIST_HEAD(busy);
0670     int i;
0671 
0672     BUG_ON(!is_root_cache(s));
0673 
0674     /*
0675      * First, shutdown active caches, i.e. caches that belong to online
0676      * memory cgroups.
0677      */
0678     arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
0679                     lockdep_is_held(&slab_mutex));
0680     for_each_memcg_cache_index(i) {
0681         c = arr->entries[i];
0682         if (!c)
0683             continue;
0684         if (__shutdown_memcg_cache(c, release, need_rcu_barrier))
0685             /*
0686              * The cache still has objects. Move it to a temporary
0687              * list so as not to try to destroy it for a second
0688              * time while iterating over inactive caches below.
0689              */
0690             list_move(&c->memcg_params.list, &busy);
0691         else
0692             /*
0693              * The cache is empty and will be destroyed soon. Clear
0694              * the pointer to it in the memcg_caches array so that
0695              * it will never be accessed even if the root cache
0696              * stays alive.
0697              */
0698             arr->entries[i] = NULL;
0699     }
0700 
0701     /*
0702      * Second, shutdown all caches left from memory cgroups that are now
0703      * offline.
0704      */
0705     list_for_each_entry_safe(c, c2, &s->memcg_params.list,
0706                  memcg_params.list)
0707         __shutdown_memcg_cache(c, release, need_rcu_barrier);
0708 
0709     list_splice(&busy, &s->memcg_params.list);
0710 
0711     /*
0712      * A cache being destroyed must be empty. In particular, this means
0713      * that all per memcg caches attached to it must be empty too.
0714      */
0715     if (!list_empty(&s->memcg_params.list))
0716         return -EBUSY;
0717     return 0;
0718 }
0719 #else
0720 static inline int shutdown_memcg_caches(struct kmem_cache *s,
0721         struct list_head *release, bool *need_rcu_barrier)
0722 {
0723     return 0;
0724 }
0725 #endif /* CONFIG_MEMCG && !CONFIG_SLOB */
0726 
0727 void slab_kmem_cache_release(struct kmem_cache *s)
0728 {
0729     __kmem_cache_release(s);
0730     destroy_memcg_params(s);
0731     kfree_const(s->name);
0732     kmem_cache_free(kmem_cache, s);
0733 }
0734 
0735 void kmem_cache_destroy(struct kmem_cache *s)
0736 {
0737     LIST_HEAD(release);
0738     bool need_rcu_barrier = false;
0739     int err;
0740 
0741     if (unlikely(!s))
0742         return;
0743 
0744     get_online_cpus();
0745     get_online_mems();
0746 
0747     kasan_cache_destroy(s);
0748     mutex_lock(&slab_mutex);
0749 
0750     s->refcount--;
0751     if (s->refcount)
0752         goto out_unlock;
0753 
0754     err = shutdown_memcg_caches(s, &release, &need_rcu_barrier);
0755     if (!err)
0756         err = shutdown_cache(s, &release, &need_rcu_barrier);
0757 
0758     if (err) {
0759         pr_err("kmem_cache_destroy %s: Slab cache still has objects\n",
0760                s->name);
0761         dump_stack();
0762     }
0763 out_unlock:
0764     mutex_unlock(&slab_mutex);
0765 
0766     put_online_mems();
0767     put_online_cpus();
0768 
0769     release_caches(&release, need_rcu_barrier);
0770 }
0771 EXPORT_SYMBOL(kmem_cache_destroy);
0772 
0773 /**
0774  * kmem_cache_shrink - Shrink a cache.
0775  * @cachep: The cache to shrink.
0776  *
0777  * Releases as many slabs as possible for a cache.
0778  * To help debugging, a zero exit status indicates all slabs were released.
0779  */
0780 int kmem_cache_shrink(struct kmem_cache *cachep)
0781 {
0782     int ret;
0783 
0784     get_online_cpus();
0785     get_online_mems();
0786     kasan_cache_shrink(cachep);
0787     ret = __kmem_cache_shrink(cachep);
0788     put_online_mems();
0789     put_online_cpus();
0790     return ret;
0791 }
0792 EXPORT_SYMBOL(kmem_cache_shrink);
0793 
0794 bool slab_is_available(void)
0795 {
0796     return slab_state >= UP;
0797 }
0798 
0799 #ifndef CONFIG_SLOB
0800 /* Create a cache during boot when no slab services are available yet */
0801 void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
0802         unsigned long flags)
0803 {
0804     int err;
0805 
0806     s->name = name;
0807     s->size = s->object_size = size;
0808     s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
0809 
0810     slab_init_memcg_params(s);
0811 
0812     err = __kmem_cache_create(s, flags);
0813 
0814     if (err)
0815         panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n",
0816                     name, size, err);
0817 
0818     s->refcount = -1;   /* Exempt from merging for now */
0819 }
0820 
0821 struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
0822                 unsigned long flags)
0823 {
0824     struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
0825 
0826     if (!s)
0827         panic("Out of memory when creating slab %s\n", name);
0828 
0829     create_boot_cache(s, name, size, flags);
0830     list_add(&s->list, &slab_caches);
0831     s->refcount = 1;
0832     return s;
0833 }
0834 
0835 struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
0836 EXPORT_SYMBOL(kmalloc_caches);
0837 
0838 #ifdef CONFIG_ZONE_DMA
0839 struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
0840 EXPORT_SYMBOL(kmalloc_dma_caches);
0841 #endif
0842 
0843 /*
0844  * Conversion table for small slabs sizes / 8 to the index in the
0845  * kmalloc array. This is necessary for slabs < 192 since we have non power
0846  * of two cache sizes there. The size of larger slabs can be determined using
0847  * fls.
0848  */
0849 static s8 size_index[24] = {
0850     3,  /* 8 */
0851     4,  /* 16 */
0852     5,  /* 24 */
0853     5,  /* 32 */
0854     6,  /* 40 */
0855     6,  /* 48 */
0856     6,  /* 56 */
0857     6,  /* 64 */
0858     1,  /* 72 */
0859     1,  /* 80 */
0860     1,  /* 88 */
0861     1,  /* 96 */
0862     7,  /* 104 */
0863     7,  /* 112 */
0864     7,  /* 120 */
0865     7,  /* 128 */
0866     2,  /* 136 */
0867     2,  /* 144 */
0868     2,  /* 152 */
0869     2,  /* 160 */
0870     2,  /* 168 */
0871     2,  /* 176 */
0872     2,  /* 184 */
0873     2   /* 192 */
0874 };
0875 
0876 static inline int size_index_elem(size_t bytes)
0877 {
0878     return (bytes - 1) / 8;
0879 }
0880 
0881 /*
0882  * Find the kmem_cache structure that serves a given size of
0883  * allocation
0884  */
0885 struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
0886 {
0887     int index;
0888 
0889     if (unlikely(size > KMALLOC_MAX_SIZE)) {
0890         WARN_ON_ONCE(!(flags & __GFP_NOWARN));
0891         return NULL;
0892     }
0893 
0894     if (size <= 192) {
0895         if (!size)
0896             return ZERO_SIZE_PTR;
0897 
0898         index = size_index[size_index_elem(size)];
0899     } else
0900         index = fls(size - 1);
0901 
0902 #ifdef CONFIG_ZONE_DMA
0903     if (unlikely((flags & GFP_DMA)))
0904         return kmalloc_dma_caches[index];
0905 
0906 #endif
0907     return kmalloc_caches[index];
0908 }
0909 
0910 /*
0911  * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
0912  * kmalloc_index() supports up to 2^26=64MB, so the final entry of the table is
0913  * kmalloc-67108864.
0914  */
0915 static struct {
0916     const char *name;
0917     unsigned long size;
0918 } const kmalloc_info[] __initconst = {
0919     {NULL,                      0},     {"kmalloc-96",             96},
0920     {"kmalloc-192",           192},     {"kmalloc-8",               8},
0921     {"kmalloc-16",             16},     {"kmalloc-32",             32},
0922     {"kmalloc-64",             64},     {"kmalloc-128",           128},
0923     {"kmalloc-256",           256},     {"kmalloc-512",           512},
0924     {"kmalloc-1024",         1024},     {"kmalloc-2048",         2048},
0925     {"kmalloc-4096",         4096},     {"kmalloc-8192",         8192},
0926     {"kmalloc-16384",       16384},     {"kmalloc-32768",       32768},
0927     {"kmalloc-65536",       65536},     {"kmalloc-131072",     131072},
0928     {"kmalloc-262144",     262144},     {"kmalloc-524288",     524288},
0929     {"kmalloc-1048576",   1048576},     {"kmalloc-2097152",   2097152},
0930     {"kmalloc-4194304",   4194304},     {"kmalloc-8388608",   8388608},
0931     {"kmalloc-16777216", 16777216},     {"kmalloc-33554432", 33554432},
0932     {"kmalloc-67108864", 67108864}
0933 };
0934 
0935 /*
0936  * Patch up the size_index table if we have strange large alignment
0937  * requirements for the kmalloc array. This is only the case for
0938  * MIPS it seems. The standard arches will not generate any code here.
0939  *
0940  * Largest permitted alignment is 256 bytes due to the way we
0941  * handle the index determination for the smaller caches.
0942  *
0943  * Make sure that nothing crazy happens if someone starts tinkering
0944  * around with ARCH_KMALLOC_MINALIGN
0945  */
0946 void __init setup_kmalloc_cache_index_table(void)
0947 {
0948     int i;
0949 
0950     BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
0951         (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
0952 
0953     for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
0954         int elem = size_index_elem(i);
0955 
0956         if (elem >= ARRAY_SIZE(size_index))
0957             break;
0958         size_index[elem] = KMALLOC_SHIFT_LOW;
0959     }
0960 
0961     if (KMALLOC_MIN_SIZE >= 64) {
0962         /*
0963          * The 96 byte size cache is not used if the alignment
0964          * is 64 byte.
0965          */
0966         for (i = 64 + 8; i <= 96; i += 8)
0967             size_index[size_index_elem(i)] = 7;
0968 
0969     }
0970 
0971     if (KMALLOC_MIN_SIZE >= 128) {
0972         /*
0973          * The 192 byte sized cache is not used if the alignment
0974          * is 128 byte. Redirect kmalloc to use the 256 byte cache
0975          * instead.
0976          */
0977         for (i = 128 + 8; i <= 192; i += 8)
0978             size_index[size_index_elem(i)] = 8;
0979     }
0980 }
0981 
0982 static void __init new_kmalloc_cache(int idx, unsigned long flags)
0983 {
0984     kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name,
0985                     kmalloc_info[idx].size, flags);
0986 }
0987 
0988 /*
0989  * Create the kmalloc array. Some of the regular kmalloc arrays
0990  * may already have been created because they were needed to
0991  * enable allocations for slab creation.
0992  */
0993 void __init create_kmalloc_caches(unsigned long flags)
0994 {
0995     int i;
0996 
0997     for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
0998         if (!kmalloc_caches[i])
0999             new_kmalloc_cache(i, flags);
1000 
1001         /*
1002          * Caches that are not of the two-to-the-power-of size.
1003          * These have to be created immediately after the
1004          * earlier power of two caches
1005          */
1006         if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6)
1007             new_kmalloc_cache(1, flags);
1008         if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7)
1009             new_kmalloc_cache(2, flags);
1010     }
1011 
1012     /* Kmalloc array is now usable */
1013     slab_state = UP;
1014 
1015 #ifdef CONFIG_ZONE_DMA
1016     for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
1017         struct kmem_cache *s = kmalloc_caches[i];
1018 
1019         if (s) {
1020             int size = kmalloc_size(i);
1021             char *n = kasprintf(GFP_NOWAIT,
1022                  "dma-kmalloc-%d", size);
1023 
1024             BUG_ON(!n);
1025             kmalloc_dma_caches[i] = create_kmalloc_cache(n,
1026                 size, SLAB_CACHE_DMA | flags);
1027         }
1028     }
1029 #endif
1030 }
1031 #endif /* !CONFIG_SLOB */
1032 
1033 /*
1034  * To avoid unnecessary overhead, we pass through large allocation requests
1035  * directly to the page allocator. We use __GFP_COMP, because we will need to
1036  * know the allocation order to free the pages properly in kfree.
1037  */
1038 void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
1039 {
1040     void *ret;
1041     struct page *page;
1042 
1043     flags |= __GFP_COMP;
1044     page = alloc_pages(flags, order);
1045     ret = page ? page_address(page) : NULL;
1046     kmemleak_alloc(ret, size, 1, flags);
1047     kasan_kmalloc_large(ret, size, flags);
1048     return ret;
1049 }
1050 EXPORT_SYMBOL(kmalloc_order);
1051 
1052 #ifdef CONFIG_TRACING
1053 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
1054 {
1055     void *ret = kmalloc_order(size, flags, order);
1056     trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
1057     return ret;
1058 }
1059 EXPORT_SYMBOL(kmalloc_order_trace);
1060 #endif
1061 
1062 #ifdef CONFIG_SLAB_FREELIST_RANDOM
1063 /* Randomize a generic freelist */
1064 static void freelist_randomize(struct rnd_state *state, unsigned int *list,
1065             size_t count)
1066 {
1067     size_t i;
1068     unsigned int rand;
1069 
1070     for (i = 0; i < count; i++)
1071         list[i] = i;
1072 
1073     /* Fisher-Yates shuffle */
1074     for (i = count - 1; i > 0; i--) {
1075         rand = prandom_u32_state(state);
1076         rand %= (i + 1);
1077         swap(list[i], list[rand]);
1078     }
1079 }
1080 
1081 /* Create a random sequence per cache */
1082 int cache_random_seq_create(struct kmem_cache *cachep, unsigned int count,
1083                     gfp_t gfp)
1084 {
1085     struct rnd_state state;
1086 
1087     if (count < 2 || cachep->random_seq)
1088         return 0;
1089 
1090     cachep->random_seq = kcalloc(count, sizeof(unsigned int), gfp);
1091     if (!cachep->random_seq)
1092         return -ENOMEM;
1093 
1094     /* Get best entropy at this stage of boot */
1095     prandom_seed_state(&state, get_random_long());
1096 
1097     freelist_randomize(&state, cachep->random_seq, count);
1098     return 0;
1099 }
1100 
1101 /* Destroy the per-cache random freelist sequence */
1102 void cache_random_seq_destroy(struct kmem_cache *cachep)
1103 {
1104     kfree(cachep->random_seq);
1105     cachep->random_seq = NULL;
1106 }
1107 #endif /* CONFIG_SLAB_FREELIST_RANDOM */
1108 
1109 #ifdef CONFIG_SLABINFO
1110 
1111 #ifdef CONFIG_SLAB
1112 #define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR)
1113 #else
1114 #define SLABINFO_RIGHTS S_IRUSR
1115 #endif
1116 
1117 static void print_slabinfo_header(struct seq_file *m)
1118 {
1119     /*
1120      * Output format version, so at least we can change it
1121      * without _too_ many complaints.
1122      */
1123 #ifdef CONFIG_DEBUG_SLAB
1124     seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
1125 #else
1126     seq_puts(m, "slabinfo - version: 2.1\n");
1127 #endif
1128     seq_puts(m, "# name            <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>");
1129     seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
1130     seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
1131 #ifdef CONFIG_DEBUG_SLAB
1132     seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> <error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
1133     seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
1134 #endif
1135     seq_putc(m, '\n');
1136 }
1137 
1138 void *slab_start(struct seq_file *m, loff_t *pos)
1139 {
1140     mutex_lock(&slab_mutex);
1141     return seq_list_start(&slab_caches, *pos);
1142 }
1143 
1144 void *slab_next(struct seq_file *m, void *p, loff_t *pos)
1145 {
1146     return seq_list_next(p, &slab_caches, pos);
1147 }
1148 
1149 void slab_stop(struct seq_file *m, void *p)
1150 {
1151     mutex_unlock(&slab_mutex);
1152 }
1153 
1154 static void
1155 memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
1156 {
1157     struct kmem_cache *c;
1158     struct slabinfo sinfo;
1159 
1160     if (!is_root_cache(s))
1161         return;
1162 
1163     for_each_memcg_cache(c, s) {
1164         memset(&sinfo, 0, sizeof(sinfo));
1165         get_slabinfo(c, &sinfo);
1166 
1167         info->active_slabs += sinfo.active_slabs;
1168         info->num_slabs += sinfo.num_slabs;
1169         info->shared_avail += sinfo.shared_avail;
1170         info->active_objs += sinfo.active_objs;
1171         info->num_objs += sinfo.num_objs;
1172     }
1173 }
1174 
1175 static void cache_show(struct kmem_cache *s, struct seq_file *m)
1176 {
1177     struct slabinfo sinfo;
1178 
1179     memset(&sinfo, 0, sizeof(sinfo));
1180     get_slabinfo(s, &sinfo);
1181 
1182     memcg_accumulate_slabinfo(s, &sinfo);
1183 
1184     seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
1185            cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
1186            sinfo.objects_per_slab, (1 << sinfo.cache_order));
1187 
1188     seq_printf(m, " : tunables %4u %4u %4u",
1189            sinfo.limit, sinfo.batchcount, sinfo.shared);
1190     seq_printf(m, " : slabdata %6lu %6lu %6lu",
1191            sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
1192     slabinfo_show_stats(m, s);
1193     seq_putc(m, '\n');
1194 }
1195 
1196 static int slab_show(struct seq_file *m, void *p)
1197 {
1198     struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
1199 
1200     if (p == slab_caches.next)
1201         print_slabinfo_header(m);
1202     if (is_root_cache(s))
1203         cache_show(s, m);
1204     return 0;
1205 }
1206 
1207 #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
1208 int memcg_slab_show(struct seq_file *m, void *p)
1209 {
1210     struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
1211     struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
1212 
1213     if (p == slab_caches.next)
1214         print_slabinfo_header(m);
1215     if (!is_root_cache(s) && s->memcg_params.memcg == memcg)
1216         cache_show(s, m);
1217     return 0;
1218 }
1219 #endif
1220 
1221 /*
1222  * slabinfo_op - iterator that generates /proc/slabinfo
1223  *
1224  * Output layout:
1225  * cache-name
1226  * num-active-objs
1227  * total-objs
1228  * object size
1229  * num-active-slabs
1230  * total-slabs
1231  * num-pages-per-slab
1232  * + further values on SMP and with statistics enabled
1233  */
1234 static const struct seq_operations slabinfo_op = {
1235     .start = slab_start,
1236     .next = slab_next,
1237     .stop = slab_stop,
1238     .show = slab_show,
1239 };
1240 
1241 static int slabinfo_open(struct inode *inode, struct file *file)
1242 {
1243     return seq_open(file, &slabinfo_op);
1244 }
1245 
1246 static const struct file_operations proc_slabinfo_operations = {
1247     .open       = slabinfo_open,
1248     .read       = seq_read,
1249     .write          = slabinfo_write,
1250     .llseek     = seq_lseek,
1251     .release    = seq_release,
1252 };
1253 
1254 static int __init slab_proc_init(void)
1255 {
1256     proc_create("slabinfo", SLABINFO_RIGHTS, NULL,
1257                         &proc_slabinfo_operations);
1258     return 0;
1259 }
1260 module_init(slab_proc_init);
1261 #endif /* CONFIG_SLABINFO */
1262 
1263 static __always_inline void *__do_krealloc(const void *p, size_t new_size,
1264                        gfp_t flags)
1265 {
1266     void *ret;
1267     size_t ks = 0;
1268 
1269     if (p)
1270         ks = ksize(p);
1271 
1272     if (ks >= new_size) {
1273         kasan_krealloc((void *)p, new_size, flags);
1274         return (void *)p;
1275     }
1276 
1277     ret = kmalloc_track_caller(new_size, flags);
1278     if (ret && p)
1279         memcpy(ret, p, ks);
1280 
1281     return ret;
1282 }
1283 
1284 /**
1285  * __krealloc - like krealloc() but don't free @p.
1286  * @p: object to reallocate memory for.
1287  * @new_size: how many bytes of memory are required.
1288  * @flags: the type of memory to allocate.
1289  *
1290  * This function is like krealloc() except it never frees the originally
1291  * allocated buffer. Use this if you don't want to free the buffer immediately
1292  * like, for example, with RCU.
1293  */
1294 void *__krealloc(const void *p, size_t new_size, gfp_t flags)
1295 {
1296     if (unlikely(!new_size))
1297         return ZERO_SIZE_PTR;
1298 
1299     return __do_krealloc(p, new_size, flags);
1300 
1301 }
1302 EXPORT_SYMBOL(__krealloc);
1303 
1304 /**
1305  * krealloc - reallocate memory. The contents will remain unchanged.
1306  * @p: object to reallocate memory for.
1307  * @new_size: how many bytes of memory are required.
1308  * @flags: the type of memory to allocate.
1309  *
1310  * The contents of the object pointed to are preserved up to the
1311  * lesser of the new and old sizes.  If @p is %NULL, krealloc()
1312  * behaves exactly like kmalloc().  If @new_size is 0 and @p is not a
1313  * %NULL pointer, the object pointed to is freed.
1314  */
1315 void *krealloc(const void *p, size_t new_size, gfp_t flags)
1316 {
1317     void *ret;
1318 
1319     if (unlikely(!new_size)) {
1320         kfree(p);
1321         return ZERO_SIZE_PTR;
1322     }
1323 
1324     ret = __do_krealloc(p, new_size, flags);
1325     if (ret && p != ret)
1326         kfree(p);
1327 
1328     return ret;
1329 }
1330 EXPORT_SYMBOL(krealloc);
1331 
1332 /**
1333  * kzfree - like kfree but zero memory
1334  * @p: object to free memory of
1335  *
1336  * The memory of the object @p points to is zeroed before freed.
1337  * If @p is %NULL, kzfree() does nothing.
1338  *
1339  * Note: this function zeroes the whole allocated buffer which can be a good
1340  * deal bigger than the requested buffer size passed to kmalloc(). So be
1341  * careful when using this function in performance sensitive code.
1342  */
1343 void kzfree(const void *p)
1344 {
1345     size_t ks;
1346     void *mem = (void *)p;
1347 
1348     if (unlikely(ZERO_OR_NULL_PTR(mem)))
1349         return;
1350     ks = ksize(mem);
1351     memset(mem, 0, ks);
1352     kfree(mem);
1353 }
1354 EXPORT_SYMBOL(kzfree);
1355 
1356 /* Tracepoints definitions. */
1357 EXPORT_TRACEPOINT_SYMBOL(kmalloc);
1358 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
1359 EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
1360 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
1361 EXPORT_TRACEPOINT_SYMBOL(kfree);
1362 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);