Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 #include <linux/spinlock.h>
0003 #include <linux/slab.h>
0004 #include <linux/list.h>
0005 #include <linux/list_bl.h>
0006 #include <linux/module.h>
0007 #include <linux/sched.h>
0008 #include <linux/workqueue.h>
0009 #include <linux/mbcache.h>
0010 
0011 /*
0012  * Mbcache is a simple key-value store. Keys need not be unique, however
0013  * key-value pairs are expected to be unique (we use this fact in
0014  * mb_cache_entry_delete_or_get()).
0015  *
0016  * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
0017  * Ext4 also uses it for deduplication of xattr values stored in inodes.
0018  * They use hash of data as a key and provide a value that may represent a
0019  * block or inode number. That's why keys need not be unique (hash of different
0020  * data may be the same). However user provided value always uniquely
0021  * identifies a cache entry.
0022  *
0023  * We provide functions for creation and removal of entries, search by key,
0024  * and a special "delete entry with given key-value pair" operation. Fixed
0025  * size hash table is used for fast key lookups.
0026  */
0027 
0028 struct mb_cache {
0029     /* Hash table of entries */
0030     struct hlist_bl_head    *c_hash;
0031     /* log2 of hash table size */
0032     int         c_bucket_bits;
0033     /* Maximum entries in cache to avoid degrading hash too much */
0034     unsigned long       c_max_entries;
0035     /* Protects c_list, c_entry_count */
0036     spinlock_t      c_list_lock;
0037     struct list_head    c_list;
0038     /* Number of entries in cache */
0039     unsigned long       c_entry_count;
0040     struct shrinker     c_shrink;
0041     /* Work for shrinking when the cache has too many entries */
0042     struct work_struct  c_shrink_work;
0043 };
0044 
0045 static struct kmem_cache *mb_entry_cache;
0046 
0047 static unsigned long mb_cache_shrink(struct mb_cache *cache,
0048                      unsigned long nr_to_scan);
0049 
0050 static inline struct hlist_bl_head *mb_cache_entry_head(struct mb_cache *cache,
0051                             u32 key)
0052 {
0053     return &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
0054 }
0055 
0056 /*
0057  * Number of entries to reclaim synchronously when there are too many entries
0058  * in cache
0059  */
0060 #define SYNC_SHRINK_BATCH 64
0061 
0062 /*
0063  * mb_cache_entry_create - create entry in cache
0064  * @cache - cache where the entry should be created
0065  * @mask - gfp mask with which the entry should be allocated
0066  * @key - key of the entry
0067  * @value - value of the entry
0068  * @reusable - is the entry reusable by others?
0069  *
0070  * Creates entry in @cache with key @key and value @value. The function returns
0071  * -EBUSY if entry with the same key and value already exists in cache.
0072  * Otherwise 0 is returned.
0073  */
0074 int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
0075               u64 value, bool reusable)
0076 {
0077     struct mb_cache_entry *entry, *dup;
0078     struct hlist_bl_node *dup_node;
0079     struct hlist_bl_head *head;
0080 
0081     /* Schedule background reclaim if there are too many entries */
0082     if (cache->c_entry_count >= cache->c_max_entries)
0083         schedule_work(&cache->c_shrink_work);
0084     /* Do some sync reclaim if background reclaim cannot keep up */
0085     if (cache->c_entry_count >= 2*cache->c_max_entries)
0086         mb_cache_shrink(cache, SYNC_SHRINK_BATCH);
0087 
0088     entry = kmem_cache_alloc(mb_entry_cache, mask);
0089     if (!entry)
0090         return -ENOMEM;
0091 
0092     INIT_LIST_HEAD(&entry->e_list);
0093     /* Initial hash reference */
0094     atomic_set(&entry->e_refcnt, 1);
0095     entry->e_key = key;
0096     entry->e_value = value;
0097     entry->e_reusable = reusable;
0098     entry->e_referenced = 0;
0099     head = mb_cache_entry_head(cache, key);
0100     hlist_bl_lock(head);
0101     hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
0102         if (dup->e_key == key && dup->e_value == value) {
0103             hlist_bl_unlock(head);
0104             kmem_cache_free(mb_entry_cache, entry);
0105             return -EBUSY;
0106         }
0107     }
0108     hlist_bl_add_head(&entry->e_hash_list, head);
0109     /*
0110      * Add entry to LRU list before it can be found by
0111      * mb_cache_entry_delete() to avoid races
0112      */
0113     spin_lock(&cache->c_list_lock);
0114     list_add_tail(&entry->e_list, &cache->c_list);
0115     cache->c_entry_count++;
0116     spin_unlock(&cache->c_list_lock);
0117     hlist_bl_unlock(head);
0118 
0119     return 0;
0120 }
0121 EXPORT_SYMBOL(mb_cache_entry_create);
0122 
0123 void __mb_cache_entry_free(struct mb_cache *cache, struct mb_cache_entry *entry)
0124 {
0125     struct hlist_bl_head *head;
0126 
0127     head = mb_cache_entry_head(cache, entry->e_key);
0128     hlist_bl_lock(head);
0129     hlist_bl_del(&entry->e_hash_list);
0130     hlist_bl_unlock(head);
0131     kmem_cache_free(mb_entry_cache, entry);
0132 }
0133 EXPORT_SYMBOL(__mb_cache_entry_free);
0134 
0135 /*
0136  * mb_cache_entry_wait_unused - wait to be the last user of the entry
0137  *
0138  * @entry - entry to work on
0139  *
0140  * Wait to be the last user of the entry.
0141  */
0142 void mb_cache_entry_wait_unused(struct mb_cache_entry *entry)
0143 {
0144     wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 2);
0145 }
0146 EXPORT_SYMBOL(mb_cache_entry_wait_unused);
0147 
0148 static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
0149                        struct mb_cache_entry *entry,
0150                        u32 key)
0151 {
0152     struct mb_cache_entry *old_entry = entry;
0153     struct hlist_bl_node *node;
0154     struct hlist_bl_head *head;
0155 
0156     head = mb_cache_entry_head(cache, key);
0157     hlist_bl_lock(head);
0158     if (entry && !hlist_bl_unhashed(&entry->e_hash_list))
0159         node = entry->e_hash_list.next;
0160     else
0161         node = hlist_bl_first(head);
0162     while (node) {
0163         entry = hlist_bl_entry(node, struct mb_cache_entry,
0164                        e_hash_list);
0165         if (entry->e_key == key && entry->e_reusable &&
0166             atomic_inc_not_zero(&entry->e_refcnt))
0167             goto out;
0168         node = node->next;
0169     }
0170     entry = NULL;
0171 out:
0172     hlist_bl_unlock(head);
0173     if (old_entry)
0174         mb_cache_entry_put(cache, old_entry);
0175 
0176     return entry;
0177 }
0178 
0179 /*
0180  * mb_cache_entry_find_first - find the first reusable entry with the given key
0181  * @cache: cache where we should search
0182  * @key: key to look for
0183  *
0184  * Search in @cache for a reusable entry with key @key. Grabs reference to the
0185  * first reusable entry found and returns the entry.
0186  */
0187 struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache,
0188                          u32 key)
0189 {
0190     return __entry_find(cache, NULL, key);
0191 }
0192 EXPORT_SYMBOL(mb_cache_entry_find_first);
0193 
0194 /*
0195  * mb_cache_entry_find_next - find next reusable entry with the same key
0196  * @cache: cache where we should search
0197  * @entry: entry to start search from
0198  *
0199  * Finds next reusable entry in the hash chain which has the same key as @entry.
0200  * If @entry is unhashed (which can happen when deletion of entry races with the
0201  * search), finds the first reusable entry in the hash chain. The function drops
0202  * reference to @entry and returns with a reference to the found entry.
0203  */
0204 struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
0205                         struct mb_cache_entry *entry)
0206 {
0207     return __entry_find(cache, entry, entry->e_key);
0208 }
0209 EXPORT_SYMBOL(mb_cache_entry_find_next);
0210 
0211 /*
0212  * mb_cache_entry_get - get a cache entry by value (and key)
0213  * @cache - cache we work with
0214  * @key - key
0215  * @value - value
0216  */
0217 struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
0218                       u64 value)
0219 {
0220     struct hlist_bl_node *node;
0221     struct hlist_bl_head *head;
0222     struct mb_cache_entry *entry;
0223 
0224     head = mb_cache_entry_head(cache, key);
0225     hlist_bl_lock(head);
0226     hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
0227         if (entry->e_key == key && entry->e_value == value &&
0228             atomic_inc_not_zero(&entry->e_refcnt))
0229             goto out;
0230     }
0231     entry = NULL;
0232 out:
0233     hlist_bl_unlock(head);
0234     return entry;
0235 }
0236 EXPORT_SYMBOL(mb_cache_entry_get);
0237 
0238 /* mb_cache_entry_delete_or_get - remove a cache entry if it has no users
0239  * @cache - cache we work with
0240  * @key - key
0241  * @value - value
0242  *
0243  * Remove entry from cache @cache with key @key and value @value. The removal
0244  * happens only if the entry is unused. The function returns NULL in case the
0245  * entry was successfully removed or there's no entry in cache. Otherwise the
0246  * function grabs reference of the entry that we failed to delete because it
0247  * still has users and return it.
0248  */
0249 struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
0250                             u32 key, u64 value)
0251 {
0252     struct mb_cache_entry *entry;
0253 
0254     entry = mb_cache_entry_get(cache, key, value);
0255     if (!entry)
0256         return NULL;
0257 
0258     /*
0259      * Drop the ref we got from mb_cache_entry_get() and the initial hash
0260      * ref if we are the last user
0261      */
0262     if (atomic_cmpxchg(&entry->e_refcnt, 2, 0) != 2)
0263         return entry;
0264 
0265     spin_lock(&cache->c_list_lock);
0266     if (!list_empty(&entry->e_list))
0267         list_del_init(&entry->e_list);
0268     cache->c_entry_count--;
0269     spin_unlock(&cache->c_list_lock);
0270     __mb_cache_entry_free(cache, entry);
0271     return NULL;
0272 }
0273 EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
0274 
0275 /* mb_cache_entry_touch - cache entry got used
0276  * @cache - cache the entry belongs to
0277  * @entry - entry that got used
0278  *
0279  * Marks entry as used to give hit higher chances of surviving in cache.
0280  */
0281 void mb_cache_entry_touch(struct mb_cache *cache,
0282               struct mb_cache_entry *entry)
0283 {
0284     entry->e_referenced = 1;
0285 }
0286 EXPORT_SYMBOL(mb_cache_entry_touch);
0287 
0288 static unsigned long mb_cache_count(struct shrinker *shrink,
0289                     struct shrink_control *sc)
0290 {
0291     struct mb_cache *cache = container_of(shrink, struct mb_cache,
0292                           c_shrink);
0293 
0294     return cache->c_entry_count;
0295 }
0296 
0297 /* Shrink number of entries in cache */
0298 static unsigned long mb_cache_shrink(struct mb_cache *cache,
0299                      unsigned long nr_to_scan)
0300 {
0301     struct mb_cache_entry *entry;
0302     unsigned long shrunk = 0;
0303 
0304     spin_lock(&cache->c_list_lock);
0305     while (nr_to_scan-- && !list_empty(&cache->c_list)) {
0306         entry = list_first_entry(&cache->c_list,
0307                      struct mb_cache_entry, e_list);
0308         /* Drop initial hash reference if there is no user */
0309         if (entry->e_referenced ||
0310             atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
0311             entry->e_referenced = 0;
0312             list_move_tail(&entry->e_list, &cache->c_list);
0313             continue;
0314         }
0315         list_del_init(&entry->e_list);
0316         cache->c_entry_count--;
0317         spin_unlock(&cache->c_list_lock);
0318         __mb_cache_entry_free(cache, entry);
0319         shrunk++;
0320         cond_resched();
0321         spin_lock(&cache->c_list_lock);
0322     }
0323     spin_unlock(&cache->c_list_lock);
0324 
0325     return shrunk;
0326 }
0327 
0328 static unsigned long mb_cache_scan(struct shrinker *shrink,
0329                    struct shrink_control *sc)
0330 {
0331     struct mb_cache *cache = container_of(shrink, struct mb_cache,
0332                           c_shrink);
0333     return mb_cache_shrink(cache, sc->nr_to_scan);
0334 }
0335 
0336 /* We shrink 1/X of the cache when we have too many entries in it */
0337 #define SHRINK_DIVISOR 16
0338 
0339 static void mb_cache_shrink_worker(struct work_struct *work)
0340 {
0341     struct mb_cache *cache = container_of(work, struct mb_cache,
0342                           c_shrink_work);
0343     mb_cache_shrink(cache, cache->c_max_entries / SHRINK_DIVISOR);
0344 }
0345 
0346 /*
0347  * mb_cache_create - create cache
0348  * @bucket_bits: log2 of the hash table size
0349  *
0350  * Create cache for keys with 2^bucket_bits hash entries.
0351  */
0352 struct mb_cache *mb_cache_create(int bucket_bits)
0353 {
0354     struct mb_cache *cache;
0355     unsigned long bucket_count = 1UL << bucket_bits;
0356     unsigned long i;
0357 
0358     cache = kzalloc(sizeof(struct mb_cache), GFP_KERNEL);
0359     if (!cache)
0360         goto err_out;
0361     cache->c_bucket_bits = bucket_bits;
0362     cache->c_max_entries = bucket_count << 4;
0363     INIT_LIST_HEAD(&cache->c_list);
0364     spin_lock_init(&cache->c_list_lock);
0365     cache->c_hash = kmalloc_array(bucket_count,
0366                       sizeof(struct hlist_bl_head),
0367                       GFP_KERNEL);
0368     if (!cache->c_hash) {
0369         kfree(cache);
0370         goto err_out;
0371     }
0372     for (i = 0; i < bucket_count; i++)
0373         INIT_HLIST_BL_HEAD(&cache->c_hash[i]);
0374 
0375     cache->c_shrink.count_objects = mb_cache_count;
0376     cache->c_shrink.scan_objects = mb_cache_scan;
0377     cache->c_shrink.seeks = DEFAULT_SEEKS;
0378     if (register_shrinker(&cache->c_shrink, "mbcache-shrinker")) {
0379         kfree(cache->c_hash);
0380         kfree(cache);
0381         goto err_out;
0382     }
0383 
0384     INIT_WORK(&cache->c_shrink_work, mb_cache_shrink_worker);
0385 
0386     return cache;
0387 
0388 err_out:
0389     return NULL;
0390 }
0391 EXPORT_SYMBOL(mb_cache_create);
0392 
0393 /*
0394  * mb_cache_destroy - destroy cache
0395  * @cache: the cache to destroy
0396  *
0397  * Free all entries in cache and cache itself. Caller must make sure nobody
0398  * (except shrinker) can reach @cache when calling this.
0399  */
0400 void mb_cache_destroy(struct mb_cache *cache)
0401 {
0402     struct mb_cache_entry *entry, *next;
0403 
0404     unregister_shrinker(&cache->c_shrink);
0405 
0406     /*
0407      * We don't bother with any locking. Cache must not be used at this
0408      * point.
0409      */
0410     list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
0411         list_del(&entry->e_list);
0412         WARN_ON(atomic_read(&entry->e_refcnt) != 1);
0413         mb_cache_entry_put(cache, entry);
0414     }
0415     kfree(cache->c_hash);
0416     kfree(cache);
0417 }
0418 EXPORT_SYMBOL(mb_cache_destroy);
0419 
0420 static int __init mbcache_init(void)
0421 {
0422     mb_entry_cache = kmem_cache_create("mbcache",
0423                 sizeof(struct mb_cache_entry), 0,
0424                 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
0425     if (!mb_entry_cache)
0426         return -ENOMEM;
0427     return 0;
0428 }
0429 
0430 static void __exit mbcache_exit(void)
0431 {
0432     kmem_cache_destroy(mb_entry_cache);
0433 }
0434 
0435 module_init(mbcache_init)
0436 module_exit(mbcache_exit)
0437 
0438 MODULE_AUTHOR("Jan Kara <jack@suse.cz>");
0439 MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
0440 MODULE_LICENSE("GPL");