Back to home page

LXR

 
 

    


0001 #include <linux/spinlock.h>
0002 #include <linux/slab.h>
0003 #include <linux/list.h>
0004 #include <linux/list_bl.h>
0005 #include <linux/module.h>
0006 #include <linux/sched.h>
0007 #include <linux/workqueue.h>
0008 #include <linux/mbcache.h>
0009 
0010 /*
0011  * Mbcache is a simple key-value store. Keys need not be unique, however
0012  * key-value pairs are expected to be unique (we use this fact in
0013  * mb_cache_entry_delete_block()).
0014  *
0015  * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
0016  * They use hash of a block contents as a key and block number as a value.
0017  * That's why keys need not be unique (different xattr blocks may end up having
0018  * the same hash). However block number always uniquely identifies a cache
0019  * entry.
0020  *
0021  * We provide functions for creation and removal of entries, search by key,
0022  * and a special "delete entry with given key-value pair" operation. Fixed
0023  * size hash table is used for fast key lookups.
0024  */
0025 
0026 struct mb_cache {
0027     /* Hash table of entries */
0028     struct hlist_bl_head    *c_hash;
0029     /* log2 of hash table size */
0030     int         c_bucket_bits;
0031     /* Maximum entries in cache to avoid degrading hash too much */
0032     unsigned long       c_max_entries;
0033     /* Protects c_list, c_entry_count */
0034     spinlock_t      c_list_lock;
0035     struct list_head    c_list;
0036     /* Number of entries in cache */
0037     unsigned long       c_entry_count;
0038     struct shrinker     c_shrink;
0039     /* Work for shrinking when the cache has too many entries */
0040     struct work_struct  c_shrink_work;
0041 };
0042 
0043 static struct kmem_cache *mb_entry_cache;
0044 
0045 static unsigned long mb_cache_shrink(struct mb_cache *cache,
0046                      unsigned long nr_to_scan);
0047 
0048 static inline struct hlist_bl_head *mb_cache_entry_head(struct mb_cache *cache,
0049                             u32 key)
0050 {
0051     return &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
0052 }
0053 
0054 /*
0055  * Number of entries to reclaim synchronously when there are too many entries
0056  * in cache
0057  */
0058 #define SYNC_SHRINK_BATCH 64
0059 
0060 /*
0061  * mb_cache_entry_create - create entry in cache
0062  * @cache - cache where the entry should be created
0063  * @mask - gfp mask with which the entry should be allocated
0064  * @key - key of the entry
0065  * @block - block that contains data
0066  * @reusable - is the block reusable by other inodes?
0067  *
0068  * Creates entry in @cache with key @key and records that data is stored in
0069  * block @block. The function returns -EBUSY if entry with the same key
0070  * and for the same block already exists in cache. Otherwise 0 is returned.
0071  */
0072 int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
0073               sector_t block, bool reusable)
0074 {
0075     struct mb_cache_entry *entry, *dup;
0076     struct hlist_bl_node *dup_node;
0077     struct hlist_bl_head *head;
0078 
0079     /* Schedule background reclaim if there are too many entries */
0080     if (cache->c_entry_count >= cache->c_max_entries)
0081         schedule_work(&cache->c_shrink_work);
0082     /* Do some sync reclaim if background reclaim cannot keep up */
0083     if (cache->c_entry_count >= 2*cache->c_max_entries)
0084         mb_cache_shrink(cache, SYNC_SHRINK_BATCH);
0085 
0086     entry = kmem_cache_alloc(mb_entry_cache, mask);
0087     if (!entry)
0088         return -ENOMEM;
0089 
0090     INIT_LIST_HEAD(&entry->e_list);
0091     /* One ref for hash, one ref returned */
0092     atomic_set(&entry->e_refcnt, 1);
0093     entry->e_key = key;
0094     entry->e_block = block;
0095     entry->e_reusable = reusable;
0096     head = mb_cache_entry_head(cache, key);
0097     hlist_bl_lock(head);
0098     hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
0099         if (dup->e_key == key && dup->e_block == block) {
0100             hlist_bl_unlock(head);
0101             kmem_cache_free(mb_entry_cache, entry);
0102             return -EBUSY;
0103         }
0104     }
0105     hlist_bl_add_head(&entry->e_hash_list, head);
0106     hlist_bl_unlock(head);
0107 
0108     spin_lock(&cache->c_list_lock);
0109     list_add_tail(&entry->e_list, &cache->c_list);
0110     /* Grab ref for LRU list */
0111     atomic_inc(&entry->e_refcnt);
0112     cache->c_entry_count++;
0113     spin_unlock(&cache->c_list_lock);
0114 
0115     return 0;
0116 }
0117 EXPORT_SYMBOL(mb_cache_entry_create);
0118 
0119 void __mb_cache_entry_free(struct mb_cache_entry *entry)
0120 {
0121     kmem_cache_free(mb_entry_cache, entry);
0122 }
0123 EXPORT_SYMBOL(__mb_cache_entry_free);
0124 
0125 static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
0126                        struct mb_cache_entry *entry,
0127                        u32 key)
0128 {
0129     struct mb_cache_entry *old_entry = entry;
0130     struct hlist_bl_node *node;
0131     struct hlist_bl_head *head;
0132 
0133     head = mb_cache_entry_head(cache, key);
0134     hlist_bl_lock(head);
0135     if (entry && !hlist_bl_unhashed(&entry->e_hash_list))
0136         node = entry->e_hash_list.next;
0137     else
0138         node = hlist_bl_first(head);
0139     while (node) {
0140         entry = hlist_bl_entry(node, struct mb_cache_entry,
0141                        e_hash_list);
0142         if (entry->e_key == key && entry->e_reusable) {
0143             atomic_inc(&entry->e_refcnt);
0144             goto out;
0145         }
0146         node = node->next;
0147     }
0148     entry = NULL;
0149 out:
0150     hlist_bl_unlock(head);
0151     if (old_entry)
0152         mb_cache_entry_put(cache, old_entry);
0153 
0154     return entry;
0155 }
0156 
0157 /*
0158  * mb_cache_entry_find_first - find the first reusable entry with the given key
0159  * @cache: cache where we should search
0160  * @key: key to look for
0161  *
0162  * Search in @cache for a reusable entry with key @key. Grabs reference to the
0163  * first reusable entry found and returns the entry.
0164  */
0165 struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache,
0166                          u32 key)
0167 {
0168     return __entry_find(cache, NULL, key);
0169 }
0170 EXPORT_SYMBOL(mb_cache_entry_find_first);
0171 
0172 /*
0173  * mb_cache_entry_find_next - find next reusable entry with the same key
0174  * @cache: cache where we should search
0175  * @entry: entry to start search from
0176  *
0177  * Finds next reusable entry in the hash chain which has the same key as @entry.
0178  * If @entry is unhashed (which can happen when deletion of entry races with the
0179  * search), finds the first reusable entry in the hash chain. The function drops
0180  * reference to @entry and returns with a reference to the found entry.
0181  */
0182 struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
0183                         struct mb_cache_entry *entry)
0184 {
0185     return __entry_find(cache, entry, entry->e_key);
0186 }
0187 EXPORT_SYMBOL(mb_cache_entry_find_next);
0188 
0189 /*
0190  * mb_cache_entry_get - get a cache entry by block number (and key)
0191  * @cache - cache we work with
0192  * @key - key of block number @block
0193  * @block - block number
0194  */
0195 struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
0196                       sector_t block)
0197 {
0198     struct hlist_bl_node *node;
0199     struct hlist_bl_head *head;
0200     struct mb_cache_entry *entry;
0201 
0202     head = mb_cache_entry_head(cache, key);
0203     hlist_bl_lock(head);
0204     hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
0205         if (entry->e_key == key && entry->e_block == block) {
0206             atomic_inc(&entry->e_refcnt);
0207             goto out;
0208         }
0209     }
0210     entry = NULL;
0211 out:
0212     hlist_bl_unlock(head);
0213     return entry;
0214 }
0215 EXPORT_SYMBOL(mb_cache_entry_get);
0216 
0217 /* mb_cache_entry_delete_block - remove information about block from cache
0218  * @cache - cache we work with
0219  * @key - key of block @block
0220  * @block - block number
0221  *
0222  * Remove entry from cache @cache with key @key with data stored in @block.
0223  */
0224 void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
0225                  sector_t block)
0226 {
0227     struct hlist_bl_node *node;
0228     struct hlist_bl_head *head;
0229     struct mb_cache_entry *entry;
0230 
0231     head = mb_cache_entry_head(cache, key);
0232     hlist_bl_lock(head);
0233     hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
0234         if (entry->e_key == key && entry->e_block == block) {
0235             /* We keep hash list reference to keep entry alive */
0236             hlist_bl_del_init(&entry->e_hash_list);
0237             hlist_bl_unlock(head);
0238             spin_lock(&cache->c_list_lock);
0239             if (!list_empty(&entry->e_list)) {
0240                 list_del_init(&entry->e_list);
0241                 cache->c_entry_count--;
0242                 atomic_dec(&entry->e_refcnt);
0243             }
0244             spin_unlock(&cache->c_list_lock);
0245             mb_cache_entry_put(cache, entry);
0246             return;
0247         }
0248     }
0249     hlist_bl_unlock(head);
0250 }
0251 EXPORT_SYMBOL(mb_cache_entry_delete_block);
0252 
0253 /* mb_cache_entry_touch - cache entry got used
0254  * @cache - cache the entry belongs to
0255  * @entry - entry that got used
0256  *
0257  * Marks entry as used to give hit higher chances of surviving in cache.
0258  */
0259 void mb_cache_entry_touch(struct mb_cache *cache,
0260               struct mb_cache_entry *entry)
0261 {
0262     entry->e_referenced = 1;
0263 }
0264 EXPORT_SYMBOL(mb_cache_entry_touch);
0265 
0266 static unsigned long mb_cache_count(struct shrinker *shrink,
0267                     struct shrink_control *sc)
0268 {
0269     struct mb_cache *cache = container_of(shrink, struct mb_cache,
0270                           c_shrink);
0271 
0272     return cache->c_entry_count;
0273 }
0274 
0275 /* Shrink number of entries in cache */
0276 static unsigned long mb_cache_shrink(struct mb_cache *cache,
0277                      unsigned long nr_to_scan)
0278 {
0279     struct mb_cache_entry *entry;
0280     struct hlist_bl_head *head;
0281     unsigned long shrunk = 0;
0282 
0283     spin_lock(&cache->c_list_lock);
0284     while (nr_to_scan-- && !list_empty(&cache->c_list)) {
0285         entry = list_first_entry(&cache->c_list,
0286                      struct mb_cache_entry, e_list);
0287         if (entry->e_referenced) {
0288             entry->e_referenced = 0;
0289             list_move_tail(&entry->e_list, &cache->c_list);
0290             continue;
0291         }
0292         list_del_init(&entry->e_list);
0293         cache->c_entry_count--;
0294         /*
0295          * We keep LRU list reference so that entry doesn't go away
0296          * from under us.
0297          */
0298         spin_unlock(&cache->c_list_lock);
0299         head = mb_cache_entry_head(cache, entry->e_key);
0300         hlist_bl_lock(head);
0301         if (!hlist_bl_unhashed(&entry->e_hash_list)) {
0302             hlist_bl_del_init(&entry->e_hash_list);
0303             atomic_dec(&entry->e_refcnt);
0304         }
0305         hlist_bl_unlock(head);
0306         if (mb_cache_entry_put(cache, entry))
0307             shrunk++;
0308         cond_resched();
0309         spin_lock(&cache->c_list_lock);
0310     }
0311     spin_unlock(&cache->c_list_lock);
0312 
0313     return shrunk;
0314 }
0315 
0316 static unsigned long mb_cache_scan(struct shrinker *shrink,
0317                    struct shrink_control *sc)
0318 {
0319     struct mb_cache *cache = container_of(shrink, struct mb_cache,
0320                           c_shrink);
0321     return mb_cache_shrink(cache, sc->nr_to_scan);
0322 }
0323 
0324 /* We shrink 1/X of the cache when we have too many entries in it */
0325 #define SHRINK_DIVISOR 16
0326 
0327 static void mb_cache_shrink_worker(struct work_struct *work)
0328 {
0329     struct mb_cache *cache = container_of(work, struct mb_cache,
0330                           c_shrink_work);
0331     mb_cache_shrink(cache, cache->c_max_entries / SHRINK_DIVISOR);
0332 }
0333 
0334 /*
0335  * mb_cache_create - create cache
0336  * @bucket_bits: log2 of the hash table size
0337  *
0338  * Create cache for keys with 2^bucket_bits hash entries.
0339  */
0340 struct mb_cache *mb_cache_create(int bucket_bits)
0341 {
0342     struct mb_cache *cache;
0343     unsigned long bucket_count = 1UL << bucket_bits;
0344     unsigned long i;
0345 
0346     cache = kzalloc(sizeof(struct mb_cache), GFP_KERNEL);
0347     if (!cache)
0348         goto err_out;
0349     cache->c_bucket_bits = bucket_bits;
0350     cache->c_max_entries = bucket_count << 4;
0351     INIT_LIST_HEAD(&cache->c_list);
0352     spin_lock_init(&cache->c_list_lock);
0353     cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
0354                 GFP_KERNEL);
0355     if (!cache->c_hash) {
0356         kfree(cache);
0357         goto err_out;
0358     }
0359     for (i = 0; i < bucket_count; i++)
0360         INIT_HLIST_BL_HEAD(&cache->c_hash[i]);
0361 
0362     cache->c_shrink.count_objects = mb_cache_count;
0363     cache->c_shrink.scan_objects = mb_cache_scan;
0364     cache->c_shrink.seeks = DEFAULT_SEEKS;
0365     if (register_shrinker(&cache->c_shrink)) {
0366         kfree(cache->c_hash);
0367         kfree(cache);
0368         goto err_out;
0369     }
0370 
0371     INIT_WORK(&cache->c_shrink_work, mb_cache_shrink_worker);
0372 
0373     return cache;
0374 
0375 err_out:
0376     return NULL;
0377 }
0378 EXPORT_SYMBOL(mb_cache_create);
0379 
0380 /*
0381  * mb_cache_destroy - destroy cache
0382  * @cache: the cache to destroy
0383  *
0384  * Free all entries in cache and cache itself. Caller must make sure nobody
0385  * (except shrinker) can reach @cache when calling this.
0386  */
0387 void mb_cache_destroy(struct mb_cache *cache)
0388 {
0389     struct mb_cache_entry *entry, *next;
0390 
0391     unregister_shrinker(&cache->c_shrink);
0392 
0393     /*
0394      * We don't bother with any locking. Cache must not be used at this
0395      * point.
0396      */
0397     list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
0398         if (!hlist_bl_unhashed(&entry->e_hash_list)) {
0399             hlist_bl_del_init(&entry->e_hash_list);
0400             atomic_dec(&entry->e_refcnt);
0401         } else
0402             WARN_ON(1);
0403         list_del(&entry->e_list);
0404         WARN_ON(atomic_read(&entry->e_refcnt) != 1);
0405         mb_cache_entry_put(cache, entry);
0406     }
0407     kfree(cache->c_hash);
0408     kfree(cache);
0409 }
0410 EXPORT_SYMBOL(mb_cache_destroy);
0411 
0412 static int __init mbcache_init(void)
0413 {
0414     mb_entry_cache = kmem_cache_create("mbcache",
0415                 sizeof(struct mb_cache_entry), 0,
0416                 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
0417     if (!mb_entry_cache)
0418         return -ENOMEM;
0419     return 0;
0420 }
0421 
0422 static void __exit mbcache_exit(void)
0423 {
0424     kmem_cache_destroy(mb_entry_cache);
0425 }
0426 
0427 module_init(mbcache_init)
0428 module_exit(mbcache_exit)
0429 
0430 MODULE_AUTHOR("Jan Kara <jack@suse.cz>");
0431 MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
0432 MODULE_LICENSE("GPL");