Back to home page

LXR

 
 

    


0001 /*
0002  * Resizable, Scalable, Concurrent Hash Table
0003  *
0004  * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
0005  * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
0006  * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
0007  *
0008  * Code partially derived from nft_hash
0009  * Rewritten with rehash code from br_multicast plus single list
0010  * pointer as suggested by Josh Triplett
0011  *
0012  * This program is free software; you can redistribute it and/or modify
0013  * it under the terms of the GNU General Public License version 2 as
0014  * published by the Free Software Foundation.
0015  */
0016 
0017 #include <linux/atomic.h>
0018 #include <linux/kernel.h>
0019 #include <linux/init.h>
0020 #include <linux/log2.h>
0021 #include <linux/sched.h>
0022 #include <linux/slab.h>
0023 #include <linux/vmalloc.h>
0024 #include <linux/mm.h>
0025 #include <linux/jhash.h>
0026 #include <linux/random.h>
0027 #include <linux/rhashtable.h>
0028 #include <linux/err.h>
0029 #include <linux/export.h>
0030 
0031 #define HASH_DEFAULT_SIZE   64UL
0032 #define HASH_MIN_SIZE       4U
0033 #define BUCKET_LOCKS_PER_CPU    32UL
0034 
0035 static u32 head_hashfn(struct rhashtable *ht,
0036                const struct bucket_table *tbl,
0037                const struct rhash_head *he)
0038 {
0039     return rht_head_hashfn(ht, tbl, he, ht->p);
0040 }
0041 
0042 #ifdef CONFIG_PROVE_LOCKING
0043 #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
0044 
0045 int lockdep_rht_mutex_is_held(struct rhashtable *ht)
0046 {
0047     return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
0048 }
0049 EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
0050 
0051 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
0052 {
0053     spinlock_t *lock = rht_bucket_lock(tbl, hash);
0054 
0055     return (debug_locks) ? lockdep_is_held(lock) : 1;
0056 }
0057 EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
0058 #else
0059 #define ASSERT_RHT_MUTEX(HT)
0060 #endif
0061 
0062 
0063 static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
0064                   gfp_t gfp)
0065 {
0066     unsigned int i, size;
0067 #if defined(CONFIG_PROVE_LOCKING)
0068     unsigned int nr_pcpus = 2;
0069 #else
0070     unsigned int nr_pcpus = num_possible_cpus();
0071 #endif
0072 
0073     nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
0074     size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
0075 
0076     /* Never allocate more than 0.5 locks per bucket */
0077     size = min_t(unsigned int, size, tbl->size >> 1);
0078 
0079     if (sizeof(spinlock_t) != 0) {
0080         tbl->locks = NULL;
0081 #ifdef CONFIG_NUMA
0082         if (size * sizeof(spinlock_t) > PAGE_SIZE &&
0083             gfp == GFP_KERNEL)
0084             tbl->locks = vmalloc(size * sizeof(spinlock_t));
0085 #endif
0086         if (gfp != GFP_KERNEL)
0087             gfp |= __GFP_NOWARN | __GFP_NORETRY;
0088 
0089         if (!tbl->locks)
0090             tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
0091                            gfp);
0092         if (!tbl->locks)
0093             return -ENOMEM;
0094         for (i = 0; i < size; i++)
0095             spin_lock_init(&tbl->locks[i]);
0096     }
0097     tbl->locks_mask = size - 1;
0098 
0099     return 0;
0100 }
0101 
0102 static void bucket_table_free(const struct bucket_table *tbl)
0103 {
0104     if (tbl)
0105         kvfree(tbl->locks);
0106 
0107     kvfree(tbl);
0108 }
0109 
0110 static void bucket_table_free_rcu(struct rcu_head *head)
0111 {
0112     bucket_table_free(container_of(head, struct bucket_table, rcu));
0113 }
0114 
0115 static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
0116                            size_t nbuckets,
0117                            gfp_t gfp)
0118 {
0119     struct bucket_table *tbl = NULL;
0120     size_t size;
0121     int i;
0122 
0123     size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
0124     if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) ||
0125         gfp != GFP_KERNEL)
0126         tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
0127     if (tbl == NULL && gfp == GFP_KERNEL)
0128         tbl = vzalloc(size);
0129     if (tbl == NULL)
0130         return NULL;
0131 
0132     tbl->size = nbuckets;
0133 
0134     if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
0135         bucket_table_free(tbl);
0136         return NULL;
0137     }
0138 
0139     INIT_LIST_HEAD(&tbl->walkers);
0140 
0141     get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
0142 
0143     for (i = 0; i < nbuckets; i++)
0144         INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
0145 
0146     return tbl;
0147 }
0148 
0149 static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
0150                           struct bucket_table *tbl)
0151 {
0152     struct bucket_table *new_tbl;
0153 
0154     do {
0155         new_tbl = tbl;
0156         tbl = rht_dereference_rcu(tbl->future_tbl, ht);
0157     } while (tbl);
0158 
0159     return new_tbl;
0160 }
0161 
0162 static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
0163 {
0164     struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
0165     struct bucket_table *new_tbl = rhashtable_last_table(ht,
0166         rht_dereference_rcu(old_tbl->future_tbl, ht));
0167     struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash];
0168     int err = -ENOENT;
0169     struct rhash_head *head, *next, *entry;
0170     spinlock_t *new_bucket_lock;
0171     unsigned int new_hash;
0172 
0173     rht_for_each(entry, old_tbl, old_hash) {
0174         err = 0;
0175         next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
0176 
0177         if (rht_is_a_nulls(next))
0178             break;
0179 
0180         pprev = &entry->next;
0181     }
0182 
0183     if (err)
0184         goto out;
0185 
0186     new_hash = head_hashfn(ht, new_tbl, entry);
0187 
0188     new_bucket_lock = rht_bucket_lock(new_tbl, new_hash);
0189 
0190     spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING);
0191     head = rht_dereference_bucket(new_tbl->buckets[new_hash],
0192                       new_tbl, new_hash);
0193 
0194     RCU_INIT_POINTER(entry->next, head);
0195 
0196     rcu_assign_pointer(new_tbl->buckets[new_hash], entry);
0197     spin_unlock(new_bucket_lock);
0198 
0199     rcu_assign_pointer(*pprev, next);
0200 
0201 out:
0202     return err;
0203 }
0204 
0205 static void rhashtable_rehash_chain(struct rhashtable *ht,
0206                     unsigned int old_hash)
0207 {
0208     struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
0209     spinlock_t *old_bucket_lock;
0210 
0211     old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
0212 
0213     spin_lock_bh(old_bucket_lock);
0214     while (!rhashtable_rehash_one(ht, old_hash))
0215         ;
0216     old_tbl->rehash++;
0217     spin_unlock_bh(old_bucket_lock);
0218 }
0219 
0220 static int rhashtable_rehash_attach(struct rhashtable *ht,
0221                     struct bucket_table *old_tbl,
0222                     struct bucket_table *new_tbl)
0223 {
0224     /* Protect future_tbl using the first bucket lock. */
0225     spin_lock_bh(old_tbl->locks);
0226 
0227     /* Did somebody beat us to it? */
0228     if (rcu_access_pointer(old_tbl->future_tbl)) {
0229         spin_unlock_bh(old_tbl->locks);
0230         return -EEXIST;
0231     }
0232 
0233     /* Make insertions go into the new, empty table right away. Deletions
0234      * and lookups will be attempted in both tables until we synchronize.
0235      */
0236     rcu_assign_pointer(old_tbl->future_tbl, new_tbl);
0237 
0238     spin_unlock_bh(old_tbl->locks);
0239 
0240     return 0;
0241 }
0242 
0243 static int rhashtable_rehash_table(struct rhashtable *ht)
0244 {
0245     struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
0246     struct bucket_table *new_tbl;
0247     struct rhashtable_walker *walker;
0248     unsigned int old_hash;
0249 
0250     new_tbl = rht_dereference(old_tbl->future_tbl, ht);
0251     if (!new_tbl)
0252         return 0;
0253 
0254     for (old_hash = 0; old_hash < old_tbl->size; old_hash++)
0255         rhashtable_rehash_chain(ht, old_hash);
0256 
0257     /* Publish the new table pointer. */
0258     rcu_assign_pointer(ht->tbl, new_tbl);
0259 
0260     spin_lock(&ht->lock);
0261     list_for_each_entry(walker, &old_tbl->walkers, list)
0262         walker->tbl = NULL;
0263     spin_unlock(&ht->lock);
0264 
0265     /* Wait for readers. All new readers will see the new
0266      * table, and thus no references to the old table will
0267      * remain.
0268      */
0269     call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
0270 
0271     return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
0272 }
0273 
0274 /**
0275  * rhashtable_expand - Expand hash table while allowing concurrent lookups
0276  * @ht:     the hash table to expand
0277  *
0278  * A secondary bucket array is allocated and the hash entries are migrated.
0279  *
0280  * This function may only be called in a context where it is safe to call
0281  * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
0282  *
0283  * The caller must ensure that no concurrent resizing occurs by holding
0284  * ht->mutex.
0285  *
0286  * It is valid to have concurrent insertions and deletions protected by per
0287  * bucket locks or concurrent RCU protected lookups and traversals.
0288  */
0289 static int rhashtable_expand(struct rhashtable *ht)
0290 {
0291     struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
0292     int err;
0293 
0294     ASSERT_RHT_MUTEX(ht);
0295 
0296     old_tbl = rhashtable_last_table(ht, old_tbl);
0297 
0298     new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL);
0299     if (new_tbl == NULL)
0300         return -ENOMEM;
0301 
0302     err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
0303     if (err)
0304         bucket_table_free(new_tbl);
0305 
0306     return err;
0307 }
0308 
0309 /**
0310  * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
0311  * @ht:     the hash table to shrink
0312  *
0313  * This function shrinks the hash table to fit, i.e., the smallest
0314  * size would not cause it to expand right away automatically.
0315  *
0316  * The caller must ensure that no concurrent resizing occurs by holding
0317  * ht->mutex.
0318  *
0319  * The caller must ensure that no concurrent table mutations take place.
0320  * It is however valid to have concurrent lookups if they are RCU protected.
0321  *
0322  * It is valid to have concurrent insertions and deletions protected by per
0323  * bucket locks or concurrent RCU protected lookups and traversals.
0324  */
0325 static int rhashtable_shrink(struct rhashtable *ht)
0326 {
0327     struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
0328     unsigned int nelems = atomic_read(&ht->nelems);
0329     unsigned int size = 0;
0330     int err;
0331 
0332     ASSERT_RHT_MUTEX(ht);
0333 
0334     if (nelems)
0335         size = roundup_pow_of_two(nelems * 3 / 2);
0336     if (size < ht->p.min_size)
0337         size = ht->p.min_size;
0338 
0339     if (old_tbl->size <= size)
0340         return 0;
0341 
0342     if (rht_dereference(old_tbl->future_tbl, ht))
0343         return -EEXIST;
0344 
0345     new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
0346     if (new_tbl == NULL)
0347         return -ENOMEM;
0348 
0349     err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
0350     if (err)
0351         bucket_table_free(new_tbl);
0352 
0353     return err;
0354 }
0355 
0356 static void rht_deferred_worker(struct work_struct *work)
0357 {
0358     struct rhashtable *ht;
0359     struct bucket_table *tbl;
0360     int err = 0;
0361 
0362     ht = container_of(work, struct rhashtable, run_work);
0363     mutex_lock(&ht->mutex);
0364 
0365     tbl = rht_dereference(ht->tbl, ht);
0366     tbl = rhashtable_last_table(ht, tbl);
0367 
0368     if (rht_grow_above_75(ht, tbl))
0369         rhashtable_expand(ht);
0370     else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
0371         rhashtable_shrink(ht);
0372 
0373     err = rhashtable_rehash_table(ht);
0374 
0375     mutex_unlock(&ht->mutex);
0376 
0377     if (err)
0378         schedule_work(&ht->run_work);
0379 }
0380 
0381 static int rhashtable_insert_rehash(struct rhashtable *ht,
0382                     struct bucket_table *tbl)
0383 {
0384     struct bucket_table *old_tbl;
0385     struct bucket_table *new_tbl;
0386     unsigned int size;
0387     int err;
0388 
0389     old_tbl = rht_dereference_rcu(ht->tbl, ht);
0390 
0391     size = tbl->size;
0392 
0393     err = -EBUSY;
0394 
0395     if (rht_grow_above_75(ht, tbl))
0396         size *= 2;
0397     /* Do not schedule more than one rehash */
0398     else if (old_tbl != tbl)
0399         goto fail;
0400 
0401     err = -ENOMEM;
0402 
0403     new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC);
0404     if (new_tbl == NULL)
0405         goto fail;
0406 
0407     err = rhashtable_rehash_attach(ht, tbl, new_tbl);
0408     if (err) {
0409         bucket_table_free(new_tbl);
0410         if (err == -EEXIST)
0411             err = 0;
0412     } else
0413         schedule_work(&ht->run_work);
0414 
0415     return err;
0416 
0417 fail:
0418     /* Do not fail the insert if someone else did a rehash. */
0419     if (likely(rcu_dereference_raw(tbl->future_tbl)))
0420         return 0;
0421 
0422     /* Schedule async rehash to retry allocation in process context. */
0423     if (err == -ENOMEM)
0424         schedule_work(&ht->run_work);
0425 
0426     return err;
0427 }
0428 
0429 static void *rhashtable_lookup_one(struct rhashtable *ht,
0430                    struct bucket_table *tbl, unsigned int hash,
0431                    const void *key, struct rhash_head *obj)
0432 {
0433     struct rhashtable_compare_arg arg = {
0434         .ht = ht,
0435         .key = key,
0436     };
0437     struct rhash_head __rcu **pprev;
0438     struct rhash_head *head;
0439     int elasticity;
0440 
0441     elasticity = ht->elasticity;
0442     pprev = &tbl->buckets[hash];
0443     rht_for_each(head, tbl, hash) {
0444         struct rhlist_head *list;
0445         struct rhlist_head *plist;
0446 
0447         elasticity--;
0448         if (!key ||
0449             (ht->p.obj_cmpfn ?
0450              ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
0451              rhashtable_compare(&arg, rht_obj(ht, head))))
0452             continue;
0453 
0454         if (!ht->rhlist)
0455             return rht_obj(ht, head);
0456 
0457         list = container_of(obj, struct rhlist_head, rhead);
0458         plist = container_of(head, struct rhlist_head, rhead);
0459 
0460         RCU_INIT_POINTER(list->next, plist);
0461         head = rht_dereference_bucket(head->next, tbl, hash);
0462         RCU_INIT_POINTER(list->rhead.next, head);
0463         rcu_assign_pointer(*pprev, obj);
0464 
0465         return NULL;
0466     }
0467 
0468     if (elasticity <= 0)
0469         return ERR_PTR(-EAGAIN);
0470 
0471     return ERR_PTR(-ENOENT);
0472 }
0473 
0474 static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
0475                           struct bucket_table *tbl,
0476                           unsigned int hash,
0477                           struct rhash_head *obj,
0478                           void *data)
0479 {
0480     struct bucket_table *new_tbl;
0481     struct rhash_head *head;
0482 
0483     if (!IS_ERR_OR_NULL(data))
0484         return ERR_PTR(-EEXIST);
0485 
0486     if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT)
0487         return ERR_CAST(data);
0488 
0489     new_tbl = rcu_dereference(tbl->future_tbl);
0490     if (new_tbl)
0491         return new_tbl;
0492 
0493     if (PTR_ERR(data) != -ENOENT)
0494         return ERR_CAST(data);
0495 
0496     if (unlikely(rht_grow_above_max(ht, tbl)))
0497         return ERR_PTR(-E2BIG);
0498 
0499     if (unlikely(rht_grow_above_100(ht, tbl)))
0500         return ERR_PTR(-EAGAIN);
0501 
0502     head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
0503 
0504     RCU_INIT_POINTER(obj->next, head);
0505     if (ht->rhlist) {
0506         struct rhlist_head *list;
0507 
0508         list = container_of(obj, struct rhlist_head, rhead);
0509         RCU_INIT_POINTER(list->next, NULL);
0510     }
0511 
0512     rcu_assign_pointer(tbl->buckets[hash], obj);
0513 
0514     atomic_inc(&ht->nelems);
0515     if (rht_grow_above_75(ht, tbl))
0516         schedule_work(&ht->run_work);
0517 
0518     return NULL;
0519 }
0520 
0521 static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
0522                    struct rhash_head *obj)
0523 {
0524     struct bucket_table *new_tbl;
0525     struct bucket_table *tbl;
0526     unsigned int hash;
0527     spinlock_t *lock;
0528     void *data;
0529 
0530     tbl = rcu_dereference(ht->tbl);
0531 
0532     /* All insertions must grab the oldest table containing
0533      * the hashed bucket that is yet to be rehashed.
0534      */
0535     for (;;) {
0536         hash = rht_head_hashfn(ht, tbl, obj, ht->p);
0537         lock = rht_bucket_lock(tbl, hash);
0538         spin_lock_bh(lock);
0539 
0540         if (tbl->rehash <= hash)
0541             break;
0542 
0543         spin_unlock_bh(lock);
0544         tbl = rcu_dereference(tbl->future_tbl);
0545     }
0546 
0547     data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
0548     new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
0549     if (PTR_ERR(new_tbl) != -EEXIST)
0550         data = ERR_CAST(new_tbl);
0551 
0552     while (!IS_ERR_OR_NULL(new_tbl)) {
0553         tbl = new_tbl;
0554         hash = rht_head_hashfn(ht, tbl, obj, ht->p);
0555         spin_lock_nested(rht_bucket_lock(tbl, hash),
0556                  SINGLE_DEPTH_NESTING);
0557 
0558         data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
0559         new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
0560         if (PTR_ERR(new_tbl) != -EEXIST)
0561             data = ERR_CAST(new_tbl);
0562 
0563         spin_unlock(rht_bucket_lock(tbl, hash));
0564     }
0565 
0566     spin_unlock_bh(lock);
0567 
0568     if (PTR_ERR(data) == -EAGAIN)
0569         data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?:
0570                    -EAGAIN);
0571 
0572     return data;
0573 }
0574 
0575 void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
0576                  struct rhash_head *obj)
0577 {
0578     void *data;
0579 
0580     do {
0581         rcu_read_lock();
0582         data = rhashtable_try_insert(ht, key, obj);
0583         rcu_read_unlock();
0584     } while (PTR_ERR(data) == -EAGAIN);
0585 
0586     return data;
0587 }
0588 EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
0589 
0590 /**
0591  * rhashtable_walk_enter - Initialise an iterator
0592  * @ht:     Table to walk over
0593  * @iter:   Hash table Iterator
0594  *
0595  * This function prepares a hash table walk.
0596  *
0597  * Note that if you restart a walk after rhashtable_walk_stop you
0598  * may see the same object twice.  Also, you may miss objects if
0599  * there are removals in between rhashtable_walk_stop and the next
0600  * call to rhashtable_walk_start.
0601  *
0602  * For a completely stable walk you should construct your own data
0603  * structure outside the hash table.
0604  *
0605  * This function may sleep so you must not call it from interrupt
0606  * context or with spin locks held.
0607  *
0608  * You must call rhashtable_walk_exit after this function returns.
0609  */
0610 void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
0611 {
0612     iter->ht = ht;
0613     iter->p = NULL;
0614     iter->slot = 0;
0615     iter->skip = 0;
0616 
0617     spin_lock(&ht->lock);
0618     iter->walker.tbl =
0619         rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
0620     list_add(&iter->walker.list, &iter->walker.tbl->walkers);
0621     spin_unlock(&ht->lock);
0622 }
0623 EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
0624 
0625 /**
0626  * rhashtable_walk_exit - Free an iterator
0627  * @iter:   Hash table Iterator
0628  *
0629  * This function frees resources allocated by rhashtable_walk_init.
0630  */
0631 void rhashtable_walk_exit(struct rhashtable_iter *iter)
0632 {
0633     spin_lock(&iter->ht->lock);
0634     if (iter->walker.tbl)
0635         list_del(&iter->walker.list);
0636     spin_unlock(&iter->ht->lock);
0637 }
0638 EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
0639 
0640 /**
0641  * rhashtable_walk_start - Start a hash table walk
0642  * @iter:   Hash table iterator
0643  *
0644  * Start a hash table walk.  Note that we take the RCU lock in all
0645  * cases including when we return an error.  So you must always call
0646  * rhashtable_walk_stop to clean up.
0647  *
0648  * Returns zero if successful.
0649  *
0650  * Returns -EAGAIN if resize event occured.  Note that the iterator
0651  * will rewind back to the beginning and you may use it immediately
0652  * by calling rhashtable_walk_next.
0653  */
0654 int rhashtable_walk_start(struct rhashtable_iter *iter)
0655     __acquires(RCU)
0656 {
0657     struct rhashtable *ht = iter->ht;
0658 
0659     rcu_read_lock();
0660 
0661     spin_lock(&ht->lock);
0662     if (iter->walker.tbl)
0663         list_del(&iter->walker.list);
0664     spin_unlock(&ht->lock);
0665 
0666     if (!iter->walker.tbl) {
0667         iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
0668         return -EAGAIN;
0669     }
0670 
0671     return 0;
0672 }
0673 EXPORT_SYMBOL_GPL(rhashtable_walk_start);
0674 
0675 /**
0676  * rhashtable_walk_next - Return the next object and advance the iterator
0677  * @iter:   Hash table iterator
0678  *
0679  * Note that you must call rhashtable_walk_stop when you are finished
0680  * with the walk.
0681  *
0682  * Returns the next object or NULL when the end of the table is reached.
0683  *
0684  * Returns -EAGAIN if resize event occured.  Note that the iterator
0685  * will rewind back to the beginning and you may continue to use it.
0686  */
0687 void *rhashtable_walk_next(struct rhashtable_iter *iter)
0688 {
0689     struct bucket_table *tbl = iter->walker.tbl;
0690     struct rhlist_head *list = iter->list;
0691     struct rhashtable *ht = iter->ht;
0692     struct rhash_head *p = iter->p;
0693     bool rhlist = ht->rhlist;
0694 
0695     if (p) {
0696         if (!rhlist || !(list = rcu_dereference(list->next))) {
0697             p = rcu_dereference(p->next);
0698             list = container_of(p, struct rhlist_head, rhead);
0699         }
0700         goto next;
0701     }
0702 
0703     for (; iter->slot < tbl->size; iter->slot++) {
0704         int skip = iter->skip;
0705 
0706         rht_for_each_rcu(p, tbl, iter->slot) {
0707             if (rhlist) {
0708                 list = container_of(p, struct rhlist_head,
0709                             rhead);
0710                 do {
0711                     if (!skip)
0712                         goto next;
0713                     skip--;
0714                     list = rcu_dereference(list->next);
0715                 } while (list);
0716 
0717                 continue;
0718             }
0719             if (!skip)
0720                 break;
0721             skip--;
0722         }
0723 
0724 next:
0725         if (!rht_is_a_nulls(p)) {
0726             iter->skip++;
0727             iter->p = p;
0728             iter->list = list;
0729             return rht_obj(ht, rhlist ? &list->rhead : p);
0730         }
0731 
0732         iter->skip = 0;
0733     }
0734 
0735     iter->p = NULL;
0736 
0737     /* Ensure we see any new tables. */
0738     smp_rmb();
0739 
0740     iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht);
0741     if (iter->walker.tbl) {
0742         iter->slot = 0;
0743         iter->skip = 0;
0744         return ERR_PTR(-EAGAIN);
0745     }
0746 
0747     return NULL;
0748 }
0749 EXPORT_SYMBOL_GPL(rhashtable_walk_next);
0750 
0751 /**
0752  * rhashtable_walk_stop - Finish a hash table walk
0753  * @iter:   Hash table iterator
0754  *
0755  * Finish a hash table walk.
0756  */
0757 void rhashtable_walk_stop(struct rhashtable_iter *iter)
0758     __releases(RCU)
0759 {
0760     struct rhashtable *ht;
0761     struct bucket_table *tbl = iter->walker.tbl;
0762 
0763     if (!tbl)
0764         goto out;
0765 
0766     ht = iter->ht;
0767 
0768     spin_lock(&ht->lock);
0769     if (tbl->rehash < tbl->size)
0770         list_add(&iter->walker.list, &tbl->walkers);
0771     else
0772         iter->walker.tbl = NULL;
0773     spin_unlock(&ht->lock);
0774 
0775     iter->p = NULL;
0776 
0777 out:
0778     rcu_read_unlock();
0779 }
0780 EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
0781 
0782 static size_t rounded_hashtable_size(const struct rhashtable_params *params)
0783 {
0784     return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
0785            (unsigned long)params->min_size);
0786 }
0787 
0788 static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
0789 {
0790     return jhash2(key, length, seed);
0791 }
0792 
0793 /**
0794  * rhashtable_init - initialize a new hash table
0795  * @ht:     hash table to be initialized
0796  * @params: configuration parameters
0797  *
0798  * Initializes a new hash table based on the provided configuration
0799  * parameters. A table can be configured either with a variable or
0800  * fixed length key:
0801  *
0802  * Configuration Example 1: Fixed length keys
0803  * struct test_obj {
0804  *  int         key;
0805  *  void *          my_member;
0806  *  struct rhash_head   node;
0807  * };
0808  *
0809  * struct rhashtable_params params = {
0810  *  .head_offset = offsetof(struct test_obj, node),
0811  *  .key_offset = offsetof(struct test_obj, key),
0812  *  .key_len = sizeof(int),
0813  *  .hashfn = jhash,
0814  *  .nulls_base = (1U << RHT_BASE_SHIFT),
0815  * };
0816  *
0817  * Configuration Example 2: Variable length keys
0818  * struct test_obj {
0819  *  [...]
0820  *  struct rhash_head   node;
0821  * };
0822  *
0823  * u32 my_hash_fn(const void *data, u32 len, u32 seed)
0824  * {
0825  *  struct test_obj *obj = data;
0826  *
0827  *  return [... hash ...];
0828  * }
0829  *
0830  * struct rhashtable_params params = {
0831  *  .head_offset = offsetof(struct test_obj, node),
0832  *  .hashfn = jhash,
0833  *  .obj_hashfn = my_hash_fn,
0834  * };
0835  */
0836 int rhashtable_init(struct rhashtable *ht,
0837             const struct rhashtable_params *params)
0838 {
0839     struct bucket_table *tbl;
0840     size_t size;
0841 
0842     size = HASH_DEFAULT_SIZE;
0843 
0844     if ((!params->key_len && !params->obj_hashfn) ||
0845         (params->obj_hashfn && !params->obj_cmpfn))
0846         return -EINVAL;
0847 
0848     if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
0849         return -EINVAL;
0850 
0851     memset(ht, 0, sizeof(*ht));
0852     mutex_init(&ht->mutex);
0853     spin_lock_init(&ht->lock);
0854     memcpy(&ht->p, params, sizeof(*params));
0855 
0856     if (params->min_size)
0857         ht->p.min_size = roundup_pow_of_two(params->min_size);
0858 
0859     if (params->max_size)
0860         ht->p.max_size = rounddown_pow_of_two(params->max_size);
0861 
0862     if (params->insecure_max_entries)
0863         ht->p.insecure_max_entries =
0864             rounddown_pow_of_two(params->insecure_max_entries);
0865     else
0866         ht->p.insecure_max_entries = ht->p.max_size * 2;
0867 
0868     ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE);
0869 
0870     if (params->nelem_hint)
0871         size = rounded_hashtable_size(&ht->p);
0872 
0873     /* The maximum (not average) chain length grows with the
0874      * size of the hash table, at a rate of (log N)/(log log N).
0875      * The value of 16 is selected so that even if the hash
0876      * table grew to 2^32 you would not expect the maximum
0877      * chain length to exceed it unless we are under attack
0878      * (or extremely unlucky).
0879      *
0880      * As this limit is only to detect attacks, we don't need
0881      * to set it to a lower value as you'd need the chain
0882      * length to vastly exceed 16 to have any real effect
0883      * on the system.
0884      */
0885     if (!params->insecure_elasticity)
0886         ht->elasticity = 16;
0887 
0888     if (params->locks_mul)
0889         ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
0890     else
0891         ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
0892 
0893     ht->key_len = ht->p.key_len;
0894     if (!params->hashfn) {
0895         ht->p.hashfn = jhash;
0896 
0897         if (!(ht->key_len & (sizeof(u32) - 1))) {
0898             ht->key_len /= sizeof(u32);
0899             ht->p.hashfn = rhashtable_jhash2;
0900         }
0901     }
0902 
0903     tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
0904     if (tbl == NULL)
0905         return -ENOMEM;
0906 
0907     atomic_set(&ht->nelems, 0);
0908 
0909     RCU_INIT_POINTER(ht->tbl, tbl);
0910 
0911     INIT_WORK(&ht->run_work, rht_deferred_worker);
0912 
0913     return 0;
0914 }
0915 EXPORT_SYMBOL_GPL(rhashtable_init);
0916 
0917 /**
0918  * rhltable_init - initialize a new hash list table
0919  * @hlt:    hash list table to be initialized
0920  * @params: configuration parameters
0921  *
0922  * Initializes a new hash list table.
0923  *
0924  * See documentation for rhashtable_init.
0925  */
0926 int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
0927 {
0928     int err;
0929 
0930     /* No rhlist NULLs marking for now. */
0931     if (params->nulls_base)
0932         return -EINVAL;
0933 
0934     err = rhashtable_init(&hlt->ht, params);
0935     hlt->ht.rhlist = true;
0936     return err;
0937 }
0938 EXPORT_SYMBOL_GPL(rhltable_init);
0939 
0940 static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
0941                 void (*free_fn)(void *ptr, void *arg),
0942                 void *arg)
0943 {
0944     struct rhlist_head *list;
0945 
0946     if (!ht->rhlist) {
0947         free_fn(rht_obj(ht, obj), arg);
0948         return;
0949     }
0950 
0951     list = container_of(obj, struct rhlist_head, rhead);
0952     do {
0953         obj = &list->rhead;
0954         list = rht_dereference(list->next, ht);
0955         free_fn(rht_obj(ht, obj), arg);
0956     } while (list);
0957 }
0958 
0959 /**
0960  * rhashtable_free_and_destroy - free elements and destroy hash table
0961  * @ht:     the hash table to destroy
0962  * @free_fn:    callback to release resources of element
0963  * @arg:    pointer passed to free_fn
0964  *
0965  * Stops an eventual async resize. If defined, invokes free_fn for each
0966  * element to releasal resources. Please note that RCU protected
0967  * readers may still be accessing the elements. Releasing of resources
0968  * must occur in a compatible manner. Then frees the bucket array.
0969  *
0970  * This function will eventually sleep to wait for an async resize
0971  * to complete. The caller is responsible that no further write operations
0972  * occurs in parallel.
0973  */
0974 void rhashtable_free_and_destroy(struct rhashtable *ht,
0975                  void (*free_fn)(void *ptr, void *arg),
0976                  void *arg)
0977 {
0978     const struct bucket_table *tbl;
0979     unsigned int i;
0980 
0981     cancel_work_sync(&ht->run_work);
0982 
0983     mutex_lock(&ht->mutex);
0984     tbl = rht_dereference(ht->tbl, ht);
0985     if (free_fn) {
0986         for (i = 0; i < tbl->size; i++) {
0987             struct rhash_head *pos, *next;
0988 
0989             for (pos = rht_dereference(tbl->buckets[i], ht),
0990                  next = !rht_is_a_nulls(pos) ?
0991                     rht_dereference(pos->next, ht) : NULL;
0992                  !rht_is_a_nulls(pos);
0993                  pos = next,
0994                  next = !rht_is_a_nulls(pos) ?
0995                     rht_dereference(pos->next, ht) : NULL)
0996                 rhashtable_free_one(ht, pos, free_fn, arg);
0997         }
0998     }
0999 
1000     bucket_table_free(tbl);
1001     mutex_unlock(&ht->mutex);
1002 }
1003 EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
1004 
1005 void rhashtable_destroy(struct rhashtable *ht)
1006 {
1007     return rhashtable_free_and_destroy(ht, NULL, NULL);
1008 }
1009 EXPORT_SYMBOL_GPL(rhashtable_destroy);