Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Resizable, Scalable, Concurrent Hash Table
0004  *
0005  * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
0006  * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
0007  * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
0008  *
0009  * Code partially derived from nft_hash
0010  * Rewritten with rehash code from br_multicast plus single list
0011  * pointer as suggested by Josh Triplett
0012  */
0013 
0014 #include <linux/atomic.h>
0015 #include <linux/kernel.h>
0016 #include <linux/init.h>
0017 #include <linux/log2.h>
0018 #include <linux/sched.h>
0019 #include <linux/rculist.h>
0020 #include <linux/slab.h>
0021 #include <linux/vmalloc.h>
0022 #include <linux/mm.h>
0023 #include <linux/jhash.h>
0024 #include <linux/random.h>
0025 #include <linux/rhashtable.h>
0026 #include <linux/err.h>
0027 #include <linux/export.h>
0028 
0029 #define HASH_DEFAULT_SIZE   64UL
0030 #define HASH_MIN_SIZE       4U
0031 
0032 union nested_table {
0033     union nested_table __rcu *table;
0034     struct rhash_lock_head __rcu *bucket;
0035 };
0036 
0037 static u32 head_hashfn(struct rhashtable *ht,
0038                const struct bucket_table *tbl,
0039                const struct rhash_head *he)
0040 {
0041     return rht_head_hashfn(ht, tbl, he, ht->p);
0042 }
0043 
0044 #ifdef CONFIG_PROVE_LOCKING
0045 #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
0046 
0047 int lockdep_rht_mutex_is_held(struct rhashtable *ht)
0048 {
0049     return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
0050 }
0051 EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
0052 
0053 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
0054 {
0055     if (!debug_locks)
0056         return 1;
0057     if (unlikely(tbl->nest))
0058         return 1;
0059     return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]);
0060 }
0061 EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
0062 #else
0063 #define ASSERT_RHT_MUTEX(HT)
0064 #endif
0065 
0066 static inline union nested_table *nested_table_top(
0067     const struct bucket_table *tbl)
0068 {
0069     /* The top-level bucket entry does not need RCU protection
0070      * because it's set at the same time as tbl->nest.
0071      */
0072     return (void *)rcu_dereference_protected(tbl->buckets[0], 1);
0073 }
0074 
0075 static void nested_table_free(union nested_table *ntbl, unsigned int size)
0076 {
0077     const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
0078     const unsigned int len = 1 << shift;
0079     unsigned int i;
0080 
0081     ntbl = rcu_dereference_protected(ntbl->table, 1);
0082     if (!ntbl)
0083         return;
0084 
0085     if (size > len) {
0086         size >>= shift;
0087         for (i = 0; i < len; i++)
0088             nested_table_free(ntbl + i, size);
0089     }
0090 
0091     kfree(ntbl);
0092 }
0093 
0094 static void nested_bucket_table_free(const struct bucket_table *tbl)
0095 {
0096     unsigned int size = tbl->size >> tbl->nest;
0097     unsigned int len = 1 << tbl->nest;
0098     union nested_table *ntbl;
0099     unsigned int i;
0100 
0101     ntbl = nested_table_top(tbl);
0102 
0103     for (i = 0; i < len; i++)
0104         nested_table_free(ntbl + i, size);
0105 
0106     kfree(ntbl);
0107 }
0108 
0109 static void bucket_table_free(const struct bucket_table *tbl)
0110 {
0111     if (tbl->nest)
0112         nested_bucket_table_free(tbl);
0113 
0114     kvfree(tbl);
0115 }
0116 
0117 static void bucket_table_free_rcu(struct rcu_head *head)
0118 {
0119     bucket_table_free(container_of(head, struct bucket_table, rcu));
0120 }
0121 
0122 static union nested_table *nested_table_alloc(struct rhashtable *ht,
0123                           union nested_table __rcu **prev,
0124                           bool leaf)
0125 {
0126     union nested_table *ntbl;
0127     int i;
0128 
0129     ntbl = rcu_dereference(*prev);
0130     if (ntbl)
0131         return ntbl;
0132 
0133     ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC);
0134 
0135     if (ntbl && leaf) {
0136         for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0]); i++)
0137             INIT_RHT_NULLS_HEAD(ntbl[i].bucket);
0138     }
0139 
0140     if (cmpxchg((union nested_table **)prev, NULL, ntbl) == NULL)
0141         return ntbl;
0142     /* Raced with another thread. */
0143     kfree(ntbl);
0144     return rcu_dereference(*prev);
0145 }
0146 
0147 static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
0148                               size_t nbuckets,
0149                               gfp_t gfp)
0150 {
0151     const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
0152     struct bucket_table *tbl;
0153     size_t size;
0154 
0155     if (nbuckets < (1 << (shift + 1)))
0156         return NULL;
0157 
0158     size = sizeof(*tbl) + sizeof(tbl->buckets[0]);
0159 
0160     tbl = kzalloc(size, gfp);
0161     if (!tbl)
0162         return NULL;
0163 
0164     if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets,
0165                 false)) {
0166         kfree(tbl);
0167         return NULL;
0168     }
0169 
0170     tbl->nest = (ilog2(nbuckets) - 1) % shift + 1;
0171 
0172     return tbl;
0173 }
0174 
0175 static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
0176                            size_t nbuckets,
0177                            gfp_t gfp)
0178 {
0179     struct bucket_table *tbl = NULL;
0180     size_t size;
0181     int i;
0182     static struct lock_class_key __key;
0183 
0184     tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), gfp);
0185 
0186     size = nbuckets;
0187 
0188     if (tbl == NULL && (gfp & ~__GFP_NOFAIL) != GFP_KERNEL) {
0189         tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
0190         nbuckets = 0;
0191     }
0192 
0193     if (tbl == NULL)
0194         return NULL;
0195 
0196     lockdep_init_map(&tbl->dep_map, "rhashtable_bucket", &__key, 0);
0197 
0198     tbl->size = size;
0199 
0200     rcu_head_init(&tbl->rcu);
0201     INIT_LIST_HEAD(&tbl->walkers);
0202 
0203     tbl->hash_rnd = get_random_u32();
0204 
0205     for (i = 0; i < nbuckets; i++)
0206         INIT_RHT_NULLS_HEAD(tbl->buckets[i]);
0207 
0208     return tbl;
0209 }
0210 
0211 static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
0212                           struct bucket_table *tbl)
0213 {
0214     struct bucket_table *new_tbl;
0215 
0216     do {
0217         new_tbl = tbl;
0218         tbl = rht_dereference_rcu(tbl->future_tbl, ht);
0219     } while (tbl);
0220 
0221     return new_tbl;
0222 }
0223 
0224 static int rhashtable_rehash_one(struct rhashtable *ht,
0225                  struct rhash_lock_head __rcu **bkt,
0226                  unsigned int old_hash)
0227 {
0228     struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
0229     struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl);
0230     int err = -EAGAIN;
0231     struct rhash_head *head, *next, *entry;
0232     struct rhash_head __rcu **pprev = NULL;
0233     unsigned int new_hash;
0234 
0235     if (new_tbl->nest)
0236         goto out;
0237 
0238     err = -ENOENT;
0239 
0240     rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash),
0241               old_tbl, old_hash) {
0242         err = 0;
0243         next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
0244 
0245         if (rht_is_a_nulls(next))
0246             break;
0247 
0248         pprev = &entry->next;
0249     }
0250 
0251     if (err)
0252         goto out;
0253 
0254     new_hash = head_hashfn(ht, new_tbl, entry);
0255 
0256     rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], SINGLE_DEPTH_NESTING);
0257 
0258     head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash);
0259 
0260     RCU_INIT_POINTER(entry->next, head);
0261 
0262     rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry);
0263 
0264     if (pprev)
0265         rcu_assign_pointer(*pprev, next);
0266     else
0267         /* Need to preserved the bit lock. */
0268         rht_assign_locked(bkt, next);
0269 
0270 out:
0271     return err;
0272 }
0273 
0274 static int rhashtable_rehash_chain(struct rhashtable *ht,
0275                     unsigned int old_hash)
0276 {
0277     struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
0278     struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash);
0279     int err;
0280 
0281     if (!bkt)
0282         return 0;
0283     rht_lock(old_tbl, bkt);
0284 
0285     while (!(err = rhashtable_rehash_one(ht, bkt, old_hash)))
0286         ;
0287 
0288     if (err == -ENOENT)
0289         err = 0;
0290     rht_unlock(old_tbl, bkt);
0291 
0292     return err;
0293 }
0294 
0295 static int rhashtable_rehash_attach(struct rhashtable *ht,
0296                     struct bucket_table *old_tbl,
0297                     struct bucket_table *new_tbl)
0298 {
0299     /* Make insertions go into the new, empty table right away. Deletions
0300      * and lookups will be attempted in both tables until we synchronize.
0301      * As cmpxchg() provides strong barriers, we do not need
0302      * rcu_assign_pointer().
0303      */
0304 
0305     if (cmpxchg((struct bucket_table **)&old_tbl->future_tbl, NULL,
0306             new_tbl) != NULL)
0307         return -EEXIST;
0308 
0309     return 0;
0310 }
0311 
0312 static int rhashtable_rehash_table(struct rhashtable *ht)
0313 {
0314     struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
0315     struct bucket_table *new_tbl;
0316     struct rhashtable_walker *walker;
0317     unsigned int old_hash;
0318     int err;
0319 
0320     new_tbl = rht_dereference(old_tbl->future_tbl, ht);
0321     if (!new_tbl)
0322         return 0;
0323 
0324     for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
0325         err = rhashtable_rehash_chain(ht, old_hash);
0326         if (err)
0327             return err;
0328         cond_resched();
0329     }
0330 
0331     /* Publish the new table pointer. */
0332     rcu_assign_pointer(ht->tbl, new_tbl);
0333 
0334     spin_lock(&ht->lock);
0335     list_for_each_entry(walker, &old_tbl->walkers, list)
0336         walker->tbl = NULL;
0337 
0338     /* Wait for readers. All new readers will see the new
0339      * table, and thus no references to the old table will
0340      * remain.
0341      * We do this inside the locked region so that
0342      * rhashtable_walk_stop() can use rcu_head_after_call_rcu()
0343      * to check if it should not re-link the table.
0344      */
0345     call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
0346     spin_unlock(&ht->lock);
0347 
0348     return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
0349 }
0350 
0351 static int rhashtable_rehash_alloc(struct rhashtable *ht,
0352                    struct bucket_table *old_tbl,
0353                    unsigned int size)
0354 {
0355     struct bucket_table *new_tbl;
0356     int err;
0357 
0358     ASSERT_RHT_MUTEX(ht);
0359 
0360     new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
0361     if (new_tbl == NULL)
0362         return -ENOMEM;
0363 
0364     err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
0365     if (err)
0366         bucket_table_free(new_tbl);
0367 
0368     return err;
0369 }
0370 
0371 /**
0372  * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
0373  * @ht:     the hash table to shrink
0374  *
0375  * This function shrinks the hash table to fit, i.e., the smallest
0376  * size would not cause it to expand right away automatically.
0377  *
0378  * The caller must ensure that no concurrent resizing occurs by holding
0379  * ht->mutex.
0380  *
0381  * The caller must ensure that no concurrent table mutations take place.
0382  * It is however valid to have concurrent lookups if they are RCU protected.
0383  *
0384  * It is valid to have concurrent insertions and deletions protected by per
0385  * bucket locks or concurrent RCU protected lookups and traversals.
0386  */
0387 static int rhashtable_shrink(struct rhashtable *ht)
0388 {
0389     struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
0390     unsigned int nelems = atomic_read(&ht->nelems);
0391     unsigned int size = 0;
0392 
0393     if (nelems)
0394         size = roundup_pow_of_two(nelems * 3 / 2);
0395     if (size < ht->p.min_size)
0396         size = ht->p.min_size;
0397 
0398     if (old_tbl->size <= size)
0399         return 0;
0400 
0401     if (rht_dereference(old_tbl->future_tbl, ht))
0402         return -EEXIST;
0403 
0404     return rhashtable_rehash_alloc(ht, old_tbl, size);
0405 }
0406 
0407 static void rht_deferred_worker(struct work_struct *work)
0408 {
0409     struct rhashtable *ht;
0410     struct bucket_table *tbl;
0411     int err = 0;
0412 
0413     ht = container_of(work, struct rhashtable, run_work);
0414     mutex_lock(&ht->mutex);
0415 
0416     tbl = rht_dereference(ht->tbl, ht);
0417     tbl = rhashtable_last_table(ht, tbl);
0418 
0419     if (rht_grow_above_75(ht, tbl))
0420         err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2);
0421     else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
0422         err = rhashtable_shrink(ht);
0423     else if (tbl->nest)
0424         err = rhashtable_rehash_alloc(ht, tbl, tbl->size);
0425 
0426     if (!err || err == -EEXIST) {
0427         int nerr;
0428 
0429         nerr = rhashtable_rehash_table(ht);
0430         err = err ?: nerr;
0431     }
0432 
0433     mutex_unlock(&ht->mutex);
0434 
0435     if (err)
0436         schedule_work(&ht->run_work);
0437 }
0438 
0439 static int rhashtable_insert_rehash(struct rhashtable *ht,
0440                     struct bucket_table *tbl)
0441 {
0442     struct bucket_table *old_tbl;
0443     struct bucket_table *new_tbl;
0444     unsigned int size;
0445     int err;
0446 
0447     old_tbl = rht_dereference_rcu(ht->tbl, ht);
0448 
0449     size = tbl->size;
0450 
0451     err = -EBUSY;
0452 
0453     if (rht_grow_above_75(ht, tbl))
0454         size *= 2;
0455     /* Do not schedule more than one rehash */
0456     else if (old_tbl != tbl)
0457         goto fail;
0458 
0459     err = -ENOMEM;
0460 
0461     new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC | __GFP_NOWARN);
0462     if (new_tbl == NULL)
0463         goto fail;
0464 
0465     err = rhashtable_rehash_attach(ht, tbl, new_tbl);
0466     if (err) {
0467         bucket_table_free(new_tbl);
0468         if (err == -EEXIST)
0469             err = 0;
0470     } else
0471         schedule_work(&ht->run_work);
0472 
0473     return err;
0474 
0475 fail:
0476     /* Do not fail the insert if someone else did a rehash. */
0477     if (likely(rcu_access_pointer(tbl->future_tbl)))
0478         return 0;
0479 
0480     /* Schedule async rehash to retry allocation in process context. */
0481     if (err == -ENOMEM)
0482         schedule_work(&ht->run_work);
0483 
0484     return err;
0485 }
0486 
0487 static void *rhashtable_lookup_one(struct rhashtable *ht,
0488                    struct rhash_lock_head __rcu **bkt,
0489                    struct bucket_table *tbl, unsigned int hash,
0490                    const void *key, struct rhash_head *obj)
0491 {
0492     struct rhashtable_compare_arg arg = {
0493         .ht = ht,
0494         .key = key,
0495     };
0496     struct rhash_head __rcu **pprev = NULL;
0497     struct rhash_head *head;
0498     int elasticity;
0499 
0500     elasticity = RHT_ELASTICITY;
0501     rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
0502         struct rhlist_head *list;
0503         struct rhlist_head *plist;
0504 
0505         elasticity--;
0506         if (!key ||
0507             (ht->p.obj_cmpfn ?
0508              ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
0509              rhashtable_compare(&arg, rht_obj(ht, head)))) {
0510             pprev = &head->next;
0511             continue;
0512         }
0513 
0514         if (!ht->rhlist)
0515             return rht_obj(ht, head);
0516 
0517         list = container_of(obj, struct rhlist_head, rhead);
0518         plist = container_of(head, struct rhlist_head, rhead);
0519 
0520         RCU_INIT_POINTER(list->next, plist);
0521         head = rht_dereference_bucket(head->next, tbl, hash);
0522         RCU_INIT_POINTER(list->rhead.next, head);
0523         if (pprev)
0524             rcu_assign_pointer(*pprev, obj);
0525         else
0526             /* Need to preserve the bit lock */
0527             rht_assign_locked(bkt, obj);
0528 
0529         return NULL;
0530     }
0531 
0532     if (elasticity <= 0)
0533         return ERR_PTR(-EAGAIN);
0534 
0535     return ERR_PTR(-ENOENT);
0536 }
0537 
0538 static struct bucket_table *rhashtable_insert_one(
0539     struct rhashtable *ht, struct rhash_lock_head __rcu **bkt,
0540     struct bucket_table *tbl, unsigned int hash, struct rhash_head *obj,
0541     void *data)
0542 {
0543     struct bucket_table *new_tbl;
0544     struct rhash_head *head;
0545 
0546     if (!IS_ERR_OR_NULL(data))
0547         return ERR_PTR(-EEXIST);
0548 
0549     if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT)
0550         return ERR_CAST(data);
0551 
0552     new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
0553     if (new_tbl)
0554         return new_tbl;
0555 
0556     if (PTR_ERR(data) != -ENOENT)
0557         return ERR_CAST(data);
0558 
0559     if (unlikely(rht_grow_above_max(ht, tbl)))
0560         return ERR_PTR(-E2BIG);
0561 
0562     if (unlikely(rht_grow_above_100(ht, tbl)))
0563         return ERR_PTR(-EAGAIN);
0564 
0565     head = rht_ptr(bkt, tbl, hash);
0566 
0567     RCU_INIT_POINTER(obj->next, head);
0568     if (ht->rhlist) {
0569         struct rhlist_head *list;
0570 
0571         list = container_of(obj, struct rhlist_head, rhead);
0572         RCU_INIT_POINTER(list->next, NULL);
0573     }
0574 
0575     /* bkt is always the head of the list, so it holds
0576      * the lock, which we need to preserve
0577      */
0578     rht_assign_locked(bkt, obj);
0579 
0580     atomic_inc(&ht->nelems);
0581     if (rht_grow_above_75(ht, tbl))
0582         schedule_work(&ht->run_work);
0583 
0584     return NULL;
0585 }
0586 
0587 static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
0588                    struct rhash_head *obj)
0589 {
0590     struct bucket_table *new_tbl;
0591     struct bucket_table *tbl;
0592     struct rhash_lock_head __rcu **bkt;
0593     unsigned int hash;
0594     void *data;
0595 
0596     new_tbl = rcu_dereference(ht->tbl);
0597 
0598     do {
0599         tbl = new_tbl;
0600         hash = rht_head_hashfn(ht, tbl, obj, ht->p);
0601         if (rcu_access_pointer(tbl->future_tbl))
0602             /* Failure is OK */
0603             bkt = rht_bucket_var(tbl, hash);
0604         else
0605             bkt = rht_bucket_insert(ht, tbl, hash);
0606         if (bkt == NULL) {
0607             new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
0608             data = ERR_PTR(-EAGAIN);
0609         } else {
0610             rht_lock(tbl, bkt);
0611             data = rhashtable_lookup_one(ht, bkt, tbl,
0612                              hash, key, obj);
0613             new_tbl = rhashtable_insert_one(ht, bkt, tbl,
0614                             hash, obj, data);
0615             if (PTR_ERR(new_tbl) != -EEXIST)
0616                 data = ERR_CAST(new_tbl);
0617 
0618             rht_unlock(tbl, bkt);
0619         }
0620     } while (!IS_ERR_OR_NULL(new_tbl));
0621 
0622     if (PTR_ERR(data) == -EAGAIN)
0623         data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?:
0624                    -EAGAIN);
0625 
0626     return data;
0627 }
0628 
0629 void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
0630                  struct rhash_head *obj)
0631 {
0632     void *data;
0633 
0634     do {
0635         rcu_read_lock();
0636         data = rhashtable_try_insert(ht, key, obj);
0637         rcu_read_unlock();
0638     } while (PTR_ERR(data) == -EAGAIN);
0639 
0640     return data;
0641 }
0642 EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
0643 
0644 /**
0645  * rhashtable_walk_enter - Initialise an iterator
0646  * @ht:     Table to walk over
0647  * @iter:   Hash table Iterator
0648  *
0649  * This function prepares a hash table walk.
0650  *
0651  * Note that if you restart a walk after rhashtable_walk_stop you
0652  * may see the same object twice.  Also, you may miss objects if
0653  * there are removals in between rhashtable_walk_stop and the next
0654  * call to rhashtable_walk_start.
0655  *
0656  * For a completely stable walk you should construct your own data
0657  * structure outside the hash table.
0658  *
0659  * This function may be called from any process context, including
0660  * non-preemptable context, but cannot be called from softirq or
0661  * hardirq context.
0662  *
0663  * You must call rhashtable_walk_exit after this function returns.
0664  */
0665 void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
0666 {
0667     iter->ht = ht;
0668     iter->p = NULL;
0669     iter->slot = 0;
0670     iter->skip = 0;
0671     iter->end_of_table = 0;
0672 
0673     spin_lock(&ht->lock);
0674     iter->walker.tbl =
0675         rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
0676     list_add(&iter->walker.list, &iter->walker.tbl->walkers);
0677     spin_unlock(&ht->lock);
0678 }
0679 EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
0680 
0681 /**
0682  * rhashtable_walk_exit - Free an iterator
0683  * @iter:   Hash table Iterator
0684  *
0685  * This function frees resources allocated by rhashtable_walk_enter.
0686  */
0687 void rhashtable_walk_exit(struct rhashtable_iter *iter)
0688 {
0689     spin_lock(&iter->ht->lock);
0690     if (iter->walker.tbl)
0691         list_del(&iter->walker.list);
0692     spin_unlock(&iter->ht->lock);
0693 }
0694 EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
0695 
0696 /**
0697  * rhashtable_walk_start_check - Start a hash table walk
0698  * @iter:   Hash table iterator
0699  *
0700  * Start a hash table walk at the current iterator position.  Note that we take
0701  * the RCU lock in all cases including when we return an error.  So you must
0702  * always call rhashtable_walk_stop to clean up.
0703  *
0704  * Returns zero if successful.
0705  *
0706  * Returns -EAGAIN if resize event occurred.  Note that the iterator
0707  * will rewind back to the beginning and you may use it immediately
0708  * by calling rhashtable_walk_next.
0709  *
0710  * rhashtable_walk_start is defined as an inline variant that returns
0711  * void. This is preferred in cases where the caller would ignore
0712  * resize events and always continue.
0713  */
0714 int rhashtable_walk_start_check(struct rhashtable_iter *iter)
0715     __acquires(RCU)
0716 {
0717     struct rhashtable *ht = iter->ht;
0718     bool rhlist = ht->rhlist;
0719 
0720     rcu_read_lock();
0721 
0722     spin_lock(&ht->lock);
0723     if (iter->walker.tbl)
0724         list_del(&iter->walker.list);
0725     spin_unlock(&ht->lock);
0726 
0727     if (iter->end_of_table)
0728         return 0;
0729     if (!iter->walker.tbl) {
0730         iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
0731         iter->slot = 0;
0732         iter->skip = 0;
0733         return -EAGAIN;
0734     }
0735 
0736     if (iter->p && !rhlist) {
0737         /*
0738          * We need to validate that 'p' is still in the table, and
0739          * if so, update 'skip'
0740          */
0741         struct rhash_head *p;
0742         int skip = 0;
0743         rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
0744             skip++;
0745             if (p == iter->p) {
0746                 iter->skip = skip;
0747                 goto found;
0748             }
0749         }
0750         iter->p = NULL;
0751     } else if (iter->p && rhlist) {
0752         /* Need to validate that 'list' is still in the table, and
0753          * if so, update 'skip' and 'p'.
0754          */
0755         struct rhash_head *p;
0756         struct rhlist_head *list;
0757         int skip = 0;
0758         rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
0759             for (list = container_of(p, struct rhlist_head, rhead);
0760                  list;
0761                  list = rcu_dereference(list->next)) {
0762                 skip++;
0763                 if (list == iter->list) {
0764                     iter->p = p;
0765                     iter->skip = skip;
0766                     goto found;
0767                 }
0768             }
0769         }
0770         iter->p = NULL;
0771     }
0772 found:
0773     return 0;
0774 }
0775 EXPORT_SYMBOL_GPL(rhashtable_walk_start_check);
0776 
0777 /**
0778  * __rhashtable_walk_find_next - Find the next element in a table (or the first
0779  * one in case of a new walk).
0780  *
0781  * @iter:   Hash table iterator
0782  *
0783  * Returns the found object or NULL when the end of the table is reached.
0784  *
0785  * Returns -EAGAIN if resize event occurred.
0786  */
0787 static void *__rhashtable_walk_find_next(struct rhashtable_iter *iter)
0788 {
0789     struct bucket_table *tbl = iter->walker.tbl;
0790     struct rhlist_head *list = iter->list;
0791     struct rhashtable *ht = iter->ht;
0792     struct rhash_head *p = iter->p;
0793     bool rhlist = ht->rhlist;
0794 
0795     if (!tbl)
0796         return NULL;
0797 
0798     for (; iter->slot < tbl->size; iter->slot++) {
0799         int skip = iter->skip;
0800 
0801         rht_for_each_rcu(p, tbl, iter->slot) {
0802             if (rhlist) {
0803                 list = container_of(p, struct rhlist_head,
0804                             rhead);
0805                 do {
0806                     if (!skip)
0807                         goto next;
0808                     skip--;
0809                     list = rcu_dereference(list->next);
0810                 } while (list);
0811 
0812                 continue;
0813             }
0814             if (!skip)
0815                 break;
0816             skip--;
0817         }
0818 
0819 next:
0820         if (!rht_is_a_nulls(p)) {
0821             iter->skip++;
0822             iter->p = p;
0823             iter->list = list;
0824             return rht_obj(ht, rhlist ? &list->rhead : p);
0825         }
0826 
0827         iter->skip = 0;
0828     }
0829 
0830     iter->p = NULL;
0831 
0832     /* Ensure we see any new tables. */
0833     smp_rmb();
0834 
0835     iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht);
0836     if (iter->walker.tbl) {
0837         iter->slot = 0;
0838         iter->skip = 0;
0839         return ERR_PTR(-EAGAIN);
0840     } else {
0841         iter->end_of_table = true;
0842     }
0843 
0844     return NULL;
0845 }
0846 
0847 /**
0848  * rhashtable_walk_next - Return the next object and advance the iterator
0849  * @iter:   Hash table iterator
0850  *
0851  * Note that you must call rhashtable_walk_stop when you are finished
0852  * with the walk.
0853  *
0854  * Returns the next object or NULL when the end of the table is reached.
0855  *
0856  * Returns -EAGAIN if resize event occurred.  Note that the iterator
0857  * will rewind back to the beginning and you may continue to use it.
0858  */
0859 void *rhashtable_walk_next(struct rhashtable_iter *iter)
0860 {
0861     struct rhlist_head *list = iter->list;
0862     struct rhashtable *ht = iter->ht;
0863     struct rhash_head *p = iter->p;
0864     bool rhlist = ht->rhlist;
0865 
0866     if (p) {
0867         if (!rhlist || !(list = rcu_dereference(list->next))) {
0868             p = rcu_dereference(p->next);
0869             list = container_of(p, struct rhlist_head, rhead);
0870         }
0871         if (!rht_is_a_nulls(p)) {
0872             iter->skip++;
0873             iter->p = p;
0874             iter->list = list;
0875             return rht_obj(ht, rhlist ? &list->rhead : p);
0876         }
0877 
0878         /* At the end of this slot, switch to next one and then find
0879          * next entry from that point.
0880          */
0881         iter->skip = 0;
0882         iter->slot++;
0883     }
0884 
0885     return __rhashtable_walk_find_next(iter);
0886 }
0887 EXPORT_SYMBOL_GPL(rhashtable_walk_next);
0888 
0889 /**
0890  * rhashtable_walk_peek - Return the next object but don't advance the iterator
0891  * @iter:   Hash table iterator
0892  *
0893  * Returns the next object or NULL when the end of the table is reached.
0894  *
0895  * Returns -EAGAIN if resize event occurred.  Note that the iterator
0896  * will rewind back to the beginning and you may continue to use it.
0897  */
0898 void *rhashtable_walk_peek(struct rhashtable_iter *iter)
0899 {
0900     struct rhlist_head *list = iter->list;
0901     struct rhashtable *ht = iter->ht;
0902     struct rhash_head *p = iter->p;
0903 
0904     if (p)
0905         return rht_obj(ht, ht->rhlist ? &list->rhead : p);
0906 
0907     /* No object found in current iter, find next one in the table. */
0908 
0909     if (iter->skip) {
0910         /* A nonzero skip value points to the next entry in the table
0911          * beyond that last one that was found. Decrement skip so
0912          * we find the current value. __rhashtable_walk_find_next
0913          * will restore the original value of skip assuming that
0914          * the table hasn't changed.
0915          */
0916         iter->skip--;
0917     }
0918 
0919     return __rhashtable_walk_find_next(iter);
0920 }
0921 EXPORT_SYMBOL_GPL(rhashtable_walk_peek);
0922 
0923 /**
0924  * rhashtable_walk_stop - Finish a hash table walk
0925  * @iter:   Hash table iterator
0926  *
0927  * Finish a hash table walk.  Does not reset the iterator to the start of the
0928  * hash table.
0929  */
0930 void rhashtable_walk_stop(struct rhashtable_iter *iter)
0931     __releases(RCU)
0932 {
0933     struct rhashtable *ht;
0934     struct bucket_table *tbl = iter->walker.tbl;
0935 
0936     if (!tbl)
0937         goto out;
0938 
0939     ht = iter->ht;
0940 
0941     spin_lock(&ht->lock);
0942     if (rcu_head_after_call_rcu(&tbl->rcu, bucket_table_free_rcu))
0943         /* This bucket table is being freed, don't re-link it. */
0944         iter->walker.tbl = NULL;
0945     else
0946         list_add(&iter->walker.list, &tbl->walkers);
0947     spin_unlock(&ht->lock);
0948 
0949 out:
0950     rcu_read_unlock();
0951 }
0952 EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
0953 
0954 static size_t rounded_hashtable_size(const struct rhashtable_params *params)
0955 {
0956     size_t retsize;
0957 
0958     if (params->nelem_hint)
0959         retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
0960                   (unsigned long)params->min_size);
0961     else
0962         retsize = max(HASH_DEFAULT_SIZE,
0963                   (unsigned long)params->min_size);
0964 
0965     return retsize;
0966 }
0967 
0968 static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
0969 {
0970     return jhash2(key, length, seed);
0971 }
0972 
0973 /**
0974  * rhashtable_init - initialize a new hash table
0975  * @ht:     hash table to be initialized
0976  * @params: configuration parameters
0977  *
0978  * Initializes a new hash table based on the provided configuration
0979  * parameters. A table can be configured either with a variable or
0980  * fixed length key:
0981  *
0982  * Configuration Example 1: Fixed length keys
0983  * struct test_obj {
0984  *  int         key;
0985  *  void *          my_member;
0986  *  struct rhash_head   node;
0987  * };
0988  *
0989  * struct rhashtable_params params = {
0990  *  .head_offset = offsetof(struct test_obj, node),
0991  *  .key_offset = offsetof(struct test_obj, key),
0992  *  .key_len = sizeof(int),
0993  *  .hashfn = jhash,
0994  * };
0995  *
0996  * Configuration Example 2: Variable length keys
0997  * struct test_obj {
0998  *  [...]
0999  *  struct rhash_head   node;
1000  * };
1001  *
1002  * u32 my_hash_fn(const void *data, u32 len, u32 seed)
1003  * {
1004  *  struct test_obj *obj = data;
1005  *
1006  *  return [... hash ...];
1007  * }
1008  *
1009  * struct rhashtable_params params = {
1010  *  .head_offset = offsetof(struct test_obj, node),
1011  *  .hashfn = jhash,
1012  *  .obj_hashfn = my_hash_fn,
1013  * };
1014  */
1015 int rhashtable_init(struct rhashtable *ht,
1016             const struct rhashtable_params *params)
1017 {
1018     struct bucket_table *tbl;
1019     size_t size;
1020 
1021     if ((!params->key_len && !params->obj_hashfn) ||
1022         (params->obj_hashfn && !params->obj_cmpfn))
1023         return -EINVAL;
1024 
1025     memset(ht, 0, sizeof(*ht));
1026     mutex_init(&ht->mutex);
1027     spin_lock_init(&ht->lock);
1028     memcpy(&ht->p, params, sizeof(*params));
1029 
1030     if (params->min_size)
1031         ht->p.min_size = roundup_pow_of_two(params->min_size);
1032 
1033     /* Cap total entries at 2^31 to avoid nelems overflow. */
1034     ht->max_elems = 1u << 31;
1035 
1036     if (params->max_size) {
1037         ht->p.max_size = rounddown_pow_of_two(params->max_size);
1038         if (ht->p.max_size < ht->max_elems / 2)
1039             ht->max_elems = ht->p.max_size * 2;
1040     }
1041 
1042     ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
1043 
1044     size = rounded_hashtable_size(&ht->p);
1045 
1046     ht->key_len = ht->p.key_len;
1047     if (!params->hashfn) {
1048         ht->p.hashfn = jhash;
1049 
1050         if (!(ht->key_len & (sizeof(u32) - 1))) {
1051             ht->key_len /= sizeof(u32);
1052             ht->p.hashfn = rhashtable_jhash2;
1053         }
1054     }
1055 
1056     /*
1057      * This is api initialization and thus we need to guarantee the
1058      * initial rhashtable allocation. Upon failure, retry with the
1059      * smallest possible size with __GFP_NOFAIL semantics.
1060      */
1061     tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
1062     if (unlikely(tbl == NULL)) {
1063         size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
1064         tbl = bucket_table_alloc(ht, size, GFP_KERNEL | __GFP_NOFAIL);
1065     }
1066 
1067     atomic_set(&ht->nelems, 0);
1068 
1069     RCU_INIT_POINTER(ht->tbl, tbl);
1070 
1071     INIT_WORK(&ht->run_work, rht_deferred_worker);
1072 
1073     return 0;
1074 }
1075 EXPORT_SYMBOL_GPL(rhashtable_init);
1076 
1077 /**
1078  * rhltable_init - initialize a new hash list table
1079  * @hlt:    hash list table to be initialized
1080  * @params: configuration parameters
1081  *
1082  * Initializes a new hash list table.
1083  *
1084  * See documentation for rhashtable_init.
1085  */
1086 int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
1087 {
1088     int err;
1089 
1090     err = rhashtable_init(&hlt->ht, params);
1091     hlt->ht.rhlist = true;
1092     return err;
1093 }
1094 EXPORT_SYMBOL_GPL(rhltable_init);
1095 
1096 static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
1097                 void (*free_fn)(void *ptr, void *arg),
1098                 void *arg)
1099 {
1100     struct rhlist_head *list;
1101 
1102     if (!ht->rhlist) {
1103         free_fn(rht_obj(ht, obj), arg);
1104         return;
1105     }
1106 
1107     list = container_of(obj, struct rhlist_head, rhead);
1108     do {
1109         obj = &list->rhead;
1110         list = rht_dereference(list->next, ht);
1111         free_fn(rht_obj(ht, obj), arg);
1112     } while (list);
1113 }
1114 
1115 /**
1116  * rhashtable_free_and_destroy - free elements and destroy hash table
1117  * @ht:     the hash table to destroy
1118  * @free_fn:    callback to release resources of element
1119  * @arg:    pointer passed to free_fn
1120  *
1121  * Stops an eventual async resize. If defined, invokes free_fn for each
1122  * element to releasal resources. Please note that RCU protected
1123  * readers may still be accessing the elements. Releasing of resources
1124  * must occur in a compatible manner. Then frees the bucket array.
1125  *
1126  * This function will eventually sleep to wait for an async resize
1127  * to complete. The caller is responsible that no further write operations
1128  * occurs in parallel.
1129  */
1130 void rhashtable_free_and_destroy(struct rhashtable *ht,
1131                  void (*free_fn)(void *ptr, void *arg),
1132                  void *arg)
1133 {
1134     struct bucket_table *tbl, *next_tbl;
1135     unsigned int i;
1136 
1137     cancel_work_sync(&ht->run_work);
1138 
1139     mutex_lock(&ht->mutex);
1140     tbl = rht_dereference(ht->tbl, ht);
1141 restart:
1142     if (free_fn) {
1143         for (i = 0; i < tbl->size; i++) {
1144             struct rhash_head *pos, *next;
1145 
1146             cond_resched();
1147             for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)),
1148                  next = !rht_is_a_nulls(pos) ?
1149                     rht_dereference(pos->next, ht) : NULL;
1150                  !rht_is_a_nulls(pos);
1151                  pos = next,
1152                  next = !rht_is_a_nulls(pos) ?
1153                     rht_dereference(pos->next, ht) : NULL)
1154                 rhashtable_free_one(ht, pos, free_fn, arg);
1155         }
1156     }
1157 
1158     next_tbl = rht_dereference(tbl->future_tbl, ht);
1159     bucket_table_free(tbl);
1160     if (next_tbl) {
1161         tbl = next_tbl;
1162         goto restart;
1163     }
1164     mutex_unlock(&ht->mutex);
1165 }
1166 EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
1167 
1168 void rhashtable_destroy(struct rhashtable *ht)
1169 {
1170     return rhashtable_free_and_destroy(ht, NULL, NULL);
1171 }
1172 EXPORT_SYMBOL_GPL(rhashtable_destroy);
1173 
1174 struct rhash_lock_head __rcu **__rht_bucket_nested(
1175     const struct bucket_table *tbl, unsigned int hash)
1176 {
1177     const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
1178     unsigned int index = hash & ((1 << tbl->nest) - 1);
1179     unsigned int size = tbl->size >> tbl->nest;
1180     unsigned int subhash = hash;
1181     union nested_table *ntbl;
1182 
1183     ntbl = nested_table_top(tbl);
1184     ntbl = rht_dereference_bucket_rcu(ntbl[index].table, tbl, hash);
1185     subhash >>= tbl->nest;
1186 
1187     while (ntbl && size > (1 << shift)) {
1188         index = subhash & ((1 << shift) - 1);
1189         ntbl = rht_dereference_bucket_rcu(ntbl[index].table,
1190                           tbl, hash);
1191         size >>= shift;
1192         subhash >>= shift;
1193     }
1194 
1195     if (!ntbl)
1196         return NULL;
1197 
1198     return &ntbl[subhash].bucket;
1199 
1200 }
1201 EXPORT_SYMBOL_GPL(__rht_bucket_nested);
1202 
1203 struct rhash_lock_head __rcu **rht_bucket_nested(
1204     const struct bucket_table *tbl, unsigned int hash)
1205 {
1206     static struct rhash_lock_head __rcu *rhnull;
1207 
1208     if (!rhnull)
1209         INIT_RHT_NULLS_HEAD(rhnull);
1210     return __rht_bucket_nested(tbl, hash) ?: &rhnull;
1211 }
1212 EXPORT_SYMBOL_GPL(rht_bucket_nested);
1213 
1214 struct rhash_lock_head __rcu **rht_bucket_nested_insert(
1215     struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
1216 {
1217     const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
1218     unsigned int index = hash & ((1 << tbl->nest) - 1);
1219     unsigned int size = tbl->size >> tbl->nest;
1220     union nested_table *ntbl;
1221 
1222     ntbl = nested_table_top(tbl);
1223     hash >>= tbl->nest;
1224     ntbl = nested_table_alloc(ht, &ntbl[index].table,
1225                   size <= (1 << shift));
1226 
1227     while (ntbl && size > (1 << shift)) {
1228         index = hash & ((1 << shift) - 1);
1229         size >>= shift;
1230         hash >>= shift;
1231         ntbl = nested_table_alloc(ht, &ntbl[index].table,
1232                       size <= (1 << shift));
1233     }
1234 
1235     if (!ntbl)
1236         return NULL;
1237 
1238     return &ntbl[hash].bucket;
1239 
1240 }
1241 EXPORT_SYMBOL_GPL(rht_bucket_nested_insert);