Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * Copyright (C) 2001 Momchil Velikov
0004  * Portions Copyright (C) 2001 Christoph Hellwig
0005  * Copyright (C) 2005 SGI, Christoph Lameter
0006  * Copyright (C) 2006 Nick Piggin
0007  * Copyright (C) 2012 Konstantin Khlebnikov
0008  * Copyright (C) 2016 Intel, Matthew Wilcox
0009  * Copyright (C) 2016 Intel, Ross Zwisler
0010  */
0011 
0012 #include <linux/bitmap.h>
0013 #include <linux/bitops.h>
0014 #include <linux/bug.h>
0015 #include <linux/cpu.h>
0016 #include <linux/errno.h>
0017 #include <linux/export.h>
0018 #include <linux/idr.h>
0019 #include <linux/init.h>
0020 #include <linux/kernel.h>
0021 #include <linux/kmemleak.h>
0022 #include <linux/percpu.h>
0023 #include <linux/preempt.h>      /* in_interrupt() */
0024 #include <linux/radix-tree.h>
0025 #include <linux/rcupdate.h>
0026 #include <linux/slab.h>
0027 #include <linux/string.h>
0028 #include <linux/xarray.h>
0029 
0030 /*
0031  * Radix tree node cache.
0032  */
0033 struct kmem_cache *radix_tree_node_cachep;
0034 
0035 /*
0036  * The radix tree is variable-height, so an insert operation not only has
0037  * to build the branch to its corresponding item, it also has to build the
0038  * branch to existing items if the size has to be increased (by
0039  * radix_tree_extend).
0040  *
0041  * The worst case is a zero height tree with just a single item at index 0,
0042  * and then inserting an item at index ULONG_MAX. This requires 2 new branches
0043  * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared.
0044  * Hence:
0045  */
0046 #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1)
0047 
0048 /*
0049  * The IDR does not have to be as high as the radix tree since it uses
0050  * signed integers, not unsigned longs.
0051  */
0052 #define IDR_INDEX_BITS      (8 /* CHAR_BIT */ * sizeof(int) - 1)
0053 #define IDR_MAX_PATH        (DIV_ROUND_UP(IDR_INDEX_BITS, \
0054                         RADIX_TREE_MAP_SHIFT))
0055 #define IDR_PRELOAD_SIZE    (IDR_MAX_PATH * 2 - 1)
0056 
0057 /*
0058  * Per-cpu pool of preloaded nodes
0059  */
0060 DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = {
0061     .lock = INIT_LOCAL_LOCK(lock),
0062 };
0063 EXPORT_PER_CPU_SYMBOL_GPL(radix_tree_preloads);
0064 
0065 static inline struct radix_tree_node *entry_to_node(void *ptr)
0066 {
0067     return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE);
0068 }
0069 
0070 static inline void *node_to_entry(void *ptr)
0071 {
0072     return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
0073 }
0074 
0075 #define RADIX_TREE_RETRY    XA_RETRY_ENTRY
0076 
0077 static inline unsigned long
0078 get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot)
0079 {
0080     return parent ? slot - parent->slots : 0;
0081 }
0082 
0083 static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
0084             struct radix_tree_node **nodep, unsigned long index)
0085 {
0086     unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
0087     void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);
0088 
0089     *nodep = (void *)entry;
0090     return offset;
0091 }
0092 
0093 static inline gfp_t root_gfp_mask(const struct radix_tree_root *root)
0094 {
0095     return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
0096 }
0097 
0098 static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
0099         int offset)
0100 {
0101     __set_bit(offset, node->tags[tag]);
0102 }
0103 
0104 static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
0105         int offset)
0106 {
0107     __clear_bit(offset, node->tags[tag]);
0108 }
0109 
0110 static inline int tag_get(const struct radix_tree_node *node, unsigned int tag,
0111         int offset)
0112 {
0113     return test_bit(offset, node->tags[tag]);
0114 }
0115 
0116 static inline void root_tag_set(struct radix_tree_root *root, unsigned tag)
0117 {
0118     root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
0119 }
0120 
0121 static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
0122 {
0123     root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
0124 }
0125 
0126 static inline void root_tag_clear_all(struct radix_tree_root *root)
0127 {
0128     root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1);
0129 }
0130 
0131 static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag)
0132 {
0133     return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT));
0134 }
0135 
0136 static inline unsigned root_tags_get(const struct radix_tree_root *root)
0137 {
0138     return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT;
0139 }
0140 
0141 static inline bool is_idr(const struct radix_tree_root *root)
0142 {
0143     return !!(root->xa_flags & ROOT_IS_IDR);
0144 }
0145 
0146 /*
0147  * Returns 1 if any slot in the node has this tag set.
0148  * Otherwise returns 0.
0149  */
0150 static inline int any_tag_set(const struct radix_tree_node *node,
0151                             unsigned int tag)
0152 {
0153     unsigned idx;
0154     for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
0155         if (node->tags[tag][idx])
0156             return 1;
0157     }
0158     return 0;
0159 }
0160 
0161 static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag)
0162 {
0163     bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE);
0164 }
0165 
0166 /**
0167  * radix_tree_find_next_bit - find the next set bit in a memory region
0168  *
0169  * @node: where to begin the search
0170  * @tag: the tag index
0171  * @offset: the bitnumber to start searching at
0172  *
0173  * Unrollable variant of find_next_bit() for constant size arrays.
0174  * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero.
0175  * Returns next bit offset, or size if nothing found.
0176  */
0177 static __always_inline unsigned long
0178 radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag,
0179              unsigned long offset)
0180 {
0181     const unsigned long *addr = node->tags[tag];
0182 
0183     if (offset < RADIX_TREE_MAP_SIZE) {
0184         unsigned long tmp;
0185 
0186         addr += offset / BITS_PER_LONG;
0187         tmp = *addr >> (offset % BITS_PER_LONG);
0188         if (tmp)
0189             return __ffs(tmp) + offset;
0190         offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
0191         while (offset < RADIX_TREE_MAP_SIZE) {
0192             tmp = *++addr;
0193             if (tmp)
0194                 return __ffs(tmp) + offset;
0195             offset += BITS_PER_LONG;
0196         }
0197     }
0198     return RADIX_TREE_MAP_SIZE;
0199 }
0200 
0201 static unsigned int iter_offset(const struct radix_tree_iter *iter)
0202 {
0203     return iter->index & RADIX_TREE_MAP_MASK;
0204 }
0205 
0206 /*
0207  * The maximum index which can be stored in a radix tree
0208  */
0209 static inline unsigned long shift_maxindex(unsigned int shift)
0210 {
0211     return (RADIX_TREE_MAP_SIZE << shift) - 1;
0212 }
0213 
0214 static inline unsigned long node_maxindex(const struct radix_tree_node *node)
0215 {
0216     return shift_maxindex(node->shift);
0217 }
0218 
0219 static unsigned long next_index(unsigned long index,
0220                 const struct radix_tree_node *node,
0221                 unsigned long offset)
0222 {
0223     return (index & ~node_maxindex(node)) + (offset << node->shift);
0224 }
0225 
0226 /*
0227  * This assumes that the caller has performed appropriate preallocation, and
0228  * that the caller has pinned this thread of control to the current CPU.
0229  */
0230 static struct radix_tree_node *
0231 radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
0232             struct radix_tree_root *root,
0233             unsigned int shift, unsigned int offset,
0234             unsigned int count, unsigned int nr_values)
0235 {
0236     struct radix_tree_node *ret = NULL;
0237 
0238     /*
0239      * Preload code isn't irq safe and it doesn't make sense to use
0240      * preloading during an interrupt anyway as all the allocations have
0241      * to be atomic. So just do normal allocation when in interrupt.
0242      */
0243     if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) {
0244         struct radix_tree_preload *rtp;
0245 
0246         /*
0247          * Even if the caller has preloaded, try to allocate from the
0248          * cache first for the new node to get accounted to the memory
0249          * cgroup.
0250          */
0251         ret = kmem_cache_alloc(radix_tree_node_cachep,
0252                        gfp_mask | __GFP_NOWARN);
0253         if (ret)
0254             goto out;
0255 
0256         /*
0257          * Provided the caller has preloaded here, we will always
0258          * succeed in getting a node here (and never reach
0259          * kmem_cache_alloc)
0260          */
0261         rtp = this_cpu_ptr(&radix_tree_preloads);
0262         if (rtp->nr) {
0263             ret = rtp->nodes;
0264             rtp->nodes = ret->parent;
0265             rtp->nr--;
0266         }
0267         /*
0268          * Update the allocation stack trace as this is more useful
0269          * for debugging.
0270          */
0271         kmemleak_update_trace(ret);
0272         goto out;
0273     }
0274     ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
0275 out:
0276     BUG_ON(radix_tree_is_internal_node(ret));
0277     if (ret) {
0278         ret->shift = shift;
0279         ret->offset = offset;
0280         ret->count = count;
0281         ret->nr_values = nr_values;
0282         ret->parent = parent;
0283         ret->array = root;
0284     }
0285     return ret;
0286 }
0287 
0288 void radix_tree_node_rcu_free(struct rcu_head *head)
0289 {
0290     struct radix_tree_node *node =
0291             container_of(head, struct radix_tree_node, rcu_head);
0292 
0293     /*
0294      * Must only free zeroed nodes into the slab.  We can be left with
0295      * non-NULL entries by radix_tree_free_nodes, so clear the entries
0296      * and tags here.
0297      */
0298     memset(node->slots, 0, sizeof(node->slots));
0299     memset(node->tags, 0, sizeof(node->tags));
0300     INIT_LIST_HEAD(&node->private_list);
0301 
0302     kmem_cache_free(radix_tree_node_cachep, node);
0303 }
0304 
0305 static inline void
0306 radix_tree_node_free(struct radix_tree_node *node)
0307 {
0308     call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
0309 }
0310 
0311 /*
0312  * Load up this CPU's radix_tree_node buffer with sufficient objects to
0313  * ensure that the addition of a single element in the tree cannot fail.  On
0314  * success, return zero, with preemption disabled.  On error, return -ENOMEM
0315  * with preemption not disabled.
0316  *
0317  * To make use of this facility, the radix tree must be initialised without
0318  * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
0319  */
0320 static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
0321 {
0322     struct radix_tree_preload *rtp;
0323     struct radix_tree_node *node;
0324     int ret = -ENOMEM;
0325 
0326     /*
0327      * Nodes preloaded by one cgroup can be used by another cgroup, so
0328      * they should never be accounted to any particular memory cgroup.
0329      */
0330     gfp_mask &= ~__GFP_ACCOUNT;
0331 
0332     local_lock(&radix_tree_preloads.lock);
0333     rtp = this_cpu_ptr(&radix_tree_preloads);
0334     while (rtp->nr < nr) {
0335         local_unlock(&radix_tree_preloads.lock);
0336         node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
0337         if (node == NULL)
0338             goto out;
0339         local_lock(&radix_tree_preloads.lock);
0340         rtp = this_cpu_ptr(&radix_tree_preloads);
0341         if (rtp->nr < nr) {
0342             node->parent = rtp->nodes;
0343             rtp->nodes = node;
0344             rtp->nr++;
0345         } else {
0346             kmem_cache_free(radix_tree_node_cachep, node);
0347         }
0348     }
0349     ret = 0;
0350 out:
0351     return ret;
0352 }
0353 
0354 /*
0355  * Load up this CPU's radix_tree_node buffer with sufficient objects to
0356  * ensure that the addition of a single element in the tree cannot fail.  On
0357  * success, return zero, with preemption disabled.  On error, return -ENOMEM
0358  * with preemption not disabled.
0359  *
0360  * To make use of this facility, the radix tree must be initialised without
0361  * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
0362  */
0363 int radix_tree_preload(gfp_t gfp_mask)
0364 {
0365     /* Warn on non-sensical use... */
0366     WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
0367     return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
0368 }
0369 EXPORT_SYMBOL(radix_tree_preload);
0370 
0371 /*
0372  * The same as above function, except we don't guarantee preloading happens.
0373  * We do it, if we decide it helps. On success, return zero with preemption
0374  * disabled. On error, return -ENOMEM with preemption not disabled.
0375  */
0376 int radix_tree_maybe_preload(gfp_t gfp_mask)
0377 {
0378     if (gfpflags_allow_blocking(gfp_mask))
0379         return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
0380     /* Preloading doesn't help anything with this gfp mask, skip it */
0381     local_lock(&radix_tree_preloads.lock);
0382     return 0;
0383 }
0384 EXPORT_SYMBOL(radix_tree_maybe_preload);
0385 
0386 static unsigned radix_tree_load_root(const struct radix_tree_root *root,
0387         struct radix_tree_node **nodep, unsigned long *maxindex)
0388 {
0389     struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
0390 
0391     *nodep = node;
0392 
0393     if (likely(radix_tree_is_internal_node(node))) {
0394         node = entry_to_node(node);
0395         *maxindex = node_maxindex(node);
0396         return node->shift + RADIX_TREE_MAP_SHIFT;
0397     }
0398 
0399     *maxindex = 0;
0400     return 0;
0401 }
0402 
0403 /*
0404  *  Extend a radix tree so it can store key @index.
0405  */
0406 static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
0407                 unsigned long index, unsigned int shift)
0408 {
0409     void *entry;
0410     unsigned int maxshift;
0411     int tag;
0412 
0413     /* Figure out what the shift should be.  */
0414     maxshift = shift;
0415     while (index > shift_maxindex(maxshift))
0416         maxshift += RADIX_TREE_MAP_SHIFT;
0417 
0418     entry = rcu_dereference_raw(root->xa_head);
0419     if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
0420         goto out;
0421 
0422     do {
0423         struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL,
0424                             root, shift, 0, 1, 0);
0425         if (!node)
0426             return -ENOMEM;
0427 
0428         if (is_idr(root)) {
0429             all_tag_set(node, IDR_FREE);
0430             if (!root_tag_get(root, IDR_FREE)) {
0431                 tag_clear(node, IDR_FREE, 0);
0432                 root_tag_set(root, IDR_FREE);
0433             }
0434         } else {
0435             /* Propagate the aggregated tag info to the new child */
0436             for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
0437                 if (root_tag_get(root, tag))
0438                     tag_set(node, tag, 0);
0439             }
0440         }
0441 
0442         BUG_ON(shift > BITS_PER_LONG);
0443         if (radix_tree_is_internal_node(entry)) {
0444             entry_to_node(entry)->parent = node;
0445         } else if (xa_is_value(entry)) {
0446             /* Moving a value entry root->xa_head to a node */
0447             node->nr_values = 1;
0448         }
0449         /*
0450          * entry was already in the radix tree, so we do not need
0451          * rcu_assign_pointer here
0452          */
0453         node->slots[0] = (void __rcu *)entry;
0454         entry = node_to_entry(node);
0455         rcu_assign_pointer(root->xa_head, entry);
0456         shift += RADIX_TREE_MAP_SHIFT;
0457     } while (shift <= maxshift);
0458 out:
0459     return maxshift + RADIX_TREE_MAP_SHIFT;
0460 }
0461 
0462 /**
0463  *  radix_tree_shrink    -    shrink radix tree to minimum height
0464  *  @root:      radix tree root
0465  */
0466 static inline bool radix_tree_shrink(struct radix_tree_root *root)
0467 {
0468     bool shrunk = false;
0469 
0470     for (;;) {
0471         struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
0472         struct radix_tree_node *child;
0473 
0474         if (!radix_tree_is_internal_node(node))
0475             break;
0476         node = entry_to_node(node);
0477 
0478         /*
0479          * The candidate node has more than one child, or its child
0480          * is not at the leftmost slot, we cannot shrink.
0481          */
0482         if (node->count != 1)
0483             break;
0484         child = rcu_dereference_raw(node->slots[0]);
0485         if (!child)
0486             break;
0487 
0488         /*
0489          * For an IDR, we must not shrink entry 0 into the root in
0490          * case somebody calls idr_replace() with a pointer that
0491          * appears to be an internal entry
0492          */
0493         if (!node->shift && is_idr(root))
0494             break;
0495 
0496         if (radix_tree_is_internal_node(child))
0497             entry_to_node(child)->parent = NULL;
0498 
0499         /*
0500          * We don't need rcu_assign_pointer(), since we are simply
0501          * moving the node from one part of the tree to another: if it
0502          * was safe to dereference the old pointer to it
0503          * (node->slots[0]), it will be safe to dereference the new
0504          * one (root->xa_head) as far as dependent read barriers go.
0505          */
0506         root->xa_head = (void __rcu *)child;
0507         if (is_idr(root) && !tag_get(node, IDR_FREE, 0))
0508             root_tag_clear(root, IDR_FREE);
0509 
0510         /*
0511          * We have a dilemma here. The node's slot[0] must not be
0512          * NULLed in case there are concurrent lookups expecting to
0513          * find the item. However if this was a bottom-level node,
0514          * then it may be subject to the slot pointer being visible
0515          * to callers dereferencing it. If item corresponding to
0516          * slot[0] is subsequently deleted, these callers would expect
0517          * their slot to become empty sooner or later.
0518          *
0519          * For example, lockless pagecache will look up a slot, deref
0520          * the page pointer, and if the page has 0 refcount it means it
0521          * was concurrently deleted from pagecache so try the deref
0522          * again. Fortunately there is already a requirement for logic
0523          * to retry the entire slot lookup -- the indirect pointer
0524          * problem (replacing direct root node with an indirect pointer
0525          * also results in a stale slot). So tag the slot as indirect
0526          * to force callers to retry.
0527          */
0528         node->count = 0;
0529         if (!radix_tree_is_internal_node(child)) {
0530             node->slots[0] = (void __rcu *)RADIX_TREE_RETRY;
0531         }
0532 
0533         WARN_ON_ONCE(!list_empty(&node->private_list));
0534         radix_tree_node_free(node);
0535         shrunk = true;
0536     }
0537 
0538     return shrunk;
0539 }
0540 
0541 static bool delete_node(struct radix_tree_root *root,
0542             struct radix_tree_node *node)
0543 {
0544     bool deleted = false;
0545 
0546     do {
0547         struct radix_tree_node *parent;
0548 
0549         if (node->count) {
0550             if (node_to_entry(node) ==
0551                     rcu_dereference_raw(root->xa_head))
0552                 deleted |= radix_tree_shrink(root);
0553             return deleted;
0554         }
0555 
0556         parent = node->parent;
0557         if (parent) {
0558             parent->slots[node->offset] = NULL;
0559             parent->count--;
0560         } else {
0561             /*
0562              * Shouldn't the tags already have all been cleared
0563              * by the caller?
0564              */
0565             if (!is_idr(root))
0566                 root_tag_clear_all(root);
0567             root->xa_head = NULL;
0568         }
0569 
0570         WARN_ON_ONCE(!list_empty(&node->private_list));
0571         radix_tree_node_free(node);
0572         deleted = true;
0573 
0574         node = parent;
0575     } while (node);
0576 
0577     return deleted;
0578 }
0579 
0580 /**
0581  *  __radix_tree_create -   create a slot in a radix tree
0582  *  @root:      radix tree root
0583  *  @index:     index key
0584  *  @nodep:     returns node
0585  *  @slotp:     returns slot
0586  *
0587  *  Create, if necessary, and return the node and slot for an item
0588  *  at position @index in the radix tree @root.
0589  *
0590  *  Until there is more than one item in the tree, no nodes are
0591  *  allocated and @root->xa_head is used as a direct slot instead of
0592  *  pointing to a node, in which case *@nodep will be NULL.
0593  *
0594  *  Returns -ENOMEM, or 0 for success.
0595  */
0596 static int __radix_tree_create(struct radix_tree_root *root,
0597         unsigned long index, struct radix_tree_node **nodep,
0598         void __rcu ***slotp)
0599 {
0600     struct radix_tree_node *node = NULL, *child;
0601     void __rcu **slot = (void __rcu **)&root->xa_head;
0602     unsigned long maxindex;
0603     unsigned int shift, offset = 0;
0604     unsigned long max = index;
0605     gfp_t gfp = root_gfp_mask(root);
0606 
0607     shift = radix_tree_load_root(root, &child, &maxindex);
0608 
0609     /* Make sure the tree is high enough.  */
0610     if (max > maxindex) {
0611         int error = radix_tree_extend(root, gfp, max, shift);
0612         if (error < 0)
0613             return error;
0614         shift = error;
0615         child = rcu_dereference_raw(root->xa_head);
0616     }
0617 
0618     while (shift > 0) {
0619         shift -= RADIX_TREE_MAP_SHIFT;
0620         if (child == NULL) {
0621             /* Have to add a child node.  */
0622             child = radix_tree_node_alloc(gfp, node, root, shift,
0623                             offset, 0, 0);
0624             if (!child)
0625                 return -ENOMEM;
0626             rcu_assign_pointer(*slot, node_to_entry(child));
0627             if (node)
0628                 node->count++;
0629         } else if (!radix_tree_is_internal_node(child))
0630             break;
0631 
0632         /* Go a level down */
0633         node = entry_to_node(child);
0634         offset = radix_tree_descend(node, &child, index);
0635         slot = &node->slots[offset];
0636     }
0637 
0638     if (nodep)
0639         *nodep = node;
0640     if (slotp)
0641         *slotp = slot;
0642     return 0;
0643 }
0644 
0645 /*
0646  * Free any nodes below this node.  The tree is presumed to not need
0647  * shrinking, and any user data in the tree is presumed to not need a
0648  * destructor called on it.  If we need to add a destructor, we can
0649  * add that functionality later.  Note that we may not clear tags or
0650  * slots from the tree as an RCU walker may still have a pointer into
0651  * this subtree.  We could replace the entries with RADIX_TREE_RETRY,
0652  * but we'll still have to clear those in rcu_free.
0653  */
0654 static void radix_tree_free_nodes(struct radix_tree_node *node)
0655 {
0656     unsigned offset = 0;
0657     struct radix_tree_node *child = entry_to_node(node);
0658 
0659     for (;;) {
0660         void *entry = rcu_dereference_raw(child->slots[offset]);
0661         if (xa_is_node(entry) && child->shift) {
0662             child = entry_to_node(entry);
0663             offset = 0;
0664             continue;
0665         }
0666         offset++;
0667         while (offset == RADIX_TREE_MAP_SIZE) {
0668             struct radix_tree_node *old = child;
0669             offset = child->offset + 1;
0670             child = child->parent;
0671             WARN_ON_ONCE(!list_empty(&old->private_list));
0672             radix_tree_node_free(old);
0673             if (old == entry_to_node(node))
0674                 return;
0675         }
0676     }
0677 }
0678 
0679 static inline int insert_entries(struct radix_tree_node *node,
0680         void __rcu **slot, void *item)
0681 {
0682     if (*slot)
0683         return -EEXIST;
0684     rcu_assign_pointer(*slot, item);
0685     if (node) {
0686         node->count++;
0687         if (xa_is_value(item))
0688             node->nr_values++;
0689     }
0690     return 1;
0691 }
0692 
0693 /**
0694  *  radix_tree_insert    -    insert into a radix tree
0695  *  @root:      radix tree root
0696  *  @index:     index key
0697  *  @item:      item to insert
0698  *
0699  *  Insert an item into the radix tree at position @index.
0700  */
0701 int radix_tree_insert(struct radix_tree_root *root, unsigned long index,
0702             void *item)
0703 {
0704     struct radix_tree_node *node;
0705     void __rcu **slot;
0706     int error;
0707 
0708     BUG_ON(radix_tree_is_internal_node(item));
0709 
0710     error = __radix_tree_create(root, index, &node, &slot);
0711     if (error)
0712         return error;
0713 
0714     error = insert_entries(node, slot, item);
0715     if (error < 0)
0716         return error;
0717 
0718     if (node) {
0719         unsigned offset = get_slot_offset(node, slot);
0720         BUG_ON(tag_get(node, 0, offset));
0721         BUG_ON(tag_get(node, 1, offset));
0722         BUG_ON(tag_get(node, 2, offset));
0723     } else {
0724         BUG_ON(root_tags_get(root));
0725     }
0726 
0727     return 0;
0728 }
0729 EXPORT_SYMBOL(radix_tree_insert);
0730 
0731 /**
0732  *  __radix_tree_lookup -   lookup an item in a radix tree
0733  *  @root:      radix tree root
0734  *  @index:     index key
0735  *  @nodep:     returns node
0736  *  @slotp:     returns slot
0737  *
0738  *  Lookup and return the item at position @index in the radix
0739  *  tree @root.
0740  *
0741  *  Until there is more than one item in the tree, no nodes are
0742  *  allocated and @root->xa_head is used as a direct slot instead of
0743  *  pointing to a node, in which case *@nodep will be NULL.
0744  */
0745 void *__radix_tree_lookup(const struct radix_tree_root *root,
0746               unsigned long index, struct radix_tree_node **nodep,
0747               void __rcu ***slotp)
0748 {
0749     struct radix_tree_node *node, *parent;
0750     unsigned long maxindex;
0751     void __rcu **slot;
0752 
0753  restart:
0754     parent = NULL;
0755     slot = (void __rcu **)&root->xa_head;
0756     radix_tree_load_root(root, &node, &maxindex);
0757     if (index > maxindex)
0758         return NULL;
0759 
0760     while (radix_tree_is_internal_node(node)) {
0761         unsigned offset;
0762 
0763         parent = entry_to_node(node);
0764         offset = radix_tree_descend(parent, &node, index);
0765         slot = parent->slots + offset;
0766         if (node == RADIX_TREE_RETRY)
0767             goto restart;
0768         if (parent->shift == 0)
0769             break;
0770     }
0771 
0772     if (nodep)
0773         *nodep = parent;
0774     if (slotp)
0775         *slotp = slot;
0776     return node;
0777 }
0778 
0779 /**
0780  *  radix_tree_lookup_slot    -    lookup a slot in a radix tree
0781  *  @root:      radix tree root
0782  *  @index:     index key
0783  *
0784  *  Returns:  the slot corresponding to the position @index in the
0785  *  radix tree @root. This is useful for update-if-exists operations.
0786  *
0787  *  This function can be called under rcu_read_lock iff the slot is not
0788  *  modified by radix_tree_replace_slot, otherwise it must be called
0789  *  exclusive from other writers. Any dereference of the slot must be done
0790  *  using radix_tree_deref_slot.
0791  */
0792 void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root,
0793                 unsigned long index)
0794 {
0795     void __rcu **slot;
0796 
0797     if (!__radix_tree_lookup(root, index, NULL, &slot))
0798         return NULL;
0799     return slot;
0800 }
0801 EXPORT_SYMBOL(radix_tree_lookup_slot);
0802 
0803 /**
0804  *  radix_tree_lookup    -    perform lookup operation on a radix tree
0805  *  @root:      radix tree root
0806  *  @index:     index key
0807  *
0808  *  Lookup the item at the position @index in the radix tree @root.
0809  *
0810  *  This function can be called under rcu_read_lock, however the caller
0811  *  must manage lifetimes of leaf nodes (eg. RCU may also be used to free
0812  *  them safely). No RCU barriers are required to access or modify the
0813  *  returned item, however.
0814  */
0815 void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
0816 {
0817     return __radix_tree_lookup(root, index, NULL, NULL);
0818 }
0819 EXPORT_SYMBOL(radix_tree_lookup);
0820 
0821 static void replace_slot(void __rcu **slot, void *item,
0822         struct radix_tree_node *node, int count, int values)
0823 {
0824     if (node && (count || values)) {
0825         node->count += count;
0826         node->nr_values += values;
0827     }
0828 
0829     rcu_assign_pointer(*slot, item);
0830 }
0831 
0832 static bool node_tag_get(const struct radix_tree_root *root,
0833                 const struct radix_tree_node *node,
0834                 unsigned int tag, unsigned int offset)
0835 {
0836     if (node)
0837         return tag_get(node, tag, offset);
0838     return root_tag_get(root, tag);
0839 }
0840 
0841 /*
0842  * IDR users want to be able to store NULL in the tree, so if the slot isn't
0843  * free, don't adjust the count, even if it's transitioning between NULL and
0844  * non-NULL.  For the IDA, we mark slots as being IDR_FREE while they still
0845  * have empty bits, but it only stores NULL in slots when they're being
0846  * deleted.
0847  */
0848 static int calculate_count(struct radix_tree_root *root,
0849                 struct radix_tree_node *node, void __rcu **slot,
0850                 void *item, void *old)
0851 {
0852     if (is_idr(root)) {
0853         unsigned offset = get_slot_offset(node, slot);
0854         bool free = node_tag_get(root, node, IDR_FREE, offset);
0855         if (!free)
0856             return 0;
0857         if (!old)
0858             return 1;
0859     }
0860     return !!item - !!old;
0861 }
0862 
0863 /**
0864  * __radix_tree_replace     - replace item in a slot
0865  * @root:       radix tree root
0866  * @node:       pointer to tree node
0867  * @slot:       pointer to slot in @node
0868  * @item:       new item to store in the slot.
0869  *
0870  * For use with __radix_tree_lookup().  Caller must hold tree write locked
0871  * across slot lookup and replacement.
0872  */
0873 void __radix_tree_replace(struct radix_tree_root *root,
0874               struct radix_tree_node *node,
0875               void __rcu **slot, void *item)
0876 {
0877     void *old = rcu_dereference_raw(*slot);
0878     int values = !!xa_is_value(item) - !!xa_is_value(old);
0879     int count = calculate_count(root, node, slot, item, old);
0880 
0881     /*
0882      * This function supports replacing value entries and
0883      * deleting entries, but that needs accounting against the
0884      * node unless the slot is root->xa_head.
0885      */
0886     WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) &&
0887             (count || values));
0888     replace_slot(slot, item, node, count, values);
0889 
0890     if (!node)
0891         return;
0892 
0893     delete_node(root, node);
0894 }
0895 
0896 /**
0897  * radix_tree_replace_slot  - replace item in a slot
0898  * @root:   radix tree root
0899  * @slot:   pointer to slot
0900  * @item:   new item to store in the slot.
0901  *
0902  * For use with radix_tree_lookup_slot() and
0903  * radix_tree_gang_lookup_tag_slot().  Caller must hold tree write locked
0904  * across slot lookup and replacement.
0905  *
0906  * NOTE: This cannot be used to switch between non-entries (empty slots),
0907  * regular entries, and value entries, as that requires accounting
0908  * inside the radix tree node. When switching from one type of entry or
0909  * deleting, use __radix_tree_lookup() and __radix_tree_replace() or
0910  * radix_tree_iter_replace().
0911  */
0912 void radix_tree_replace_slot(struct radix_tree_root *root,
0913                  void __rcu **slot, void *item)
0914 {
0915     __radix_tree_replace(root, NULL, slot, item);
0916 }
0917 EXPORT_SYMBOL(radix_tree_replace_slot);
0918 
0919 /**
0920  * radix_tree_iter_replace - replace item in a slot
0921  * @root:   radix tree root
0922  * @iter:   iterator state
0923  * @slot:   pointer to slot
0924  * @item:   new item to store in the slot.
0925  *
0926  * For use with radix_tree_for_each_slot().
0927  * Caller must hold tree write locked.
0928  */
0929 void radix_tree_iter_replace(struct radix_tree_root *root,
0930                 const struct radix_tree_iter *iter,
0931                 void __rcu **slot, void *item)
0932 {
0933     __radix_tree_replace(root, iter->node, slot, item);
0934 }
0935 
0936 static void node_tag_set(struct radix_tree_root *root,
0937                 struct radix_tree_node *node,
0938                 unsigned int tag, unsigned int offset)
0939 {
0940     while (node) {
0941         if (tag_get(node, tag, offset))
0942             return;
0943         tag_set(node, tag, offset);
0944         offset = node->offset;
0945         node = node->parent;
0946     }
0947 
0948     if (!root_tag_get(root, tag))
0949         root_tag_set(root, tag);
0950 }
0951 
0952 /**
0953  *  radix_tree_tag_set - set a tag on a radix tree node
0954  *  @root:      radix tree root
0955  *  @index:     index key
0956  *  @tag:       tag index
0957  *
0958  *  Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
0959  *  corresponding to @index in the radix tree.  From
0960  *  the root all the way down to the leaf node.
0961  *
0962  *  Returns the address of the tagged item.  Setting a tag on a not-present
0963  *  item is a bug.
0964  */
0965 void *radix_tree_tag_set(struct radix_tree_root *root,
0966             unsigned long index, unsigned int tag)
0967 {
0968     struct radix_tree_node *node, *parent;
0969     unsigned long maxindex;
0970 
0971     radix_tree_load_root(root, &node, &maxindex);
0972     BUG_ON(index > maxindex);
0973 
0974     while (radix_tree_is_internal_node(node)) {
0975         unsigned offset;
0976 
0977         parent = entry_to_node(node);
0978         offset = radix_tree_descend(parent, &node, index);
0979         BUG_ON(!node);
0980 
0981         if (!tag_get(parent, tag, offset))
0982             tag_set(parent, tag, offset);
0983     }
0984 
0985     /* set the root's tag bit */
0986     if (!root_tag_get(root, tag))
0987         root_tag_set(root, tag);
0988 
0989     return node;
0990 }
0991 EXPORT_SYMBOL(radix_tree_tag_set);
0992 
0993 static void node_tag_clear(struct radix_tree_root *root,
0994                 struct radix_tree_node *node,
0995                 unsigned int tag, unsigned int offset)
0996 {
0997     while (node) {
0998         if (!tag_get(node, tag, offset))
0999             return;
1000         tag_clear(node, tag, offset);
1001         if (any_tag_set(node, tag))
1002             return;
1003 
1004         offset = node->offset;
1005         node = node->parent;
1006     }
1007 
1008     /* clear the root's tag bit */
1009     if (root_tag_get(root, tag))
1010         root_tag_clear(root, tag);
1011 }
1012 
1013 /**
1014  *  radix_tree_tag_clear - clear a tag on a radix tree node
1015  *  @root:      radix tree root
1016  *  @index:     index key
1017  *  @tag:       tag index
1018  *
1019  *  Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
1020  *  corresponding to @index in the radix tree.  If this causes
1021  *  the leaf node to have no tags set then clear the tag in the
1022  *  next-to-leaf node, etc.
1023  *
1024  *  Returns the address of the tagged item on success, else NULL.  ie:
1025  *  has the same return value and semantics as radix_tree_lookup().
1026  */
1027 void *radix_tree_tag_clear(struct radix_tree_root *root,
1028             unsigned long index, unsigned int tag)
1029 {
1030     struct radix_tree_node *node, *parent;
1031     unsigned long maxindex;
1032     int offset;
1033 
1034     radix_tree_load_root(root, &node, &maxindex);
1035     if (index > maxindex)
1036         return NULL;
1037 
1038     parent = NULL;
1039 
1040     while (radix_tree_is_internal_node(node)) {
1041         parent = entry_to_node(node);
1042         offset = radix_tree_descend(parent, &node, index);
1043     }
1044 
1045     if (node)
1046         node_tag_clear(root, parent, tag, offset);
1047 
1048     return node;
1049 }
1050 EXPORT_SYMBOL(radix_tree_tag_clear);
1051 
1052 /**
1053   * radix_tree_iter_tag_clear - clear a tag on the current iterator entry
1054   * @root: radix tree root
1055   * @iter: iterator state
1056   * @tag: tag to clear
1057   */
1058 void radix_tree_iter_tag_clear(struct radix_tree_root *root,
1059             const struct radix_tree_iter *iter, unsigned int tag)
1060 {
1061     node_tag_clear(root, iter->node, tag, iter_offset(iter));
1062 }
1063 
1064 /**
1065  * radix_tree_tag_get - get a tag on a radix tree node
1066  * @root:       radix tree root
1067  * @index:      index key
1068  * @tag:        tag index (< RADIX_TREE_MAX_TAGS)
1069  *
1070  * Return values:
1071  *
1072  *  0: tag not present or not set
1073  *  1: tag set
1074  *
1075  * Note that the return value of this function may not be relied on, even if
1076  * the RCU lock is held, unless tag modification and node deletion are excluded
1077  * from concurrency.
1078  */
1079 int radix_tree_tag_get(const struct radix_tree_root *root,
1080             unsigned long index, unsigned int tag)
1081 {
1082     struct radix_tree_node *node, *parent;
1083     unsigned long maxindex;
1084 
1085     if (!root_tag_get(root, tag))
1086         return 0;
1087 
1088     radix_tree_load_root(root, &node, &maxindex);
1089     if (index > maxindex)
1090         return 0;
1091 
1092     while (radix_tree_is_internal_node(node)) {
1093         unsigned offset;
1094 
1095         parent = entry_to_node(node);
1096         offset = radix_tree_descend(parent, &node, index);
1097 
1098         if (!tag_get(parent, tag, offset))
1099             return 0;
1100         if (node == RADIX_TREE_RETRY)
1101             break;
1102     }
1103 
1104     return 1;
1105 }
1106 EXPORT_SYMBOL(radix_tree_tag_get);
1107 
1108 /* Construct iter->tags bit-mask from node->tags[tag] array */
1109 static void set_iter_tags(struct radix_tree_iter *iter,
1110                 struct radix_tree_node *node, unsigned offset,
1111                 unsigned tag)
1112 {
1113     unsigned tag_long = offset / BITS_PER_LONG;
1114     unsigned tag_bit  = offset % BITS_PER_LONG;
1115 
1116     if (!node) {
1117         iter->tags = 1;
1118         return;
1119     }
1120 
1121     iter->tags = node->tags[tag][tag_long] >> tag_bit;
1122 
1123     /* This never happens if RADIX_TREE_TAG_LONGS == 1 */
1124     if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
1125         /* Pick tags from next element */
1126         if (tag_bit)
1127             iter->tags |= node->tags[tag][tag_long + 1] <<
1128                         (BITS_PER_LONG - tag_bit);
1129         /* Clip chunk size, here only BITS_PER_LONG tags */
1130         iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG);
1131     }
1132 }
1133 
1134 void __rcu **radix_tree_iter_resume(void __rcu **slot,
1135                     struct radix_tree_iter *iter)
1136 {
1137     slot++;
1138     iter->index = __radix_tree_iter_add(iter, 1);
1139     iter->next_index = iter->index;
1140     iter->tags = 0;
1141     return NULL;
1142 }
1143 EXPORT_SYMBOL(radix_tree_iter_resume);
1144 
1145 /**
1146  * radix_tree_next_chunk - find next chunk of slots for iteration
1147  *
1148  * @root:   radix tree root
1149  * @iter:   iterator state
1150  * @flags:  RADIX_TREE_ITER_* flags and tag index
1151  * Returns: pointer to chunk first slot, or NULL if iteration is over
1152  */
1153 void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
1154                  struct radix_tree_iter *iter, unsigned flags)
1155 {
1156     unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
1157     struct radix_tree_node *node, *child;
1158     unsigned long index, offset, maxindex;
1159 
1160     if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
1161         return NULL;
1162 
1163     /*
1164      * Catch next_index overflow after ~0UL. iter->index never overflows
1165      * during iterating; it can be zero only at the beginning.
1166      * And we cannot overflow iter->next_index in a single step,
1167      * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
1168      *
1169      * This condition also used by radix_tree_next_slot() to stop
1170      * contiguous iterating, and forbid switching to the next chunk.
1171      */
1172     index = iter->next_index;
1173     if (!index && iter->index)
1174         return NULL;
1175 
1176  restart:
1177     radix_tree_load_root(root, &child, &maxindex);
1178     if (index > maxindex)
1179         return NULL;
1180     if (!child)
1181         return NULL;
1182 
1183     if (!radix_tree_is_internal_node(child)) {
1184         /* Single-slot tree */
1185         iter->index = index;
1186         iter->next_index = maxindex + 1;
1187         iter->tags = 1;
1188         iter->node = NULL;
1189         return (void __rcu **)&root->xa_head;
1190     }
1191 
1192     do {
1193         node = entry_to_node(child);
1194         offset = radix_tree_descend(node, &child, index);
1195 
1196         if ((flags & RADIX_TREE_ITER_TAGGED) ?
1197                 !tag_get(node, tag, offset) : !child) {
1198             /* Hole detected */
1199             if (flags & RADIX_TREE_ITER_CONTIG)
1200                 return NULL;
1201 
1202             if (flags & RADIX_TREE_ITER_TAGGED)
1203                 offset = radix_tree_find_next_bit(node, tag,
1204                         offset + 1);
1205             else
1206                 while (++offset < RADIX_TREE_MAP_SIZE) {
1207                     void *slot = rcu_dereference_raw(
1208                             node->slots[offset]);
1209                     if (slot)
1210                         break;
1211                 }
1212             index &= ~node_maxindex(node);
1213             index += offset << node->shift;
1214             /* Overflow after ~0UL */
1215             if (!index)
1216                 return NULL;
1217             if (offset == RADIX_TREE_MAP_SIZE)
1218                 goto restart;
1219             child = rcu_dereference_raw(node->slots[offset]);
1220         }
1221 
1222         if (!child)
1223             goto restart;
1224         if (child == RADIX_TREE_RETRY)
1225             break;
1226     } while (node->shift && radix_tree_is_internal_node(child));
1227 
1228     /* Update the iterator state */
1229     iter->index = (index &~ node_maxindex(node)) | offset;
1230     iter->next_index = (index | node_maxindex(node)) + 1;
1231     iter->node = node;
1232 
1233     if (flags & RADIX_TREE_ITER_TAGGED)
1234         set_iter_tags(iter, node, offset, tag);
1235 
1236     return node->slots + offset;
1237 }
1238 EXPORT_SYMBOL(radix_tree_next_chunk);
1239 
1240 /**
1241  *  radix_tree_gang_lookup - perform multiple lookup on a radix tree
1242  *  @root:      radix tree root
1243  *  @results:   where the results of the lookup are placed
1244  *  @first_index:   start the lookup from this key
1245  *  @max_items: place up to this many items at *results
1246  *
1247  *  Performs an index-ascending scan of the tree for present items.  Places
1248  *  them at *@results and returns the number of items which were placed at
1249  *  *@results.
1250  *
1251  *  The implementation is naive.
1252  *
1253  *  Like radix_tree_lookup, radix_tree_gang_lookup may be called under
1254  *  rcu_read_lock. In this case, rather than the returned results being
1255  *  an atomic snapshot of the tree at a single point in time, the
1256  *  semantics of an RCU protected gang lookup are as though multiple
1257  *  radix_tree_lookups have been issued in individual locks, and results
1258  *  stored in 'results'.
1259  */
1260 unsigned int
1261 radix_tree_gang_lookup(const struct radix_tree_root *root, void **results,
1262             unsigned long first_index, unsigned int max_items)
1263 {
1264     struct radix_tree_iter iter;
1265     void __rcu **slot;
1266     unsigned int ret = 0;
1267 
1268     if (unlikely(!max_items))
1269         return 0;
1270 
1271     radix_tree_for_each_slot(slot, root, &iter, first_index) {
1272         results[ret] = rcu_dereference_raw(*slot);
1273         if (!results[ret])
1274             continue;
1275         if (radix_tree_is_internal_node(results[ret])) {
1276             slot = radix_tree_iter_retry(&iter);
1277             continue;
1278         }
1279         if (++ret == max_items)
1280             break;
1281     }
1282 
1283     return ret;
1284 }
1285 EXPORT_SYMBOL(radix_tree_gang_lookup);
1286 
1287 /**
1288  *  radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
1289  *                               based on a tag
1290  *  @root:      radix tree root
1291  *  @results:   where the results of the lookup are placed
1292  *  @first_index:   start the lookup from this key
1293  *  @max_items: place up to this many items at *results
1294  *  @tag:       the tag index (< RADIX_TREE_MAX_TAGS)
1295  *
1296  *  Performs an index-ascending scan of the tree for present items which
1297  *  have the tag indexed by @tag set.  Places the items at *@results and
1298  *  returns the number of items which were placed at *@results.
1299  */
1300 unsigned int
1301 radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results,
1302         unsigned long first_index, unsigned int max_items,
1303         unsigned int tag)
1304 {
1305     struct radix_tree_iter iter;
1306     void __rcu **slot;
1307     unsigned int ret = 0;
1308 
1309     if (unlikely(!max_items))
1310         return 0;
1311 
1312     radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
1313         results[ret] = rcu_dereference_raw(*slot);
1314         if (!results[ret])
1315             continue;
1316         if (radix_tree_is_internal_node(results[ret])) {
1317             slot = radix_tree_iter_retry(&iter);
1318             continue;
1319         }
1320         if (++ret == max_items)
1321             break;
1322     }
1323 
1324     return ret;
1325 }
1326 EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
1327 
1328 /**
1329  *  radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a
1330  *                    radix tree based on a tag
1331  *  @root:      radix tree root
1332  *  @results:   where the results of the lookup are placed
1333  *  @first_index:   start the lookup from this key
1334  *  @max_items: place up to this many items at *results
1335  *  @tag:       the tag index (< RADIX_TREE_MAX_TAGS)
1336  *
1337  *  Performs an index-ascending scan of the tree for present items which
1338  *  have the tag indexed by @tag set.  Places the slots at *@results and
1339  *  returns the number of slots which were placed at *@results.
1340  */
1341 unsigned int
1342 radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root,
1343         void __rcu ***results, unsigned long first_index,
1344         unsigned int max_items, unsigned int tag)
1345 {
1346     struct radix_tree_iter iter;
1347     void __rcu **slot;
1348     unsigned int ret = 0;
1349 
1350     if (unlikely(!max_items))
1351         return 0;
1352 
1353     radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
1354         results[ret] = slot;
1355         if (++ret == max_items)
1356             break;
1357     }
1358 
1359     return ret;
1360 }
1361 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
1362 
1363 static bool __radix_tree_delete(struct radix_tree_root *root,
1364                 struct radix_tree_node *node, void __rcu **slot)
1365 {
1366     void *old = rcu_dereference_raw(*slot);
1367     int values = xa_is_value(old) ? -1 : 0;
1368     unsigned offset = get_slot_offset(node, slot);
1369     int tag;
1370 
1371     if (is_idr(root))
1372         node_tag_set(root, node, IDR_FREE, offset);
1373     else
1374         for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1375             node_tag_clear(root, node, tag, offset);
1376 
1377     replace_slot(slot, NULL, node, -1, values);
1378     return node && delete_node(root, node);
1379 }
1380 
1381 /**
1382  * radix_tree_iter_delete - delete the entry at this iterator position
1383  * @root: radix tree root
1384  * @iter: iterator state
1385  * @slot: pointer to slot
1386  *
1387  * Delete the entry at the position currently pointed to by the iterator.
1388  * This may result in the current node being freed; if it is, the iterator
1389  * is advanced so that it will not reference the freed memory.  This
1390  * function may be called without any locking if there are no other threads
1391  * which can access this tree.
1392  */
1393 void radix_tree_iter_delete(struct radix_tree_root *root,
1394                 struct radix_tree_iter *iter, void __rcu **slot)
1395 {
1396     if (__radix_tree_delete(root, iter->node, slot))
1397         iter->index = iter->next_index;
1398 }
1399 EXPORT_SYMBOL(radix_tree_iter_delete);
1400 
1401 /**
1402  * radix_tree_delete_item - delete an item from a radix tree
1403  * @root: radix tree root
1404  * @index: index key
1405  * @item: expected item
1406  *
1407  * Remove @item at @index from the radix tree rooted at @root.
1408  *
1409  * Return: the deleted entry, or %NULL if it was not present
1410  * or the entry at the given @index was not @item.
1411  */
1412 void *radix_tree_delete_item(struct radix_tree_root *root,
1413                  unsigned long index, void *item)
1414 {
1415     struct radix_tree_node *node = NULL;
1416     void __rcu **slot = NULL;
1417     void *entry;
1418 
1419     entry = __radix_tree_lookup(root, index, &node, &slot);
1420     if (!slot)
1421         return NULL;
1422     if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE,
1423                         get_slot_offset(node, slot))))
1424         return NULL;
1425 
1426     if (item && entry != item)
1427         return NULL;
1428 
1429     __radix_tree_delete(root, node, slot);
1430 
1431     return entry;
1432 }
1433 EXPORT_SYMBOL(radix_tree_delete_item);
1434 
1435 /**
1436  * radix_tree_delete - delete an entry from a radix tree
1437  * @root: radix tree root
1438  * @index: index key
1439  *
1440  * Remove the entry at @index from the radix tree rooted at @root.
1441  *
1442  * Return: The deleted entry, or %NULL if it was not present.
1443  */
1444 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1445 {
1446     return radix_tree_delete_item(root, index, NULL);
1447 }
1448 EXPORT_SYMBOL(radix_tree_delete);
1449 
1450 /**
1451  *  radix_tree_tagged - test whether any items in the tree are tagged
1452  *  @root:      radix tree root
1453  *  @tag:       tag to test
1454  */
1455 int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag)
1456 {
1457     return root_tag_get(root, tag);
1458 }
1459 EXPORT_SYMBOL(radix_tree_tagged);
1460 
1461 /**
1462  * idr_preload - preload for idr_alloc()
1463  * @gfp_mask: allocation mask to use for preloading
1464  *
1465  * Preallocate memory to use for the next call to idr_alloc().  This function
1466  * returns with preemption disabled.  It will be enabled by idr_preload_end().
1467  */
1468 void idr_preload(gfp_t gfp_mask)
1469 {
1470     if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE))
1471         local_lock(&radix_tree_preloads.lock);
1472 }
1473 EXPORT_SYMBOL(idr_preload);
1474 
1475 void __rcu **idr_get_free(struct radix_tree_root *root,
1476                   struct radix_tree_iter *iter, gfp_t gfp,
1477                   unsigned long max)
1478 {
1479     struct radix_tree_node *node = NULL, *child;
1480     void __rcu **slot = (void __rcu **)&root->xa_head;
1481     unsigned long maxindex, start = iter->next_index;
1482     unsigned int shift, offset = 0;
1483 
1484  grow:
1485     shift = radix_tree_load_root(root, &child, &maxindex);
1486     if (!radix_tree_tagged(root, IDR_FREE))
1487         start = max(start, maxindex + 1);
1488     if (start > max)
1489         return ERR_PTR(-ENOSPC);
1490 
1491     if (start > maxindex) {
1492         int error = radix_tree_extend(root, gfp, start, shift);
1493         if (error < 0)
1494             return ERR_PTR(error);
1495         shift = error;
1496         child = rcu_dereference_raw(root->xa_head);
1497     }
1498     if (start == 0 && shift == 0)
1499         shift = RADIX_TREE_MAP_SHIFT;
1500 
1501     while (shift) {
1502         shift -= RADIX_TREE_MAP_SHIFT;
1503         if (child == NULL) {
1504             /* Have to add a child node.  */
1505             child = radix_tree_node_alloc(gfp, node, root, shift,
1506                             offset, 0, 0);
1507             if (!child)
1508                 return ERR_PTR(-ENOMEM);
1509             all_tag_set(child, IDR_FREE);
1510             rcu_assign_pointer(*slot, node_to_entry(child));
1511             if (node)
1512                 node->count++;
1513         } else if (!radix_tree_is_internal_node(child))
1514             break;
1515 
1516         node = entry_to_node(child);
1517         offset = radix_tree_descend(node, &child, start);
1518         if (!tag_get(node, IDR_FREE, offset)) {
1519             offset = radix_tree_find_next_bit(node, IDR_FREE,
1520                             offset + 1);
1521             start = next_index(start, node, offset);
1522             if (start > max || start == 0)
1523                 return ERR_PTR(-ENOSPC);
1524             while (offset == RADIX_TREE_MAP_SIZE) {
1525                 offset = node->offset + 1;
1526                 node = node->parent;
1527                 if (!node)
1528                     goto grow;
1529                 shift = node->shift;
1530             }
1531             child = rcu_dereference_raw(node->slots[offset]);
1532         }
1533         slot = &node->slots[offset];
1534     }
1535 
1536     iter->index = start;
1537     if (node)
1538         iter->next_index = 1 + min(max, (start | node_maxindex(node)));
1539     else
1540         iter->next_index = 1;
1541     iter->node = node;
1542     set_iter_tags(iter, node, offset, IDR_FREE);
1543 
1544     return slot;
1545 }
1546 
1547 /**
1548  * idr_destroy - release all internal memory from an IDR
1549  * @idr: idr handle
1550  *
1551  * After this function is called, the IDR is empty, and may be reused or
1552  * the data structure containing it may be freed.
1553  *
1554  * A typical clean-up sequence for objects stored in an idr tree will use
1555  * idr_for_each() to free all objects, if necessary, then idr_destroy() to
1556  * free the memory used to keep track of those objects.
1557  */
1558 void idr_destroy(struct idr *idr)
1559 {
1560     struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head);
1561     if (radix_tree_is_internal_node(node))
1562         radix_tree_free_nodes(node);
1563     idr->idr_rt.xa_head = NULL;
1564     root_tag_set(&idr->idr_rt, IDR_FREE);
1565 }
1566 EXPORT_SYMBOL(idr_destroy);
1567 
1568 static void
1569 radix_tree_node_ctor(void *arg)
1570 {
1571     struct radix_tree_node *node = arg;
1572 
1573     memset(node, 0, sizeof(*node));
1574     INIT_LIST_HEAD(&node->private_list);
1575 }
1576 
1577 static int radix_tree_cpu_dead(unsigned int cpu)
1578 {
1579     struct radix_tree_preload *rtp;
1580     struct radix_tree_node *node;
1581 
1582     /* Free per-cpu pool of preloaded nodes */
1583     rtp = &per_cpu(radix_tree_preloads, cpu);
1584     while (rtp->nr) {
1585         node = rtp->nodes;
1586         rtp->nodes = node->parent;
1587         kmem_cache_free(radix_tree_node_cachep, node);
1588         rtp->nr--;
1589     }
1590     return 0;
1591 }
1592 
1593 void __init radix_tree_init(void)
1594 {
1595     int ret;
1596 
1597     BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);
1598     BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK);
1599     BUILD_BUG_ON(XA_CHUNK_SIZE > 255);
1600     radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
1601             sizeof(struct radix_tree_node), 0,
1602             SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
1603             radix_tree_node_ctor);
1604     ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead",
1605                     NULL, radix_tree_cpu_dead);
1606     WARN_ON(ret < 0);
1607 }