Back to home page

LXR

 
 

    


0001 /* Generic associative array implementation.
0002  *
0003  * See Documentation/assoc_array.txt for information.
0004  *
0005  * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
0006  * Written by David Howells (dhowells@redhat.com)
0007  *
0008  * This program is free software; you can redistribute it and/or
0009  * modify it under the terms of the GNU General Public Licence
0010  * as published by the Free Software Foundation; either version
0011  * 2 of the Licence, or (at your option) any later version.
0012  */
0013 //#define DEBUG
0014 #include <linux/rcupdate.h>
0015 #include <linux/slab.h>
0016 #include <linux/err.h>
0017 #include <linux/assoc_array_priv.h>
0018 
0019 /*
0020  * Iterate over an associative array.  The caller must hold the RCU read lock
0021  * or better.
0022  */
0023 static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root,
0024                        const struct assoc_array_ptr *stop,
0025                        int (*iterator)(const void *leaf,
0026                                void *iterator_data),
0027                        void *iterator_data)
0028 {
0029     const struct assoc_array_shortcut *shortcut;
0030     const struct assoc_array_node *node;
0031     const struct assoc_array_ptr *cursor, *ptr, *parent;
0032     unsigned long has_meta;
0033     int slot, ret;
0034 
0035     cursor = root;
0036 
0037 begin_node:
0038     if (assoc_array_ptr_is_shortcut(cursor)) {
0039         /* Descend through a shortcut */
0040         shortcut = assoc_array_ptr_to_shortcut(cursor);
0041         smp_read_barrier_depends();
0042         cursor = ACCESS_ONCE(shortcut->next_node);
0043     }
0044 
0045     node = assoc_array_ptr_to_node(cursor);
0046     smp_read_barrier_depends();
0047     slot = 0;
0048 
0049     /* We perform two passes of each node.
0050      *
0051      * The first pass does all the leaves in this node.  This means we
0052      * don't miss any leaves if the node is split up by insertion whilst
0053      * we're iterating over the branches rooted here (we may, however, see
0054      * some leaves twice).
0055      */
0056     has_meta = 0;
0057     for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
0058         ptr = ACCESS_ONCE(node->slots[slot]);
0059         has_meta |= (unsigned long)ptr;
0060         if (ptr && assoc_array_ptr_is_leaf(ptr)) {
0061             /* We need a barrier between the read of the pointer
0062              * and dereferencing the pointer - but only if we are
0063              * actually going to dereference it.
0064              */
0065             smp_read_barrier_depends();
0066 
0067             /* Invoke the callback */
0068             ret = iterator(assoc_array_ptr_to_leaf(ptr),
0069                        iterator_data);
0070             if (ret)
0071                 return ret;
0072         }
0073     }
0074 
0075     /* The second pass attends to all the metadata pointers.  If we follow
0076      * one of these we may find that we don't come back here, but rather go
0077      * back to a replacement node with the leaves in a different layout.
0078      *
0079      * We are guaranteed to make progress, however, as the slot number for
0080      * a particular portion of the key space cannot change - and we
0081      * continue at the back pointer + 1.
0082      */
0083     if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE))
0084         goto finished_node;
0085     slot = 0;
0086 
0087 continue_node:
0088     node = assoc_array_ptr_to_node(cursor);
0089     smp_read_barrier_depends();
0090 
0091     for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
0092         ptr = ACCESS_ONCE(node->slots[slot]);
0093         if (assoc_array_ptr_is_meta(ptr)) {
0094             cursor = ptr;
0095             goto begin_node;
0096         }
0097     }
0098 
0099 finished_node:
0100     /* Move up to the parent (may need to skip back over a shortcut) */
0101     parent = ACCESS_ONCE(node->back_pointer);
0102     slot = node->parent_slot;
0103     if (parent == stop)
0104         return 0;
0105 
0106     if (assoc_array_ptr_is_shortcut(parent)) {
0107         shortcut = assoc_array_ptr_to_shortcut(parent);
0108         smp_read_barrier_depends();
0109         cursor = parent;
0110         parent = ACCESS_ONCE(shortcut->back_pointer);
0111         slot = shortcut->parent_slot;
0112         if (parent == stop)
0113             return 0;
0114     }
0115 
0116     /* Ascend to next slot in parent node */
0117     cursor = parent;
0118     slot++;
0119     goto continue_node;
0120 }
0121 
0122 /**
0123  * assoc_array_iterate - Pass all objects in the array to a callback
0124  * @array: The array to iterate over.
0125  * @iterator: The callback function.
0126  * @iterator_data: Private data for the callback function.
0127  *
0128  * Iterate over all the objects in an associative array.  Each one will be
0129  * presented to the iterator function.
0130  *
0131  * If the array is being modified concurrently with the iteration then it is
0132  * possible that some objects in the array will be passed to the iterator
0133  * callback more than once - though every object should be passed at least
0134  * once.  If this is undesirable then the caller must lock against modification
0135  * for the duration of this function.
0136  *
0137  * The function will return 0 if no objects were in the array or else it will
0138  * return the result of the last iterator function called.  Iteration stops
0139  * immediately if any call to the iteration function results in a non-zero
0140  * return.
0141  *
0142  * The caller should hold the RCU read lock or better if concurrent
0143  * modification is possible.
0144  */
0145 int assoc_array_iterate(const struct assoc_array *array,
0146             int (*iterator)(const void *object,
0147                     void *iterator_data),
0148             void *iterator_data)
0149 {
0150     struct assoc_array_ptr *root = ACCESS_ONCE(array->root);
0151 
0152     if (!root)
0153         return 0;
0154     return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data);
0155 }
0156 
0157 enum assoc_array_walk_status {
0158     assoc_array_walk_tree_empty,
0159     assoc_array_walk_found_terminal_node,
0160     assoc_array_walk_found_wrong_shortcut,
0161 };
0162 
0163 struct assoc_array_walk_result {
0164     struct {
0165         struct assoc_array_node *node;  /* Node in which leaf might be found */
0166         int     level;
0167         int     slot;
0168     } terminal_node;
0169     struct {
0170         struct assoc_array_shortcut *shortcut;
0171         int     level;
0172         int     sc_level;
0173         unsigned long   sc_segments;
0174         unsigned long   dissimilarity;
0175     } wrong_shortcut;
0176 };
0177 
0178 /*
0179  * Navigate through the internal tree looking for the closest node to the key.
0180  */
0181 static enum assoc_array_walk_status
0182 assoc_array_walk(const struct assoc_array *array,
0183          const struct assoc_array_ops *ops,
0184          const void *index_key,
0185          struct assoc_array_walk_result *result)
0186 {
0187     struct assoc_array_shortcut *shortcut;
0188     struct assoc_array_node *node;
0189     struct assoc_array_ptr *cursor, *ptr;
0190     unsigned long sc_segments, dissimilarity;
0191     unsigned long segments;
0192     int level, sc_level, next_sc_level;
0193     int slot;
0194 
0195     pr_devel("-->%s()\n", __func__);
0196 
0197     cursor = ACCESS_ONCE(array->root);
0198     if (!cursor)
0199         return assoc_array_walk_tree_empty;
0200 
0201     level = 0;
0202 
0203     /* Use segments from the key for the new leaf to navigate through the
0204      * internal tree, skipping through nodes and shortcuts that are on
0205      * route to the destination.  Eventually we'll come to a slot that is
0206      * either empty or contains a leaf at which point we've found a node in
0207      * which the leaf we're looking for might be found or into which it
0208      * should be inserted.
0209      */
0210 jumped:
0211     segments = ops->get_key_chunk(index_key, level);
0212     pr_devel("segments[%d]: %lx\n", level, segments);
0213 
0214     if (assoc_array_ptr_is_shortcut(cursor))
0215         goto follow_shortcut;
0216 
0217 consider_node:
0218     node = assoc_array_ptr_to_node(cursor);
0219     smp_read_barrier_depends();
0220 
0221     slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
0222     slot &= ASSOC_ARRAY_FAN_MASK;
0223     ptr = ACCESS_ONCE(node->slots[slot]);
0224 
0225     pr_devel("consider slot %x [ix=%d type=%lu]\n",
0226          slot, level, (unsigned long)ptr & 3);
0227 
0228     if (!assoc_array_ptr_is_meta(ptr)) {
0229         /* The node doesn't have a node/shortcut pointer in the slot
0230          * corresponding to the index key that we have to follow.
0231          */
0232         result->terminal_node.node = node;
0233         result->terminal_node.level = level;
0234         result->terminal_node.slot = slot;
0235         pr_devel("<--%s() = terminal_node\n", __func__);
0236         return assoc_array_walk_found_terminal_node;
0237     }
0238 
0239     if (assoc_array_ptr_is_node(ptr)) {
0240         /* There is a pointer to a node in the slot corresponding to
0241          * this index key segment, so we need to follow it.
0242          */
0243         cursor = ptr;
0244         level += ASSOC_ARRAY_LEVEL_STEP;
0245         if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0)
0246             goto consider_node;
0247         goto jumped;
0248     }
0249 
0250     /* There is a shortcut in the slot corresponding to the index key
0251      * segment.  We follow the shortcut if its partial index key matches
0252      * this leaf's.  Otherwise we need to split the shortcut.
0253      */
0254     cursor = ptr;
0255 follow_shortcut:
0256     shortcut = assoc_array_ptr_to_shortcut(cursor);
0257     smp_read_barrier_depends();
0258     pr_devel("shortcut to %d\n", shortcut->skip_to_level);
0259     sc_level = level + ASSOC_ARRAY_LEVEL_STEP;
0260     BUG_ON(sc_level > shortcut->skip_to_level);
0261 
0262     do {
0263         /* Check the leaf against the shortcut's index key a word at a
0264          * time, trimming the final word (the shortcut stores the index
0265          * key completely from the root to the shortcut's target).
0266          */
0267         if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0)
0268             segments = ops->get_key_chunk(index_key, sc_level);
0269 
0270         sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT];
0271         dissimilarity = segments ^ sc_segments;
0272 
0273         if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) {
0274             /* Trim segments that are beyond the shortcut */
0275             int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK;
0276             dissimilarity &= ~(ULONG_MAX << shift);
0277             next_sc_level = shortcut->skip_to_level;
0278         } else {
0279             next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE;
0280             next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
0281         }
0282 
0283         if (dissimilarity != 0) {
0284             /* This shortcut points elsewhere */
0285             result->wrong_shortcut.shortcut = shortcut;
0286             result->wrong_shortcut.level = level;
0287             result->wrong_shortcut.sc_level = sc_level;
0288             result->wrong_shortcut.sc_segments = sc_segments;
0289             result->wrong_shortcut.dissimilarity = dissimilarity;
0290             return assoc_array_walk_found_wrong_shortcut;
0291         }
0292 
0293         sc_level = next_sc_level;
0294     } while (sc_level < shortcut->skip_to_level);
0295 
0296     /* The shortcut matches the leaf's index to this point. */
0297     cursor = ACCESS_ONCE(shortcut->next_node);
0298     if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) {
0299         level = sc_level;
0300         goto jumped;
0301     } else {
0302         level = sc_level;
0303         goto consider_node;
0304     }
0305 }
0306 
0307 /**
0308  * assoc_array_find - Find an object by index key
0309  * @array: The associative array to search.
0310  * @ops: The operations to use.
0311  * @index_key: The key to the object.
0312  *
0313  * Find an object in an associative array by walking through the internal tree
0314  * to the node that should contain the object and then searching the leaves
0315  * there.  NULL is returned if the requested object was not found in the array.
0316  *
0317  * The caller must hold the RCU read lock or better.
0318  */
0319 void *assoc_array_find(const struct assoc_array *array,
0320                const struct assoc_array_ops *ops,
0321                const void *index_key)
0322 {
0323     struct assoc_array_walk_result result;
0324     const struct assoc_array_node *node;
0325     const struct assoc_array_ptr *ptr;
0326     const void *leaf;
0327     int slot;
0328 
0329     if (assoc_array_walk(array, ops, index_key, &result) !=
0330         assoc_array_walk_found_terminal_node)
0331         return NULL;
0332 
0333     node = result.terminal_node.node;
0334     smp_read_barrier_depends();
0335 
0336     /* If the target key is available to us, it's has to be pointed to by
0337      * the terminal node.
0338      */
0339     for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
0340         ptr = ACCESS_ONCE(node->slots[slot]);
0341         if (ptr && assoc_array_ptr_is_leaf(ptr)) {
0342             /* We need a barrier between the read of the pointer
0343              * and dereferencing the pointer - but only if we are
0344              * actually going to dereference it.
0345              */
0346             leaf = assoc_array_ptr_to_leaf(ptr);
0347             smp_read_barrier_depends();
0348             if (ops->compare_object(leaf, index_key))
0349                 return (void *)leaf;
0350         }
0351     }
0352 
0353     return NULL;
0354 }
0355 
0356 /*
0357  * Destructively iterate over an associative array.  The caller must prevent
0358  * other simultaneous accesses.
0359  */
0360 static void assoc_array_destroy_subtree(struct assoc_array_ptr *root,
0361                     const struct assoc_array_ops *ops)
0362 {
0363     struct assoc_array_shortcut *shortcut;
0364     struct assoc_array_node *node;
0365     struct assoc_array_ptr *cursor, *parent = NULL;
0366     int slot = -1;
0367 
0368     pr_devel("-->%s()\n", __func__);
0369 
0370     cursor = root;
0371     if (!cursor) {
0372         pr_devel("empty\n");
0373         return;
0374     }
0375 
0376 move_to_meta:
0377     if (assoc_array_ptr_is_shortcut(cursor)) {
0378         /* Descend through a shortcut */
0379         pr_devel("[%d] shortcut\n", slot);
0380         BUG_ON(!assoc_array_ptr_is_shortcut(cursor));
0381         shortcut = assoc_array_ptr_to_shortcut(cursor);
0382         BUG_ON(shortcut->back_pointer != parent);
0383         BUG_ON(slot != -1 && shortcut->parent_slot != slot);
0384         parent = cursor;
0385         cursor = shortcut->next_node;
0386         slot = -1;
0387         BUG_ON(!assoc_array_ptr_is_node(cursor));
0388     }
0389 
0390     pr_devel("[%d] node\n", slot);
0391     node = assoc_array_ptr_to_node(cursor);
0392     BUG_ON(node->back_pointer != parent);
0393     BUG_ON(slot != -1 && node->parent_slot != slot);
0394     slot = 0;
0395 
0396 continue_node:
0397     pr_devel("Node %p [back=%p]\n", node, node->back_pointer);
0398     for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
0399         struct assoc_array_ptr *ptr = node->slots[slot];
0400         if (!ptr)
0401             continue;
0402         if (assoc_array_ptr_is_meta(ptr)) {
0403             parent = cursor;
0404             cursor = ptr;
0405             goto move_to_meta;
0406         }
0407 
0408         if (ops) {
0409             pr_devel("[%d] free leaf\n", slot);
0410             ops->free_object(assoc_array_ptr_to_leaf(ptr));
0411         }
0412     }
0413 
0414     parent = node->back_pointer;
0415     slot = node->parent_slot;
0416     pr_devel("free node\n");
0417     kfree(node);
0418     if (!parent)
0419         return; /* Done */
0420 
0421     /* Move back up to the parent (may need to free a shortcut on
0422      * the way up) */
0423     if (assoc_array_ptr_is_shortcut(parent)) {
0424         shortcut = assoc_array_ptr_to_shortcut(parent);
0425         BUG_ON(shortcut->next_node != cursor);
0426         cursor = parent;
0427         parent = shortcut->back_pointer;
0428         slot = shortcut->parent_slot;
0429         pr_devel("free shortcut\n");
0430         kfree(shortcut);
0431         if (!parent)
0432             return;
0433 
0434         BUG_ON(!assoc_array_ptr_is_node(parent));
0435     }
0436 
0437     /* Ascend to next slot in parent node */
0438     pr_devel("ascend to %p[%d]\n", parent, slot);
0439     cursor = parent;
0440     node = assoc_array_ptr_to_node(cursor);
0441     slot++;
0442     goto continue_node;
0443 }
0444 
0445 /**
0446  * assoc_array_destroy - Destroy an associative array
0447  * @array: The array to destroy.
0448  * @ops: The operations to use.
0449  *
0450  * Discard all metadata and free all objects in an associative array.  The
0451  * array will be empty and ready to use again upon completion.  This function
0452  * cannot fail.
0453  *
0454  * The caller must prevent all other accesses whilst this takes place as no
0455  * attempt is made to adjust pointers gracefully to permit RCU readlock-holding
0456  * accesses to continue.  On the other hand, no memory allocation is required.
0457  */
0458 void assoc_array_destroy(struct assoc_array *array,
0459              const struct assoc_array_ops *ops)
0460 {
0461     assoc_array_destroy_subtree(array->root, ops);
0462     array->root = NULL;
0463 }
0464 
0465 /*
0466  * Handle insertion into an empty tree.
0467  */
0468 static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit)
0469 {
0470     struct assoc_array_node *new_n0;
0471 
0472     pr_devel("-->%s()\n", __func__);
0473 
0474     new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
0475     if (!new_n0)
0476         return false;
0477 
0478     edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
0479     edit->leaf_p = &new_n0->slots[0];
0480     edit->adjust_count_on = new_n0;
0481     edit->set[0].ptr = &edit->array->root;
0482     edit->set[0].to = assoc_array_node_to_ptr(new_n0);
0483 
0484     pr_devel("<--%s() = ok [no root]\n", __func__);
0485     return true;
0486 }
0487 
0488 /*
0489  * Handle insertion into a terminal node.
0490  */
0491 static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit,
0492                           const struct assoc_array_ops *ops,
0493                           const void *index_key,
0494                           struct assoc_array_walk_result *result)
0495 {
0496     struct assoc_array_shortcut *shortcut, *new_s0;
0497     struct assoc_array_node *node, *new_n0, *new_n1, *side;
0498     struct assoc_array_ptr *ptr;
0499     unsigned long dissimilarity, base_seg, blank;
0500     size_t keylen;
0501     bool have_meta;
0502     int level, diff;
0503     int slot, next_slot, free_slot, i, j;
0504 
0505     node    = result->terminal_node.node;
0506     level   = result->terminal_node.level;
0507     edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot;
0508 
0509     pr_devel("-->%s()\n", __func__);
0510 
0511     /* We arrived at a node which doesn't have an onward node or shortcut
0512      * pointer that we have to follow.  This means that (a) the leaf we
0513      * want must go here (either by insertion or replacement) or (b) we
0514      * need to split this node and insert in one of the fragments.
0515      */
0516     free_slot = -1;
0517 
0518     /* Firstly, we have to check the leaves in this node to see if there's
0519      * a matching one we should replace in place.
0520      */
0521     for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
0522         ptr = node->slots[i];
0523         if (!ptr) {
0524             free_slot = i;
0525             continue;
0526         }
0527         if (assoc_array_ptr_is_leaf(ptr) &&
0528             ops->compare_object(assoc_array_ptr_to_leaf(ptr),
0529                     index_key)) {
0530             pr_devel("replace in slot %d\n", i);
0531             edit->leaf_p = &node->slots[i];
0532             edit->dead_leaf = node->slots[i];
0533             pr_devel("<--%s() = ok [replace]\n", __func__);
0534             return true;
0535         }
0536     }
0537 
0538     /* If there is a free slot in this node then we can just insert the
0539      * leaf here.
0540      */
0541     if (free_slot >= 0) {
0542         pr_devel("insert in free slot %d\n", free_slot);
0543         edit->leaf_p = &node->slots[free_slot];
0544         edit->adjust_count_on = node;
0545         pr_devel("<--%s() = ok [insert]\n", __func__);
0546         return true;
0547     }
0548 
0549     /* The node has no spare slots - so we're either going to have to split
0550      * it or insert another node before it.
0551      *
0552      * Whatever, we're going to need at least two new nodes - so allocate
0553      * those now.  We may also need a new shortcut, but we deal with that
0554      * when we need it.
0555      */
0556     new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
0557     if (!new_n0)
0558         return false;
0559     edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
0560     new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
0561     if (!new_n1)
0562         return false;
0563     edit->new_meta[1] = assoc_array_node_to_ptr(new_n1);
0564 
0565     /* We need to find out how similar the leaves are. */
0566     pr_devel("no spare slots\n");
0567     have_meta = false;
0568     for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
0569         ptr = node->slots[i];
0570         if (assoc_array_ptr_is_meta(ptr)) {
0571             edit->segment_cache[i] = 0xff;
0572             have_meta = true;
0573             continue;
0574         }
0575         base_seg = ops->get_object_key_chunk(
0576             assoc_array_ptr_to_leaf(ptr), level);
0577         base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
0578         edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
0579     }
0580 
0581     if (have_meta) {
0582         pr_devel("have meta\n");
0583         goto split_node;
0584     }
0585 
0586     /* The node contains only leaves */
0587     dissimilarity = 0;
0588     base_seg = edit->segment_cache[0];
0589     for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++)
0590         dissimilarity |= edit->segment_cache[i] ^ base_seg;
0591 
0592     pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity);
0593 
0594     if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) {
0595         /* The old leaves all cluster in the same slot.  We will need
0596          * to insert a shortcut if the new node wants to cluster with them.
0597          */
0598         if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0)
0599             goto all_leaves_cluster_together;
0600 
0601         /* Otherwise we can just insert a new node ahead of the old
0602          * one.
0603          */
0604         goto present_leaves_cluster_but_not_new_leaf;
0605     }
0606 
0607 split_node:
0608     pr_devel("split node\n");
0609 
0610     /* We need to split the current node; we know that the node doesn't
0611      * simply contain a full set of leaves that cluster together (it
0612      * contains meta pointers and/or non-clustering leaves).
0613      *
0614      * We need to expel at least two leaves out of a set consisting of the
0615      * leaves in the node and the new leaf.
0616      *
0617      * We need a new node (n0) to replace the current one and a new node to
0618      * take the expelled nodes (n1).
0619      */
0620     edit->set[0].to = assoc_array_node_to_ptr(new_n0);
0621     new_n0->back_pointer = node->back_pointer;
0622     new_n0->parent_slot = node->parent_slot;
0623     new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
0624     new_n1->parent_slot = -1; /* Need to calculate this */
0625 
0626 do_split_node:
0627     pr_devel("do_split_node\n");
0628 
0629     new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
0630     new_n1->nr_leaves_on_branch = 0;
0631 
0632     /* Begin by finding two matching leaves.  There have to be at least two
0633      * that match - even if there are meta pointers - because any leaf that
0634      * would match a slot with a meta pointer in it must be somewhere
0635      * behind that meta pointer and cannot be here.  Further, given N
0636      * remaining leaf slots, we now have N+1 leaves to go in them.
0637      */
0638     for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
0639         slot = edit->segment_cache[i];
0640         if (slot != 0xff)
0641             for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++)
0642                 if (edit->segment_cache[j] == slot)
0643                     goto found_slot_for_multiple_occupancy;
0644     }
0645 found_slot_for_multiple_occupancy:
0646     pr_devel("same slot: %x %x [%02x]\n", i, j, slot);
0647     BUG_ON(i >= ASSOC_ARRAY_FAN_OUT);
0648     BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1);
0649     BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT);
0650 
0651     new_n1->parent_slot = slot;
0652 
0653     /* Metadata pointers cannot change slot */
0654     for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
0655         if (assoc_array_ptr_is_meta(node->slots[i]))
0656             new_n0->slots[i] = node->slots[i];
0657         else
0658             new_n0->slots[i] = NULL;
0659     BUG_ON(new_n0->slots[slot] != NULL);
0660     new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1);
0661 
0662     /* Filter the leaf pointers between the new nodes */
0663     free_slot = -1;
0664     next_slot = 0;
0665     for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
0666         if (assoc_array_ptr_is_meta(node->slots[i]))
0667             continue;
0668         if (edit->segment_cache[i] == slot) {
0669             new_n1->slots[next_slot++] = node->slots[i];
0670             new_n1->nr_leaves_on_branch++;
0671         } else {
0672             do {
0673                 free_slot++;
0674             } while (new_n0->slots[free_slot] != NULL);
0675             new_n0->slots[free_slot] = node->slots[i];
0676         }
0677     }
0678 
0679     pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot);
0680 
0681     if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) {
0682         do {
0683             free_slot++;
0684         } while (new_n0->slots[free_slot] != NULL);
0685         edit->leaf_p = &new_n0->slots[free_slot];
0686         edit->adjust_count_on = new_n0;
0687     } else {
0688         edit->leaf_p = &new_n1->slots[next_slot++];
0689         edit->adjust_count_on = new_n1;
0690     }
0691 
0692     BUG_ON(next_slot <= 1);
0693 
0694     edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0);
0695     for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
0696         if (edit->segment_cache[i] == 0xff) {
0697             ptr = node->slots[i];
0698             BUG_ON(assoc_array_ptr_is_leaf(ptr));
0699             if (assoc_array_ptr_is_node(ptr)) {
0700                 side = assoc_array_ptr_to_node(ptr);
0701                 edit->set_backpointers[i] = &side->back_pointer;
0702             } else {
0703                 shortcut = assoc_array_ptr_to_shortcut(ptr);
0704                 edit->set_backpointers[i] = &shortcut->back_pointer;
0705             }
0706         }
0707     }
0708 
0709     ptr = node->back_pointer;
0710     if (!ptr)
0711         edit->set[0].ptr = &edit->array->root;
0712     else if (assoc_array_ptr_is_node(ptr))
0713         edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot];
0714     else
0715         edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node;
0716     edit->excised_meta[0] = assoc_array_node_to_ptr(node);
0717     pr_devel("<--%s() = ok [split node]\n", __func__);
0718     return true;
0719 
0720 present_leaves_cluster_but_not_new_leaf:
0721     /* All the old leaves cluster in the same slot, but the new leaf wants
0722      * to go into a different slot, so we create a new node to hold the new
0723      * leaf and a pointer to a new node holding all the old leaves.
0724      */
0725     pr_devel("present leaves cluster but not new leaf\n");
0726 
0727     new_n0->back_pointer = node->back_pointer;
0728     new_n0->parent_slot = node->parent_slot;
0729     new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
0730     new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
0731     new_n1->parent_slot = edit->segment_cache[0];
0732     new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch;
0733     edit->adjust_count_on = new_n0;
0734 
0735     for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
0736         new_n1->slots[i] = node->slots[i];
0737 
0738     new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0);
0739     edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]];
0740 
0741     edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot];
0742     edit->set[0].to = assoc_array_node_to_ptr(new_n0);
0743     edit->excised_meta[0] = assoc_array_node_to_ptr(node);
0744     pr_devel("<--%s() = ok [insert node before]\n", __func__);
0745     return true;
0746 
0747 all_leaves_cluster_together:
0748     /* All the leaves, new and old, want to cluster together in this node
0749      * in the same slot, so we have to replace this node with a shortcut to
0750      * skip over the identical parts of the key and then place a pair of
0751      * nodes, one inside the other, at the end of the shortcut and
0752      * distribute the keys between them.
0753      *
0754      * Firstly we need to work out where the leaves start diverging as a
0755      * bit position into their keys so that we know how big the shortcut
0756      * needs to be.
0757      *
0758      * We only need to make a single pass of N of the N+1 leaves because if
0759      * any keys differ between themselves at bit X then at least one of
0760      * them must also differ with the base key at bit X or before.
0761      */
0762     pr_devel("all leaves cluster together\n");
0763     diff = INT_MAX;
0764     for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
0765         int x = ops->diff_objects(assoc_array_ptr_to_leaf(node->slots[i]),
0766                       index_key);
0767         if (x < diff) {
0768             BUG_ON(x < 0);
0769             diff = x;
0770         }
0771     }
0772     BUG_ON(diff == INT_MAX);
0773     BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP);
0774 
0775     keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
0776     keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
0777 
0778     new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
0779              keylen * sizeof(unsigned long), GFP_KERNEL);
0780     if (!new_s0)
0781         return false;
0782     edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0);
0783 
0784     edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
0785     new_s0->back_pointer = node->back_pointer;
0786     new_s0->parent_slot = node->parent_slot;
0787     new_s0->next_node = assoc_array_node_to_ptr(new_n0);
0788     new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
0789     new_n0->parent_slot = 0;
0790     new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
0791     new_n1->parent_slot = -1; /* Need to calculate this */
0792 
0793     new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK;
0794     pr_devel("skip_to_level = %d [diff %d]\n", level, diff);
0795     BUG_ON(level <= 0);
0796 
0797     for (i = 0; i < keylen; i++)
0798         new_s0->index_key[i] =
0799             ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE);
0800 
0801     blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
0802     pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank);
0803     new_s0->index_key[keylen - 1] &= ~blank;
0804 
0805     /* This now reduces to a node splitting exercise for which we'll need
0806      * to regenerate the disparity table.
0807      */
0808     for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
0809         ptr = node->slots[i];
0810         base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr),
0811                              level);
0812         base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
0813         edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
0814     }
0815 
0816     base_seg = ops->get_key_chunk(index_key, level);
0817     base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
0818     edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK;
0819     goto do_split_node;
0820 }
0821 
0822 /*
0823  * Handle insertion into the middle of a shortcut.
0824  */
0825 static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit,
0826                         const struct assoc_array_ops *ops,
0827                         struct assoc_array_walk_result *result)
0828 {
0829     struct assoc_array_shortcut *shortcut, *new_s0, *new_s1;
0830     struct assoc_array_node *node, *new_n0, *side;
0831     unsigned long sc_segments, dissimilarity, blank;
0832     size_t keylen;
0833     int level, sc_level, diff;
0834     int sc_slot;
0835 
0836     shortcut    = result->wrong_shortcut.shortcut;
0837     level       = result->wrong_shortcut.level;
0838     sc_level    = result->wrong_shortcut.sc_level;
0839     sc_segments = result->wrong_shortcut.sc_segments;
0840     dissimilarity   = result->wrong_shortcut.dissimilarity;
0841 
0842     pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n",
0843          __func__, level, dissimilarity, sc_level);
0844 
0845     /* We need to split a shortcut and insert a node between the two
0846      * pieces.  Zero-length pieces will be dispensed with entirely.
0847      *
0848      * First of all, we need to find out in which level the first
0849      * difference was.
0850      */
0851     diff = __ffs(dissimilarity);
0852     diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK;
0853     diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK;
0854     pr_devel("diff=%d\n", diff);
0855 
0856     if (!shortcut->back_pointer) {
0857         edit->set[0].ptr = &edit->array->root;
0858     } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) {
0859         node = assoc_array_ptr_to_node(shortcut->back_pointer);
0860         edit->set[0].ptr = &node->slots[shortcut->parent_slot];
0861     } else {
0862         BUG();
0863     }
0864 
0865     edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut);
0866 
0867     /* Create a new node now since we're going to need it anyway */
0868     new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
0869     if (!new_n0)
0870         return false;
0871     edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
0872     edit->adjust_count_on = new_n0;
0873 
0874     /* Insert a new shortcut before the new node if this segment isn't of
0875      * zero length - otherwise we just connect the new node directly to the
0876      * parent.
0877      */
0878     level += ASSOC_ARRAY_LEVEL_STEP;
0879     if (diff > level) {
0880         pr_devel("pre-shortcut %d...%d\n", level, diff);
0881         keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
0882         keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
0883 
0884         new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
0885                  keylen * sizeof(unsigned long), GFP_KERNEL);
0886         if (!new_s0)
0887             return false;
0888         edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0);
0889         edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
0890         new_s0->back_pointer = shortcut->back_pointer;
0891         new_s0->parent_slot = shortcut->parent_slot;
0892         new_s0->next_node = assoc_array_node_to_ptr(new_n0);
0893         new_s0->skip_to_level = diff;
0894 
0895         new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
0896         new_n0->parent_slot = 0;
0897 
0898         memcpy(new_s0->index_key, shortcut->index_key,
0899                keylen * sizeof(unsigned long));
0900 
0901         blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
0902         pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank);
0903         new_s0->index_key[keylen - 1] &= ~blank;
0904     } else {
0905         pr_devel("no pre-shortcut\n");
0906         edit->set[0].to = assoc_array_node_to_ptr(new_n0);
0907         new_n0->back_pointer = shortcut->back_pointer;
0908         new_n0->parent_slot = shortcut->parent_slot;
0909     }
0910 
0911     side = assoc_array_ptr_to_node(shortcut->next_node);
0912     new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch;
0913 
0914     /* We need to know which slot in the new node is going to take a
0915      * metadata pointer.
0916      */
0917     sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
0918     sc_slot &= ASSOC_ARRAY_FAN_MASK;
0919 
0920     pr_devel("new slot %lx >> %d -> %d\n",
0921          sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot);
0922 
0923     /* Determine whether we need to follow the new node with a replacement
0924      * for the current shortcut.  We could in theory reuse the current
0925      * shortcut if its parent slot number doesn't change - but that's a
0926      * 1-in-16 chance so not worth expending the code upon.
0927      */
0928     level = diff + ASSOC_ARRAY_LEVEL_STEP;
0929     if (level < shortcut->skip_to_level) {
0930         pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level);
0931         keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
0932         keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
0933 
0934         new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) +
0935                  keylen * sizeof(unsigned long), GFP_KERNEL);
0936         if (!new_s1)
0937             return false;
0938         edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1);
0939 
0940         new_s1->back_pointer = assoc_array_node_to_ptr(new_n0);
0941         new_s1->parent_slot = sc_slot;
0942         new_s1->next_node = shortcut->next_node;
0943         new_s1->skip_to_level = shortcut->skip_to_level;
0944 
0945         new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1);
0946 
0947         memcpy(new_s1->index_key, shortcut->index_key,
0948                keylen * sizeof(unsigned long));
0949 
0950         edit->set[1].ptr = &side->back_pointer;
0951         edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1);
0952     } else {
0953         pr_devel("no post-shortcut\n");
0954 
0955         /* We don't have to replace the pointed-to node as long as we
0956          * use memory barriers to make sure the parent slot number is
0957          * changed before the back pointer (the parent slot number is
0958          * irrelevant to the old parent shortcut).
0959          */
0960         new_n0->slots[sc_slot] = shortcut->next_node;
0961         edit->set_parent_slot[0].p = &side->parent_slot;
0962         edit->set_parent_slot[0].to = sc_slot;
0963         edit->set[1].ptr = &side->back_pointer;
0964         edit->set[1].to = assoc_array_node_to_ptr(new_n0);
0965     }
0966 
0967     /* Install the new leaf in a spare slot in the new node. */
0968     if (sc_slot == 0)
0969         edit->leaf_p = &new_n0->slots[1];
0970     else
0971         edit->leaf_p = &new_n0->slots[0];
0972 
0973     pr_devel("<--%s() = ok [split shortcut]\n", __func__);
0974     return edit;
0975 }
0976 
0977 /**
0978  * assoc_array_insert - Script insertion of an object into an associative array
0979  * @array: The array to insert into.
0980  * @ops: The operations to use.
0981  * @index_key: The key to insert at.
0982  * @object: The object to insert.
0983  *
0984  * Precalculate and preallocate a script for the insertion or replacement of an
0985  * object in an associative array.  This results in an edit script that can
0986  * either be applied or cancelled.
0987  *
0988  * The function returns a pointer to an edit script or -ENOMEM.
0989  *
0990  * The caller should lock against other modifications and must continue to hold
0991  * the lock until assoc_array_apply_edit() has been called.
0992  *
0993  * Accesses to the tree may take place concurrently with this function,
0994  * provided they hold the RCU read lock.
0995  */
0996 struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
0997                         const struct assoc_array_ops *ops,
0998                         const void *index_key,
0999                         void *object)
1000 {
1001     struct assoc_array_walk_result result;
1002     struct assoc_array_edit *edit;
1003 
1004     pr_devel("-->%s()\n", __func__);
1005 
1006     /* The leaf pointer we're given must not have the bottom bit set as we
1007      * use those for type-marking the pointer.  NULL pointers are also not
1008      * allowed as they indicate an empty slot but we have to allow them
1009      * here as they can be updated later.
1010      */
1011     BUG_ON(assoc_array_ptr_is_meta(object));
1012 
1013     edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
1014     if (!edit)
1015         return ERR_PTR(-ENOMEM);
1016     edit->array = array;
1017     edit->ops = ops;
1018     edit->leaf = assoc_array_leaf_to_ptr(object);
1019     edit->adjust_count_by = 1;
1020 
1021     switch (assoc_array_walk(array, ops, index_key, &result)) {
1022     case assoc_array_walk_tree_empty:
1023         /* Allocate a root node if there isn't one yet */
1024         if (!assoc_array_insert_in_empty_tree(edit))
1025             goto enomem;
1026         return edit;
1027 
1028     case assoc_array_walk_found_terminal_node:
1029         /* We found a node that doesn't have a node/shortcut pointer in
1030          * the slot corresponding to the index key that we have to
1031          * follow.
1032          */
1033         if (!assoc_array_insert_into_terminal_node(edit, ops, index_key,
1034                                &result))
1035             goto enomem;
1036         return edit;
1037 
1038     case assoc_array_walk_found_wrong_shortcut:
1039         /* We found a shortcut that didn't match our key in a slot we
1040          * needed to follow.
1041          */
1042         if (!assoc_array_insert_mid_shortcut(edit, ops, &result))
1043             goto enomem;
1044         return edit;
1045     }
1046 
1047 enomem:
1048     /* Clean up after an out of memory error */
1049     pr_devel("enomem\n");
1050     assoc_array_cancel_edit(edit);
1051     return ERR_PTR(-ENOMEM);
1052 }
1053 
1054 /**
1055  * assoc_array_insert_set_object - Set the new object pointer in an edit script
1056  * @edit: The edit script to modify.
1057  * @object: The object pointer to set.
1058  *
1059  * Change the object to be inserted in an edit script.  The object pointed to
1060  * by the old object is not freed.  This must be done prior to applying the
1061  * script.
1062  */
1063 void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object)
1064 {
1065     BUG_ON(!object);
1066     edit->leaf = assoc_array_leaf_to_ptr(object);
1067 }
1068 
1069 struct assoc_array_delete_collapse_context {
1070     struct assoc_array_node *node;
1071     const void      *skip_leaf;
1072     int         slot;
1073 };
1074 
1075 /*
1076  * Subtree collapse to node iterator.
1077  */
1078 static int assoc_array_delete_collapse_iterator(const void *leaf,
1079                         void *iterator_data)
1080 {
1081     struct assoc_array_delete_collapse_context *collapse = iterator_data;
1082 
1083     if (leaf == collapse->skip_leaf)
1084         return 0;
1085 
1086     BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT);
1087 
1088     collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf);
1089     return 0;
1090 }
1091 
1092 /**
1093  * assoc_array_delete - Script deletion of an object from an associative array
1094  * @array: The array to search.
1095  * @ops: The operations to use.
1096  * @index_key: The key to the object.
1097  *
1098  * Precalculate and preallocate a script for the deletion of an object from an
1099  * associative array.  This results in an edit script that can either be
1100  * applied or cancelled.
1101  *
1102  * The function returns a pointer to an edit script if the object was found,
1103  * NULL if the object was not found or -ENOMEM.
1104  *
1105  * The caller should lock against other modifications and must continue to hold
1106  * the lock until assoc_array_apply_edit() has been called.
1107  *
1108  * Accesses to the tree may take place concurrently with this function,
1109  * provided they hold the RCU read lock.
1110  */
1111 struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
1112                         const struct assoc_array_ops *ops,
1113                         const void *index_key)
1114 {
1115     struct assoc_array_delete_collapse_context collapse;
1116     struct assoc_array_walk_result result;
1117     struct assoc_array_node *node, *new_n0;
1118     struct assoc_array_edit *edit;
1119     struct assoc_array_ptr *ptr;
1120     bool has_meta;
1121     int slot, i;
1122 
1123     pr_devel("-->%s()\n", __func__);
1124 
1125     edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
1126     if (!edit)
1127         return ERR_PTR(-ENOMEM);
1128     edit->array = array;
1129     edit->ops = ops;
1130     edit->adjust_count_by = -1;
1131 
1132     switch (assoc_array_walk(array, ops, index_key, &result)) {
1133     case assoc_array_walk_found_terminal_node:
1134         /* We found a node that should contain the leaf we've been
1135          * asked to remove - *if* it's in the tree.
1136          */
1137         pr_devel("terminal_node\n");
1138         node = result.terminal_node.node;
1139 
1140         for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
1141             ptr = node->slots[slot];
1142             if (ptr &&
1143                 assoc_array_ptr_is_leaf(ptr) &&
1144                 ops->compare_object(assoc_array_ptr_to_leaf(ptr),
1145                         index_key))
1146                 goto found_leaf;
1147         }
1148     case assoc_array_walk_tree_empty:
1149     case assoc_array_walk_found_wrong_shortcut:
1150     default:
1151         assoc_array_cancel_edit(edit);
1152         pr_devel("not found\n");
1153         return NULL;
1154     }
1155 
1156 found_leaf:
1157     BUG_ON(array->nr_leaves_on_tree <= 0);
1158 
1159     /* In the simplest form of deletion we just clear the slot and release
1160      * the leaf after a suitable interval.
1161      */
1162     edit->dead_leaf = node->slots[slot];
1163     edit->set[0].ptr = &node->slots[slot];
1164     edit->set[0].to = NULL;
1165     edit->adjust_count_on = node;
1166 
1167     /* If that concludes erasure of the last leaf, then delete the entire
1168      * internal array.
1169      */
1170     if (array->nr_leaves_on_tree == 1) {
1171         edit->set[1].ptr = &array->root;
1172         edit->set[1].to = NULL;
1173         edit->adjust_count_on = NULL;
1174         edit->excised_subtree = array->root;
1175         pr_devel("all gone\n");
1176         return edit;
1177     }
1178 
1179     /* However, we'd also like to clear up some metadata blocks if we
1180      * possibly can.
1181      *
1182      * We go for a simple algorithm of: if this node has FAN_OUT or fewer
1183      * leaves in it, then attempt to collapse it - and attempt to
1184      * recursively collapse up the tree.
1185      *
1186      * We could also try and collapse in partially filled subtrees to take
1187      * up space in this node.
1188      */
1189     if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
1190         struct assoc_array_node *parent, *grandparent;
1191         struct assoc_array_ptr *ptr;
1192 
1193         /* First of all, we need to know if this node has metadata so
1194          * that we don't try collapsing if all the leaves are already
1195          * here.
1196          */
1197         has_meta = false;
1198         for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
1199             ptr = node->slots[i];
1200             if (assoc_array_ptr_is_meta(ptr)) {
1201                 has_meta = true;
1202                 break;
1203             }
1204         }
1205 
1206         pr_devel("leaves: %ld [m=%d]\n",
1207              node->nr_leaves_on_branch - 1, has_meta);
1208 
1209         /* Look further up the tree to see if we can collapse this node
1210          * into a more proximal node too.
1211          */
1212         parent = node;
1213     collapse_up:
1214         pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch);
1215 
1216         ptr = parent->back_pointer;
1217         if (!ptr)
1218             goto do_collapse;
1219         if (assoc_array_ptr_is_shortcut(ptr)) {
1220             struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr);
1221             ptr = s->back_pointer;
1222             if (!ptr)
1223                 goto do_collapse;
1224         }
1225 
1226         grandparent = assoc_array_ptr_to_node(ptr);
1227         if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
1228             parent = grandparent;
1229             goto collapse_up;
1230         }
1231 
1232     do_collapse:
1233         /* There's no point collapsing if the original node has no meta
1234          * pointers to discard and if we didn't merge into one of that
1235          * node's ancestry.
1236          */
1237         if (has_meta || parent != node) {
1238             node = parent;
1239 
1240             /* Create a new node to collapse into */
1241             new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
1242             if (!new_n0)
1243                 goto enomem;
1244             edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
1245 
1246             new_n0->back_pointer = node->back_pointer;
1247             new_n0->parent_slot = node->parent_slot;
1248             new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
1249             edit->adjust_count_on = new_n0;
1250 
1251             collapse.node = new_n0;
1252             collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf);
1253             collapse.slot = 0;
1254             assoc_array_subtree_iterate(assoc_array_node_to_ptr(node),
1255                             node->back_pointer,
1256                             assoc_array_delete_collapse_iterator,
1257                             &collapse);
1258             pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch);
1259             BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1);
1260 
1261             if (!node->back_pointer) {
1262                 edit->set[1].ptr = &array->root;
1263             } else if (assoc_array_ptr_is_leaf(node->back_pointer)) {
1264                 BUG();
1265             } else if (assoc_array_ptr_is_node(node->back_pointer)) {
1266                 struct assoc_array_node *p =
1267                     assoc_array_ptr_to_node(node->back_pointer);
1268                 edit->set[1].ptr = &p->slots[node->parent_slot];
1269             } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) {
1270                 struct assoc_array_shortcut *s =
1271                     assoc_array_ptr_to_shortcut(node->back_pointer);
1272                 edit->set[1].ptr = &s->next_node;
1273             }
1274             edit->set[1].to = assoc_array_node_to_ptr(new_n0);
1275             edit->excised_subtree = assoc_array_node_to_ptr(node);
1276         }
1277     }
1278 
1279     return edit;
1280 
1281 enomem:
1282     /* Clean up after an out of memory error */
1283     pr_devel("enomem\n");
1284     assoc_array_cancel_edit(edit);
1285     return ERR_PTR(-ENOMEM);
1286 }
1287 
1288 /**
1289  * assoc_array_clear - Script deletion of all objects from an associative array
1290  * @array: The array to clear.
1291  * @ops: The operations to use.
1292  *
1293  * Precalculate and preallocate a script for the deletion of all the objects
1294  * from an associative array.  This results in an edit script that can either
1295  * be applied or cancelled.
1296  *
1297  * The function returns a pointer to an edit script if there are objects to be
1298  * deleted, NULL if there are no objects in the array or -ENOMEM.
1299  *
1300  * The caller should lock against other modifications and must continue to hold
1301  * the lock until assoc_array_apply_edit() has been called.
1302  *
1303  * Accesses to the tree may take place concurrently with this function,
1304  * provided they hold the RCU read lock.
1305  */
1306 struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
1307                        const struct assoc_array_ops *ops)
1308 {
1309     struct assoc_array_edit *edit;
1310 
1311     pr_devel("-->%s()\n", __func__);
1312 
1313     if (!array->root)
1314         return NULL;
1315 
1316     edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
1317     if (!edit)
1318         return ERR_PTR(-ENOMEM);
1319     edit->array = array;
1320     edit->ops = ops;
1321     edit->set[1].ptr = &array->root;
1322     edit->set[1].to = NULL;
1323     edit->excised_subtree = array->root;
1324     edit->ops_for_excised_subtree = ops;
1325     pr_devel("all gone\n");
1326     return edit;
1327 }
1328 
1329 /*
1330  * Handle the deferred destruction after an applied edit.
1331  */
1332 static void assoc_array_rcu_cleanup(struct rcu_head *head)
1333 {
1334     struct assoc_array_edit *edit =
1335         container_of(head, struct assoc_array_edit, rcu);
1336     int i;
1337 
1338     pr_devel("-->%s()\n", __func__);
1339 
1340     if (edit->dead_leaf)
1341         edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf));
1342     for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++)
1343         if (edit->excised_meta[i])
1344             kfree(assoc_array_ptr_to_node(edit->excised_meta[i]));
1345 
1346     if (edit->excised_subtree) {
1347         BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree));
1348         if (assoc_array_ptr_is_node(edit->excised_subtree)) {
1349             struct assoc_array_node *n =
1350                 assoc_array_ptr_to_node(edit->excised_subtree);
1351             n->back_pointer = NULL;
1352         } else {
1353             struct assoc_array_shortcut *s =
1354                 assoc_array_ptr_to_shortcut(edit->excised_subtree);
1355             s->back_pointer = NULL;
1356         }
1357         assoc_array_destroy_subtree(edit->excised_subtree,
1358                         edit->ops_for_excised_subtree);
1359     }
1360 
1361     kfree(edit);
1362 }
1363 
1364 /**
1365  * assoc_array_apply_edit - Apply an edit script to an associative array
1366  * @edit: The script to apply.
1367  *
1368  * Apply an edit script to an associative array to effect an insertion,
1369  * deletion or clearance.  As the edit script includes preallocated memory,
1370  * this is guaranteed not to fail.
1371  *
1372  * The edit script, dead objects and dead metadata will be scheduled for
1373  * destruction after an RCU grace period to permit those doing read-only
1374  * accesses on the array to continue to do so under the RCU read lock whilst
1375  * the edit is taking place.
1376  */
1377 void assoc_array_apply_edit(struct assoc_array_edit *edit)
1378 {
1379     struct assoc_array_shortcut *shortcut;
1380     struct assoc_array_node *node;
1381     struct assoc_array_ptr *ptr;
1382     int i;
1383 
1384     pr_devel("-->%s()\n", __func__);
1385 
1386     smp_wmb();
1387     if (edit->leaf_p)
1388         *edit->leaf_p = edit->leaf;
1389 
1390     smp_wmb();
1391     for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++)
1392         if (edit->set_parent_slot[i].p)
1393             *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to;
1394 
1395     smp_wmb();
1396     for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++)
1397         if (edit->set_backpointers[i])
1398             *edit->set_backpointers[i] = edit->set_backpointers_to;
1399 
1400     smp_wmb();
1401     for (i = 0; i < ARRAY_SIZE(edit->set); i++)
1402         if (edit->set[i].ptr)
1403             *edit->set[i].ptr = edit->set[i].to;
1404 
1405     if (edit->array->root == NULL) {
1406         edit->array->nr_leaves_on_tree = 0;
1407     } else if (edit->adjust_count_on) {
1408         node = edit->adjust_count_on;
1409         for (;;) {
1410             node->nr_leaves_on_branch += edit->adjust_count_by;
1411 
1412             ptr = node->back_pointer;
1413             if (!ptr)
1414                 break;
1415             if (assoc_array_ptr_is_shortcut(ptr)) {
1416                 shortcut = assoc_array_ptr_to_shortcut(ptr);
1417                 ptr = shortcut->back_pointer;
1418                 if (!ptr)
1419                     break;
1420             }
1421             BUG_ON(!assoc_array_ptr_is_node(ptr));
1422             node = assoc_array_ptr_to_node(ptr);
1423         }
1424 
1425         edit->array->nr_leaves_on_tree += edit->adjust_count_by;
1426     }
1427 
1428     call_rcu(&edit->rcu, assoc_array_rcu_cleanup);
1429 }
1430 
1431 /**
1432  * assoc_array_cancel_edit - Discard an edit script.
1433  * @edit: The script to discard.
1434  *
1435  * Free an edit script and all the preallocated data it holds without making
1436  * any changes to the associative array it was intended for.
1437  *
1438  * NOTE!  In the case of an insertion script, this does _not_ release the leaf
1439  * that was to be inserted.  That is left to the caller.
1440  */
1441 void assoc_array_cancel_edit(struct assoc_array_edit *edit)
1442 {
1443     struct assoc_array_ptr *ptr;
1444     int i;
1445 
1446     pr_devel("-->%s()\n", __func__);
1447 
1448     /* Clean up after an out of memory error */
1449     for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) {
1450         ptr = edit->new_meta[i];
1451         if (ptr) {
1452             if (assoc_array_ptr_is_node(ptr))
1453                 kfree(assoc_array_ptr_to_node(ptr));
1454             else
1455                 kfree(assoc_array_ptr_to_shortcut(ptr));
1456         }
1457     }
1458     kfree(edit);
1459 }
1460 
1461 /**
1462  * assoc_array_gc - Garbage collect an associative array.
1463  * @array: The array to clean.
1464  * @ops: The operations to use.
1465  * @iterator: A callback function to pass judgement on each object.
1466  * @iterator_data: Private data for the callback function.
1467  *
1468  * Collect garbage from an associative array and pack down the internal tree to
1469  * save memory.
1470  *
1471  * The iterator function is asked to pass judgement upon each object in the
1472  * array.  If it returns false, the object is discard and if it returns true,
1473  * the object is kept.  If it returns true, it must increment the object's
1474  * usage count (or whatever it needs to do to retain it) before returning.
1475  *
1476  * This function returns 0 if successful or -ENOMEM if out of memory.  In the
1477  * latter case, the array is not changed.
1478  *
1479  * The caller should lock against other modifications and must continue to hold
1480  * the lock until assoc_array_apply_edit() has been called.
1481  *
1482  * Accesses to the tree may take place concurrently with this function,
1483  * provided they hold the RCU read lock.
1484  */
1485 int assoc_array_gc(struct assoc_array *array,
1486            const struct assoc_array_ops *ops,
1487            bool (*iterator)(void *object, void *iterator_data),
1488            void *iterator_data)
1489 {
1490     struct assoc_array_shortcut *shortcut, *new_s;
1491     struct assoc_array_node *node, *new_n;
1492     struct assoc_array_edit *edit;
1493     struct assoc_array_ptr *cursor, *ptr;
1494     struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp;
1495     unsigned long nr_leaves_on_tree;
1496     int keylen, slot, nr_free, next_slot, i;
1497 
1498     pr_devel("-->%s()\n", __func__);
1499 
1500     if (!array->root)
1501         return 0;
1502 
1503     edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
1504     if (!edit)
1505         return -ENOMEM;
1506     edit->array = array;
1507     edit->ops = ops;
1508     edit->ops_for_excised_subtree = ops;
1509     edit->set[0].ptr = &array->root;
1510     edit->excised_subtree = array->root;
1511 
1512     new_root = new_parent = NULL;
1513     new_ptr_pp = &new_root;
1514     cursor = array->root;
1515 
1516 descend:
1517     /* If this point is a shortcut, then we need to duplicate it and
1518      * advance the target cursor.
1519      */
1520     if (assoc_array_ptr_is_shortcut(cursor)) {
1521         shortcut = assoc_array_ptr_to_shortcut(cursor);
1522         keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
1523         keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
1524         new_s = kmalloc(sizeof(struct assoc_array_shortcut) +
1525                 keylen * sizeof(unsigned long), GFP_KERNEL);
1526         if (!new_s)
1527             goto enomem;
1528         pr_devel("dup shortcut %p -> %p\n", shortcut, new_s);
1529         memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) +
1530                      keylen * sizeof(unsigned long)));
1531         new_s->back_pointer = new_parent;
1532         new_s->parent_slot = shortcut->parent_slot;
1533         *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s);
1534         new_ptr_pp = &new_s->next_node;
1535         cursor = shortcut->next_node;
1536     }
1537 
1538     /* Duplicate the node at this position */
1539     node = assoc_array_ptr_to_node(cursor);
1540     new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
1541     if (!new_n)
1542         goto enomem;
1543     pr_devel("dup node %p -> %p\n", node, new_n);
1544     new_n->back_pointer = new_parent;
1545     new_n->parent_slot = node->parent_slot;
1546     *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n);
1547     new_ptr_pp = NULL;
1548     slot = 0;
1549 
1550 continue_node:
1551     /* Filter across any leaves and gc any subtrees */
1552     for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
1553         ptr = node->slots[slot];
1554         if (!ptr)
1555             continue;
1556 
1557         if (assoc_array_ptr_is_leaf(ptr)) {
1558             if (iterator(assoc_array_ptr_to_leaf(ptr),
1559                      iterator_data))
1560                 /* The iterator will have done any reference
1561                  * counting on the object for us.
1562                  */
1563                 new_n->slots[slot] = ptr;
1564             continue;
1565         }
1566 
1567         new_ptr_pp = &new_n->slots[slot];
1568         cursor = ptr;
1569         goto descend;
1570     }
1571 
1572     pr_devel("-- compress node %p --\n", new_n);
1573 
1574     /* Count up the number of empty slots in this node and work out the
1575      * subtree leaf count.
1576      */
1577     new_n->nr_leaves_on_branch = 0;
1578     nr_free = 0;
1579     for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
1580         ptr = new_n->slots[slot];
1581         if (!ptr)
1582             nr_free++;
1583         else if (assoc_array_ptr_is_leaf(ptr))
1584             new_n->nr_leaves_on_branch++;
1585     }
1586     pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch);
1587 
1588     /* See what we can fold in */
1589     next_slot = 0;
1590     for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
1591         struct assoc_array_shortcut *s;
1592         struct assoc_array_node *child;
1593 
1594         ptr = new_n->slots[slot];
1595         if (!ptr || assoc_array_ptr_is_leaf(ptr))
1596             continue;
1597 
1598         s = NULL;
1599         if (assoc_array_ptr_is_shortcut(ptr)) {
1600             s = assoc_array_ptr_to_shortcut(ptr);
1601             ptr = s->next_node;
1602         }
1603 
1604         child = assoc_array_ptr_to_node(ptr);
1605         new_n->nr_leaves_on_branch += child->nr_leaves_on_branch;
1606 
1607         if (child->nr_leaves_on_branch <= nr_free + 1) {
1608             /* Fold the child node into this one */
1609             pr_devel("[%d] fold node %lu/%d [nx %d]\n",
1610                  slot, child->nr_leaves_on_branch, nr_free + 1,
1611                  next_slot);
1612 
1613             /* We would already have reaped an intervening shortcut
1614              * on the way back up the tree.
1615              */
1616             BUG_ON(s);
1617 
1618             new_n->slots[slot] = NULL;
1619             nr_free++;
1620             if (slot < next_slot)
1621                 next_slot = slot;
1622             for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
1623                 struct assoc_array_ptr *p = child->slots[i];
1624                 if (!p)
1625                     continue;
1626                 BUG_ON(assoc_array_ptr_is_meta(p));
1627                 while (new_n->slots[next_slot])
1628                     next_slot++;
1629                 BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT);
1630                 new_n->slots[next_slot++] = p;
1631                 nr_free--;
1632             }
1633             kfree(child);
1634         } else {
1635             pr_devel("[%d] retain node %lu/%d [nx %d]\n",
1636                  slot, child->nr_leaves_on_branch, nr_free + 1,
1637                  next_slot);
1638         }
1639     }
1640 
1641     pr_devel("after: %lu\n", new_n->nr_leaves_on_branch);
1642 
1643     nr_leaves_on_tree = new_n->nr_leaves_on_branch;
1644 
1645     /* Excise this node if it is singly occupied by a shortcut */
1646     if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) {
1647         for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++)
1648             if ((ptr = new_n->slots[slot]))
1649                 break;
1650 
1651         if (assoc_array_ptr_is_meta(ptr) &&
1652             assoc_array_ptr_is_shortcut(ptr)) {
1653             pr_devel("excise node %p with 1 shortcut\n", new_n);
1654             new_s = assoc_array_ptr_to_shortcut(ptr);
1655             new_parent = new_n->back_pointer;
1656             slot = new_n->parent_slot;
1657             kfree(new_n);
1658             if (!new_parent) {
1659                 new_s->back_pointer = NULL;
1660                 new_s->parent_slot = 0;
1661                 new_root = ptr;
1662                 goto gc_complete;
1663             }
1664 
1665             if (assoc_array_ptr_is_shortcut(new_parent)) {
1666                 /* We can discard any preceding shortcut also */
1667                 struct assoc_array_shortcut *s =
1668                     assoc_array_ptr_to_shortcut(new_parent);
1669 
1670                 pr_devel("excise preceding shortcut\n");
1671 
1672                 new_parent = new_s->back_pointer = s->back_pointer;
1673                 slot = new_s->parent_slot = s->parent_slot;
1674                 kfree(s);
1675                 if (!new_parent) {
1676                     new_s->back_pointer = NULL;
1677                     new_s->parent_slot = 0;
1678                     new_root = ptr;
1679                     goto gc_complete;
1680                 }
1681             }
1682 
1683             new_s->back_pointer = new_parent;
1684             new_s->parent_slot = slot;
1685             new_n = assoc_array_ptr_to_node(new_parent);
1686             new_n->slots[slot] = ptr;
1687             goto ascend_old_tree;
1688         }
1689     }
1690 
1691     /* Excise any shortcuts we might encounter that point to nodes that
1692      * only contain leaves.
1693      */
1694     ptr = new_n->back_pointer;
1695     if (!ptr)
1696         goto gc_complete;
1697 
1698     if (assoc_array_ptr_is_shortcut(ptr)) {
1699         new_s = assoc_array_ptr_to_shortcut(ptr);
1700         new_parent = new_s->back_pointer;
1701         slot = new_s->parent_slot;
1702 
1703         if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) {
1704             struct assoc_array_node *n;
1705 
1706             pr_devel("excise shortcut\n");
1707             new_n->back_pointer = new_parent;
1708             new_n->parent_slot = slot;
1709             kfree(new_s);
1710             if (!new_parent) {
1711                 new_root = assoc_array_node_to_ptr(new_n);
1712                 goto gc_complete;
1713             }
1714 
1715             n = assoc_array_ptr_to_node(new_parent);
1716             n->slots[slot] = assoc_array_node_to_ptr(new_n);
1717         }
1718     } else {
1719         new_parent = ptr;
1720     }
1721     new_n = assoc_array_ptr_to_node(new_parent);
1722 
1723 ascend_old_tree:
1724     ptr = node->back_pointer;
1725     if (assoc_array_ptr_is_shortcut(ptr)) {
1726         shortcut = assoc_array_ptr_to_shortcut(ptr);
1727         slot = shortcut->parent_slot;
1728         cursor = shortcut->back_pointer;
1729         if (!cursor)
1730             goto gc_complete;
1731     } else {
1732         slot = node->parent_slot;
1733         cursor = ptr;
1734     }
1735     BUG_ON(!cursor);
1736     node = assoc_array_ptr_to_node(cursor);
1737     slot++;
1738     goto continue_node;
1739 
1740 gc_complete:
1741     edit->set[0].to = new_root;
1742     assoc_array_apply_edit(edit);
1743     array->nr_leaves_on_tree = nr_leaves_on_tree;
1744     return 0;
1745 
1746 enomem:
1747     pr_devel("enomem\n");
1748     assoc_array_destroy_subtree(new_root, edit->ops);
1749     kfree(edit);
1750     return -ENOMEM;
1751 }