tools/lib/list_sort.c

0001 // SPDX-License-Identifier: GPL-2.0
0002 #include <linux/kernel.h>
0003 #include <linux/compiler.h>
0004 #include <linux/export.h>
0005 #include <linux/string.h>
0006 #include <linux/list_sort.h>
0007 #include <linux/list.h>
0008
0009 /*
0010  * Returns a list organized in an intermediate format suited
0011  * to chaining of merge() calls: null-terminated, no reserved or
0012  * sentinel head node, "prev" links not maintained.
0013  */
0014 __attribute__((nonnull(2,3,4)))
0015 static struct list_head *merge(void *priv, list_cmp_func_t cmp,
0016                 struct list_head *a, struct list_head *b)
0017 {
0018     struct list_head *head, **tail = &head;
0019
0020     for (;;) {
0021         /* if equal, take 'a' -- important for sort stability */
0022         if (cmp(priv, a, b) <= 0) {
0023             *tail = a;
0024             tail = &a->next;
0025             a = a->next;
0026             if (!a) {
0027                 *tail = b;
0028                 break;
0029             }
0030         } else {
0031             *tail = b;
0032             tail = &b->next;
0033             b = b->next;
0034             if (!b) {
0035                 *tail = a;
0036                 break;
0037             }
0038         }
0039     }
0040     return head;
0041 }
0042
0043 /*
0044  * Combine final list merge with restoration of standard doubly-linked
0045  * list structure.  This approach duplicates code from merge(), but
0046  * runs faster than the tidier alternatives of either a separate final
0047  * prev-link restoration pass, or maintaining the prev links
0048  * throughout.
0049  */
0050 __attribute__((nonnull(2,3,4,5)))
0051 static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head,
0052             struct list_head *a, struct list_head *b)
0053 {
0054     struct list_head *tail = head;
0055     u8 count = 0;
0056
0057     for (;;) {
0058         /* if equal, take 'a' -- important for sort stability */
0059         if (cmp(priv, a, b) <= 0) {
0060             tail->next = a;
0061             a->prev = tail;
0062             tail = a;
0063             a = a->next;
0064             if (!a)
0065                 break;
0066         } else {
0067             tail->next = b;
0068             b->prev = tail;
0069             tail = b;
0070             b = b->next;
0071             if (!b) {
0072                 b = a;
0073                 break;
0074             }
0075         }
0076     }
0077
0078     /* Finish linking remainder of list b on to tail */
0079     tail->next = b;
0080     do {
0081         /*
0082          * If the merge is highly unbalanced (e.g. the input is
0083          * already sorted), this loop may run many iterations.
0084          * Continue callbacks to the client even though no
0085          * element comparison is needed, so the client's cmp()
0086          * routine can invoke cond_resched() periodically.
0087          */
0088         if (unlikely(!++count))
0089             cmp(priv, b, b);
0090         b->prev = tail;
0091         tail = b;
0092         b = b->next;
0093     } while (b);
0094
0095     /* And the final links to make a circular doubly-linked list */
0096     tail->next = head;
0097     head->prev = tail;
0098 }
0099
0100 /**
0101  * list_sort - sort a list
0102  * @priv: private data, opaque to list_sort(), passed to @cmp
0103  * @head: the list to sort
0104  * @cmp: the elements comparison function
0105  *
0106  * The comparison function @cmp must return > 0 if @a should sort after
0107  * @b ("@a > @b" if you want an ascending sort), and <= 0 if @a should
0108  * sort before @b *or* their original order should be preserved.  It is
0109  * always called with the element that came first in the input in @a,
0110  * and list_sort is a stable sort, so it is not necessary to distinguish
0111  * the @a < @b and @a == @b cases.
0112  *
0113  * This is compatible with two styles of @cmp function:
0114  * - The traditional style which returns <0 / =0 / >0, or
0115  * - Returning a boolean 0/1.
0116  * The latter offers a chance to save a few cycles in the comparison
0117  * (which is used by e.g. plug_ctx_cmp() in block/blk-mq.c).
0118  *
0119  * A good way to write a multi-word comparison is::
0120  *
0121  *  if (a->high != b->high)
0122  *      return a->high > b->high;
0123  *  if (a->middle != b->middle)
0124  *      return a->middle > b->middle;
0125  *  return a->low > b->low;
0126  *
0127  *
0128  * This mergesort is as eager as possible while always performing at least
0129  * 2:1 balanced merges.  Given two pending sublists of size 2^k, they are
0130  * merged to a size-2^(k+1) list as soon as we have 2^k following elements.
0131  *
0132  * Thus, it will avoid cache thrashing as long as 3*2^k elements can
0133  * fit into the cache.  Not quite as good as a fully-eager bottom-up
0134  * mergesort, but it does use 0.2*n fewer comparisons, so is faster in
0135  * the common case that everything fits into L1.
0136  *
0137  *
0138  * The merging is controlled by "count", the number of elements in the
0139  * pending lists.  This is beautifully simple code, but rather subtle.
0140  *
0141  * Each time we increment "count", we set one bit (bit k) and clear
0142  * bits k-1 .. 0.  Each time this happens (except the very first time
0143  * for each bit, when count increments to 2^k), we merge two lists of
0144  * size 2^k into one list of size 2^(k+1).
0145  *
0146  * This merge happens exactly when the count reaches an odd multiple of
0147  * 2^k, which is when we have 2^k elements pending in smaller lists,
0148  * so it's safe to merge away two lists of size 2^k.
0149  *
0150  * After this happens twice, we have created two lists of size 2^(k+1),
0151  * which will be merged into a list of size 2^(k+2) before we create
0152  * a third list of size 2^(k+1), so there are never more than two pending.
0153  *
0154  * The number of pending lists of size 2^k is determined by the
0155  * state of bit k of "count" plus two extra pieces of information:
0156  *
0157  * - The state of bit k-1 (when k == 0, consider bit -1 always set), and
0158  * - Whether the higher-order bits are zero or non-zero (i.e.
0159  *   is count >= 2^(k+1)).
0160  *
0161  * There are six states we distinguish.  "x" represents some arbitrary
0162  * bits, and "y" represents some arbitrary non-zero bits:
0163  * 0:  00x: 0 pending of size 2^k;           x pending of sizes < 2^k
0164  * 1:  01x: 0 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k
0165  * 2: x10x: 0 pending of size 2^k; 2^k     + x pending of sizes < 2^k
0166  * 3: x11x: 1 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k
0167  * 4: y00x: 1 pending of size 2^k; 2^k     + x pending of sizes < 2^k
0168  * 5: y01x: 2 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k
0169  * (merge and loop back to state 2)
0170  *
0171  * We gain lists of size 2^k in the 2->3 and 4->5 transitions (because
0172  * bit k-1 is set while the more significant bits are non-zero) and
0173  * merge them away in the 5->2 transition.  Note in particular that just
0174  * before the 5->2 transition, all lower-order bits are 11 (state 3),
0175  * so there is one list of each smaller size.
0176  *
0177  * When we reach the end of the input, we merge all the pending
0178  * lists, from smallest to largest.  If you work through cases 2 to
0179  * 5 above, you can see that the number of elements we merge with a list
0180  * of size 2^k varies from 2^(k-1) (cases 3 and 5 when x == 0) to
0181  * 2^(k+1) - 1 (second merge of case 5 when x == 2^(k-1) - 1).
0182  */
0183 __attribute__((nonnull(2,3)))
0184 void list_sort(void *priv, struct list_head *head, list_cmp_func_t cmp)
0185 {
0186     struct list_head *list = head->next, *pending = NULL;
0187     size_t count = 0;   /* Count of pending */
0188
0189     if (list == head->prev) /* Zero or one elements */
0190         return;
0191
0192     /* Convert to a null-terminated singly-linked list. */
0193     head->prev->next = NULL;
0194
0195     /*
0196      * Data structure invariants:
0197      * - All lists are singly linked and null-terminated; prev
0198      *   pointers are not maintained.
0199      * - pending is a prev-linked "list of lists" of sorted
0200      *   sublists awaiting further merging.
0201      * - Each of the sorted sublists is power-of-two in size.
0202      * - Sublists are sorted by size and age, smallest & newest at front.
0203      * - There are zero to two sublists of each size.
0204      * - A pair of pending sublists are merged as soon as the number
0205      *   of following pending elements equals their size (i.e.
0206      *   each time count reaches an odd multiple of that size).
0207      *   That ensures each later final merge will be at worst 2:1.
0208      * - Each round consists of:
0209      *   - Merging the two sublists selected by the highest bit
0210      *     which flips when count is incremented, and
0211      *   - Adding an element from the input as a size-1 sublist.
0212      */
0213     do {
0214         size_t bits;
0215         struct list_head **tail = &pending;
0216
0217         /* Find the least-significant clear bit in count */
0218         for (bits = count; bits & 1; bits >>= 1)
0219             tail = &(*tail)->prev;
0220         /* Do the indicated merge */
0221         if (likely(bits)) {
0222             struct list_head *a = *tail, *b = a->prev;
0223
0224             a = merge(priv, cmp, b, a);
0225             /* Install the merged result in place of the inputs */
0226             a->prev = b->prev;
0227             *tail = a;
0228         }
0229
0230         /* Move one element from input list to pending */
0231         list->prev = pending;
0232         pending = list;
0233         list = list->next;
0234         pending->next = NULL;
0235         count++;
0236     } while (list);
0237
0238     /* End of input; merge together all the pending lists. */
0239     list = pending;
0240     pending = pending->prev;
0241     for (;;) {
0242         struct list_head *next = pending->prev;
0243
0244         if (!next)
0245             break;
0246         list = merge(priv, cmp, pending, list);
0247         pending = next;
0248     }
0249     /* The final merge, rebuilding prev links */
0250     merge_final(priv, cmp, head, pending, list);
0251 }
0252 EXPORT_SYMBOL(list_sort);