Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0-only */
0002 /*
0003  * Fast and scalable bitmaps.
0004  *
0005  * Copyright (C) 2016 Facebook
0006  * Copyright (C) 2013-2014 Jens Axboe
0007  */
0008 
0009 #ifndef __LINUX_SCALE_BITMAP_H
0010 #define __LINUX_SCALE_BITMAP_H
0011 
0012 #include <linux/atomic.h>
0013 #include <linux/bitops.h>
0014 #include <linux/cache.h>
0015 #include <linux/list.h>
0016 #include <linux/log2.h>
0017 #include <linux/minmax.h>
0018 #include <linux/percpu.h>
0019 #include <linux/slab.h>
0020 #include <linux/smp.h>
0021 #include <linux/types.h>
0022 #include <linux/wait.h>
0023 
0024 struct seq_file;
0025 
0026 /**
0027  * struct sbitmap_word - Word in a &struct sbitmap.
0028  */
0029 struct sbitmap_word {
0030     /**
0031      * @word: word holding free bits
0032      */
0033     unsigned long word;
0034 
0035     /**
0036      * @cleared: word holding cleared bits
0037      */
0038     unsigned long cleared ____cacheline_aligned_in_smp;
0039 } ____cacheline_aligned_in_smp;
0040 
0041 /**
0042  * struct sbitmap - Scalable bitmap.
0043  *
0044  * A &struct sbitmap is spread over multiple cachelines to avoid ping-pong. This
0045  * trades off higher memory usage for better scalability.
0046  */
0047 struct sbitmap {
0048     /**
0049      * @depth: Number of bits used in the whole bitmap.
0050      */
0051     unsigned int depth;
0052 
0053     /**
0054      * @shift: log2(number of bits used per word)
0055      */
0056     unsigned int shift;
0057 
0058     /**
0059      * @map_nr: Number of words (cachelines) being used for the bitmap.
0060      */
0061     unsigned int map_nr;
0062 
0063     /**
0064      * @round_robin: Allocate bits in strict round-robin order.
0065      */
0066     bool round_robin;
0067 
0068     /**
0069      * @map: Allocated bitmap.
0070      */
0071     struct sbitmap_word *map;
0072 
0073     /*
0074      * @alloc_hint: Cache of last successfully allocated or freed bit.
0075      *
0076      * This is per-cpu, which allows multiple users to stick to different
0077      * cachelines until the map is exhausted.
0078      */
0079     unsigned int __percpu *alloc_hint;
0080 };
0081 
0082 #define SBQ_WAIT_QUEUES 8
0083 #define SBQ_WAKE_BATCH 8
0084 
0085 /**
0086  * struct sbq_wait_state - Wait queue in a &struct sbitmap_queue.
0087  */
0088 struct sbq_wait_state {
0089     /**
0090      * @wait_cnt: Number of frees remaining before we wake up.
0091      */
0092     atomic_t wait_cnt;
0093 
0094     /**
0095      * @wait: Wait queue.
0096      */
0097     wait_queue_head_t wait;
0098 } ____cacheline_aligned_in_smp;
0099 
0100 /**
0101  * struct sbitmap_queue - Scalable bitmap with the added ability to wait on free
0102  * bits.
0103  *
0104  * A &struct sbitmap_queue uses multiple wait queues and rolling wakeups to
0105  * avoid contention on the wait queue spinlock. This ensures that we don't hit a
0106  * scalability wall when we run out of free bits and have to start putting tasks
0107  * to sleep.
0108  */
0109 struct sbitmap_queue {
0110     /**
0111      * @sb: Scalable bitmap.
0112      */
0113     struct sbitmap sb;
0114 
0115     /**
0116      * @wake_batch: Number of bits which must be freed before we wake up any
0117      * waiters.
0118      */
0119     unsigned int wake_batch;
0120 
0121     /**
0122      * @wake_index: Next wait queue in @ws to wake up.
0123      */
0124     atomic_t wake_index;
0125 
0126     /**
0127      * @ws: Wait queues.
0128      */
0129     struct sbq_wait_state *ws;
0130 
0131     /*
0132      * @ws_active: count of currently active ws waitqueues
0133      */
0134     atomic_t ws_active;
0135 
0136     /**
0137      * @min_shallow_depth: The minimum shallow depth which may be passed to
0138      * sbitmap_queue_get_shallow()
0139      */
0140     unsigned int min_shallow_depth;
0141 };
0142 
0143 /**
0144  * sbitmap_init_node() - Initialize a &struct sbitmap on a specific memory node.
0145  * @sb: Bitmap to initialize.
0146  * @depth: Number of bits to allocate.
0147  * @shift: Use 2^@shift bits per word in the bitmap; if a negative number if
0148  *         given, a good default is chosen.
0149  * @flags: Allocation flags.
0150  * @node: Memory node to allocate on.
0151  * @round_robin: If true, be stricter about allocation order; always allocate
0152  *               starting from the last allocated bit. This is less efficient
0153  *               than the default behavior (false).
0154  * @alloc_hint: If true, apply percpu hint for where to start searching for
0155  *              a free bit.
0156  *
0157  * Return: Zero on success or negative errno on failure.
0158  */
0159 int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
0160               gfp_t flags, int node, bool round_robin, bool alloc_hint);
0161 
0162 /* sbitmap internal helper */
0163 static inline unsigned int __map_depth(const struct sbitmap *sb, int index)
0164 {
0165     if (index == sb->map_nr - 1)
0166         return sb->depth - (index << sb->shift);
0167     return 1U << sb->shift;
0168 }
0169 
0170 /**
0171  * sbitmap_free() - Free memory used by a &struct sbitmap.
0172  * @sb: Bitmap to free.
0173  */
0174 static inline void sbitmap_free(struct sbitmap *sb)
0175 {
0176     free_percpu(sb->alloc_hint);
0177     kvfree(sb->map);
0178     sb->map = NULL;
0179 }
0180 
0181 /**
0182  * sbitmap_resize() - Resize a &struct sbitmap.
0183  * @sb: Bitmap to resize.
0184  * @depth: New number of bits to resize to.
0185  *
0186  * Doesn't reallocate anything. It's up to the caller to ensure that the new
0187  * depth doesn't exceed the depth that the sb was initialized with.
0188  */
0189 void sbitmap_resize(struct sbitmap *sb, unsigned int depth);
0190 
0191 /**
0192  * sbitmap_get() - Try to allocate a free bit from a &struct sbitmap.
0193  * @sb: Bitmap to allocate from.
0194  *
0195  * This operation provides acquire barrier semantics if it succeeds.
0196  *
0197  * Return: Non-negative allocated bit number if successful, -1 otherwise.
0198  */
0199 int sbitmap_get(struct sbitmap *sb);
0200 
0201 /**
0202  * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap,
0203  * limiting the depth used from each word.
0204  * @sb: Bitmap to allocate from.
0205  * @shallow_depth: The maximum number of bits to allocate from a single word.
0206  *
0207  * This rather specific operation allows for having multiple users with
0208  * different allocation limits. E.g., there can be a high-priority class that
0209  * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow()
0210  * with a @shallow_depth of (1 << (@sb->shift - 1)). Then, the low-priority
0211  * class can only allocate half of the total bits in the bitmap, preventing it
0212  * from starving out the high-priority class.
0213  *
0214  * Return: Non-negative allocated bit number if successful, -1 otherwise.
0215  */
0216 int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth);
0217 
0218 /**
0219  * sbitmap_any_bit_set() - Check for a set bit in a &struct sbitmap.
0220  * @sb: Bitmap to check.
0221  *
0222  * Return: true if any bit in the bitmap is set, false otherwise.
0223  */
0224 bool sbitmap_any_bit_set(const struct sbitmap *sb);
0225 
0226 #define SB_NR_TO_INDEX(sb, bitnr) ((bitnr) >> (sb)->shift)
0227 #define SB_NR_TO_BIT(sb, bitnr) ((bitnr) & ((1U << (sb)->shift) - 1U))
0228 
0229 typedef bool (*sb_for_each_fn)(struct sbitmap *, unsigned int, void *);
0230 
0231 /**
0232  * __sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap.
0233  * @start: Where to start the iteration.
0234  * @sb: Bitmap to iterate over.
0235  * @fn: Callback. Should return true to continue or false to break early.
0236  * @data: Pointer to pass to callback.
0237  *
0238  * This is inline even though it's non-trivial so that the function calls to the
0239  * callback will hopefully get optimized away.
0240  */
0241 static inline void __sbitmap_for_each_set(struct sbitmap *sb,
0242                       unsigned int start,
0243                       sb_for_each_fn fn, void *data)
0244 {
0245     unsigned int index;
0246     unsigned int nr;
0247     unsigned int scanned = 0;
0248 
0249     if (start >= sb->depth)
0250         start = 0;
0251     index = SB_NR_TO_INDEX(sb, start);
0252     nr = SB_NR_TO_BIT(sb, start);
0253 
0254     while (scanned < sb->depth) {
0255         unsigned long word;
0256         unsigned int depth = min_t(unsigned int,
0257                        __map_depth(sb, index) - nr,
0258                        sb->depth - scanned);
0259 
0260         scanned += depth;
0261         word = sb->map[index].word & ~sb->map[index].cleared;
0262         if (!word)
0263             goto next;
0264 
0265         /*
0266          * On the first iteration of the outer loop, we need to add the
0267          * bit offset back to the size of the word for find_next_bit().
0268          * On all other iterations, nr is zero, so this is a noop.
0269          */
0270         depth += nr;
0271         while (1) {
0272             nr = find_next_bit(&word, depth, nr);
0273             if (nr >= depth)
0274                 break;
0275             if (!fn(sb, (index << sb->shift) + nr, data))
0276                 return;
0277 
0278             nr++;
0279         }
0280 next:
0281         nr = 0;
0282         if (++index >= sb->map_nr)
0283             index = 0;
0284     }
0285 }
0286 
0287 /**
0288  * sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap.
0289  * @sb: Bitmap to iterate over.
0290  * @fn: Callback. Should return true to continue or false to break early.
0291  * @data: Pointer to pass to callback.
0292  */
0293 static inline void sbitmap_for_each_set(struct sbitmap *sb, sb_for_each_fn fn,
0294                     void *data)
0295 {
0296     __sbitmap_for_each_set(sb, 0, fn, data);
0297 }
0298 
0299 static inline unsigned long *__sbitmap_word(struct sbitmap *sb,
0300                         unsigned int bitnr)
0301 {
0302     return &sb->map[SB_NR_TO_INDEX(sb, bitnr)].word;
0303 }
0304 
0305 /* Helpers equivalent to the operations in asm/bitops.h and linux/bitmap.h */
0306 
0307 static inline void sbitmap_set_bit(struct sbitmap *sb, unsigned int bitnr)
0308 {
0309     set_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr));
0310 }
0311 
0312 static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr)
0313 {
0314     clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr));
0315 }
0316 
0317 /*
0318  * This one is special, since it doesn't actually clear the bit, rather it
0319  * sets the corresponding bit in the ->cleared mask instead. Paired with
0320  * the caller doing sbitmap_deferred_clear() if a given index is full, which
0321  * will clear the previously freed entries in the corresponding ->word.
0322  */
0323 static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int bitnr)
0324 {
0325     unsigned long *addr = &sb->map[SB_NR_TO_INDEX(sb, bitnr)].cleared;
0326 
0327     set_bit(SB_NR_TO_BIT(sb, bitnr), addr);
0328 }
0329 
0330 /*
0331  * Pair of sbitmap_get, and this one applies both cleared bit and
0332  * allocation hint.
0333  */
0334 static inline void sbitmap_put(struct sbitmap *sb, unsigned int bitnr)
0335 {
0336     sbitmap_deferred_clear_bit(sb, bitnr);
0337 
0338     if (likely(sb->alloc_hint && !sb->round_robin && bitnr < sb->depth))
0339         *raw_cpu_ptr(sb->alloc_hint) = bitnr;
0340 }
0341 
0342 static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr)
0343 {
0344     return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr));
0345 }
0346 
0347 static inline int sbitmap_calculate_shift(unsigned int depth)
0348 {
0349     int shift = ilog2(BITS_PER_LONG);
0350 
0351     /*
0352      * If the bitmap is small, shrink the number of bits per word so
0353      * we spread over a few cachelines, at least. If less than 4
0354      * bits, just forget about it, it's not going to work optimally
0355      * anyway.
0356      */
0357     if (depth >= 4) {
0358         while ((4U << shift) > depth)
0359             shift--;
0360     }
0361 
0362     return shift;
0363 }
0364 
0365 /**
0366  * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file.
0367  * @sb: Bitmap to show.
0368  * @m: struct seq_file to write to.
0369  *
0370  * This is intended for debugging. The format may change at any time.
0371  */
0372 void sbitmap_show(struct sbitmap *sb, struct seq_file *m);
0373 
0374 
0375 /**
0376  * sbitmap_weight() - Return how many set and not cleared bits in a &struct
0377  * sbitmap.
0378  * @sb: Bitmap to check.
0379  *
0380  * Return: How many set and not cleared bits set
0381  */
0382 unsigned int sbitmap_weight(const struct sbitmap *sb);
0383 
0384 /**
0385  * sbitmap_bitmap_show() - Write a hex dump of a &struct sbitmap to a &struct
0386  * seq_file.
0387  * @sb: Bitmap to show.
0388  * @m: struct seq_file to write to.
0389  *
0390  * This is intended for debugging. The output isn't guaranteed to be internally
0391  * consistent.
0392  */
0393 void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m);
0394 
0395 /**
0396  * sbitmap_queue_init_node() - Initialize a &struct sbitmap_queue on a specific
0397  * memory node.
0398  * @sbq: Bitmap queue to initialize.
0399  * @depth: See sbitmap_init_node().
0400  * @shift: See sbitmap_init_node().
0401  * @round_robin: See sbitmap_get().
0402  * @flags: Allocation flags.
0403  * @node: Memory node to allocate on.
0404  *
0405  * Return: Zero on success or negative errno on failure.
0406  */
0407 int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
0408                 int shift, bool round_robin, gfp_t flags, int node);
0409 
0410 /**
0411  * sbitmap_queue_free() - Free memory used by a &struct sbitmap_queue.
0412  *
0413  * @sbq: Bitmap queue to free.
0414  */
0415 static inline void sbitmap_queue_free(struct sbitmap_queue *sbq)
0416 {
0417     kfree(sbq->ws);
0418     sbitmap_free(&sbq->sb);
0419 }
0420 
0421 /**
0422  * sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch
0423  * @sbq: Bitmap queue to recalculate wake batch.
0424  * @users: Number of shares.
0425  *
0426  * Like sbitmap_queue_update_wake_batch(), this will calculate wake batch
0427  * by depth. This interface is for HCTX shared tags or queue shared tags.
0428  */
0429 void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
0430                         unsigned int users);
0431 
0432 /**
0433  * sbitmap_queue_resize() - Resize a &struct sbitmap_queue.
0434  * @sbq: Bitmap queue to resize.
0435  * @depth: New number of bits to resize to.
0436  *
0437  * Like sbitmap_resize(), this doesn't reallocate anything. It has to do
0438  * some extra work on the &struct sbitmap_queue, so it's not safe to just
0439  * resize the underlying &struct sbitmap.
0440  */
0441 void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth);
0442 
0443 /**
0444  * __sbitmap_queue_get() - Try to allocate a free bit from a &struct
0445  * sbitmap_queue with preemption already disabled.
0446  * @sbq: Bitmap queue to allocate from.
0447  *
0448  * Return: Non-negative allocated bit number if successful, -1 otherwise.
0449  */
0450 int __sbitmap_queue_get(struct sbitmap_queue *sbq);
0451 
0452 /**
0453  * __sbitmap_queue_get_batch() - Try to allocate a batch of free bits
0454  * @sbq: Bitmap queue to allocate from.
0455  * @nr_tags: number of tags requested
0456  * @offset: offset to add to returned bits
0457  *
0458  * Return: Mask of allocated tags, 0 if none are found. Each tag allocated is
0459  * a bit in the mask returned, and the caller must add @offset to the value to
0460  * get the absolute tag value.
0461  */
0462 unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
0463                     unsigned int *offset);
0464 
0465 /**
0466  * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct
0467  * sbitmap_queue, limiting the depth used from each word, with preemption
0468  * already disabled.
0469  * @sbq: Bitmap queue to allocate from.
0470  * @shallow_depth: The maximum number of bits to allocate from a single word.
0471  * See sbitmap_get_shallow().
0472  *
0473  * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after
0474  * initializing @sbq.
0475  *
0476  * Return: Non-negative allocated bit number if successful, -1 otherwise.
0477  */
0478 int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
0479                   unsigned int shallow_depth);
0480 
0481 /**
0482  * sbitmap_queue_get() - Try to allocate a free bit from a &struct
0483  * sbitmap_queue.
0484  * @sbq: Bitmap queue to allocate from.
0485  * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to
0486  *       sbitmap_queue_clear()).
0487  *
0488  * Return: Non-negative allocated bit number if successful, -1 otherwise.
0489  */
0490 static inline int sbitmap_queue_get(struct sbitmap_queue *sbq,
0491                     unsigned int *cpu)
0492 {
0493     int nr;
0494 
0495     *cpu = get_cpu();
0496     nr = __sbitmap_queue_get(sbq);
0497     put_cpu();
0498     return nr;
0499 }
0500 
0501 /**
0502  * sbitmap_queue_min_shallow_depth() - Inform a &struct sbitmap_queue of the
0503  * minimum shallow depth that will be used.
0504  * @sbq: Bitmap queue in question.
0505  * @min_shallow_depth: The minimum shallow depth that will be passed to
0506  * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow().
0507  *
0508  * sbitmap_queue_clear() batches wakeups as an optimization. The batch size
0509  * depends on the depth of the bitmap. Since the shallow allocation functions
0510  * effectively operate with a different depth, the shallow depth must be taken
0511  * into account when calculating the batch size. This function must be called
0512  * with the minimum shallow depth that will be used. Failure to do so can result
0513  * in missed wakeups.
0514  */
0515 void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
0516                      unsigned int min_shallow_depth);
0517 
0518 /**
0519  * sbitmap_queue_clear() - Free an allocated bit and wake up waiters on a
0520  * &struct sbitmap_queue.
0521  * @sbq: Bitmap to free from.
0522  * @nr: Bit number to free.
0523  * @cpu: CPU the bit was allocated on.
0524  */
0525 void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
0526              unsigned int cpu);
0527 
0528 /**
0529  * sbitmap_queue_clear_batch() - Free a batch of allocated bits
0530  * &struct sbitmap_queue.
0531  * @sbq: Bitmap to free from.
0532  * @offset: offset for each tag in array
0533  * @tags: array of tags
0534  * @nr_tags: number of tags in array
0535  */
0536 void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset,
0537                 int *tags, int nr_tags);
0538 
0539 static inline int sbq_index_inc(int index)
0540 {
0541     return (index + 1) & (SBQ_WAIT_QUEUES - 1);
0542 }
0543 
0544 static inline void sbq_index_atomic_inc(atomic_t *index)
0545 {
0546     int old = atomic_read(index);
0547     int new = sbq_index_inc(old);
0548     atomic_cmpxchg(index, old, new);
0549 }
0550 
0551 /**
0552  * sbq_wait_ptr() - Get the next wait queue to use for a &struct
0553  * sbitmap_queue.
0554  * @sbq: Bitmap queue to wait on.
0555  * @wait_index: A counter per "user" of @sbq.
0556  */
0557 static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq,
0558                           atomic_t *wait_index)
0559 {
0560     struct sbq_wait_state *ws;
0561 
0562     ws = &sbq->ws[atomic_read(wait_index)];
0563     sbq_index_atomic_inc(wait_index);
0564     return ws;
0565 }
0566 
0567 /**
0568  * sbitmap_queue_wake_all() - Wake up everything waiting on a &struct
0569  * sbitmap_queue.
0570  * @sbq: Bitmap queue to wake up.
0571  */
0572 void sbitmap_queue_wake_all(struct sbitmap_queue *sbq);
0573 
0574 /**
0575  * sbitmap_queue_wake_up() - Wake up some of waiters in one waitqueue
0576  * on a &struct sbitmap_queue.
0577  * @sbq: Bitmap queue to wake up.
0578  */
0579 void sbitmap_queue_wake_up(struct sbitmap_queue *sbq);
0580 
0581 /**
0582  * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct
0583  * seq_file.
0584  * @sbq: Bitmap queue to show.
0585  * @m: struct seq_file to write to.
0586  *
0587  * This is intended for debugging. The format may change at any time.
0588  */
0589 void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m);
0590 
0591 struct sbq_wait {
0592     struct sbitmap_queue *sbq;  /* if set, sbq_wait is accounted */
0593     struct wait_queue_entry wait;
0594 };
0595 
0596 #define DEFINE_SBQ_WAIT(name)                           \
0597     struct sbq_wait name = {                        \
0598         .sbq = NULL,                            \
0599         .wait = {                           \
0600             .private    = current,              \
0601             .func       = autoremove_wake_function,     \
0602             .entry      = LIST_HEAD_INIT((name).wait.entry),    \
0603         }                               \
0604     }
0605 
0606 /*
0607  * Wrapper around prepare_to_wait_exclusive(), which maintains some extra
0608  * internal state.
0609  */
0610 void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq,
0611                 struct sbq_wait_state *ws,
0612                 struct sbq_wait *sbq_wait, int state);
0613 
0614 /*
0615  * Must be paired with sbitmap_prepare_to_wait().
0616  */
0617 void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws,
0618                 struct sbq_wait *sbq_wait);
0619 
0620 /*
0621  * Wrapper around add_wait_queue(), which maintains some extra internal state
0622  */
0623 void sbitmap_add_wait_queue(struct sbitmap_queue *sbq,
0624                 struct sbq_wait_state *ws,
0625                 struct sbq_wait *sbq_wait);
0626 
0627 /*
0628  * Must be paired with sbitmap_add_wait_queue()
0629  */
0630 void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait);
0631 
0632 #endif /* __LINUX_SCALE_BITMAP_H */