fs/btrfs/discard.c

0001 // SPDX-License-Identifier: GPL-2.0
0002
0003 #include <linux/jiffies.h>
0004 #include <linux/kernel.h>
0005 #include <linux/ktime.h>
0006 #include <linux/list.h>
0007 #include <linux/math64.h>
0008 #include <linux/sizes.h>
0009 #include <linux/workqueue.h>
0010 #include "ctree.h"
0011 #include "block-group.h"
0012 #include "discard.h"
0013 #include "free-space-cache.h"
0014
0015 /*
0016  * This contains the logic to handle async discard.
0017  *
0018  * Async discard manages trimming of free space outside of transaction commit.
0019  * Discarding is done by managing the block_groups on a LRU list based on free
0020  * space recency.  Two passes are used to first prioritize discarding extents
0021  * and then allow for trimming in the bitmap the best opportunity to coalesce.
0022  * The block_groups are maintained on multiple lists to allow for multiple
0023  * passes with different discard filter requirements.  A delayed work item is
0024  * used to manage discarding with timeout determined by a max of the delay
0025  * incurred by the iops rate limit, the byte rate limit, and the max delay of
0026  * BTRFS_DISCARD_MAX_DELAY.
0027  *
0028  * Note, this only keeps track of block_groups that are explicitly for data.
0029  * Mixed block_groups are not supported.
0030  *
0031  * The first list is special to manage discarding of fully free block groups.
0032  * This is necessary because we issue a final trim for a full free block group
0033  * after forgetting it.  When a block group becomes unused, instead of directly
0034  * being added to the unused_bgs list, we add it to this first list.  Then
0035  * from there, if it becomes fully discarded, we place it onto the unused_bgs
0036  * list.
0037  *
0038  * The in-memory free space cache serves as the backing state for discard.
0039  * Consequently this means there is no persistence.  We opt to load all the
0040  * block groups in as not discarded, so the mount case degenerates to the
0041  * crashing case.
0042  *
0043  * As the free space cache uses bitmaps, there exists a tradeoff between
0044  * ease/efficiency for find_free_extent() and the accuracy of discard state.
0045  * Here we opt to let untrimmed regions merge with everything while only letting
0046  * trimmed regions merge with other trimmed regions.  This can cause
0047  * overtrimming, but the coalescing benefit seems to be worth it.  Additionally,
0048  * bitmap state is tracked as a whole.  If we're able to fully trim a bitmap,
0049  * the trimmed flag is set on the bitmap.  Otherwise, if an allocation comes in,
0050  * this resets the state and we will retry trimming the whole bitmap.  This is a
0051  * tradeoff between discard state accuracy and the cost of accounting.
0052  */
0053
0054 /* This is an initial delay to give some chance for block reuse */
0055 #define BTRFS_DISCARD_DELAY     (120ULL * NSEC_PER_SEC)
0056 #define BTRFS_DISCARD_UNUSED_DELAY  (10ULL * NSEC_PER_SEC)
0057
0058 /* Target completion latency of discarding all discardable extents */
0059 #define BTRFS_DISCARD_TARGET_MSEC   (6 * 60 * 60UL * MSEC_PER_SEC)
0060 #define BTRFS_DISCARD_MIN_DELAY_MSEC    (1UL)
0061 #define BTRFS_DISCARD_MAX_DELAY_MSEC    (1000UL)
0062 #define BTRFS_DISCARD_MAX_IOPS      (10U)
0063
0064 /* Montonically decreasing minimum length filters after index 0 */
0065 static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {
0066     0,
0067     BTRFS_ASYNC_DISCARD_MAX_FILTER,
0068     BTRFS_ASYNC_DISCARD_MIN_FILTER
0069 };
0070
0071 static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
0072                       struct btrfs_block_group *block_group)
0073 {
0074     return &discard_ctl->discard_list[block_group->discard_index];
0075 }
0076
0077 static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
0078                   struct btrfs_block_group *block_group)
0079 {
0080     if (!btrfs_run_discard_work(discard_ctl))
0081         return;
0082
0083     if (list_empty(&block_group->discard_list) ||
0084         block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
0085         if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED)
0086             block_group->discard_index = BTRFS_DISCARD_INDEX_START;
0087         block_group->discard_eligible_time = (ktime_get_ns() +
0088                               BTRFS_DISCARD_DELAY);
0089         block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
0090     }
0091
0092     list_move_tail(&block_group->discard_list,
0093                get_discard_list(discard_ctl, block_group));
0094 }
0095
0096 static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
0097                 struct btrfs_block_group *block_group)
0098 {
0099     if (!btrfs_is_block_group_data_only(block_group))
0100         return;
0101
0102     spin_lock(&discard_ctl->lock);
0103     __add_to_discard_list(discard_ctl, block_group);
0104     spin_unlock(&discard_ctl->lock);
0105 }
0106
0107 static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
0108                        struct btrfs_block_group *block_group)
0109 {
0110     spin_lock(&discard_ctl->lock);
0111
0112     if (!btrfs_run_discard_work(discard_ctl)) {
0113         spin_unlock(&discard_ctl->lock);
0114         return;
0115     }
0116
0117     list_del_init(&block_group->discard_list);
0118
0119     block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
0120     block_group->discard_eligible_time = (ktime_get_ns() +
0121                           BTRFS_DISCARD_UNUSED_DELAY);
0122     block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
0123     list_add_tail(&block_group->discard_list,
0124               &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
0125
0126     spin_unlock(&discard_ctl->lock);
0127 }
0128
0129 static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
0130                      struct btrfs_block_group *block_group)
0131 {
0132     bool running = false;
0133
0134     spin_lock(&discard_ctl->lock);
0135
0136     if (block_group == discard_ctl->block_group) {
0137         running = true;
0138         discard_ctl->block_group = NULL;
0139     }
0140
0141     block_group->discard_eligible_time = 0;
0142     list_del_init(&block_group->discard_list);
0143
0144     spin_unlock(&discard_ctl->lock);
0145
0146     return running;
0147 }
0148
0149 /**
0150  * find_next_block_group - find block_group that's up next for discarding
0151  * @discard_ctl: discard control
0152  * @now: current time
0153  *
0154  * Iterate over the discard lists to find the next block_group up for
0155  * discarding checking the discard_eligible_time of block_group.
0156  */
0157 static struct btrfs_block_group *find_next_block_group(
0158                     struct btrfs_discard_ctl *discard_ctl,
0159                     u64 now)
0160 {
0161     struct btrfs_block_group *ret_block_group = NULL, *block_group;
0162     int i;
0163
0164     for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
0165         struct list_head *discard_list = &discard_ctl->discard_list[i];
0166
0167         if (!list_empty(discard_list)) {
0168             block_group = list_first_entry(discard_list,
0169                                struct btrfs_block_group,
0170                                discard_list);
0171
0172             if (!ret_block_group)
0173                 ret_block_group = block_group;
0174
0175             if (ret_block_group->discard_eligible_time < now)
0176                 break;
0177
0178             if (ret_block_group->discard_eligible_time >
0179                 block_group->discard_eligible_time)
0180                 ret_block_group = block_group;
0181         }
0182     }
0183
0184     return ret_block_group;
0185 }
0186
0187 /**
0188  * Wrap find_next_block_group()
0189  *
0190  * @discard_ctl:   discard control
0191  * @discard_state: the discard_state of the block_group after state management
0192  * @discard_index: the discard_index of the block_group after state management
0193  * @now:           time when discard was invoked, in ns
0194  *
0195  * This wraps find_next_block_group() and sets the block_group to be in use.
0196  * discard_state's control flow is managed here.  Variables related to
0197  * discard_state are reset here as needed (eg discard_cursor).  @discard_state
0198  * and @discard_index are remembered as it may change while we're discarding,
0199  * but we want the discard to execute in the context determined here.
0200  */
0201 static struct btrfs_block_group *peek_discard_list(
0202                     struct btrfs_discard_ctl *discard_ctl,
0203                     enum btrfs_discard_state *discard_state,
0204                     int *discard_index, u64 now)
0205 {
0206     struct btrfs_block_group *block_group;
0207
0208     spin_lock(&discard_ctl->lock);
0209 again:
0210     block_group = find_next_block_group(discard_ctl, now);
0211
0212     if (block_group && now >= block_group->discard_eligible_time) {
0213         if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
0214             block_group->used != 0) {
0215             if (btrfs_is_block_group_data_only(block_group))
0216                 __add_to_discard_list(discard_ctl, block_group);
0217             else
0218                 list_del_init(&block_group->discard_list);
0219             goto again;
0220         }
0221         if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
0222             block_group->discard_cursor = block_group->start;
0223             block_group->discard_state = BTRFS_DISCARD_EXTENTS;
0224         }
0225         discard_ctl->block_group = block_group;
0226     }
0227     if (block_group) {
0228         *discard_state = block_group->discard_state;
0229         *discard_index = block_group->discard_index;
0230     }
0231     spin_unlock(&discard_ctl->lock);
0232
0233     return block_group;
0234 }
0235
0236 /**
0237  * btrfs_discard_check_filter - updates a block groups filters
0238  * @block_group: block group of interest
0239  * @bytes: recently freed region size after coalescing
0240  *
0241  * Async discard maintains multiple lists with progressively smaller filters
0242  * to prioritize discarding based on size.  Should a free space that matches
0243  * a larger filter be returned to the free_space_cache, prioritize that discard
0244  * by moving @block_group to the proper filter.
0245  */
0246 void btrfs_discard_check_filter(struct btrfs_block_group *block_group,
0247                 u64 bytes)
0248 {
0249     struct btrfs_discard_ctl *discard_ctl;
0250
0251     if (!block_group ||
0252         !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
0253         return;
0254
0255     discard_ctl = &block_group->fs_info->discard_ctl;
0256
0257     if (block_group->discard_index > BTRFS_DISCARD_INDEX_START &&
0258         bytes >= discard_minlen[block_group->discard_index - 1]) {
0259         int i;
0260
0261         remove_from_discard_list(discard_ctl, block_group);
0262
0263         for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS;
0264              i++) {
0265             if (bytes >= discard_minlen[i]) {
0266                 block_group->discard_index = i;
0267                 add_to_discard_list(discard_ctl, block_group);
0268                 break;
0269             }
0270         }
0271     }
0272 }
0273
0274 /**
0275  * btrfs_update_discard_index - moves a block group along the discard lists
0276  * @discard_ctl: discard control
0277  * @block_group: block_group of interest
0278  *
0279  * Increment @block_group's discard_index.  If it falls of the list, let it be.
0280  * Otherwise add it back to the appropriate list.
0281  */
0282 static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl,
0283                        struct btrfs_block_group *block_group)
0284 {
0285     block_group->discard_index++;
0286     if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) {
0287         block_group->discard_index = 1;
0288         return;
0289     }
0290
0291     add_to_discard_list(discard_ctl, block_group);
0292 }
0293
0294 /**
0295  * btrfs_discard_cancel_work - remove a block_group from the discard lists
0296  * @discard_ctl: discard control
0297  * @block_group: block_group of interest
0298  *
0299  * This removes @block_group from the discard lists.  If necessary, it waits on
0300  * the current work and then reschedules the delayed work.
0301  */
0302 void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
0303                    struct btrfs_block_group *block_group)
0304 {
0305     if (remove_from_discard_list(discard_ctl, block_group)) {
0306         cancel_delayed_work_sync(&discard_ctl->work);
0307         btrfs_discard_schedule_work(discard_ctl, true);
0308     }
0309 }
0310
0311 /**
0312  * btrfs_discard_queue_work - handles queuing the block_groups
0313  * @discard_ctl: discard control
0314  * @block_group: block_group of interest
0315  *
0316  * This maintains the LRU order of the discard lists.
0317  */
0318 void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
0319                   struct btrfs_block_group *block_group)
0320 {
0321     if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
0322         return;
0323
0324     if (block_group->used == 0)
0325         add_to_discard_unused_list(discard_ctl, block_group);
0326     else
0327         add_to_discard_list(discard_ctl, block_group);
0328
0329     if (!delayed_work_pending(&discard_ctl->work))
0330         btrfs_discard_schedule_work(discard_ctl, false);
0331 }
0332
0333 static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
0334                       u64 now, bool override)
0335 {
0336     struct btrfs_block_group *block_group;
0337
0338     if (!btrfs_run_discard_work(discard_ctl))
0339         return;
0340     if (!override && delayed_work_pending(&discard_ctl->work))
0341         return;
0342
0343     block_group = find_next_block_group(discard_ctl, now);
0344     if (block_group) {
0345         u64 delay = discard_ctl->delay_ms * NSEC_PER_MSEC;
0346         u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit);
0347
0348         /*
0349          * A single delayed workqueue item is responsible for
0350          * discarding, so we can manage the bytes rate limit by keeping
0351          * track of the previous discard.
0352          */
0353         if (kbps_limit && discard_ctl->prev_discard) {
0354             u64 bps_limit = ((u64)kbps_limit) * SZ_1K;
0355             u64 bps_delay = div64_u64(discard_ctl->prev_discard *
0356                           NSEC_PER_SEC, bps_limit);
0357
0358             delay = max(delay, bps_delay);
0359         }
0360
0361         /*
0362          * This timeout is to hopefully prevent immediate discarding
0363          * in a recently allocated block group.
0364          */
0365         if (now < block_group->discard_eligible_time) {
0366             u64 bg_timeout = block_group->discard_eligible_time - now;
0367
0368             delay = max(delay, bg_timeout);
0369         }
0370
0371         if (override && discard_ctl->prev_discard) {
0372             u64 elapsed = now - discard_ctl->prev_discard_time;
0373
0374             if (delay > elapsed)
0375                 delay -= elapsed;
0376             else
0377                 delay = 0;
0378         }
0379
0380         mod_delayed_work(discard_ctl->discard_workers,
0381                  &discard_ctl->work, nsecs_to_jiffies(delay));
0382     }
0383 }
0384
0385 /*
0386  * btrfs_discard_schedule_work - responsible for scheduling the discard work
0387  * @discard_ctl:  discard control
0388  * @override:     override the current timer
0389  *
0390  * Discards are issued by a delayed workqueue item.  @override is used to
0391  * update the current delay as the baseline delay interval is reevaluated on
0392  * transaction commit.  This is also maxed with any other rate limit.
0393  */
0394 void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
0395                  bool override)
0396 {
0397     const u64 now = ktime_get_ns();
0398
0399     spin_lock(&discard_ctl->lock);
0400     __btrfs_discard_schedule_work(discard_ctl, now, override);
0401     spin_unlock(&discard_ctl->lock);
0402 }
0403
0404 /**
0405  * btrfs_finish_discard_pass - determine next step of a block_group
0406  * @discard_ctl: discard control
0407  * @block_group: block_group of interest
0408  *
0409  * This determines the next step for a block group after it's finished going
0410  * through a pass on a discard list.  If it is unused and fully trimmed, we can
0411  * mark it unused and send it to the unused_bgs path.  Otherwise, pass it onto
0412  * the appropriate filter list or let it fall off.
0413  */
0414 static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
0415                       struct btrfs_block_group *block_group)
0416 {
0417     remove_from_discard_list(discard_ctl, block_group);
0418
0419     if (block_group->used == 0) {
0420         if (btrfs_is_free_space_trimmed(block_group))
0421             btrfs_mark_bg_unused(block_group);
0422         else
0423             add_to_discard_unused_list(discard_ctl, block_group);
0424     } else {
0425         btrfs_update_discard_index(discard_ctl, block_group);
0426     }
0427 }
0428
0429 /**
0430  * btrfs_discard_workfn - discard work function
0431  * @work: work
0432  *
0433  * This finds the next block_group to start discarding and then discards a
0434  * single region.  It does this in a two-pass fashion: first extents and second
0435  * bitmaps.  Completely discarded block groups are sent to the unused_bgs path.
0436  */
0437 static void btrfs_discard_workfn(struct work_struct *work)
0438 {
0439     struct btrfs_discard_ctl *discard_ctl;
0440     struct btrfs_block_group *block_group;
0441     enum btrfs_discard_state discard_state;
0442     int discard_index = 0;
0443     u64 trimmed = 0;
0444     u64 minlen = 0;
0445     u64 now = ktime_get_ns();
0446
0447     discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
0448
0449     block_group = peek_discard_list(discard_ctl, &discard_state,
0450                     &discard_index, now);
0451     if (!block_group || !btrfs_run_discard_work(discard_ctl))
0452         return;
0453     if (now < block_group->discard_eligible_time) {
0454         btrfs_discard_schedule_work(discard_ctl, false);
0455         return;
0456     }
0457
0458     /* Perform discarding */
0459     minlen = discard_minlen[discard_index];
0460
0461     if (discard_state == BTRFS_DISCARD_BITMAPS) {
0462         u64 maxlen = 0;
0463
0464         /*
0465          * Use the previous levels minimum discard length as the max
0466          * length filter.  In the case something is added to make a
0467          * region go beyond the max filter, the entire bitmap is set
0468          * back to BTRFS_TRIM_STATE_UNTRIMMED.
0469          */
0470         if (discard_index != BTRFS_DISCARD_INDEX_UNUSED)
0471             maxlen = discard_minlen[discard_index - 1];
0472
0473         btrfs_trim_block_group_bitmaps(block_group, &trimmed,
0474                        block_group->discard_cursor,
0475                        btrfs_block_group_end(block_group),
0476                        minlen, maxlen, true);
0477         discard_ctl->discard_bitmap_bytes += trimmed;
0478     } else {
0479         btrfs_trim_block_group_extents(block_group, &trimmed,
0480                        block_group->discard_cursor,
0481                        btrfs_block_group_end(block_group),
0482                        minlen, true);
0483         discard_ctl->discard_extent_bytes += trimmed;
0484     }
0485
0486     /* Determine next steps for a block_group */
0487     if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
0488         if (discard_state == BTRFS_DISCARD_BITMAPS) {
0489             btrfs_finish_discard_pass(discard_ctl, block_group);
0490         } else {
0491             block_group->discard_cursor = block_group->start;
0492             spin_lock(&discard_ctl->lock);
0493             if (block_group->discard_state !=
0494                 BTRFS_DISCARD_RESET_CURSOR)
0495                 block_group->discard_state =
0496                             BTRFS_DISCARD_BITMAPS;
0497             spin_unlock(&discard_ctl->lock);
0498         }
0499     }
0500
0501     now = ktime_get_ns();
0502     spin_lock(&discard_ctl->lock);
0503     discard_ctl->prev_discard = trimmed;
0504     discard_ctl->prev_discard_time = now;
0505     discard_ctl->block_group = NULL;
0506     __btrfs_discard_schedule_work(discard_ctl, now, false);
0507     spin_unlock(&discard_ctl->lock);
0508 }
0509
0510 /**
0511  * btrfs_run_discard_work - determines if async discard should be running
0512  * @discard_ctl: discard control
0513  *
0514  * Checks if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
0515  */
0516 bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
0517 {
0518     struct btrfs_fs_info *fs_info = container_of(discard_ctl,
0519                              struct btrfs_fs_info,
0520                              discard_ctl);
0521
0522     return (!(fs_info->sb->s_flags & SB_RDONLY) &&
0523         test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
0524 }
0525
0526 /**
0527  * btrfs_discard_calc_delay - recalculate the base delay
0528  * @discard_ctl: discard control
0529  *
0530  * Recalculate the base delay which is based off the total number of
0531  * discardable_extents.  Clamp this between the lower_limit (iops_limit or 1ms)
0532  * and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC).
0533  */
0534 void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
0535 {
0536     s32 discardable_extents;
0537     s64 discardable_bytes;
0538     u32 iops_limit;
0539     unsigned long delay;
0540
0541     discardable_extents = atomic_read(&discard_ctl->discardable_extents);
0542     if (!discardable_extents)
0543         return;
0544
0545     spin_lock(&discard_ctl->lock);
0546
0547     /*
0548      * The following is to fix a potential -1 discrepenancy that we're not
0549      * sure how to reproduce. But given that this is the only place that
0550      * utilizes these numbers and this is only called by from
0551      * btrfs_finish_extent_commit() which is synchronized, we can correct
0552      * here.
0553      */
0554     if (discardable_extents < 0)
0555         atomic_add(-discardable_extents,
0556                &discard_ctl->discardable_extents);
0557
0558     discardable_bytes = atomic64_read(&discard_ctl->discardable_bytes);
0559     if (discardable_bytes < 0)
0560         atomic64_add(-discardable_bytes,
0561                  &discard_ctl->discardable_bytes);
0562
0563     if (discardable_extents <= 0) {
0564         spin_unlock(&discard_ctl->lock);
0565         return;
0566     }
0567
0568     iops_limit = READ_ONCE(discard_ctl->iops_limit);
0569     if (iops_limit)
0570         delay = MSEC_PER_SEC / iops_limit;
0571     else
0572         delay = BTRFS_DISCARD_TARGET_MSEC / discardable_extents;
0573
0574     delay = clamp(delay, BTRFS_DISCARD_MIN_DELAY_MSEC,
0575               BTRFS_DISCARD_MAX_DELAY_MSEC);
0576     discard_ctl->delay_ms = delay;
0577
0578     spin_unlock(&discard_ctl->lock);
0579 }
0580
0581 /**
0582  * btrfs_discard_update_discardable - propagate discard counters
0583  * @block_group: block_group of interest
0584  *
0585  * This propagates deltas of counters up to the discard_ctl.  It maintains a
0586  * current counter and a previous counter passing the delta up to the global
0587  * stat.  Then the current counter value becomes the previous counter value.
0588  */
0589 void btrfs_discard_update_discardable(struct btrfs_block_group *block_group)
0590 {
0591     struct btrfs_free_space_ctl *ctl;
0592     struct btrfs_discard_ctl *discard_ctl;
0593     s32 extents_delta;
0594     s64 bytes_delta;
0595
0596     if (!block_group ||
0597         !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) ||
0598         !btrfs_is_block_group_data_only(block_group))
0599         return;
0600
0601     ctl = block_group->free_space_ctl;
0602     discard_ctl = &block_group->fs_info->discard_ctl;
0603
0604     lockdep_assert_held(&ctl->tree_lock);
0605     extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] -
0606             ctl->discardable_extents[BTRFS_STAT_PREV];
0607     if (extents_delta) {
0608         atomic_add(extents_delta, &discard_ctl->discardable_extents);
0609         ctl->discardable_extents[BTRFS_STAT_PREV] =
0610             ctl->discardable_extents[BTRFS_STAT_CURR];
0611     }
0612
0613     bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] -
0614               ctl->discardable_bytes[BTRFS_STAT_PREV];
0615     if (bytes_delta) {
0616         atomic64_add(bytes_delta, &discard_ctl->discardable_bytes);
0617         ctl->discardable_bytes[BTRFS_STAT_PREV] =
0618             ctl->discardable_bytes[BTRFS_STAT_CURR];
0619     }
0620 }
0621
0622 /**
0623  * btrfs_discard_punt_unused_bgs_list - punt unused_bgs list to discard lists
0624  * @fs_info: fs_info of interest
0625  *
0626  * The unused_bgs list needs to be punted to the discard lists because the
0627  * order of operations is changed.  In the normal synchronous discard path, the
0628  * block groups are trimmed via a single large trim in transaction commit.  This
0629  * is ultimately what we are trying to avoid with asynchronous discard.  Thus,
0630  * it must be done before going down the unused_bgs path.
0631  */
0632 void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
0633 {
0634     struct btrfs_block_group *block_group, *next;
0635
0636     spin_lock(&fs_info->unused_bgs_lock);
0637     /* We enabled async discard, so punt all to the queue */
0638     list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
0639                  bg_list) {
0640         list_del_init(&block_group->bg_list);
0641         btrfs_put_block_group(block_group);
0642         btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
0643     }
0644     spin_unlock(&fs_info->unused_bgs_lock);
0645 }
0646
0647 /**
0648  * btrfs_discard_purge_list - purge discard lists
0649  * @discard_ctl: discard control
0650  *
0651  * If we are disabling async discard, we may have intercepted block groups that
0652  * are completely free and ready for the unused_bgs path.  As discarding will
0653  * now happen in transaction commit or not at all, we can safely mark the
0654  * corresponding block groups as unused and they will be sent on their merry
0655  * way to the unused_bgs list.
0656  */
0657 static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl)
0658 {
0659     struct btrfs_block_group *block_group, *next;
0660     int i;
0661
0662     spin_lock(&discard_ctl->lock);
0663     for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
0664         list_for_each_entry_safe(block_group, next,
0665                      &discard_ctl->discard_list[i],
0666                      discard_list) {
0667             list_del_init(&block_group->discard_list);
0668             spin_unlock(&discard_ctl->lock);
0669             if (block_group->used == 0)
0670                 btrfs_mark_bg_unused(block_group);
0671             spin_lock(&discard_ctl->lock);
0672         }
0673     }
0674     spin_unlock(&discard_ctl->lock);
0675 }
0676
0677 void btrfs_discard_resume(struct btrfs_fs_info *fs_info)
0678 {
0679     if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
0680         btrfs_discard_cleanup(fs_info);
0681         return;
0682     }
0683
0684     btrfs_discard_punt_unused_bgs_list(fs_info);
0685
0686     set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
0687 }
0688
0689 void btrfs_discard_stop(struct btrfs_fs_info *fs_info)
0690 {
0691     clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
0692 }
0693
0694 void btrfs_discard_init(struct btrfs_fs_info *fs_info)
0695 {
0696     struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
0697     int i;
0698
0699     spin_lock_init(&discard_ctl->lock);
0700     INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn);
0701
0702     for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++)
0703         INIT_LIST_HEAD(&discard_ctl->discard_list[i]);
0704
0705     discard_ctl->prev_discard = 0;
0706     discard_ctl->prev_discard_time = 0;
0707     atomic_set(&discard_ctl->discardable_extents, 0);
0708     atomic64_set(&discard_ctl->discardable_bytes, 0);
0709     discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE;
0710     discard_ctl->delay_ms = BTRFS_DISCARD_MAX_DELAY_MSEC;
0711     discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
0712     discard_ctl->kbps_limit = 0;
0713     discard_ctl->discard_extent_bytes = 0;
0714     discard_ctl->discard_bitmap_bytes = 0;
0715     atomic64_set(&discard_ctl->discard_bytes_saved, 0);
0716 }
0717
0718 void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info)
0719 {
0720     btrfs_discard_stop(fs_info);
0721     cancel_delayed_work_sync(&fs_info->discard_ctl.work);
0722     btrfs_discard_purge_list(&fs_info->discard_ctl);
0723 }