0001
0002
0003 #include <linux/jiffies.h>
0004 #include <linux/kernel.h>
0005 #include <linux/ktime.h>
0006 #include <linux/list.h>
0007 #include <linux/math64.h>
0008 #include <linux/sizes.h>
0009 #include <linux/workqueue.h>
0010 #include "ctree.h"
0011 #include "block-group.h"
0012 #include "discard.h"
0013 #include "free-space-cache.h"
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055 #define BTRFS_DISCARD_DELAY (120ULL * NSEC_PER_SEC)
0056 #define BTRFS_DISCARD_UNUSED_DELAY (10ULL * NSEC_PER_SEC)
0057
0058
0059 #define BTRFS_DISCARD_TARGET_MSEC (6 * 60 * 60UL * MSEC_PER_SEC)
0060 #define BTRFS_DISCARD_MIN_DELAY_MSEC (1UL)
0061 #define BTRFS_DISCARD_MAX_DELAY_MSEC (1000UL)
0062 #define BTRFS_DISCARD_MAX_IOPS (10U)
0063
0064
0065 static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {
0066 0,
0067 BTRFS_ASYNC_DISCARD_MAX_FILTER,
0068 BTRFS_ASYNC_DISCARD_MIN_FILTER
0069 };
0070
0071 static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
0072 struct btrfs_block_group *block_group)
0073 {
0074 return &discard_ctl->discard_list[block_group->discard_index];
0075 }
0076
0077 static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
0078 struct btrfs_block_group *block_group)
0079 {
0080 if (!btrfs_run_discard_work(discard_ctl))
0081 return;
0082
0083 if (list_empty(&block_group->discard_list) ||
0084 block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
0085 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED)
0086 block_group->discard_index = BTRFS_DISCARD_INDEX_START;
0087 block_group->discard_eligible_time = (ktime_get_ns() +
0088 BTRFS_DISCARD_DELAY);
0089 block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
0090 }
0091
0092 list_move_tail(&block_group->discard_list,
0093 get_discard_list(discard_ctl, block_group));
0094 }
0095
0096 static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
0097 struct btrfs_block_group *block_group)
0098 {
0099 if (!btrfs_is_block_group_data_only(block_group))
0100 return;
0101
0102 spin_lock(&discard_ctl->lock);
0103 __add_to_discard_list(discard_ctl, block_group);
0104 spin_unlock(&discard_ctl->lock);
0105 }
0106
0107 static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
0108 struct btrfs_block_group *block_group)
0109 {
0110 spin_lock(&discard_ctl->lock);
0111
0112 if (!btrfs_run_discard_work(discard_ctl)) {
0113 spin_unlock(&discard_ctl->lock);
0114 return;
0115 }
0116
0117 list_del_init(&block_group->discard_list);
0118
0119 block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
0120 block_group->discard_eligible_time = (ktime_get_ns() +
0121 BTRFS_DISCARD_UNUSED_DELAY);
0122 block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
0123 list_add_tail(&block_group->discard_list,
0124 &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
0125
0126 spin_unlock(&discard_ctl->lock);
0127 }
0128
0129 static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
0130 struct btrfs_block_group *block_group)
0131 {
0132 bool running = false;
0133
0134 spin_lock(&discard_ctl->lock);
0135
0136 if (block_group == discard_ctl->block_group) {
0137 running = true;
0138 discard_ctl->block_group = NULL;
0139 }
0140
0141 block_group->discard_eligible_time = 0;
0142 list_del_init(&block_group->discard_list);
0143
0144 spin_unlock(&discard_ctl->lock);
0145
0146 return running;
0147 }
0148
0149
0150
0151
0152
0153
0154
0155
0156
0157 static struct btrfs_block_group *find_next_block_group(
0158 struct btrfs_discard_ctl *discard_ctl,
0159 u64 now)
0160 {
0161 struct btrfs_block_group *ret_block_group = NULL, *block_group;
0162 int i;
0163
0164 for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
0165 struct list_head *discard_list = &discard_ctl->discard_list[i];
0166
0167 if (!list_empty(discard_list)) {
0168 block_group = list_first_entry(discard_list,
0169 struct btrfs_block_group,
0170 discard_list);
0171
0172 if (!ret_block_group)
0173 ret_block_group = block_group;
0174
0175 if (ret_block_group->discard_eligible_time < now)
0176 break;
0177
0178 if (ret_block_group->discard_eligible_time >
0179 block_group->discard_eligible_time)
0180 ret_block_group = block_group;
0181 }
0182 }
0183
0184 return ret_block_group;
0185 }
0186
0187
0188
0189
0190
0191
0192
0193
0194
0195
0196
0197
0198
0199
0200
0201 static struct btrfs_block_group *peek_discard_list(
0202 struct btrfs_discard_ctl *discard_ctl,
0203 enum btrfs_discard_state *discard_state,
0204 int *discard_index, u64 now)
0205 {
0206 struct btrfs_block_group *block_group;
0207
0208 spin_lock(&discard_ctl->lock);
0209 again:
0210 block_group = find_next_block_group(discard_ctl, now);
0211
0212 if (block_group && now >= block_group->discard_eligible_time) {
0213 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
0214 block_group->used != 0) {
0215 if (btrfs_is_block_group_data_only(block_group))
0216 __add_to_discard_list(discard_ctl, block_group);
0217 else
0218 list_del_init(&block_group->discard_list);
0219 goto again;
0220 }
0221 if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
0222 block_group->discard_cursor = block_group->start;
0223 block_group->discard_state = BTRFS_DISCARD_EXTENTS;
0224 }
0225 discard_ctl->block_group = block_group;
0226 }
0227 if (block_group) {
0228 *discard_state = block_group->discard_state;
0229 *discard_index = block_group->discard_index;
0230 }
0231 spin_unlock(&discard_ctl->lock);
0232
0233 return block_group;
0234 }
0235
0236
0237
0238
0239
0240
0241
0242
0243
0244
0245
0246 void btrfs_discard_check_filter(struct btrfs_block_group *block_group,
0247 u64 bytes)
0248 {
0249 struct btrfs_discard_ctl *discard_ctl;
0250
0251 if (!block_group ||
0252 !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
0253 return;
0254
0255 discard_ctl = &block_group->fs_info->discard_ctl;
0256
0257 if (block_group->discard_index > BTRFS_DISCARD_INDEX_START &&
0258 bytes >= discard_minlen[block_group->discard_index - 1]) {
0259 int i;
0260
0261 remove_from_discard_list(discard_ctl, block_group);
0262
0263 for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS;
0264 i++) {
0265 if (bytes >= discard_minlen[i]) {
0266 block_group->discard_index = i;
0267 add_to_discard_list(discard_ctl, block_group);
0268 break;
0269 }
0270 }
0271 }
0272 }
0273
0274
0275
0276
0277
0278
0279
0280
0281
0282 static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl,
0283 struct btrfs_block_group *block_group)
0284 {
0285 block_group->discard_index++;
0286 if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) {
0287 block_group->discard_index = 1;
0288 return;
0289 }
0290
0291 add_to_discard_list(discard_ctl, block_group);
0292 }
0293
0294
0295
0296
0297
0298
0299
0300
0301
0302 void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
0303 struct btrfs_block_group *block_group)
0304 {
0305 if (remove_from_discard_list(discard_ctl, block_group)) {
0306 cancel_delayed_work_sync(&discard_ctl->work);
0307 btrfs_discard_schedule_work(discard_ctl, true);
0308 }
0309 }
0310
0311
0312
0313
0314
0315
0316
0317
0318 void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
0319 struct btrfs_block_group *block_group)
0320 {
0321 if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
0322 return;
0323
0324 if (block_group->used == 0)
0325 add_to_discard_unused_list(discard_ctl, block_group);
0326 else
0327 add_to_discard_list(discard_ctl, block_group);
0328
0329 if (!delayed_work_pending(&discard_ctl->work))
0330 btrfs_discard_schedule_work(discard_ctl, false);
0331 }
0332
0333 static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
0334 u64 now, bool override)
0335 {
0336 struct btrfs_block_group *block_group;
0337
0338 if (!btrfs_run_discard_work(discard_ctl))
0339 return;
0340 if (!override && delayed_work_pending(&discard_ctl->work))
0341 return;
0342
0343 block_group = find_next_block_group(discard_ctl, now);
0344 if (block_group) {
0345 u64 delay = discard_ctl->delay_ms * NSEC_PER_MSEC;
0346 u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit);
0347
0348
0349
0350
0351
0352
0353 if (kbps_limit && discard_ctl->prev_discard) {
0354 u64 bps_limit = ((u64)kbps_limit) * SZ_1K;
0355 u64 bps_delay = div64_u64(discard_ctl->prev_discard *
0356 NSEC_PER_SEC, bps_limit);
0357
0358 delay = max(delay, bps_delay);
0359 }
0360
0361
0362
0363
0364
0365 if (now < block_group->discard_eligible_time) {
0366 u64 bg_timeout = block_group->discard_eligible_time - now;
0367
0368 delay = max(delay, bg_timeout);
0369 }
0370
0371 if (override && discard_ctl->prev_discard) {
0372 u64 elapsed = now - discard_ctl->prev_discard_time;
0373
0374 if (delay > elapsed)
0375 delay -= elapsed;
0376 else
0377 delay = 0;
0378 }
0379
0380 mod_delayed_work(discard_ctl->discard_workers,
0381 &discard_ctl->work, nsecs_to_jiffies(delay));
0382 }
0383 }
0384
0385
0386
0387
0388
0389
0390
0391
0392
0393
0394 void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
0395 bool override)
0396 {
0397 const u64 now = ktime_get_ns();
0398
0399 spin_lock(&discard_ctl->lock);
0400 __btrfs_discard_schedule_work(discard_ctl, now, override);
0401 spin_unlock(&discard_ctl->lock);
0402 }
0403
0404
0405
0406
0407
0408
0409
0410
0411
0412
0413
0414 static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
0415 struct btrfs_block_group *block_group)
0416 {
0417 remove_from_discard_list(discard_ctl, block_group);
0418
0419 if (block_group->used == 0) {
0420 if (btrfs_is_free_space_trimmed(block_group))
0421 btrfs_mark_bg_unused(block_group);
0422 else
0423 add_to_discard_unused_list(discard_ctl, block_group);
0424 } else {
0425 btrfs_update_discard_index(discard_ctl, block_group);
0426 }
0427 }
0428
0429
0430
0431
0432
0433
0434
0435
0436
0437 static void btrfs_discard_workfn(struct work_struct *work)
0438 {
0439 struct btrfs_discard_ctl *discard_ctl;
0440 struct btrfs_block_group *block_group;
0441 enum btrfs_discard_state discard_state;
0442 int discard_index = 0;
0443 u64 trimmed = 0;
0444 u64 minlen = 0;
0445 u64 now = ktime_get_ns();
0446
0447 discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
0448
0449 block_group = peek_discard_list(discard_ctl, &discard_state,
0450 &discard_index, now);
0451 if (!block_group || !btrfs_run_discard_work(discard_ctl))
0452 return;
0453 if (now < block_group->discard_eligible_time) {
0454 btrfs_discard_schedule_work(discard_ctl, false);
0455 return;
0456 }
0457
0458
0459 minlen = discard_minlen[discard_index];
0460
0461 if (discard_state == BTRFS_DISCARD_BITMAPS) {
0462 u64 maxlen = 0;
0463
0464
0465
0466
0467
0468
0469
0470 if (discard_index != BTRFS_DISCARD_INDEX_UNUSED)
0471 maxlen = discard_minlen[discard_index - 1];
0472
0473 btrfs_trim_block_group_bitmaps(block_group, &trimmed,
0474 block_group->discard_cursor,
0475 btrfs_block_group_end(block_group),
0476 minlen, maxlen, true);
0477 discard_ctl->discard_bitmap_bytes += trimmed;
0478 } else {
0479 btrfs_trim_block_group_extents(block_group, &trimmed,
0480 block_group->discard_cursor,
0481 btrfs_block_group_end(block_group),
0482 minlen, true);
0483 discard_ctl->discard_extent_bytes += trimmed;
0484 }
0485
0486
0487 if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
0488 if (discard_state == BTRFS_DISCARD_BITMAPS) {
0489 btrfs_finish_discard_pass(discard_ctl, block_group);
0490 } else {
0491 block_group->discard_cursor = block_group->start;
0492 spin_lock(&discard_ctl->lock);
0493 if (block_group->discard_state !=
0494 BTRFS_DISCARD_RESET_CURSOR)
0495 block_group->discard_state =
0496 BTRFS_DISCARD_BITMAPS;
0497 spin_unlock(&discard_ctl->lock);
0498 }
0499 }
0500
0501 now = ktime_get_ns();
0502 spin_lock(&discard_ctl->lock);
0503 discard_ctl->prev_discard = trimmed;
0504 discard_ctl->prev_discard_time = now;
0505 discard_ctl->block_group = NULL;
0506 __btrfs_discard_schedule_work(discard_ctl, now, false);
0507 spin_unlock(&discard_ctl->lock);
0508 }
0509
0510
0511
0512
0513
0514
0515
0516 bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
0517 {
0518 struct btrfs_fs_info *fs_info = container_of(discard_ctl,
0519 struct btrfs_fs_info,
0520 discard_ctl);
0521
0522 return (!(fs_info->sb->s_flags & SB_RDONLY) &&
0523 test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
0524 }
0525
0526
0527
0528
0529
0530
0531
0532
0533
0534 void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
0535 {
0536 s32 discardable_extents;
0537 s64 discardable_bytes;
0538 u32 iops_limit;
0539 unsigned long delay;
0540
0541 discardable_extents = atomic_read(&discard_ctl->discardable_extents);
0542 if (!discardable_extents)
0543 return;
0544
0545 spin_lock(&discard_ctl->lock);
0546
0547
0548
0549
0550
0551
0552
0553
0554 if (discardable_extents < 0)
0555 atomic_add(-discardable_extents,
0556 &discard_ctl->discardable_extents);
0557
0558 discardable_bytes = atomic64_read(&discard_ctl->discardable_bytes);
0559 if (discardable_bytes < 0)
0560 atomic64_add(-discardable_bytes,
0561 &discard_ctl->discardable_bytes);
0562
0563 if (discardable_extents <= 0) {
0564 spin_unlock(&discard_ctl->lock);
0565 return;
0566 }
0567
0568 iops_limit = READ_ONCE(discard_ctl->iops_limit);
0569 if (iops_limit)
0570 delay = MSEC_PER_SEC / iops_limit;
0571 else
0572 delay = BTRFS_DISCARD_TARGET_MSEC / discardable_extents;
0573
0574 delay = clamp(delay, BTRFS_DISCARD_MIN_DELAY_MSEC,
0575 BTRFS_DISCARD_MAX_DELAY_MSEC);
0576 discard_ctl->delay_ms = delay;
0577
0578 spin_unlock(&discard_ctl->lock);
0579 }
0580
0581
0582
0583
0584
0585
0586
0587
0588
0589 void btrfs_discard_update_discardable(struct btrfs_block_group *block_group)
0590 {
0591 struct btrfs_free_space_ctl *ctl;
0592 struct btrfs_discard_ctl *discard_ctl;
0593 s32 extents_delta;
0594 s64 bytes_delta;
0595
0596 if (!block_group ||
0597 !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) ||
0598 !btrfs_is_block_group_data_only(block_group))
0599 return;
0600
0601 ctl = block_group->free_space_ctl;
0602 discard_ctl = &block_group->fs_info->discard_ctl;
0603
0604 lockdep_assert_held(&ctl->tree_lock);
0605 extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] -
0606 ctl->discardable_extents[BTRFS_STAT_PREV];
0607 if (extents_delta) {
0608 atomic_add(extents_delta, &discard_ctl->discardable_extents);
0609 ctl->discardable_extents[BTRFS_STAT_PREV] =
0610 ctl->discardable_extents[BTRFS_STAT_CURR];
0611 }
0612
0613 bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] -
0614 ctl->discardable_bytes[BTRFS_STAT_PREV];
0615 if (bytes_delta) {
0616 atomic64_add(bytes_delta, &discard_ctl->discardable_bytes);
0617 ctl->discardable_bytes[BTRFS_STAT_PREV] =
0618 ctl->discardable_bytes[BTRFS_STAT_CURR];
0619 }
0620 }
0621
0622
0623
0624
0625
0626
0627
0628
0629
0630
0631
0632 void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
0633 {
0634 struct btrfs_block_group *block_group, *next;
0635
0636 spin_lock(&fs_info->unused_bgs_lock);
0637
0638 list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
0639 bg_list) {
0640 list_del_init(&block_group->bg_list);
0641 btrfs_put_block_group(block_group);
0642 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
0643 }
0644 spin_unlock(&fs_info->unused_bgs_lock);
0645 }
0646
0647
0648
0649
0650
0651
0652
0653
0654
0655
0656
0657 static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl)
0658 {
0659 struct btrfs_block_group *block_group, *next;
0660 int i;
0661
0662 spin_lock(&discard_ctl->lock);
0663 for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
0664 list_for_each_entry_safe(block_group, next,
0665 &discard_ctl->discard_list[i],
0666 discard_list) {
0667 list_del_init(&block_group->discard_list);
0668 spin_unlock(&discard_ctl->lock);
0669 if (block_group->used == 0)
0670 btrfs_mark_bg_unused(block_group);
0671 spin_lock(&discard_ctl->lock);
0672 }
0673 }
0674 spin_unlock(&discard_ctl->lock);
0675 }
0676
0677 void btrfs_discard_resume(struct btrfs_fs_info *fs_info)
0678 {
0679 if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
0680 btrfs_discard_cleanup(fs_info);
0681 return;
0682 }
0683
0684 btrfs_discard_punt_unused_bgs_list(fs_info);
0685
0686 set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
0687 }
0688
0689 void btrfs_discard_stop(struct btrfs_fs_info *fs_info)
0690 {
0691 clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
0692 }
0693
0694 void btrfs_discard_init(struct btrfs_fs_info *fs_info)
0695 {
0696 struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
0697 int i;
0698
0699 spin_lock_init(&discard_ctl->lock);
0700 INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn);
0701
0702 for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++)
0703 INIT_LIST_HEAD(&discard_ctl->discard_list[i]);
0704
0705 discard_ctl->prev_discard = 0;
0706 discard_ctl->prev_discard_time = 0;
0707 atomic_set(&discard_ctl->discardable_extents, 0);
0708 atomic64_set(&discard_ctl->discardable_bytes, 0);
0709 discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE;
0710 discard_ctl->delay_ms = BTRFS_DISCARD_MAX_DELAY_MSEC;
0711 discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
0712 discard_ctl->kbps_limit = 0;
0713 discard_ctl->discard_extent_bytes = 0;
0714 discard_ctl->discard_bitmap_bytes = 0;
0715 atomic64_set(&discard_ctl->discard_bytes_saved, 0);
0716 }
0717
0718 void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info)
0719 {
0720 btrfs_discard_stop(fs_info);
0721 cancel_delayed_work_sync(&fs_info->discard_ctl.work);
0722 btrfs_discard_purge_list(&fs_info->discard_ctl);
0723 }