0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064 #include "bcache.h"
0065 #include "btree.h"
0066
0067 #include <linux/blkdev.h>
0068 #include <linux/kthread.h>
0069 #include <linux/random.h>
0070 #include <trace/events/bcache.h>
0071
0072 #define MAX_OPEN_BUCKETS 128
0073
0074
0075
0076 uint8_t bch_inc_gen(struct cache *ca, struct bucket *b)
0077 {
0078 uint8_t ret = ++b->gen;
0079
0080 ca->set->need_gc = max(ca->set->need_gc, bucket_gc_gen(b));
0081 WARN_ON_ONCE(ca->set->need_gc > BUCKET_GC_GEN_MAX);
0082
0083 return ret;
0084 }
0085
0086 void bch_rescale_priorities(struct cache_set *c, int sectors)
0087 {
0088 struct cache *ca;
0089 struct bucket *b;
0090 unsigned long next = c->nbuckets * c->cache->sb.bucket_size / 1024;
0091 int r;
0092
0093 atomic_sub(sectors, &c->rescale);
0094
0095 do {
0096 r = atomic_read(&c->rescale);
0097
0098 if (r >= 0)
0099 return;
0100 } while (atomic_cmpxchg(&c->rescale, r, r + next) != r);
0101
0102 mutex_lock(&c->bucket_lock);
0103
0104 c->min_prio = USHRT_MAX;
0105
0106 ca = c->cache;
0107 for_each_bucket(b, ca)
0108 if (b->prio &&
0109 b->prio != BTREE_PRIO &&
0110 !atomic_read(&b->pin)) {
0111 b->prio--;
0112 c->min_prio = min(c->min_prio, b->prio);
0113 }
0114
0115 mutex_unlock(&c->bucket_lock);
0116 }
0117
0118
0119
0120
0121
0122
0123
0124
0125 static inline bool can_inc_bucket_gen(struct bucket *b)
0126 {
0127 return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX;
0128 }
0129
0130 bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b)
0131 {
0132 BUG_ON(!ca->set->gc_mark_valid);
0133
0134 return (!GC_MARK(b) ||
0135 GC_MARK(b) == GC_MARK_RECLAIMABLE) &&
0136 !atomic_read(&b->pin) &&
0137 can_inc_bucket_gen(b);
0138 }
0139
0140 void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
0141 {
0142 lockdep_assert_held(&ca->set->bucket_lock);
0143 BUG_ON(GC_MARK(b) && GC_MARK(b) != GC_MARK_RECLAIMABLE);
0144
0145 if (GC_SECTORS_USED(b))
0146 trace_bcache_invalidate(ca, b - ca->buckets);
0147
0148 bch_inc_gen(ca, b);
0149 b->prio = INITIAL_PRIO;
0150 atomic_inc(&b->pin);
0151 }
0152
0153 static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
0154 {
0155 __bch_invalidate_one_bucket(ca, b);
0156
0157 fifo_push(&ca->free_inc, b - ca->buckets);
0158 }
0159
0160
0161
0162
0163
0164
0165
0166
0167
0168
0169 #define bucket_prio(b) \
0170 ({ \
0171 unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \
0172 \
0173 (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); \
0174 })
0175
0176 #define bucket_max_cmp(l, r) (bucket_prio(l) < bucket_prio(r))
0177 #define bucket_min_cmp(l, r) (bucket_prio(l) > bucket_prio(r))
0178
0179 static void invalidate_buckets_lru(struct cache *ca)
0180 {
0181 struct bucket *b;
0182 ssize_t i;
0183
0184 ca->heap.used = 0;
0185
0186 for_each_bucket(b, ca) {
0187 if (!bch_can_invalidate_bucket(ca, b))
0188 continue;
0189
0190 if (!heap_full(&ca->heap))
0191 heap_add(&ca->heap, b, bucket_max_cmp);
0192 else if (bucket_max_cmp(b, heap_peek(&ca->heap))) {
0193 ca->heap.data[0] = b;
0194 heap_sift(&ca->heap, 0, bucket_max_cmp);
0195 }
0196 }
0197
0198 for (i = ca->heap.used / 2 - 1; i >= 0; --i)
0199 heap_sift(&ca->heap, i, bucket_min_cmp);
0200
0201 while (!fifo_full(&ca->free_inc)) {
0202 if (!heap_pop(&ca->heap, b, bucket_min_cmp)) {
0203
0204
0205
0206
0207 ca->invalidate_needs_gc = 1;
0208 wake_up_gc(ca->set);
0209 return;
0210 }
0211
0212 bch_invalidate_one_bucket(ca, b);
0213 }
0214 }
0215
0216 static void invalidate_buckets_fifo(struct cache *ca)
0217 {
0218 struct bucket *b;
0219 size_t checked = 0;
0220
0221 while (!fifo_full(&ca->free_inc)) {
0222 if (ca->fifo_last_bucket < ca->sb.first_bucket ||
0223 ca->fifo_last_bucket >= ca->sb.nbuckets)
0224 ca->fifo_last_bucket = ca->sb.first_bucket;
0225
0226 b = ca->buckets + ca->fifo_last_bucket++;
0227
0228 if (bch_can_invalidate_bucket(ca, b))
0229 bch_invalidate_one_bucket(ca, b);
0230
0231 if (++checked >= ca->sb.nbuckets) {
0232 ca->invalidate_needs_gc = 1;
0233 wake_up_gc(ca->set);
0234 return;
0235 }
0236 }
0237 }
0238
0239 static void invalidate_buckets_random(struct cache *ca)
0240 {
0241 struct bucket *b;
0242 size_t checked = 0;
0243
0244 while (!fifo_full(&ca->free_inc)) {
0245 size_t n;
0246
0247 get_random_bytes(&n, sizeof(n));
0248
0249 n %= (size_t) (ca->sb.nbuckets - ca->sb.first_bucket);
0250 n += ca->sb.first_bucket;
0251
0252 b = ca->buckets + n;
0253
0254 if (bch_can_invalidate_bucket(ca, b))
0255 bch_invalidate_one_bucket(ca, b);
0256
0257 if (++checked >= ca->sb.nbuckets / 2) {
0258 ca->invalidate_needs_gc = 1;
0259 wake_up_gc(ca->set);
0260 return;
0261 }
0262 }
0263 }
0264
0265 static void invalidate_buckets(struct cache *ca)
0266 {
0267 BUG_ON(ca->invalidate_needs_gc);
0268
0269 switch (CACHE_REPLACEMENT(&ca->sb)) {
0270 case CACHE_REPLACEMENT_LRU:
0271 invalidate_buckets_lru(ca);
0272 break;
0273 case CACHE_REPLACEMENT_FIFO:
0274 invalidate_buckets_fifo(ca);
0275 break;
0276 case CACHE_REPLACEMENT_RANDOM:
0277 invalidate_buckets_random(ca);
0278 break;
0279 }
0280 }
0281
0282 #define allocator_wait(ca, cond) \
0283 do { \
0284 while (1) { \
0285 set_current_state(TASK_INTERRUPTIBLE); \
0286 if (cond) \
0287 break; \
0288 \
0289 mutex_unlock(&(ca)->set->bucket_lock); \
0290 if (kthread_should_stop() || \
0291 test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \
0292 set_current_state(TASK_RUNNING); \
0293 goto out; \
0294 } \
0295 \
0296 schedule(); \
0297 mutex_lock(&(ca)->set->bucket_lock); \
0298 } \
0299 __set_current_state(TASK_RUNNING); \
0300 } while (0)
0301
0302 static int bch_allocator_push(struct cache *ca, long bucket)
0303 {
0304 unsigned int i;
0305
0306
0307 if (fifo_push(&ca->free[RESERVE_PRIO], bucket))
0308 return true;
0309
0310 for (i = 0; i < RESERVE_NR; i++)
0311 if (fifo_push(&ca->free[i], bucket))
0312 return true;
0313
0314 return false;
0315 }
0316
0317 static int bch_allocator_thread(void *arg)
0318 {
0319 struct cache *ca = arg;
0320
0321 mutex_lock(&ca->set->bucket_lock);
0322
0323 while (1) {
0324
0325
0326
0327
0328
0329 while (1) {
0330 long bucket;
0331
0332 if (!fifo_pop(&ca->free_inc, bucket))
0333 break;
0334
0335 if (ca->discard) {
0336 mutex_unlock(&ca->set->bucket_lock);
0337 blkdev_issue_discard(ca->bdev,
0338 bucket_to_sector(ca->set, bucket),
0339 ca->sb.bucket_size, GFP_KERNEL);
0340 mutex_lock(&ca->set->bucket_lock);
0341 }
0342
0343 allocator_wait(ca, bch_allocator_push(ca, bucket));
0344 wake_up(&ca->set->btree_cache_wait);
0345 wake_up(&ca->set->bucket_wait);
0346 }
0347
0348
0349
0350
0351
0352
0353
0354 retry_invalidate:
0355 allocator_wait(ca, ca->set->gc_mark_valid &&
0356 !ca->invalidate_needs_gc);
0357 invalidate_buckets(ca);
0358
0359
0360
0361
0362
0363 allocator_wait(ca, !atomic_read(&ca->set->prio_blocked));
0364 if (CACHE_SYNC(&ca->sb)) {
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376 if (!fifo_full(&ca->free_inc))
0377 goto retry_invalidate;
0378
0379 if (bch_prio_write(ca, false) < 0) {
0380 ca->invalidate_needs_gc = 1;
0381 wake_up_gc(ca->set);
0382 }
0383 }
0384 }
0385 out:
0386 wait_for_kthread_stop();
0387 return 0;
0388 }
0389
0390
0391
0392 long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait)
0393 {
0394 DEFINE_WAIT(w);
0395 struct bucket *b;
0396 long r;
0397
0398
0399
0400 if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)))
0401 return -1;
0402
0403
0404 if (fifo_pop(&ca->free[RESERVE_NONE], r) ||
0405 fifo_pop(&ca->free[reserve], r))
0406 goto out;
0407
0408 if (!wait) {
0409 trace_bcache_alloc_fail(ca, reserve);
0410 return -1;
0411 }
0412
0413 do {
0414 prepare_to_wait(&ca->set->bucket_wait, &w,
0415 TASK_UNINTERRUPTIBLE);
0416
0417 mutex_unlock(&ca->set->bucket_lock);
0418 schedule();
0419 mutex_lock(&ca->set->bucket_lock);
0420 } while (!fifo_pop(&ca->free[RESERVE_NONE], r) &&
0421 !fifo_pop(&ca->free[reserve], r));
0422
0423 finish_wait(&ca->set->bucket_wait, &w);
0424 out:
0425 if (ca->alloc_thread)
0426 wake_up_process(ca->alloc_thread);
0427
0428 trace_bcache_alloc(ca, reserve);
0429
0430 if (expensive_debug_checks(ca->set)) {
0431 size_t iter;
0432 long i;
0433 unsigned int j;
0434
0435 for (iter = 0; iter < prio_buckets(ca) * 2; iter++)
0436 BUG_ON(ca->prio_buckets[iter] == (uint64_t) r);
0437
0438 for (j = 0; j < RESERVE_NR; j++)
0439 fifo_for_each(i, &ca->free[j], iter)
0440 BUG_ON(i == r);
0441 fifo_for_each(i, &ca->free_inc, iter)
0442 BUG_ON(i == r);
0443 }
0444
0445 b = ca->buckets + r;
0446
0447 BUG_ON(atomic_read(&b->pin) != 1);
0448
0449 SET_GC_SECTORS_USED(b, ca->sb.bucket_size);
0450
0451 if (reserve <= RESERVE_PRIO) {
0452 SET_GC_MARK(b, GC_MARK_METADATA);
0453 SET_GC_MOVE(b, 0);
0454 b->prio = BTREE_PRIO;
0455 } else {
0456 SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
0457 SET_GC_MOVE(b, 0);
0458 b->prio = INITIAL_PRIO;
0459 }
0460
0461 if (ca->set->avail_nbuckets > 0) {
0462 ca->set->avail_nbuckets--;
0463 bch_update_bucket_in_use(ca->set, &ca->set->gc_stats);
0464 }
0465
0466 return r;
0467 }
0468
0469 void __bch_bucket_free(struct cache *ca, struct bucket *b)
0470 {
0471 SET_GC_MARK(b, 0);
0472 SET_GC_SECTORS_USED(b, 0);
0473
0474 if (ca->set->avail_nbuckets < ca->set->nbuckets) {
0475 ca->set->avail_nbuckets++;
0476 bch_update_bucket_in_use(ca->set, &ca->set->gc_stats);
0477 }
0478 }
0479
0480 void bch_bucket_free(struct cache_set *c, struct bkey *k)
0481 {
0482 unsigned int i;
0483
0484 for (i = 0; i < KEY_PTRS(k); i++)
0485 __bch_bucket_free(c->cache, PTR_BUCKET(c, k, i));
0486 }
0487
0488 int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
0489 struct bkey *k, bool wait)
0490 {
0491 struct cache *ca;
0492 long b;
0493
0494
0495 if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags)))
0496 return -1;
0497
0498 lockdep_assert_held(&c->bucket_lock);
0499
0500 bkey_init(k);
0501
0502 ca = c->cache;
0503 b = bch_bucket_alloc(ca, reserve, wait);
0504 if (b == -1)
0505 goto err;
0506
0507 k->ptr[0] = MAKE_PTR(ca->buckets[b].gen,
0508 bucket_to_sector(c, b),
0509 ca->sb.nr_this_dev);
0510
0511 SET_KEY_PTRS(k, 1);
0512
0513 return 0;
0514 err:
0515 bch_bucket_free(c, k);
0516 bkey_put(c, k);
0517 return -1;
0518 }
0519
0520 int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
0521 struct bkey *k, bool wait)
0522 {
0523 int ret;
0524
0525 mutex_lock(&c->bucket_lock);
0526 ret = __bch_bucket_alloc_set(c, reserve, k, wait);
0527 mutex_unlock(&c->bucket_lock);
0528 return ret;
0529 }
0530
0531
0532
0533 struct open_bucket {
0534 struct list_head list;
0535 unsigned int last_write_point;
0536 unsigned int sectors_free;
0537 BKEY_PADDED(key);
0538 };
0539
0540
0541
0542
0543
0544
0545
0546
0547
0548
0549
0550
0551
0552
0553
0554
0555
0556
0557
0558
0559
0560
0561
0562
0563
0564
0565 static struct open_bucket *pick_data_bucket(struct cache_set *c,
0566 const struct bkey *search,
0567 unsigned int write_point,
0568 struct bkey *alloc)
0569 {
0570 struct open_bucket *ret, *ret_task = NULL;
0571
0572 list_for_each_entry_reverse(ret, &c->data_buckets, list)
0573 if (UUID_FLASH_ONLY(&c->uuids[KEY_INODE(&ret->key)]) !=
0574 UUID_FLASH_ONLY(&c->uuids[KEY_INODE(search)]))
0575 continue;
0576 else if (!bkey_cmp(&ret->key, search))
0577 goto found;
0578 else if (ret->last_write_point == write_point)
0579 ret_task = ret;
0580
0581 ret = ret_task ?: list_first_entry(&c->data_buckets,
0582 struct open_bucket, list);
0583 found:
0584 if (!ret->sectors_free && KEY_PTRS(alloc)) {
0585 ret->sectors_free = c->cache->sb.bucket_size;
0586 bkey_copy(&ret->key, alloc);
0587 bkey_init(alloc);
0588 }
0589
0590 if (!ret->sectors_free)
0591 ret = NULL;
0592
0593 return ret;
0594 }
0595
0596
0597
0598
0599
0600
0601
0602
0603
0604
0605
0606 bool bch_alloc_sectors(struct cache_set *c,
0607 struct bkey *k,
0608 unsigned int sectors,
0609 unsigned int write_point,
0610 unsigned int write_prio,
0611 bool wait)
0612 {
0613 struct open_bucket *b;
0614 BKEY_PADDED(key) alloc;
0615 unsigned int i;
0616
0617
0618
0619
0620
0621
0622
0623
0624 bkey_init(&alloc.key);
0625 spin_lock(&c->data_bucket_lock);
0626
0627 while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) {
0628 unsigned int watermark = write_prio
0629 ? RESERVE_MOVINGGC
0630 : RESERVE_NONE;
0631
0632 spin_unlock(&c->data_bucket_lock);
0633
0634 if (bch_bucket_alloc_set(c, watermark, &alloc.key, wait))
0635 return false;
0636
0637 spin_lock(&c->data_bucket_lock);
0638 }
0639
0640
0641
0642
0643
0644
0645 if (KEY_PTRS(&alloc.key))
0646 bkey_put(c, &alloc.key);
0647
0648 for (i = 0; i < KEY_PTRS(&b->key); i++)
0649 EBUG_ON(ptr_stale(c, &b->key, i));
0650
0651
0652
0653 for (i = 0; i < KEY_PTRS(&b->key); i++)
0654 k->ptr[i] = b->key.ptr[i];
0655
0656 sectors = min(sectors, b->sectors_free);
0657
0658 SET_KEY_OFFSET(k, KEY_OFFSET(k) + sectors);
0659 SET_KEY_SIZE(k, sectors);
0660 SET_KEY_PTRS(k, KEY_PTRS(&b->key));
0661
0662
0663
0664
0665
0666 list_move_tail(&b->list, &c->data_buckets);
0667 bkey_copy_key(&b->key, k);
0668 b->last_write_point = write_point;
0669
0670 b->sectors_free -= sectors;
0671
0672 for (i = 0; i < KEY_PTRS(&b->key); i++) {
0673 SET_PTR_OFFSET(&b->key, i, PTR_OFFSET(&b->key, i) + sectors);
0674
0675 atomic_long_add(sectors,
0676 &c->cache->sectors_written);
0677 }
0678
0679 if (b->sectors_free < c->cache->sb.block_size)
0680 b->sectors_free = 0;
0681
0682
0683
0684
0685
0686
0687 if (b->sectors_free)
0688 for (i = 0; i < KEY_PTRS(&b->key); i++)
0689 atomic_inc(&PTR_BUCKET(c, &b->key, i)->pin);
0690
0691 spin_unlock(&c->data_bucket_lock);
0692 return true;
0693 }
0694
0695
0696
0697 void bch_open_buckets_free(struct cache_set *c)
0698 {
0699 struct open_bucket *b;
0700
0701 while (!list_empty(&c->data_buckets)) {
0702 b = list_first_entry(&c->data_buckets,
0703 struct open_bucket, list);
0704 list_del(&b->list);
0705 kfree(b);
0706 }
0707 }
0708
0709 int bch_open_buckets_alloc(struct cache_set *c)
0710 {
0711 int i;
0712
0713 spin_lock_init(&c->data_bucket_lock);
0714
0715 for (i = 0; i < MAX_OPEN_BUCKETS; i++) {
0716 struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL);
0717
0718 if (!b)
0719 return -ENOMEM;
0720
0721 list_add(&b->list, &c->data_buckets);
0722 }
0723
0724 return 0;
0725 }
0726
0727 int bch_cache_allocator_start(struct cache *ca)
0728 {
0729 struct task_struct *k = kthread_run(bch_allocator_thread,
0730 ca, "bcache_allocator");
0731 if (IS_ERR(k))
0732 return PTR_ERR(k);
0733
0734 ca->alloc_thread = k;
0735 return 0;
0736 }