0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #include "bcache.h"
0011 #include "btree.h"
0012 #include "debug.h"
0013 #include "request.h"
0014 #include "writeback.h"
0015
0016 #include <linux/module.h>
0017 #include <linux/hash.h>
0018 #include <linux/random.h>
0019 #include <linux/backing-dev.h>
0020
0021 #include <trace/events/bcache.h>
0022
0023 #define CUTOFF_CACHE_ADD 95
0024 #define CUTOFF_CACHE_READA 90
0025
0026 struct kmem_cache *bch_search_cache;
0027
0028 static void bch_data_insert_start(struct closure *cl);
0029
0030 static unsigned int cache_mode(struct cached_dev *dc)
0031 {
0032 return BDEV_CACHE_MODE(&dc->sb);
0033 }
0034
0035 static bool verify(struct cached_dev *dc)
0036 {
0037 return dc->verify;
0038 }
0039
0040 static void bio_csum(struct bio *bio, struct bkey *k)
0041 {
0042 struct bio_vec bv;
0043 struct bvec_iter iter;
0044 uint64_t csum = 0;
0045
0046 bio_for_each_segment(bv, bio, iter) {
0047 void *d = bvec_kmap_local(&bv);
0048
0049 csum = crc64_be(csum, d, bv.bv_len);
0050 kunmap_local(d);
0051 }
0052
0053 k->ptr[KEY_PTRS(k)] = csum & (~0ULL >> 1);
0054 }
0055
0056
0057
0058 static void bch_data_insert_keys(struct closure *cl)
0059 {
0060 struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
0061 atomic_t *journal_ref = NULL;
0062 struct bkey *replace_key = op->replace ? &op->replace_key : NULL;
0063 int ret;
0064
0065 if (!op->replace)
0066 journal_ref = bch_journal(op->c, &op->insert_keys,
0067 op->flush_journal ? cl : NULL);
0068
0069 ret = bch_btree_insert(op->c, &op->insert_keys,
0070 journal_ref, replace_key);
0071 if (ret == -ESRCH) {
0072 op->replace_collision = true;
0073 } else if (ret) {
0074 op->status = BLK_STS_RESOURCE;
0075 op->insert_data_done = true;
0076 }
0077
0078 if (journal_ref)
0079 atomic_dec_bug(journal_ref);
0080
0081 if (!op->insert_data_done) {
0082 continue_at(cl, bch_data_insert_start, op->wq);
0083 return;
0084 }
0085
0086 bch_keylist_free(&op->insert_keys);
0087 closure_return(cl);
0088 }
0089
0090 static int bch_keylist_realloc(struct keylist *l, unsigned int u64s,
0091 struct cache_set *c)
0092 {
0093 size_t oldsize = bch_keylist_nkeys(l);
0094 size_t newsize = oldsize + u64s;
0095
0096
0097
0098
0099
0100
0101
0102 if (newsize * sizeof(uint64_t) > block_bytes(c->cache) - sizeof(struct jset))
0103 return -ENOMEM;
0104
0105 return __bch_keylist_realloc(l, u64s);
0106 }
0107
0108 static void bch_data_invalidate(struct closure *cl)
0109 {
0110 struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
0111 struct bio *bio = op->bio;
0112
0113 pr_debug("invalidating %i sectors from %llu\n",
0114 bio_sectors(bio), (uint64_t) bio->bi_iter.bi_sector);
0115
0116 while (bio_sectors(bio)) {
0117 unsigned int sectors = min(bio_sectors(bio),
0118 1U << (KEY_SIZE_BITS - 1));
0119
0120 if (bch_keylist_realloc(&op->insert_keys, 2, op->c))
0121 goto out;
0122
0123 bio->bi_iter.bi_sector += sectors;
0124 bio->bi_iter.bi_size -= sectors << 9;
0125
0126 bch_keylist_add(&op->insert_keys,
0127 &KEY(op->inode,
0128 bio->bi_iter.bi_sector,
0129 sectors));
0130 }
0131
0132 op->insert_data_done = true;
0133
0134 bio_put(bio);
0135 out:
0136 continue_at(cl, bch_data_insert_keys, op->wq);
0137 }
0138
0139 static void bch_data_insert_error(struct closure *cl)
0140 {
0141 struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
0142
0143
0144
0145
0146
0147
0148
0149
0150
0151
0152 struct bkey *src = op->insert_keys.keys, *dst = op->insert_keys.keys;
0153
0154 while (src != op->insert_keys.top) {
0155 struct bkey *n = bkey_next(src);
0156
0157 SET_KEY_PTRS(src, 0);
0158 memmove(dst, src, bkey_bytes(src));
0159
0160 dst = bkey_next(dst);
0161 src = n;
0162 }
0163
0164 op->insert_keys.top = dst;
0165
0166 bch_data_insert_keys(cl);
0167 }
0168
0169 static void bch_data_insert_endio(struct bio *bio)
0170 {
0171 struct closure *cl = bio->bi_private;
0172 struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
0173
0174 if (bio->bi_status) {
0175
0176 if (op->writeback)
0177 op->status = bio->bi_status;
0178 else if (!op->replace)
0179 set_closure_fn(cl, bch_data_insert_error, op->wq);
0180 else
0181 set_closure_fn(cl, NULL, NULL);
0182 }
0183
0184 bch_bbio_endio(op->c, bio, bio->bi_status, "writing data to cache");
0185 }
0186
0187 static void bch_data_insert_start(struct closure *cl)
0188 {
0189 struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
0190 struct bio *bio = op->bio, *n;
0191
0192 if (op->bypass)
0193 return bch_data_invalidate(cl);
0194
0195 if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0)
0196 wake_up_gc(op->c);
0197
0198
0199
0200
0201
0202 bio->bi_opf &= ~(REQ_PREFLUSH|REQ_FUA);
0203
0204 do {
0205 unsigned int i;
0206 struct bkey *k;
0207 struct bio_set *split = &op->c->bio_split;
0208
0209
0210 if (bch_keylist_realloc(&op->insert_keys,
0211 3 + (op->csum ? 1 : 0),
0212 op->c)) {
0213 continue_at(cl, bch_data_insert_keys, op->wq);
0214 return;
0215 }
0216
0217 k = op->insert_keys.top;
0218 bkey_init(k);
0219 SET_KEY_INODE(k, op->inode);
0220 SET_KEY_OFFSET(k, bio->bi_iter.bi_sector);
0221
0222 if (!bch_alloc_sectors(op->c, k, bio_sectors(bio),
0223 op->write_point, op->write_prio,
0224 op->writeback))
0225 goto err;
0226
0227 n = bio_next_split(bio, KEY_SIZE(k), GFP_NOIO, split);
0228
0229 n->bi_end_io = bch_data_insert_endio;
0230 n->bi_private = cl;
0231
0232 if (op->writeback) {
0233 SET_KEY_DIRTY(k, true);
0234
0235 for (i = 0; i < KEY_PTRS(k); i++)
0236 SET_GC_MARK(PTR_BUCKET(op->c, k, i),
0237 GC_MARK_DIRTY);
0238 }
0239
0240 SET_KEY_CSUM(k, op->csum);
0241 if (KEY_CSUM(k))
0242 bio_csum(n, k);
0243
0244 trace_bcache_cache_insert(k);
0245 bch_keylist_push(&op->insert_keys);
0246
0247 bio_set_op_attrs(n, REQ_OP_WRITE, 0);
0248 bch_submit_bbio(n, op->c, k, 0);
0249 } while (n != bio);
0250
0251 op->insert_data_done = true;
0252 continue_at(cl, bch_data_insert_keys, op->wq);
0253 return;
0254 err:
0255
0256 BUG_ON(op->writeback);
0257
0258
0259
0260
0261
0262
0263
0264 if (!op->replace) {
0265
0266
0267
0268
0269
0270
0271 op->bypass = true;
0272 return bch_data_invalidate(cl);
0273 } else {
0274
0275
0276
0277
0278 op->insert_data_done = true;
0279 bio_put(bio);
0280
0281 if (!bch_keylist_empty(&op->insert_keys))
0282 continue_at(cl, bch_data_insert_keys, op->wq);
0283 else
0284 closure_return(cl);
0285 }
0286 }
0287
0288
0289
0290
0291
0292
0293
0294
0295
0296
0297
0298
0299
0300
0301
0302
0303
0304
0305
0306
0307
0308 void bch_data_insert(struct closure *cl)
0309 {
0310 struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
0311
0312 trace_bcache_write(op->c, op->inode, op->bio,
0313 op->writeback, op->bypass);
0314
0315 bch_keylist_init(&op->insert_keys);
0316 bio_get(op->bio);
0317 bch_data_insert_start(cl);
0318 }
0319
0320
0321
0322
0323
0324 unsigned int bch_get_congested(const struct cache_set *c)
0325 {
0326 int i;
0327
0328 if (!c->congested_read_threshold_us &&
0329 !c->congested_write_threshold_us)
0330 return 0;
0331
0332 i = (local_clock_us() - c->congested_last_us) / 1024;
0333 if (i < 0)
0334 return 0;
0335
0336 i += atomic_read(&c->congested);
0337 if (i >= 0)
0338 return 0;
0339
0340 i += CONGESTED_MAX;
0341
0342 if (i > 0)
0343 i = fract_exp_two(i, 6);
0344
0345 i -= hweight32(get_random_u32());
0346
0347 return i > 0 ? i : 1;
0348 }
0349
0350 static void add_sequential(struct task_struct *t)
0351 {
0352 ewma_add(t->sequential_io_avg,
0353 t->sequential_io, 8, 0);
0354
0355 t->sequential_io = 0;
0356 }
0357
0358 static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k)
0359 {
0360 return &dc->io_hash[hash_64(k, RECENT_IO_BITS)];
0361 }
0362
0363 static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
0364 {
0365 struct cache_set *c = dc->disk.c;
0366 unsigned int mode = cache_mode(dc);
0367 unsigned int sectors, congested;
0368 struct task_struct *task = current;
0369 struct io *i;
0370
0371 if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
0372 c->gc_stats.in_use > CUTOFF_CACHE_ADD ||
0373 (bio_op(bio) == REQ_OP_DISCARD))
0374 goto skip;
0375
0376 if (mode == CACHE_MODE_NONE ||
0377 (mode == CACHE_MODE_WRITEAROUND &&
0378 op_is_write(bio_op(bio))))
0379 goto skip;
0380
0381
0382
0383
0384
0385
0386
0387
0388
0389
0390
0391 if ((bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND))) {
0392 if (!(bio->bi_opf & (REQ_META|REQ_PRIO)) &&
0393 (dc->cache_readahead_policy != BCH_CACHE_READA_ALL))
0394 goto skip;
0395 }
0396
0397 if (bio->bi_iter.bi_sector & (c->cache->sb.block_size - 1) ||
0398 bio_sectors(bio) & (c->cache->sb.block_size - 1)) {
0399 pr_debug("skipping unaligned io\n");
0400 goto skip;
0401 }
0402
0403 if (bypass_torture_test(dc)) {
0404 if ((get_random_int() & 3) == 3)
0405 goto skip;
0406 else
0407 goto rescale;
0408 }
0409
0410 congested = bch_get_congested(c);
0411 if (!congested && !dc->sequential_cutoff)
0412 goto rescale;
0413
0414 spin_lock(&dc->io_lock);
0415
0416 hlist_for_each_entry(i, iohash(dc, bio->bi_iter.bi_sector), hash)
0417 if (i->last == bio->bi_iter.bi_sector &&
0418 time_before(jiffies, i->jiffies))
0419 goto found;
0420
0421 i = list_first_entry(&dc->io_lru, struct io, lru);
0422
0423 add_sequential(task);
0424 i->sequential = 0;
0425 found:
0426 if (i->sequential + bio->bi_iter.bi_size > i->sequential)
0427 i->sequential += bio->bi_iter.bi_size;
0428
0429 i->last = bio_end_sector(bio);
0430 i->jiffies = jiffies + msecs_to_jiffies(5000);
0431 task->sequential_io = i->sequential;
0432
0433 hlist_del(&i->hash);
0434 hlist_add_head(&i->hash, iohash(dc, i->last));
0435 list_move_tail(&i->lru, &dc->io_lru);
0436
0437 spin_unlock(&dc->io_lock);
0438
0439 sectors = max(task->sequential_io,
0440 task->sequential_io_avg) >> 9;
0441
0442 if (dc->sequential_cutoff &&
0443 sectors >= dc->sequential_cutoff >> 9) {
0444 trace_bcache_bypass_sequential(bio);
0445 goto skip;
0446 }
0447
0448 if (congested && sectors >= congested) {
0449 trace_bcache_bypass_congested(bio);
0450 goto skip;
0451 }
0452
0453 rescale:
0454 bch_rescale_priorities(c, bio_sectors(bio));
0455 return false;
0456 skip:
0457 bch_mark_sectors_bypassed(c, dc, bio_sectors(bio));
0458 return true;
0459 }
0460
0461
0462
0463 struct search {
0464
0465 struct closure cl;
0466
0467 struct bbio bio;
0468 struct bio *orig_bio;
0469 struct bio *cache_miss;
0470 struct bcache_device *d;
0471
0472 unsigned int insert_bio_sectors;
0473 unsigned int recoverable:1;
0474 unsigned int write:1;
0475 unsigned int read_dirty_data:1;
0476 unsigned int cache_missed:1;
0477
0478 struct block_device *orig_bdev;
0479 unsigned long start_time;
0480
0481 struct btree_op op;
0482 struct data_insert_op iop;
0483 };
0484
0485 static void bch_cache_read_endio(struct bio *bio)
0486 {
0487 struct bbio *b = container_of(bio, struct bbio, bio);
0488 struct closure *cl = bio->bi_private;
0489 struct search *s = container_of(cl, struct search, cl);
0490
0491
0492
0493
0494
0495
0496
0497
0498 if (bio->bi_status)
0499 s->iop.status = bio->bi_status;
0500 else if (!KEY_DIRTY(&b->key) &&
0501 ptr_stale(s->iop.c, &b->key, 0)) {
0502 atomic_long_inc(&s->iop.c->cache_read_races);
0503 s->iop.status = BLK_STS_IOERR;
0504 }
0505
0506 bch_bbio_endio(s->iop.c, bio, bio->bi_status, "reading from cache");
0507 }
0508
0509
0510
0511
0512
0513 static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
0514 {
0515 struct search *s = container_of(op, struct search, op);
0516 struct bio *n, *bio = &s->bio.bio;
0517 struct bkey *bio_key;
0518 unsigned int ptr;
0519
0520 if (bkey_cmp(k, &KEY(s->iop.inode, bio->bi_iter.bi_sector, 0)) <= 0)
0521 return MAP_CONTINUE;
0522
0523 if (KEY_INODE(k) != s->iop.inode ||
0524 KEY_START(k) > bio->bi_iter.bi_sector) {
0525 unsigned int bio_sectors = bio_sectors(bio);
0526 unsigned int sectors = KEY_INODE(k) == s->iop.inode
0527 ? min_t(uint64_t, INT_MAX,
0528 KEY_START(k) - bio->bi_iter.bi_sector)
0529 : INT_MAX;
0530 int ret = s->d->cache_miss(b, s, bio, sectors);
0531
0532 if (ret != MAP_CONTINUE)
0533 return ret;
0534
0535
0536 BUG_ON(bio_sectors <= sectors);
0537 }
0538
0539 if (!KEY_SIZE(k))
0540 return MAP_CONTINUE;
0541
0542
0543 ptr = 0;
0544
0545 PTR_BUCKET(b->c, k, ptr)->prio = INITIAL_PRIO;
0546
0547 if (KEY_DIRTY(k))
0548 s->read_dirty_data = true;
0549
0550 n = bio_next_split(bio, min_t(uint64_t, INT_MAX,
0551 KEY_OFFSET(k) - bio->bi_iter.bi_sector),
0552 GFP_NOIO, &s->d->bio_split);
0553
0554 bio_key = &container_of(n, struct bbio, bio)->key;
0555 bch_bkey_copy_single_ptr(bio_key, k, ptr);
0556
0557 bch_cut_front(&KEY(s->iop.inode, n->bi_iter.bi_sector, 0), bio_key);
0558 bch_cut_back(&KEY(s->iop.inode, bio_end_sector(n), 0), bio_key);
0559
0560 n->bi_end_io = bch_cache_read_endio;
0561 n->bi_private = &s->cl;
0562
0563
0564
0565
0566
0567
0568
0569
0570
0571
0572
0573
0574 __bch_submit_bbio(n, b->c);
0575 return n == bio ? MAP_DONE : MAP_CONTINUE;
0576 }
0577
0578 static void cache_lookup(struct closure *cl)
0579 {
0580 struct search *s = container_of(cl, struct search, iop.cl);
0581 struct bio *bio = &s->bio.bio;
0582 struct cached_dev *dc;
0583 int ret;
0584
0585 bch_btree_op_init(&s->op, -1);
0586
0587 ret = bch_btree_map_keys(&s->op, s->iop.c,
0588 &KEY(s->iop.inode, bio->bi_iter.bi_sector, 0),
0589 cache_lookup_fn, MAP_END_KEY);
0590 if (ret == -EAGAIN) {
0591 continue_at(cl, cache_lookup, bcache_wq);
0592 return;
0593 }
0594
0595
0596
0597
0598
0599
0600
0601
0602
0603
0604 if (ret < 0) {
0605 BUG_ON(ret == -EINTR);
0606 if (s->d && s->d->c &&
0607 !UUID_FLASH_ONLY(&s->d->c->uuids[s->d->id])) {
0608 dc = container_of(s->d, struct cached_dev, disk);
0609 if (dc && atomic_read(&dc->has_dirty))
0610 s->recoverable = false;
0611 }
0612 if (!s->iop.status)
0613 s->iop.status = BLK_STS_IOERR;
0614 }
0615
0616 closure_return(cl);
0617 }
0618
0619
0620
0621 static void request_endio(struct bio *bio)
0622 {
0623 struct closure *cl = bio->bi_private;
0624
0625 if (bio->bi_status) {
0626 struct search *s = container_of(cl, struct search, cl);
0627
0628 s->iop.status = bio->bi_status;
0629
0630 s->recoverable = false;
0631 }
0632
0633 bio_put(bio);
0634 closure_put(cl);
0635 }
0636
0637 static void backing_request_endio(struct bio *bio)
0638 {
0639 struct closure *cl = bio->bi_private;
0640
0641 if (bio->bi_status) {
0642 struct search *s = container_of(cl, struct search, cl);
0643 struct cached_dev *dc = container_of(s->d,
0644 struct cached_dev, disk);
0645
0646
0647
0648
0649
0650
0651
0652 if (unlikely(s->iop.writeback &&
0653 bio->bi_opf & REQ_PREFLUSH)) {
0654 pr_err("Can't flush %pg: returned bi_status %i\n",
0655 dc->bdev, bio->bi_status);
0656 } else {
0657
0658 s->iop.status = bio->bi_status;
0659 }
0660 s->recoverable = false;
0661
0662 bch_count_backing_io_errors(dc, bio);
0663 }
0664
0665 bio_put(bio);
0666 closure_put(cl);
0667 }
0668
0669 static void bio_complete(struct search *s)
0670 {
0671 if (s->orig_bio) {
0672
0673 bio_end_io_acct_remapped(s->orig_bio, s->start_time,
0674 s->orig_bdev);
0675 trace_bcache_request_end(s->d, s->orig_bio);
0676 s->orig_bio->bi_status = s->iop.status;
0677 bio_endio(s->orig_bio);
0678 s->orig_bio = NULL;
0679 }
0680 }
0681
0682 static void do_bio_hook(struct search *s,
0683 struct bio *orig_bio,
0684 bio_end_io_t *end_io_fn)
0685 {
0686 struct bio *bio = &s->bio.bio;
0687
0688 bio_init_clone(orig_bio->bi_bdev, bio, orig_bio, GFP_NOIO);
0689
0690
0691
0692
0693
0694
0695 bio->bi_end_io = end_io_fn;
0696 bio->bi_private = &s->cl;
0697
0698 bio_cnt_set(bio, 3);
0699 }
0700
0701 static void search_free(struct closure *cl)
0702 {
0703 struct search *s = container_of(cl, struct search, cl);
0704
0705 atomic_dec(&s->iop.c->search_inflight);
0706
0707 if (s->iop.bio)
0708 bio_put(s->iop.bio);
0709
0710 bio_complete(s);
0711 closure_debug_destroy(cl);
0712 mempool_free(s, &s->iop.c->search);
0713 }
0714
0715 static inline struct search *search_alloc(struct bio *bio,
0716 struct bcache_device *d, struct block_device *orig_bdev,
0717 unsigned long start_time)
0718 {
0719 struct search *s;
0720
0721 s = mempool_alloc(&d->c->search, GFP_NOIO);
0722
0723 closure_init(&s->cl, NULL);
0724 do_bio_hook(s, bio, request_endio);
0725 atomic_inc(&d->c->search_inflight);
0726
0727 s->orig_bio = bio;
0728 s->cache_miss = NULL;
0729 s->cache_missed = 0;
0730 s->d = d;
0731 s->recoverable = 1;
0732 s->write = op_is_write(bio_op(bio));
0733 s->read_dirty_data = 0;
0734
0735 s->orig_bdev = orig_bdev;
0736 s->start_time = start_time;
0737 s->iop.c = d->c;
0738 s->iop.bio = NULL;
0739 s->iop.inode = d->id;
0740 s->iop.write_point = hash_long((unsigned long) current, 16);
0741 s->iop.write_prio = 0;
0742 s->iop.status = 0;
0743 s->iop.flags = 0;
0744 s->iop.flush_journal = op_is_flush(bio->bi_opf);
0745 s->iop.wq = bcache_wq;
0746
0747 return s;
0748 }
0749
0750
0751
0752 static void cached_dev_bio_complete(struct closure *cl)
0753 {
0754 struct search *s = container_of(cl, struct search, cl);
0755 struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
0756
0757 cached_dev_put(dc);
0758 search_free(cl);
0759 }
0760
0761
0762
0763 static void cached_dev_read_error_done(struct closure *cl)
0764 {
0765 struct search *s = container_of(cl, struct search, cl);
0766
0767 if (s->iop.replace_collision)
0768 bch_mark_cache_miss_collision(s->iop.c, s->d);
0769
0770 if (s->iop.bio)
0771 bio_free_pages(s->iop.bio);
0772
0773 cached_dev_bio_complete(cl);
0774 }
0775
0776 static void cached_dev_read_error(struct closure *cl)
0777 {
0778 struct search *s = container_of(cl, struct search, cl);
0779 struct bio *bio = &s->bio.bio;
0780
0781
0782
0783
0784
0785
0786
0787
0788 if (s->recoverable && !s->read_dirty_data) {
0789
0790 trace_bcache_read_retry(s->orig_bio);
0791
0792 s->iop.status = 0;
0793 do_bio_hook(s, s->orig_bio, backing_request_endio);
0794
0795
0796
0797
0798 closure_bio_submit(s->iop.c, bio, cl);
0799 }
0800
0801 continue_at(cl, cached_dev_read_error_done, NULL);
0802 }
0803
0804 static void cached_dev_cache_miss_done(struct closure *cl)
0805 {
0806 struct search *s = container_of(cl, struct search, cl);
0807 struct bcache_device *d = s->d;
0808
0809 if (s->iop.replace_collision)
0810 bch_mark_cache_miss_collision(s->iop.c, s->d);
0811
0812 if (s->iop.bio)
0813 bio_free_pages(s->iop.bio);
0814
0815 cached_dev_bio_complete(cl);
0816 closure_put(&d->cl);
0817 }
0818
0819 static void cached_dev_read_done(struct closure *cl)
0820 {
0821 struct search *s = container_of(cl, struct search, cl);
0822 struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
0823
0824
0825
0826
0827
0828
0829
0830
0831
0832 if (s->iop.bio) {
0833 bio_reset(s->iop.bio, s->cache_miss->bi_bdev, REQ_OP_READ);
0834 s->iop.bio->bi_iter.bi_sector =
0835 s->cache_miss->bi_iter.bi_sector;
0836 s->iop.bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
0837 bio_clone_blkg_association(s->iop.bio, s->cache_miss);
0838 bch_bio_map(s->iop.bio, NULL);
0839
0840 bio_copy_data(s->cache_miss, s->iop.bio);
0841
0842 bio_put(s->cache_miss);
0843 s->cache_miss = NULL;
0844 }
0845
0846 if (verify(dc) && s->recoverable && !s->read_dirty_data)
0847 bch_data_verify(dc, s->orig_bio);
0848
0849 closure_get(&dc->disk.cl);
0850 bio_complete(s);
0851
0852 if (s->iop.bio &&
0853 !test_bit(CACHE_SET_STOPPING, &s->iop.c->flags)) {
0854 BUG_ON(!s->iop.replace);
0855 closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
0856 }
0857
0858 continue_at(cl, cached_dev_cache_miss_done, NULL);
0859 }
0860
0861 static void cached_dev_read_done_bh(struct closure *cl)
0862 {
0863 struct search *s = container_of(cl, struct search, cl);
0864 struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
0865
0866 bch_mark_cache_accounting(s->iop.c, s->d,
0867 !s->cache_missed, s->iop.bypass);
0868 trace_bcache_read(s->orig_bio, !s->cache_missed, s->iop.bypass);
0869
0870 if (s->iop.status)
0871 continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq);
0872 else if (s->iop.bio || verify(dc))
0873 continue_at_nobarrier(cl, cached_dev_read_done, bcache_wq);
0874 else
0875 continue_at_nobarrier(cl, cached_dev_bio_complete, NULL);
0876 }
0877
0878 static int cached_dev_cache_miss(struct btree *b, struct search *s,
0879 struct bio *bio, unsigned int sectors)
0880 {
0881 int ret = MAP_CONTINUE;
0882 struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
0883 struct bio *miss, *cache_bio;
0884 unsigned int size_limit;
0885
0886 s->cache_missed = 1;
0887
0888 if (s->cache_miss || s->iop.bypass) {
0889 miss = bio_next_split(bio, sectors, GFP_NOIO, &s->d->bio_split);
0890 ret = miss == bio ? MAP_DONE : MAP_CONTINUE;
0891 goto out_submit;
0892 }
0893
0894
0895 size_limit = min_t(unsigned int, BIO_MAX_VECS * PAGE_SECTORS,
0896 (1 << KEY_SIZE_BITS) - 1);
0897 s->insert_bio_sectors = min3(size_limit, sectors, bio_sectors(bio));
0898
0899 s->iop.replace_key = KEY(s->iop.inode,
0900 bio->bi_iter.bi_sector + s->insert_bio_sectors,
0901 s->insert_bio_sectors);
0902
0903 ret = bch_btree_insert_check_key(b, &s->op, &s->iop.replace_key);
0904 if (ret)
0905 return ret;
0906
0907 s->iop.replace = true;
0908
0909 miss = bio_next_split(bio, s->insert_bio_sectors, GFP_NOIO,
0910 &s->d->bio_split);
0911
0912
0913 ret = miss == bio ? MAP_DONE : -EINTR;
0914
0915 cache_bio = bio_alloc_bioset(miss->bi_bdev,
0916 DIV_ROUND_UP(s->insert_bio_sectors, PAGE_SECTORS),
0917 0, GFP_NOWAIT, &dc->disk.bio_split);
0918 if (!cache_bio)
0919 goto out_submit;
0920
0921 cache_bio->bi_iter.bi_sector = miss->bi_iter.bi_sector;
0922 cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
0923
0924 cache_bio->bi_end_io = backing_request_endio;
0925 cache_bio->bi_private = &s->cl;
0926
0927 bch_bio_map(cache_bio, NULL);
0928 if (bch_bio_alloc_pages(cache_bio, __GFP_NOWARN|GFP_NOIO))
0929 goto out_put;
0930
0931 s->cache_miss = miss;
0932 s->iop.bio = cache_bio;
0933 bio_get(cache_bio);
0934
0935 closure_bio_submit(s->iop.c, cache_bio, &s->cl);
0936
0937 return ret;
0938 out_put:
0939 bio_put(cache_bio);
0940 out_submit:
0941 miss->bi_end_io = backing_request_endio;
0942 miss->bi_private = &s->cl;
0943
0944 closure_bio_submit(s->iop.c, miss, &s->cl);
0945 return ret;
0946 }
0947
0948 static void cached_dev_read(struct cached_dev *dc, struct search *s)
0949 {
0950 struct closure *cl = &s->cl;
0951
0952 closure_call(&s->iop.cl, cache_lookup, NULL, cl);
0953 continue_at(cl, cached_dev_read_done_bh, NULL);
0954 }
0955
0956
0957
0958 static void cached_dev_write_complete(struct closure *cl)
0959 {
0960 struct search *s = container_of(cl, struct search, cl);
0961 struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
0962
0963 up_read_non_owner(&dc->writeback_lock);
0964 cached_dev_bio_complete(cl);
0965 }
0966
0967 static void cached_dev_write(struct cached_dev *dc, struct search *s)
0968 {
0969 struct closure *cl = &s->cl;
0970 struct bio *bio = &s->bio.bio;
0971 struct bkey start = KEY(dc->disk.id, bio->bi_iter.bi_sector, 0);
0972 struct bkey end = KEY(dc->disk.id, bio_end_sector(bio), 0);
0973
0974 bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys, &start, &end);
0975
0976 down_read_non_owner(&dc->writeback_lock);
0977 if (bch_keybuf_check_overlapping(&dc->writeback_keys, &start, &end)) {
0978
0979
0980
0981
0982 s->iop.bypass = false;
0983 s->iop.writeback = true;
0984 }
0985
0986
0987
0988
0989
0990
0991
0992
0993 if (bio_op(bio) == REQ_OP_DISCARD)
0994 s->iop.bypass = true;
0995
0996 if (should_writeback(dc, s->orig_bio,
0997 cache_mode(dc),
0998 s->iop.bypass)) {
0999 s->iop.bypass = false;
1000 s->iop.writeback = true;
1001 }
1002
1003 if (s->iop.bypass) {
1004 s->iop.bio = s->orig_bio;
1005 bio_get(s->iop.bio);
1006
1007 if (bio_op(bio) == REQ_OP_DISCARD &&
1008 !bdev_max_discard_sectors(dc->bdev))
1009 goto insert_data;
1010
1011
1012 bio->bi_end_io = backing_request_endio;
1013 closure_bio_submit(s->iop.c, bio, cl);
1014
1015 } else if (s->iop.writeback) {
1016 bch_writeback_add(dc);
1017 s->iop.bio = bio;
1018
1019 if (bio->bi_opf & REQ_PREFLUSH) {
1020
1021
1022
1023
1024 struct bio *flush;
1025
1026 flush = bio_alloc_bioset(bio->bi_bdev, 0,
1027 REQ_OP_WRITE | REQ_PREFLUSH,
1028 GFP_NOIO, &dc->disk.bio_split);
1029 if (!flush) {
1030 s->iop.status = BLK_STS_RESOURCE;
1031 goto insert_data;
1032 }
1033 flush->bi_end_io = backing_request_endio;
1034 flush->bi_private = cl;
1035
1036 closure_bio_submit(s->iop.c, flush, cl);
1037 }
1038 } else {
1039 s->iop.bio = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO,
1040 &dc->disk.bio_split);
1041
1042 bio->bi_end_io = backing_request_endio;
1043 closure_bio_submit(s->iop.c, bio, cl);
1044 }
1045
1046 insert_data:
1047 closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
1048 continue_at(cl, cached_dev_write_complete, NULL);
1049 }
1050
1051 static void cached_dev_nodata(struct closure *cl)
1052 {
1053 struct search *s = container_of(cl, struct search, cl);
1054 struct bio *bio = &s->bio.bio;
1055
1056 if (s->iop.flush_journal)
1057 bch_journal_meta(s->iop.c, cl);
1058
1059
1060 bio->bi_end_io = backing_request_endio;
1061 closure_bio_submit(s->iop.c, bio, cl);
1062
1063 continue_at(cl, cached_dev_bio_complete, NULL);
1064 }
1065
1066 struct detached_dev_io_private {
1067 struct bcache_device *d;
1068 unsigned long start_time;
1069 bio_end_io_t *bi_end_io;
1070 void *bi_private;
1071 struct block_device *orig_bdev;
1072 };
1073
1074 static void detached_dev_end_io(struct bio *bio)
1075 {
1076 struct detached_dev_io_private *ddip;
1077
1078 ddip = bio->bi_private;
1079 bio->bi_end_io = ddip->bi_end_io;
1080 bio->bi_private = ddip->bi_private;
1081
1082
1083 bio_end_io_acct_remapped(bio, ddip->start_time, ddip->orig_bdev);
1084
1085 if (bio->bi_status) {
1086 struct cached_dev *dc = container_of(ddip->d,
1087 struct cached_dev, disk);
1088
1089 bch_count_backing_io_errors(dc, bio);
1090 }
1091
1092 kfree(ddip);
1093 bio->bi_end_io(bio);
1094 }
1095
1096 static void detached_dev_do_request(struct bcache_device *d, struct bio *bio,
1097 struct block_device *orig_bdev, unsigned long start_time)
1098 {
1099 struct detached_dev_io_private *ddip;
1100 struct cached_dev *dc = container_of(d, struct cached_dev, disk);
1101
1102
1103
1104
1105
1106
1107 ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
1108 if (!ddip) {
1109 bio->bi_status = BLK_STS_RESOURCE;
1110 bio->bi_end_io(bio);
1111 return;
1112 }
1113
1114 ddip->d = d;
1115
1116 ddip->orig_bdev = orig_bdev;
1117 ddip->start_time = start_time;
1118 ddip->bi_end_io = bio->bi_end_io;
1119 ddip->bi_private = bio->bi_private;
1120 bio->bi_end_io = detached_dev_end_io;
1121 bio->bi_private = ddip;
1122
1123 if ((bio_op(bio) == REQ_OP_DISCARD) &&
1124 !bdev_max_discard_sectors(dc->bdev))
1125 bio->bi_end_io(bio);
1126 else
1127 submit_bio_noacct(bio);
1128 }
1129
1130 static void quit_max_writeback_rate(struct cache_set *c,
1131 struct cached_dev *this_dc)
1132 {
1133 int i;
1134 struct bcache_device *d;
1135 struct cached_dev *dc;
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146 if (mutex_trylock(&bch_register_lock)) {
1147 for (i = 0; i < c->devices_max_used; i++) {
1148 if (!c->devices[i])
1149 continue;
1150
1151 if (UUID_FLASH_ONLY(&c->uuids[i]))
1152 continue;
1153
1154 d = c->devices[i];
1155 dc = container_of(d, struct cached_dev, disk);
1156
1157
1158
1159
1160
1161 atomic_long_set(&dc->writeback_rate.rate, 1);
1162 }
1163 mutex_unlock(&bch_register_lock);
1164 } else
1165 atomic_long_set(&this_dc->writeback_rate.rate, 1);
1166 }
1167
1168
1169
1170 void cached_dev_submit_bio(struct bio *bio)
1171 {
1172 struct search *s;
1173 struct block_device *orig_bdev = bio->bi_bdev;
1174 struct bcache_device *d = orig_bdev->bd_disk->private_data;
1175 struct cached_dev *dc = container_of(d, struct cached_dev, disk);
1176 unsigned long start_time;
1177 int rw = bio_data_dir(bio);
1178
1179 if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) ||
1180 dc->io_disable)) {
1181 bio->bi_status = BLK_STS_IOERR;
1182 bio_endio(bio);
1183 return;
1184 }
1185
1186 if (likely(d->c)) {
1187 if (atomic_read(&d->c->idle_counter))
1188 atomic_set(&d->c->idle_counter, 0);
1189
1190
1191
1192
1193
1194
1195 if (unlikely(atomic_read(&d->c->at_max_writeback_rate) == 1)) {
1196 atomic_set(&d->c->at_max_writeback_rate, 0);
1197 quit_max_writeback_rate(d->c, dc);
1198 }
1199 }
1200
1201 start_time = bio_start_io_acct(bio);
1202
1203 bio_set_dev(bio, dc->bdev);
1204 bio->bi_iter.bi_sector += dc->sb.data_offset;
1205
1206 if (cached_dev_get(dc)) {
1207 s = search_alloc(bio, d, orig_bdev, start_time);
1208 trace_bcache_request_start(s->d, bio);
1209
1210 if (!bio->bi_iter.bi_size) {
1211
1212
1213
1214
1215 continue_at_nobarrier(&s->cl,
1216 cached_dev_nodata,
1217 bcache_wq);
1218 } else {
1219 s->iop.bypass = check_should_bypass(dc, bio);
1220
1221 if (rw)
1222 cached_dev_write(dc, s);
1223 else
1224 cached_dev_read(dc, s);
1225 }
1226 } else
1227
1228 detached_dev_do_request(d, bio, orig_bdev, start_time);
1229 }
1230
1231 static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode,
1232 unsigned int cmd, unsigned long arg)
1233 {
1234 struct cached_dev *dc = container_of(d, struct cached_dev, disk);
1235
1236 if (dc->io_disable)
1237 return -EIO;
1238 if (!dc->bdev->bd_disk->fops->ioctl)
1239 return -ENOTTY;
1240 return dc->bdev->bd_disk->fops->ioctl(dc->bdev, mode, cmd, arg);
1241 }
1242
1243 void bch_cached_dev_request_init(struct cached_dev *dc)
1244 {
1245 dc->disk.cache_miss = cached_dev_cache_miss;
1246 dc->disk.ioctl = cached_dev_ioctl;
1247 }
1248
1249
1250
1251 static int flash_dev_cache_miss(struct btree *b, struct search *s,
1252 struct bio *bio, unsigned int sectors)
1253 {
1254 unsigned int bytes = min(sectors, bio_sectors(bio)) << 9;
1255
1256 swap(bio->bi_iter.bi_size, bytes);
1257 zero_fill_bio(bio);
1258 swap(bio->bi_iter.bi_size, bytes);
1259
1260 bio_advance(bio, bytes);
1261
1262 if (!bio->bi_iter.bi_size)
1263 return MAP_DONE;
1264
1265 return MAP_CONTINUE;
1266 }
1267
1268 static void flash_dev_nodata(struct closure *cl)
1269 {
1270 struct search *s = container_of(cl, struct search, cl);
1271
1272 if (s->iop.flush_journal)
1273 bch_journal_meta(s->iop.c, cl);
1274
1275 continue_at(cl, search_free, NULL);
1276 }
1277
1278 void flash_dev_submit_bio(struct bio *bio)
1279 {
1280 struct search *s;
1281 struct closure *cl;
1282 struct bcache_device *d = bio->bi_bdev->bd_disk->private_data;
1283
1284 if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
1285 bio->bi_status = BLK_STS_IOERR;
1286 bio_endio(bio);
1287 return;
1288 }
1289
1290 s = search_alloc(bio, d, bio->bi_bdev, bio_start_io_acct(bio));
1291 cl = &s->cl;
1292 bio = &s->bio.bio;
1293
1294 trace_bcache_request_start(s->d, bio);
1295
1296 if (!bio->bi_iter.bi_size) {
1297
1298
1299
1300 continue_at_nobarrier(&s->cl,
1301 flash_dev_nodata,
1302 bcache_wq);
1303 return;
1304 } else if (bio_data_dir(bio)) {
1305 bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys,
1306 &KEY(d->id, bio->bi_iter.bi_sector, 0),
1307 &KEY(d->id, bio_end_sector(bio), 0));
1308
1309 s->iop.bypass = (bio_op(bio) == REQ_OP_DISCARD) != 0;
1310 s->iop.writeback = true;
1311 s->iop.bio = bio;
1312
1313 closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
1314 } else {
1315 closure_call(&s->iop.cl, cache_lookup, NULL, cl);
1316 }
1317
1318 continue_at(cl, search_free, NULL);
1319 }
1320
1321 static int flash_dev_ioctl(struct bcache_device *d, fmode_t mode,
1322 unsigned int cmd, unsigned long arg)
1323 {
1324 return -ENOTTY;
1325 }
1326
1327 void bch_flash_dev_request_init(struct bcache_device *d)
1328 {
1329 d->cache_miss = flash_dev_cache_miss;
1330 d->ioctl = flash_dev_ioctl;
1331 }
1332
1333 void bch_request_exit(void)
1334 {
1335 kmem_cache_destroy(bch_search_cache);
1336 }
1337
1338 int __init bch_request_init(void)
1339 {
1340 bch_search_cache = KMEM_CACHE(search, 0);
1341 if (!bch_search_cache)
1342 return -ENOMEM;
1343
1344 return 0;
1345 }