0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #include "bcache.h"
0011 #include "btree.h"
0012 #include "debug.h"
0013 #include "extents.h"
0014 #include "request.h"
0015 #include "writeback.h"
0016 #include "features.h"
0017
0018 #include <linux/blkdev.h>
0019 #include <linux/pagemap.h>
0020 #include <linux/debugfs.h>
0021 #include <linux/idr.h>
0022 #include <linux/kthread.h>
0023 #include <linux/workqueue.h>
0024 #include <linux/module.h>
0025 #include <linux/random.h>
0026 #include <linux/reboot.h>
0027 #include <linux/sysfs.h>
0028
0029 unsigned int bch_cutoff_writeback;
0030 unsigned int bch_cutoff_writeback_sync;
0031
0032 static const char bcache_magic[] = {
0033 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca,
0034 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81
0035 };
0036
0037 static const char invalid_uuid[] = {
0038 0xa0, 0x3e, 0xf8, 0xed, 0x3e, 0xe1, 0xb8, 0x78,
0039 0xc8, 0x50, 0xfc, 0x5e, 0xcb, 0x16, 0xcd, 0x99
0040 };
0041
0042 static struct kobject *bcache_kobj;
0043 struct mutex bch_register_lock;
0044 bool bcache_is_reboot;
0045 LIST_HEAD(bch_cache_sets);
0046 static LIST_HEAD(uncached_devices);
0047
0048 static int bcache_major;
0049 static DEFINE_IDA(bcache_device_idx);
0050 static wait_queue_head_t unregister_wait;
0051 struct workqueue_struct *bcache_wq;
0052 struct workqueue_struct *bch_flush_wq;
0053 struct workqueue_struct *bch_journal_wq;
0054
0055
0056 #define BTREE_MAX_PAGES (256 * 1024 / PAGE_SIZE)
0057
0058 #define BCACHE_MINORS 128
0059
0060 #define BCACHE_DEVICE_IDX_MAX ((1U << MINORBITS)/BCACHE_MINORS)
0061
0062
0063
0064 static unsigned int get_bucket_size(struct cache_sb *sb, struct cache_sb_disk *s)
0065 {
0066 unsigned int bucket_size = le16_to_cpu(s->bucket_size);
0067
0068 if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) {
0069 if (bch_has_feature_large_bucket(sb)) {
0070 unsigned int max, order;
0071
0072 max = sizeof(unsigned int) * BITS_PER_BYTE - 1;
0073 order = le16_to_cpu(s->bucket_size);
0074
0075
0076
0077
0078 if (order > max)
0079 pr_err("Bucket size (1 << %u) overflows\n",
0080 order);
0081 bucket_size = 1 << order;
0082 } else if (bch_has_feature_obso_large_bucket(sb)) {
0083 bucket_size +=
0084 le16_to_cpu(s->obso_bucket_size_hi) << 16;
0085 }
0086 }
0087
0088 return bucket_size;
0089 }
0090
0091 static const char *read_super_common(struct cache_sb *sb, struct block_device *bdev,
0092 struct cache_sb_disk *s)
0093 {
0094 const char *err;
0095 unsigned int i;
0096
0097 sb->first_bucket= le16_to_cpu(s->first_bucket);
0098 sb->nbuckets = le64_to_cpu(s->nbuckets);
0099 sb->bucket_size = get_bucket_size(sb, s);
0100
0101 sb->nr_in_set = le16_to_cpu(s->nr_in_set);
0102 sb->nr_this_dev = le16_to_cpu(s->nr_this_dev);
0103
0104 err = "Too many journal buckets";
0105 if (sb->keys > SB_JOURNAL_BUCKETS)
0106 goto err;
0107
0108 err = "Too many buckets";
0109 if (sb->nbuckets > LONG_MAX)
0110 goto err;
0111
0112 err = "Not enough buckets";
0113 if (sb->nbuckets < 1 << 7)
0114 goto err;
0115
0116 err = "Bad block size (not power of 2)";
0117 if (!is_power_of_2(sb->block_size))
0118 goto err;
0119
0120 err = "Bad block size (larger than page size)";
0121 if (sb->block_size > PAGE_SECTORS)
0122 goto err;
0123
0124 err = "Bad bucket size (not power of 2)";
0125 if (!is_power_of_2(sb->bucket_size))
0126 goto err;
0127
0128 err = "Bad bucket size (smaller than page size)";
0129 if (sb->bucket_size < PAGE_SECTORS)
0130 goto err;
0131
0132 err = "Invalid superblock: device too small";
0133 if (get_capacity(bdev->bd_disk) <
0134 sb->bucket_size * sb->nbuckets)
0135 goto err;
0136
0137 err = "Bad UUID";
0138 if (bch_is_zero(sb->set_uuid, 16))
0139 goto err;
0140
0141 err = "Bad cache device number in set";
0142 if (!sb->nr_in_set ||
0143 sb->nr_in_set <= sb->nr_this_dev ||
0144 sb->nr_in_set > MAX_CACHES_PER_SET)
0145 goto err;
0146
0147 err = "Journal buckets not sequential";
0148 for (i = 0; i < sb->keys; i++)
0149 if (sb->d[i] != sb->first_bucket + i)
0150 goto err;
0151
0152 err = "Too many journal buckets";
0153 if (sb->first_bucket + sb->keys > sb->nbuckets)
0154 goto err;
0155
0156 err = "Invalid superblock: first bucket comes before end of super";
0157 if (sb->first_bucket * sb->bucket_size < 16)
0158 goto err;
0159
0160 err = NULL;
0161 err:
0162 return err;
0163 }
0164
0165
0166 static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
0167 struct cache_sb_disk **res)
0168 {
0169 const char *err;
0170 struct cache_sb_disk *s;
0171 struct page *page;
0172 unsigned int i;
0173
0174 page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
0175 SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL);
0176 if (IS_ERR(page))
0177 return "IO error";
0178 s = page_address(page) + offset_in_page(SB_OFFSET);
0179
0180 sb->offset = le64_to_cpu(s->offset);
0181 sb->version = le64_to_cpu(s->version);
0182
0183 memcpy(sb->magic, s->magic, 16);
0184 memcpy(sb->uuid, s->uuid, 16);
0185 memcpy(sb->set_uuid, s->set_uuid, 16);
0186 memcpy(sb->label, s->label, SB_LABEL_SIZE);
0187
0188 sb->flags = le64_to_cpu(s->flags);
0189 sb->seq = le64_to_cpu(s->seq);
0190 sb->last_mount = le32_to_cpu(s->last_mount);
0191 sb->keys = le16_to_cpu(s->keys);
0192
0193 for (i = 0; i < SB_JOURNAL_BUCKETS; i++)
0194 sb->d[i] = le64_to_cpu(s->d[i]);
0195
0196 pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u\n",
0197 sb->version, sb->flags, sb->seq, sb->keys);
0198
0199 err = "Not a bcache superblock (bad offset)";
0200 if (sb->offset != SB_SECTOR)
0201 goto err;
0202
0203 err = "Not a bcache superblock (bad magic)";
0204 if (memcmp(sb->magic, bcache_magic, 16))
0205 goto err;
0206
0207 err = "Bad checksum";
0208 if (s->csum != csum_set(s))
0209 goto err;
0210
0211 err = "Bad UUID";
0212 if (bch_is_zero(sb->uuid, 16))
0213 goto err;
0214
0215 sb->block_size = le16_to_cpu(s->block_size);
0216
0217 err = "Superblock block size smaller than device block size";
0218 if (sb->block_size << 9 < bdev_logical_block_size(bdev))
0219 goto err;
0220
0221 switch (sb->version) {
0222 case BCACHE_SB_VERSION_BDEV:
0223 sb->data_offset = BDEV_DATA_START_DEFAULT;
0224 break;
0225 case BCACHE_SB_VERSION_BDEV_WITH_OFFSET:
0226 case BCACHE_SB_VERSION_BDEV_WITH_FEATURES:
0227 sb->data_offset = le64_to_cpu(s->data_offset);
0228
0229 err = "Bad data offset";
0230 if (sb->data_offset < BDEV_DATA_START_DEFAULT)
0231 goto err;
0232
0233 break;
0234 case BCACHE_SB_VERSION_CDEV:
0235 case BCACHE_SB_VERSION_CDEV_WITH_UUID:
0236 err = read_super_common(sb, bdev, s);
0237 if (err)
0238 goto err;
0239 break;
0240 case BCACHE_SB_VERSION_CDEV_WITH_FEATURES:
0241
0242
0243
0244
0245 sb->feature_compat = le64_to_cpu(s->feature_compat);
0246 sb->feature_incompat = le64_to_cpu(s->feature_incompat);
0247 sb->feature_ro_compat = le64_to_cpu(s->feature_ro_compat);
0248
0249
0250 err = "Unsupported compatible feature found";
0251 if (bch_has_unknown_compat_features(sb))
0252 goto err;
0253
0254 err = "Unsupported read-only compatible feature found";
0255 if (bch_has_unknown_ro_compat_features(sb))
0256 goto err;
0257
0258 err = "Unsupported incompatible feature found";
0259 if (bch_has_unknown_incompat_features(sb))
0260 goto err;
0261
0262 err = read_super_common(sb, bdev, s);
0263 if (err)
0264 goto err;
0265 break;
0266 default:
0267 err = "Unsupported superblock version";
0268 goto err;
0269 }
0270
0271 sb->last_mount = (u32)ktime_get_real_seconds();
0272 *res = s;
0273 return NULL;
0274 err:
0275 put_page(page);
0276 return err;
0277 }
0278
0279 static void write_bdev_super_endio(struct bio *bio)
0280 {
0281 struct cached_dev *dc = bio->bi_private;
0282
0283 if (bio->bi_status)
0284 bch_count_backing_io_errors(dc, bio);
0285
0286 closure_put(&dc->sb_write);
0287 }
0288
0289 static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out,
0290 struct bio *bio)
0291 {
0292 unsigned int i;
0293
0294 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META;
0295 bio->bi_iter.bi_sector = SB_SECTOR;
0296 __bio_add_page(bio, virt_to_page(out), SB_SIZE,
0297 offset_in_page(out));
0298
0299 out->offset = cpu_to_le64(sb->offset);
0300
0301 memcpy(out->uuid, sb->uuid, 16);
0302 memcpy(out->set_uuid, sb->set_uuid, 16);
0303 memcpy(out->label, sb->label, SB_LABEL_SIZE);
0304
0305 out->flags = cpu_to_le64(sb->flags);
0306 out->seq = cpu_to_le64(sb->seq);
0307
0308 out->last_mount = cpu_to_le32(sb->last_mount);
0309 out->first_bucket = cpu_to_le16(sb->first_bucket);
0310 out->keys = cpu_to_le16(sb->keys);
0311
0312 for (i = 0; i < sb->keys; i++)
0313 out->d[i] = cpu_to_le64(sb->d[i]);
0314
0315 if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) {
0316 out->feature_compat = cpu_to_le64(sb->feature_compat);
0317 out->feature_incompat = cpu_to_le64(sb->feature_incompat);
0318 out->feature_ro_compat = cpu_to_le64(sb->feature_ro_compat);
0319 }
0320
0321 out->version = cpu_to_le64(sb->version);
0322 out->csum = csum_set(out);
0323
0324 pr_debug("ver %llu, flags %llu, seq %llu\n",
0325 sb->version, sb->flags, sb->seq);
0326
0327 submit_bio(bio);
0328 }
0329
0330 static void bch_write_bdev_super_unlock(struct closure *cl)
0331 {
0332 struct cached_dev *dc = container_of(cl, struct cached_dev, sb_write);
0333
0334 up(&dc->sb_write_mutex);
0335 }
0336
0337 void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
0338 {
0339 struct closure *cl = &dc->sb_write;
0340 struct bio *bio = &dc->sb_bio;
0341
0342 down(&dc->sb_write_mutex);
0343 closure_init(cl, parent);
0344
0345 bio_init(bio, dc->bdev, dc->sb_bv, 1, 0);
0346 bio->bi_end_io = write_bdev_super_endio;
0347 bio->bi_private = dc;
0348
0349 closure_get(cl);
0350
0351 __write_super(&dc->sb, dc->sb_disk, bio);
0352
0353 closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
0354 }
0355
0356 static void write_super_endio(struct bio *bio)
0357 {
0358 struct cache *ca = bio->bi_private;
0359
0360
0361 bch_count_io_errors(ca, bio->bi_status, 0,
0362 "writing superblock");
0363 closure_put(&ca->set->sb_write);
0364 }
0365
0366 static void bcache_write_super_unlock(struct closure *cl)
0367 {
0368 struct cache_set *c = container_of(cl, struct cache_set, sb_write);
0369
0370 up(&c->sb_write_mutex);
0371 }
0372
0373 void bcache_write_super(struct cache_set *c)
0374 {
0375 struct closure *cl = &c->sb_write;
0376 struct cache *ca = c->cache;
0377 struct bio *bio = &ca->sb_bio;
0378 unsigned int version = BCACHE_SB_VERSION_CDEV_WITH_UUID;
0379
0380 down(&c->sb_write_mutex);
0381 closure_init(cl, &c->cl);
0382
0383 ca->sb.seq++;
0384
0385 if (ca->sb.version < version)
0386 ca->sb.version = version;
0387
0388 bio_init(bio, ca->bdev, ca->sb_bv, 1, 0);
0389 bio->bi_end_io = write_super_endio;
0390 bio->bi_private = ca;
0391
0392 closure_get(cl);
0393 __write_super(&ca->sb, ca->sb_disk, bio);
0394
0395 closure_return_with_destructor(cl, bcache_write_super_unlock);
0396 }
0397
0398
0399
0400 static void uuid_endio(struct bio *bio)
0401 {
0402 struct closure *cl = bio->bi_private;
0403 struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
0404
0405 cache_set_err_on(bio->bi_status, c, "accessing uuids");
0406 bch_bbio_free(bio, c);
0407 closure_put(cl);
0408 }
0409
0410 static void uuid_io_unlock(struct closure *cl)
0411 {
0412 struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
0413
0414 up(&c->uuid_write_mutex);
0415 }
0416
0417 static void uuid_io(struct cache_set *c, blk_opf_t opf, struct bkey *k,
0418 struct closure *parent)
0419 {
0420 struct closure *cl = &c->uuid_write;
0421 struct uuid_entry *u;
0422 unsigned int i;
0423 char buf[80];
0424
0425 BUG_ON(!parent);
0426 down(&c->uuid_write_mutex);
0427 closure_init(cl, parent);
0428
0429 for (i = 0; i < KEY_PTRS(k); i++) {
0430 struct bio *bio = bch_bbio_alloc(c);
0431
0432 bio->bi_opf = opf | REQ_SYNC | REQ_META;
0433 bio->bi_iter.bi_size = KEY_SIZE(k) << 9;
0434
0435 bio->bi_end_io = uuid_endio;
0436 bio->bi_private = cl;
0437 bch_bio_map(bio, c->uuids);
0438
0439 bch_submit_bbio(bio, c, k, i);
0440
0441 if ((opf & REQ_OP_MASK) != REQ_OP_WRITE)
0442 break;
0443 }
0444
0445 bch_extent_to_text(buf, sizeof(buf), k);
0446 pr_debug("%s UUIDs at %s\n", (opf & REQ_OP_MASK) == REQ_OP_WRITE ?
0447 "wrote" : "read", buf);
0448
0449 for (u = c->uuids; u < c->uuids + c->nr_uuids; u++)
0450 if (!bch_is_zero(u->uuid, 16))
0451 pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u\n",
0452 u - c->uuids, u->uuid, u->label,
0453 u->first_reg, u->last_reg, u->invalidated);
0454
0455 closure_return_with_destructor(cl, uuid_io_unlock);
0456 }
0457
0458 static char *uuid_read(struct cache_set *c, struct jset *j, struct closure *cl)
0459 {
0460 struct bkey *k = &j->uuid_bucket;
0461
0462 if (__bch_btree_ptr_invalid(c, k))
0463 return "bad uuid pointer";
0464
0465 bkey_copy(&c->uuid_bucket, k);
0466 uuid_io(c, REQ_OP_READ, k, cl);
0467
0468 if (j->version < BCACHE_JSET_VERSION_UUIDv1) {
0469 struct uuid_entry_v0 *u0 = (void *) c->uuids;
0470 struct uuid_entry *u1 = (void *) c->uuids;
0471 int i;
0472
0473 closure_sync(cl);
0474
0475
0476
0477
0478
0479
0480
0481 for (i = c->nr_uuids - 1;
0482 i >= 0;
0483 --i) {
0484 memcpy(u1[i].uuid, u0[i].uuid, 16);
0485 memcpy(u1[i].label, u0[i].label, 32);
0486
0487 u1[i].first_reg = u0[i].first_reg;
0488 u1[i].last_reg = u0[i].last_reg;
0489 u1[i].invalidated = u0[i].invalidated;
0490
0491 u1[i].flags = 0;
0492 u1[i].sectors = 0;
0493 }
0494 }
0495
0496 return NULL;
0497 }
0498
0499 static int __uuid_write(struct cache_set *c)
0500 {
0501 BKEY_PADDED(key) k;
0502 struct closure cl;
0503 struct cache *ca = c->cache;
0504 unsigned int size;
0505
0506 closure_init_stack(&cl);
0507 lockdep_assert_held(&bch_register_lock);
0508
0509 if (bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, true))
0510 return 1;
0511
0512 size = meta_bucket_pages(&ca->sb) * PAGE_SECTORS;
0513 SET_KEY_SIZE(&k.key, size);
0514 uuid_io(c, REQ_OP_WRITE, &k.key, &cl);
0515 closure_sync(&cl);
0516
0517
0518 atomic_long_add(ca->sb.bucket_size, &ca->meta_sectors_written);
0519
0520 bkey_copy(&c->uuid_bucket, &k.key);
0521 bkey_put(c, &k.key);
0522 return 0;
0523 }
0524
0525 int bch_uuid_write(struct cache_set *c)
0526 {
0527 int ret = __uuid_write(c);
0528
0529 if (!ret)
0530 bch_journal_meta(c, NULL);
0531
0532 return ret;
0533 }
0534
0535 static struct uuid_entry *uuid_find(struct cache_set *c, const char *uuid)
0536 {
0537 struct uuid_entry *u;
0538
0539 for (u = c->uuids;
0540 u < c->uuids + c->nr_uuids; u++)
0541 if (!memcmp(u->uuid, uuid, 16))
0542 return u;
0543
0544 return NULL;
0545 }
0546
0547 static struct uuid_entry *uuid_find_empty(struct cache_set *c)
0548 {
0549 static const char zero_uuid[16] = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
0550
0551 return uuid_find(c, zero_uuid);
0552 }
0553
0554
0555
0556
0557
0558
0559
0560
0561
0562
0563
0564
0565
0566
0567
0568
0569
0570
0571
0572
0573
0574
0575
0576
0577
0578
0579
0580
0581 static void prio_endio(struct bio *bio)
0582 {
0583 struct cache *ca = bio->bi_private;
0584
0585 cache_set_err_on(bio->bi_status, ca->set, "accessing priorities");
0586 bch_bbio_free(bio, ca->set);
0587 closure_put(&ca->prio);
0588 }
0589
0590 static void prio_io(struct cache *ca, uint64_t bucket, blk_opf_t opf)
0591 {
0592 struct closure *cl = &ca->prio;
0593 struct bio *bio = bch_bbio_alloc(ca->set);
0594
0595 closure_init_stack(cl);
0596
0597 bio->bi_iter.bi_sector = bucket * ca->sb.bucket_size;
0598 bio_set_dev(bio, ca->bdev);
0599 bio->bi_iter.bi_size = meta_bucket_bytes(&ca->sb);
0600
0601 bio->bi_end_io = prio_endio;
0602 bio->bi_private = ca;
0603 bio->bi_opf = opf | REQ_SYNC | REQ_META;
0604 bch_bio_map(bio, ca->disk_buckets);
0605
0606 closure_bio_submit(ca->set, bio, &ca->prio);
0607 closure_sync(cl);
0608 }
0609
0610 int bch_prio_write(struct cache *ca, bool wait)
0611 {
0612 int i;
0613 struct bucket *b;
0614 struct closure cl;
0615
0616 pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu\n",
0617 fifo_used(&ca->free[RESERVE_PRIO]),
0618 fifo_used(&ca->free[RESERVE_NONE]),
0619 fifo_used(&ca->free_inc));
0620
0621
0622
0623
0624
0625
0626 if (!wait) {
0627 size_t avail = fifo_used(&ca->free[RESERVE_PRIO]) +
0628 fifo_used(&ca->free[RESERVE_NONE]);
0629 if (prio_buckets(ca) > avail)
0630 return -ENOMEM;
0631 }
0632
0633 closure_init_stack(&cl);
0634
0635 lockdep_assert_held(&ca->set->bucket_lock);
0636
0637 ca->disk_buckets->seq++;
0638
0639 atomic_long_add(ca->sb.bucket_size * prio_buckets(ca),
0640 &ca->meta_sectors_written);
0641
0642 for (i = prio_buckets(ca) - 1; i >= 0; --i) {
0643 long bucket;
0644 struct prio_set *p = ca->disk_buckets;
0645 struct bucket_disk *d = p->data;
0646 struct bucket_disk *end = d + prios_per_bucket(ca);
0647
0648 for (b = ca->buckets + i * prios_per_bucket(ca);
0649 b < ca->buckets + ca->sb.nbuckets && d < end;
0650 b++, d++) {
0651 d->prio = cpu_to_le16(b->prio);
0652 d->gen = b->gen;
0653 }
0654
0655 p->next_bucket = ca->prio_buckets[i + 1];
0656 p->magic = pset_magic(&ca->sb);
0657 p->csum = bch_crc64(&p->magic, meta_bucket_bytes(&ca->sb) - 8);
0658
0659 bucket = bch_bucket_alloc(ca, RESERVE_PRIO, wait);
0660 BUG_ON(bucket == -1);
0661
0662 mutex_unlock(&ca->set->bucket_lock);
0663 prio_io(ca, bucket, REQ_OP_WRITE);
0664 mutex_lock(&ca->set->bucket_lock);
0665
0666 ca->prio_buckets[i] = bucket;
0667 atomic_dec_bug(&ca->buckets[bucket].pin);
0668 }
0669
0670 mutex_unlock(&ca->set->bucket_lock);
0671
0672 bch_journal_meta(ca->set, &cl);
0673 closure_sync(&cl);
0674
0675 mutex_lock(&ca->set->bucket_lock);
0676
0677
0678
0679
0680
0681 for (i = 0; i < prio_buckets(ca); i++) {
0682 if (ca->prio_last_buckets[i])
0683 __bch_bucket_free(ca,
0684 &ca->buckets[ca->prio_last_buckets[i]]);
0685
0686 ca->prio_last_buckets[i] = ca->prio_buckets[i];
0687 }
0688 return 0;
0689 }
0690
0691 static int prio_read(struct cache *ca, uint64_t bucket)
0692 {
0693 struct prio_set *p = ca->disk_buckets;
0694 struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d;
0695 struct bucket *b;
0696 unsigned int bucket_nr = 0;
0697 int ret = -EIO;
0698
0699 for (b = ca->buckets;
0700 b < ca->buckets + ca->sb.nbuckets;
0701 b++, d++) {
0702 if (d == end) {
0703 ca->prio_buckets[bucket_nr] = bucket;
0704 ca->prio_last_buckets[bucket_nr] = bucket;
0705 bucket_nr++;
0706
0707 prio_io(ca, bucket, REQ_OP_READ);
0708
0709 if (p->csum !=
0710 bch_crc64(&p->magic, meta_bucket_bytes(&ca->sb) - 8)) {
0711 pr_warn("bad csum reading priorities\n");
0712 goto out;
0713 }
0714
0715 if (p->magic != pset_magic(&ca->sb)) {
0716 pr_warn("bad magic reading priorities\n");
0717 goto out;
0718 }
0719
0720 bucket = p->next_bucket;
0721 d = p->data;
0722 }
0723
0724 b->prio = le16_to_cpu(d->prio);
0725 b->gen = b->last_gc = d->gen;
0726 }
0727
0728 ret = 0;
0729 out:
0730 return ret;
0731 }
0732
0733
0734
0735 static int open_dev(struct block_device *b, fmode_t mode)
0736 {
0737 struct bcache_device *d = b->bd_disk->private_data;
0738
0739 if (test_bit(BCACHE_DEV_CLOSING, &d->flags))
0740 return -ENXIO;
0741
0742 closure_get(&d->cl);
0743 return 0;
0744 }
0745
0746 static void release_dev(struct gendisk *b, fmode_t mode)
0747 {
0748 struct bcache_device *d = b->private_data;
0749
0750 closure_put(&d->cl);
0751 }
0752
0753 static int ioctl_dev(struct block_device *b, fmode_t mode,
0754 unsigned int cmd, unsigned long arg)
0755 {
0756 struct bcache_device *d = b->bd_disk->private_data;
0757
0758 return d->ioctl(d, mode, cmd, arg);
0759 }
0760
0761 static const struct block_device_operations bcache_cached_ops = {
0762 .submit_bio = cached_dev_submit_bio,
0763 .open = open_dev,
0764 .release = release_dev,
0765 .ioctl = ioctl_dev,
0766 .owner = THIS_MODULE,
0767 };
0768
0769 static const struct block_device_operations bcache_flash_ops = {
0770 .submit_bio = flash_dev_submit_bio,
0771 .open = open_dev,
0772 .release = release_dev,
0773 .ioctl = ioctl_dev,
0774 .owner = THIS_MODULE,
0775 };
0776
0777 void bcache_device_stop(struct bcache_device *d)
0778 {
0779 if (!test_and_set_bit(BCACHE_DEV_CLOSING, &d->flags))
0780
0781
0782
0783
0784
0785 closure_queue(&d->cl);
0786 }
0787
0788 static void bcache_device_unlink(struct bcache_device *d)
0789 {
0790 lockdep_assert_held(&bch_register_lock);
0791
0792 if (d->c && !test_and_set_bit(BCACHE_DEV_UNLINK_DONE, &d->flags)) {
0793 struct cache *ca = d->c->cache;
0794
0795 sysfs_remove_link(&d->c->kobj, d->name);
0796 sysfs_remove_link(&d->kobj, "cache");
0797
0798 bd_unlink_disk_holder(ca->bdev, d->disk);
0799 }
0800 }
0801
0802 static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
0803 const char *name)
0804 {
0805 struct cache *ca = c->cache;
0806 int ret;
0807
0808 bd_link_disk_holder(ca->bdev, d->disk);
0809
0810 snprintf(d->name, BCACHEDEVNAME_SIZE,
0811 "%s%u", name, d->id);
0812
0813 ret = sysfs_create_link(&d->kobj, &c->kobj, "cache");
0814 if (ret < 0)
0815 pr_err("Couldn't create device -> cache set symlink\n");
0816
0817 ret = sysfs_create_link(&c->kobj, &d->kobj, d->name);
0818 if (ret < 0)
0819 pr_err("Couldn't create cache set -> device symlink\n");
0820
0821 clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags);
0822 }
0823
0824 static void bcache_device_detach(struct bcache_device *d)
0825 {
0826 lockdep_assert_held(&bch_register_lock);
0827
0828 atomic_dec(&d->c->attached_dev_nr);
0829
0830 if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) {
0831 struct uuid_entry *u = d->c->uuids + d->id;
0832
0833 SET_UUID_FLASH_ONLY(u, 0);
0834 memcpy(u->uuid, invalid_uuid, 16);
0835 u->invalidated = cpu_to_le32((u32)ktime_get_real_seconds());
0836 bch_uuid_write(d->c);
0837 }
0838
0839 bcache_device_unlink(d);
0840
0841 d->c->devices[d->id] = NULL;
0842 closure_put(&d->c->caching);
0843 d->c = NULL;
0844 }
0845
0846 static void bcache_device_attach(struct bcache_device *d, struct cache_set *c,
0847 unsigned int id)
0848 {
0849 d->id = id;
0850 d->c = c;
0851 c->devices[id] = d;
0852
0853 if (id >= c->devices_max_used)
0854 c->devices_max_used = id + 1;
0855
0856 closure_get(&c->caching);
0857 }
0858
0859 static inline int first_minor_to_idx(int first_minor)
0860 {
0861 return (first_minor/BCACHE_MINORS);
0862 }
0863
0864 static inline int idx_to_first_minor(int idx)
0865 {
0866 return (idx * BCACHE_MINORS);
0867 }
0868
0869 static void bcache_device_free(struct bcache_device *d)
0870 {
0871 struct gendisk *disk = d->disk;
0872
0873 lockdep_assert_held(&bch_register_lock);
0874
0875 if (disk)
0876 pr_info("%s stopped\n", disk->disk_name);
0877 else
0878 pr_err("bcache device (NULL gendisk) stopped\n");
0879
0880 if (d->c)
0881 bcache_device_detach(d);
0882
0883 if (disk) {
0884 ida_simple_remove(&bcache_device_idx,
0885 first_minor_to_idx(disk->first_minor));
0886 put_disk(disk);
0887 }
0888
0889 bioset_exit(&d->bio_split);
0890 kvfree(d->full_dirty_stripes);
0891 kvfree(d->stripe_sectors_dirty);
0892
0893 closure_debug_destroy(&d->cl);
0894 }
0895
0896 static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
0897 sector_t sectors, struct block_device *cached_bdev,
0898 const struct block_device_operations *ops)
0899 {
0900 struct request_queue *q;
0901 const size_t max_stripes = min_t(size_t, INT_MAX,
0902 SIZE_MAX / sizeof(atomic_t));
0903 uint64_t n;
0904 int idx;
0905
0906 if (!d->stripe_size)
0907 d->stripe_size = 1 << 31;
0908
0909 n = DIV_ROUND_UP_ULL(sectors, d->stripe_size);
0910 if (!n || n > max_stripes) {
0911 pr_err("nr_stripes too large or invalid: %llu (start sector beyond end of disk?)\n",
0912 n);
0913 return -ENOMEM;
0914 }
0915 d->nr_stripes = n;
0916
0917 n = d->nr_stripes * sizeof(atomic_t);
0918 d->stripe_sectors_dirty = kvzalloc(n, GFP_KERNEL);
0919 if (!d->stripe_sectors_dirty)
0920 return -ENOMEM;
0921
0922 n = BITS_TO_LONGS(d->nr_stripes) * sizeof(unsigned long);
0923 d->full_dirty_stripes = kvzalloc(n, GFP_KERNEL);
0924 if (!d->full_dirty_stripes)
0925 goto out_free_stripe_sectors_dirty;
0926
0927 idx = ida_simple_get(&bcache_device_idx, 0,
0928 BCACHE_DEVICE_IDX_MAX, GFP_KERNEL);
0929 if (idx < 0)
0930 goto out_free_full_dirty_stripes;
0931
0932 if (bioset_init(&d->bio_split, 4, offsetof(struct bbio, bio),
0933 BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
0934 goto out_ida_remove;
0935
0936 d->disk = blk_alloc_disk(NUMA_NO_NODE);
0937 if (!d->disk)
0938 goto out_bioset_exit;
0939
0940 set_capacity(d->disk, sectors);
0941 snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx);
0942
0943 d->disk->major = bcache_major;
0944 d->disk->first_minor = idx_to_first_minor(idx);
0945 d->disk->minors = BCACHE_MINORS;
0946 d->disk->fops = ops;
0947 d->disk->private_data = d;
0948
0949 q = d->disk->queue;
0950 q->limits.max_hw_sectors = UINT_MAX;
0951 q->limits.max_sectors = UINT_MAX;
0952 q->limits.max_segment_size = UINT_MAX;
0953 q->limits.max_segments = BIO_MAX_VECS;
0954 blk_queue_max_discard_sectors(q, UINT_MAX);
0955 q->limits.discard_granularity = 512;
0956 q->limits.io_min = block_size;
0957 q->limits.logical_block_size = block_size;
0958 q->limits.physical_block_size = block_size;
0959
0960 if (q->limits.logical_block_size > PAGE_SIZE && cached_bdev) {
0961
0962
0963
0964
0965 pr_info("%s: sb/logical block size (%u) greater than page size (%lu) falling back to device logical block size (%u)\n",
0966 d->disk->disk_name, q->limits.logical_block_size,
0967 PAGE_SIZE, bdev_logical_block_size(cached_bdev));
0968
0969
0970 blk_queue_logical_block_size(q, bdev_logical_block_size(cached_bdev));
0971 }
0972
0973 blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
0974 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue);
0975
0976 blk_queue_write_cache(q, true, true);
0977
0978 return 0;
0979
0980 out_bioset_exit:
0981 bioset_exit(&d->bio_split);
0982 out_ida_remove:
0983 ida_simple_remove(&bcache_device_idx, idx);
0984 out_free_full_dirty_stripes:
0985 kvfree(d->full_dirty_stripes);
0986 out_free_stripe_sectors_dirty:
0987 kvfree(d->stripe_sectors_dirty);
0988 return -ENOMEM;
0989
0990 }
0991
0992
0993
0994 static void calc_cached_dev_sectors(struct cache_set *c)
0995 {
0996 uint64_t sectors = 0;
0997 struct cached_dev *dc;
0998
0999 list_for_each_entry(dc, &c->cached_devs, list)
1000 sectors += bdev_nr_sectors(dc->bdev);
1001
1002 c->cached_dev_sectors = sectors;
1003 }
1004
1005 #define BACKING_DEV_OFFLINE_TIMEOUT 5
1006 static int cached_dev_status_update(void *arg)
1007 {
1008 struct cached_dev *dc = arg;
1009 struct request_queue *q;
1010
1011
1012
1013
1014
1015
1016 while (!kthread_should_stop() && !dc->io_disable) {
1017 q = bdev_get_queue(dc->bdev);
1018 if (blk_queue_dying(q))
1019 dc->offline_seconds++;
1020 else
1021 dc->offline_seconds = 0;
1022
1023 if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) {
1024 pr_err("%pg: device offline for %d seconds\n",
1025 dc->bdev,
1026 BACKING_DEV_OFFLINE_TIMEOUT);
1027 pr_err("%s: disable I/O request due to backing device offline\n",
1028 dc->disk.name);
1029 dc->io_disable = true;
1030
1031 smp_mb();
1032 bcache_device_stop(&dc->disk);
1033 break;
1034 }
1035 schedule_timeout_interruptible(HZ);
1036 }
1037
1038 wait_for_kthread_stop();
1039 return 0;
1040 }
1041
1042
1043 int bch_cached_dev_run(struct cached_dev *dc)
1044 {
1045 int ret = 0;
1046 struct bcache_device *d = &dc->disk;
1047 char *buf = kmemdup_nul(dc->sb.label, SB_LABEL_SIZE, GFP_KERNEL);
1048 char *env[] = {
1049 "DRIVER=bcache",
1050 kasprintf(GFP_KERNEL, "CACHED_UUID=%pU", dc->sb.uuid),
1051 kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf ? : ""),
1052 NULL,
1053 };
1054
1055 if (dc->io_disable) {
1056 pr_err("I/O disabled on cached dev %pg\n", dc->bdev);
1057 ret = -EIO;
1058 goto out;
1059 }
1060
1061 if (atomic_xchg(&dc->running, 1)) {
1062 pr_info("cached dev %pg is running already\n", dc->bdev);
1063 ret = -EBUSY;
1064 goto out;
1065 }
1066
1067 if (!d->c &&
1068 BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) {
1069 struct closure cl;
1070
1071 closure_init_stack(&cl);
1072
1073 SET_BDEV_STATE(&dc->sb, BDEV_STATE_STALE);
1074 bch_write_bdev_super(dc, &cl);
1075 closure_sync(&cl);
1076 }
1077
1078 ret = add_disk(d->disk);
1079 if (ret)
1080 goto out;
1081 bd_link_disk_holder(dc->bdev, dc->disk.disk);
1082
1083
1084
1085
1086 kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env);
1087
1088 if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
1089 sysfs_create_link(&disk_to_dev(d->disk)->kobj,
1090 &d->kobj, "bcache")) {
1091 pr_err("Couldn't create bcache dev <-> disk sysfs symlinks\n");
1092 ret = -ENOMEM;
1093 goto out;
1094 }
1095
1096 dc->status_update_thread = kthread_run(cached_dev_status_update,
1097 dc, "bcache_status_update");
1098 if (IS_ERR(dc->status_update_thread)) {
1099 pr_warn("failed to create bcache_status_update kthread, continue to run without monitoring backing device status\n");
1100 }
1101
1102 out:
1103 kfree(env[1]);
1104 kfree(env[2]);
1105 kfree(buf);
1106 return ret;
1107 }
1108
1109
1110
1111
1112
1113
1114
1115
1116 static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
1117 {
1118 int time_out = WRITEBACK_RATE_UPDATE_SECS_MAX * HZ;
1119
1120 do {
1121 if (!test_bit(BCACHE_DEV_RATE_DW_RUNNING,
1122 &dc->disk.flags))
1123 break;
1124 time_out--;
1125 schedule_timeout_interruptible(1);
1126 } while (time_out > 0);
1127
1128 if (time_out == 0)
1129 pr_warn("give up waiting for dc->writeback_write_update to quit\n");
1130
1131 cancel_delayed_work_sync(&dc->writeback_rate_update);
1132 }
1133
1134 static void cached_dev_detach_finish(struct work_struct *w)
1135 {
1136 struct cached_dev *dc = container_of(w, struct cached_dev, detach);
1137 struct cache_set *c = dc->disk.c;
1138
1139 BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags));
1140 BUG_ON(refcount_read(&dc->count));
1141
1142
1143 if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
1144 cancel_writeback_rate_update_dwork(dc);
1145
1146 if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
1147 kthread_stop(dc->writeback_thread);
1148 dc->writeback_thread = NULL;
1149 }
1150
1151 mutex_lock(&bch_register_lock);
1152
1153 bcache_device_detach(&dc->disk);
1154 list_move(&dc->list, &uncached_devices);
1155 calc_cached_dev_sectors(c);
1156
1157 clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags);
1158 clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags);
1159
1160 mutex_unlock(&bch_register_lock);
1161
1162 pr_info("Caching disabled for %pg\n", dc->bdev);
1163
1164
1165 closure_put(&dc->disk.cl);
1166 }
1167
1168 void bch_cached_dev_detach(struct cached_dev *dc)
1169 {
1170 lockdep_assert_held(&bch_register_lock);
1171
1172 if (test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
1173 return;
1174
1175 if (test_and_set_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
1176 return;
1177
1178
1179
1180
1181
1182 closure_get(&dc->disk.cl);
1183
1184 bch_writeback_queue(dc);
1185
1186 cached_dev_put(dc);
1187 }
1188
1189 int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
1190 uint8_t *set_uuid)
1191 {
1192 uint32_t rtime = cpu_to_le32((u32)ktime_get_real_seconds());
1193 struct uuid_entry *u;
1194 struct cached_dev *exist_dc, *t;
1195 int ret = 0;
1196
1197 if ((set_uuid && memcmp(set_uuid, c->set_uuid, 16)) ||
1198 (!set_uuid && memcmp(dc->sb.set_uuid, c->set_uuid, 16)))
1199 return -ENOENT;
1200
1201 if (dc->disk.c) {
1202 pr_err("Can't attach %pg: already attached\n", dc->bdev);
1203 return -EINVAL;
1204 }
1205
1206 if (test_bit(CACHE_SET_STOPPING, &c->flags)) {
1207 pr_err("Can't attach %pg: shutting down\n", dc->bdev);
1208 return -EINVAL;
1209 }
1210
1211 if (dc->sb.block_size < c->cache->sb.block_size) {
1212
1213 pr_err("Couldn't attach %pg: block size less than set's block size\n",
1214 dc->bdev);
1215 return -EINVAL;
1216 }
1217
1218
1219 list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) {
1220 if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) {
1221 pr_err("Tried to attach %pg but duplicate UUID already attached\n",
1222 dc->bdev);
1223
1224 return -EINVAL;
1225 }
1226 }
1227
1228 u = uuid_find(c, dc->sb.uuid);
1229
1230 if (u &&
1231 (BDEV_STATE(&dc->sb) == BDEV_STATE_STALE ||
1232 BDEV_STATE(&dc->sb) == BDEV_STATE_NONE)) {
1233 memcpy(u->uuid, invalid_uuid, 16);
1234 u->invalidated = cpu_to_le32((u32)ktime_get_real_seconds());
1235 u = NULL;
1236 }
1237
1238 if (!u) {
1239 if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
1240 pr_err("Couldn't find uuid for %pg in set\n", dc->bdev);
1241 return -ENOENT;
1242 }
1243
1244 u = uuid_find_empty(c);
1245 if (!u) {
1246 pr_err("Not caching %pg, no room for UUID\n", dc->bdev);
1247 return -EINVAL;
1248 }
1249 }
1250
1251
1252
1253
1254
1255
1256 if (bch_is_zero(u->uuid, 16)) {
1257 struct closure cl;
1258
1259 closure_init_stack(&cl);
1260
1261 memcpy(u->uuid, dc->sb.uuid, 16);
1262 memcpy(u->label, dc->sb.label, SB_LABEL_SIZE);
1263 u->first_reg = u->last_reg = rtime;
1264 bch_uuid_write(c);
1265
1266 memcpy(dc->sb.set_uuid, c->set_uuid, 16);
1267 SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
1268
1269 bch_write_bdev_super(dc, &cl);
1270 closure_sync(&cl);
1271 } else {
1272 u->last_reg = rtime;
1273 bch_uuid_write(c);
1274 }
1275
1276 bcache_device_attach(&dc->disk, c, u - c->uuids);
1277 list_move(&dc->list, &c->cached_devs);
1278 calc_cached_dev_sectors(c);
1279
1280
1281
1282
1283
1284 smp_wmb();
1285 refcount_set(&dc->count, 1);
1286
1287
1288 down_write(&dc->writeback_lock);
1289 if (bch_cached_dev_writeback_start(dc)) {
1290 up_write(&dc->writeback_lock);
1291 pr_err("Couldn't start writeback facilities for %s\n",
1292 dc->disk.disk->disk_name);
1293 return -ENOMEM;
1294 }
1295
1296 if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
1297 atomic_set(&dc->has_dirty, 1);
1298 bch_writeback_queue(dc);
1299 }
1300
1301 bch_sectors_dirty_init(&dc->disk);
1302
1303 ret = bch_cached_dev_run(dc);
1304 if (ret && (ret != -EBUSY)) {
1305 up_write(&dc->writeback_lock);
1306
1307
1308
1309
1310
1311
1312 kthread_stop(dc->writeback_thread);
1313 cancel_writeback_rate_update_dwork(dc);
1314 pr_err("Couldn't run cached device %pg\n", dc->bdev);
1315 return ret;
1316 }
1317
1318 bcache_device_link(&dc->disk, c, "bdev");
1319 atomic_inc(&c->attached_dev_nr);
1320
1321 if (bch_has_feature_obso_large_bucket(&(c->cache->sb))) {
1322 pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n");
1323 pr_err("Please update to the latest bcache-tools to create the cache device\n");
1324 set_disk_ro(dc->disk.disk, 1);
1325 }
1326
1327
1328 up_write(&dc->writeback_lock);
1329
1330 pr_info("Caching %pg as %s on set %pU\n",
1331 dc->bdev,
1332 dc->disk.disk->disk_name,
1333 dc->disk.c->set_uuid);
1334 return 0;
1335 }
1336
1337
1338 void bch_cached_dev_release(struct kobject *kobj)
1339 {
1340 struct cached_dev *dc = container_of(kobj, struct cached_dev,
1341 disk.kobj);
1342 kfree(dc);
1343 module_put(THIS_MODULE);
1344 }
1345
1346 static void cached_dev_free(struct closure *cl)
1347 {
1348 struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
1349
1350 if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
1351 cancel_writeback_rate_update_dwork(dc);
1352
1353 if (!IS_ERR_OR_NULL(dc->writeback_thread))
1354 kthread_stop(dc->writeback_thread);
1355 if (!IS_ERR_OR_NULL(dc->status_update_thread))
1356 kthread_stop(dc->status_update_thread);
1357
1358 mutex_lock(&bch_register_lock);
1359
1360 if (atomic_read(&dc->running)) {
1361 bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
1362 del_gendisk(dc->disk.disk);
1363 }
1364 bcache_device_free(&dc->disk);
1365 list_del(&dc->list);
1366
1367 mutex_unlock(&bch_register_lock);
1368
1369 if (dc->sb_disk)
1370 put_page(virt_to_page(dc->sb_disk));
1371
1372 if (!IS_ERR_OR_NULL(dc->bdev))
1373 blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1374
1375 wake_up(&unregister_wait);
1376
1377 kobject_put(&dc->disk.kobj);
1378 }
1379
1380 static void cached_dev_flush(struct closure *cl)
1381 {
1382 struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
1383 struct bcache_device *d = &dc->disk;
1384
1385 mutex_lock(&bch_register_lock);
1386 bcache_device_unlink(d);
1387 mutex_unlock(&bch_register_lock);
1388
1389 bch_cache_accounting_destroy(&dc->accounting);
1390 kobject_del(&d->kobj);
1391
1392 continue_at(cl, cached_dev_free, system_wq);
1393 }
1394
1395 static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
1396 {
1397 int ret;
1398 struct io *io;
1399 struct request_queue *q = bdev_get_queue(dc->bdev);
1400
1401 __module_get(THIS_MODULE);
1402 INIT_LIST_HEAD(&dc->list);
1403 closure_init(&dc->disk.cl, NULL);
1404 set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq);
1405 kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype);
1406 INIT_WORK(&dc->detach, cached_dev_detach_finish);
1407 sema_init(&dc->sb_write_mutex, 1);
1408 INIT_LIST_HEAD(&dc->io_lru);
1409 spin_lock_init(&dc->io_lock);
1410 bch_cache_accounting_init(&dc->accounting, &dc->disk.cl);
1411
1412 dc->sequential_cutoff = 4 << 20;
1413
1414 for (io = dc->io; io < dc->io + RECENT_IO; io++) {
1415 list_add(&io->lru, &dc->io_lru);
1416 hlist_add_head(&io->hash, dc->io_hash + RECENT_IO);
1417 }
1418
1419 dc->disk.stripe_size = q->limits.io_opt >> 9;
1420
1421 if (dc->disk.stripe_size)
1422 dc->partial_stripes_expensive =
1423 q->limits.raid_partial_stripes_expensive;
1424
1425 ret = bcache_device_init(&dc->disk, block_size,
1426 bdev_nr_sectors(dc->bdev) - dc->sb.data_offset,
1427 dc->bdev, &bcache_cached_ops);
1428 if (ret)
1429 return ret;
1430
1431 blk_queue_io_opt(dc->disk.disk->queue,
1432 max(queue_io_opt(dc->disk.disk->queue), queue_io_opt(q)));
1433
1434 atomic_set(&dc->io_errors, 0);
1435 dc->io_disable = false;
1436 dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
1437
1438 dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_AUTO;
1439
1440 bch_cached_dev_request_init(dc);
1441 bch_cached_dev_writeback_init(dc);
1442 return 0;
1443 }
1444
1445
1446
1447 static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
1448 struct block_device *bdev,
1449 struct cached_dev *dc)
1450 {
1451 const char *err = "cannot allocate memory";
1452 struct cache_set *c;
1453 int ret = -ENOMEM;
1454
1455 memcpy(&dc->sb, sb, sizeof(struct cache_sb));
1456 dc->bdev = bdev;
1457 dc->bdev->bd_holder = dc;
1458 dc->sb_disk = sb_disk;
1459
1460 if (cached_dev_init(dc, sb->block_size << 9))
1461 goto err;
1462
1463 err = "error creating kobject";
1464 if (kobject_add(&dc->disk.kobj, bdev_kobj(bdev), "bcache"))
1465 goto err;
1466 if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj))
1467 goto err;
1468
1469 pr_info("registered backing device %pg\n", dc->bdev);
1470
1471 list_add(&dc->list, &uncached_devices);
1472
1473 list_for_each_entry(c, &bch_cache_sets, list)
1474 bch_cached_dev_attach(dc, c, NULL);
1475
1476 if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE ||
1477 BDEV_STATE(&dc->sb) == BDEV_STATE_STALE) {
1478 err = "failed to run cached device";
1479 ret = bch_cached_dev_run(dc);
1480 if (ret)
1481 goto err;
1482 }
1483
1484 return 0;
1485 err:
1486 pr_notice("error %pg: %s\n", dc->bdev, err);
1487 bcache_device_stop(&dc->disk);
1488 return ret;
1489 }
1490
1491
1492
1493
1494 void bch_flash_dev_release(struct kobject *kobj)
1495 {
1496 struct bcache_device *d = container_of(kobj, struct bcache_device,
1497 kobj);
1498 kfree(d);
1499 }
1500
1501 static void flash_dev_free(struct closure *cl)
1502 {
1503 struct bcache_device *d = container_of(cl, struct bcache_device, cl);
1504
1505 mutex_lock(&bch_register_lock);
1506 atomic_long_sub(bcache_dev_sectors_dirty(d),
1507 &d->c->flash_dev_dirty_sectors);
1508 del_gendisk(d->disk);
1509 bcache_device_free(d);
1510 mutex_unlock(&bch_register_lock);
1511 kobject_put(&d->kobj);
1512 }
1513
1514 static void flash_dev_flush(struct closure *cl)
1515 {
1516 struct bcache_device *d = container_of(cl, struct bcache_device, cl);
1517
1518 mutex_lock(&bch_register_lock);
1519 bcache_device_unlink(d);
1520 mutex_unlock(&bch_register_lock);
1521 kobject_del(&d->kobj);
1522 continue_at(cl, flash_dev_free, system_wq);
1523 }
1524
1525 static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
1526 {
1527 int err = -ENOMEM;
1528 struct bcache_device *d = kzalloc(sizeof(struct bcache_device),
1529 GFP_KERNEL);
1530 if (!d)
1531 goto err_ret;
1532
1533 closure_init(&d->cl, NULL);
1534 set_closure_fn(&d->cl, flash_dev_flush, system_wq);
1535
1536 kobject_init(&d->kobj, &bch_flash_dev_ktype);
1537
1538 if (bcache_device_init(d, block_bytes(c->cache), u->sectors,
1539 NULL, &bcache_flash_ops))
1540 goto err;
1541
1542 bcache_device_attach(d, c, u - c->uuids);
1543 bch_sectors_dirty_init(d);
1544 bch_flash_dev_request_init(d);
1545 err = add_disk(d->disk);
1546 if (err)
1547 goto err;
1548
1549 err = kobject_add(&d->kobj, &disk_to_dev(d->disk)->kobj, "bcache");
1550 if (err)
1551 goto err;
1552
1553 bcache_device_link(d, c, "volume");
1554
1555 if (bch_has_feature_obso_large_bucket(&c->cache->sb)) {
1556 pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n");
1557 pr_err("Please update to the latest bcache-tools to create the cache device\n");
1558 set_disk_ro(d->disk, 1);
1559 }
1560
1561 return 0;
1562 err:
1563 kobject_put(&d->kobj);
1564 err_ret:
1565 return err;
1566 }
1567
1568 static int flash_devs_run(struct cache_set *c)
1569 {
1570 int ret = 0;
1571 struct uuid_entry *u;
1572
1573 for (u = c->uuids;
1574 u < c->uuids + c->nr_uuids && !ret;
1575 u++)
1576 if (UUID_FLASH_ONLY(u))
1577 ret = flash_dev_run(c, u);
1578
1579 return ret;
1580 }
1581
1582 int bch_flash_dev_create(struct cache_set *c, uint64_t size)
1583 {
1584 struct uuid_entry *u;
1585
1586 if (test_bit(CACHE_SET_STOPPING, &c->flags))
1587 return -EINTR;
1588
1589 if (!test_bit(CACHE_SET_RUNNING, &c->flags))
1590 return -EPERM;
1591
1592 u = uuid_find_empty(c);
1593 if (!u) {
1594 pr_err("Can't create volume, no room for UUID\n");
1595 return -EINVAL;
1596 }
1597
1598 get_random_bytes(u->uuid, 16);
1599 memset(u->label, 0, 32);
1600 u->first_reg = u->last_reg = cpu_to_le32((u32)ktime_get_real_seconds());
1601
1602 SET_UUID_FLASH_ONLY(u, 1);
1603 u->sectors = size >> 9;
1604
1605 bch_uuid_write(c);
1606
1607 return flash_dev_run(c, u);
1608 }
1609
1610 bool bch_cached_dev_error(struct cached_dev *dc)
1611 {
1612 if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
1613 return false;
1614
1615 dc->io_disable = true;
1616
1617 smp_mb();
1618
1619 pr_err("stop %s: too many IO errors on backing device %pg\n",
1620 dc->disk.disk->disk_name, dc->bdev);
1621
1622 bcache_device_stop(&dc->disk);
1623 return true;
1624 }
1625
1626
1627
1628 __printf(2, 3)
1629 bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
1630 {
1631 struct va_format vaf;
1632 va_list args;
1633
1634 if (c->on_error != ON_ERROR_PANIC &&
1635 test_bit(CACHE_SET_STOPPING, &c->flags))
1636 return false;
1637
1638 if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
1639 pr_info("CACHE_SET_IO_DISABLE already set\n");
1640
1641
1642
1643
1644
1645
1646 va_start(args, fmt);
1647
1648 vaf.fmt = fmt;
1649 vaf.va = &args;
1650
1651 pr_err("error on %pU: %pV, disabling caching\n",
1652 c->set_uuid, &vaf);
1653
1654 va_end(args);
1655
1656 if (c->on_error == ON_ERROR_PANIC)
1657 panic("panic forced after error\n");
1658
1659 bch_cache_set_unregister(c);
1660 return true;
1661 }
1662
1663
1664 void bch_cache_set_release(struct kobject *kobj)
1665 {
1666 struct cache_set *c = container_of(kobj, struct cache_set, kobj);
1667
1668 kfree(c);
1669 module_put(THIS_MODULE);
1670 }
1671
1672 static void cache_set_free(struct closure *cl)
1673 {
1674 struct cache_set *c = container_of(cl, struct cache_set, cl);
1675 struct cache *ca;
1676
1677 debugfs_remove(c->debug);
1678
1679 bch_open_buckets_free(c);
1680 bch_btree_cache_free(c);
1681 bch_journal_free(c);
1682
1683 mutex_lock(&bch_register_lock);
1684 bch_bset_sort_state_free(&c->sort);
1685 free_pages((unsigned long) c->uuids, ilog2(meta_bucket_pages(&c->cache->sb)));
1686
1687 ca = c->cache;
1688 if (ca) {
1689 ca->set = NULL;
1690 c->cache = NULL;
1691 kobject_put(&ca->kobj);
1692 }
1693
1694
1695 if (c->moving_gc_wq)
1696 destroy_workqueue(c->moving_gc_wq);
1697 bioset_exit(&c->bio_split);
1698 mempool_exit(&c->fill_iter);
1699 mempool_exit(&c->bio_meta);
1700 mempool_exit(&c->search);
1701 kfree(c->devices);
1702
1703 list_del(&c->list);
1704 mutex_unlock(&bch_register_lock);
1705
1706 pr_info("Cache set %pU unregistered\n", c->set_uuid);
1707 wake_up(&unregister_wait);
1708
1709 closure_debug_destroy(&c->cl);
1710 kobject_put(&c->kobj);
1711 }
1712
1713 static void cache_set_flush(struct closure *cl)
1714 {
1715 struct cache_set *c = container_of(cl, struct cache_set, caching);
1716 struct cache *ca = c->cache;
1717 struct btree *b;
1718
1719 bch_cache_accounting_destroy(&c->accounting);
1720
1721 kobject_put(&c->internal);
1722 kobject_del(&c->kobj);
1723
1724 if (!IS_ERR_OR_NULL(c->gc_thread))
1725 kthread_stop(c->gc_thread);
1726
1727 if (!IS_ERR_OR_NULL(c->root))
1728 list_add(&c->root->list, &c->btree_cache);
1729
1730
1731
1732
1733
1734 if (!test_bit(CACHE_SET_IO_DISABLE, &c->flags))
1735 list_for_each_entry(b, &c->btree_cache, list) {
1736 mutex_lock(&b->write_lock);
1737 if (btree_node_dirty(b))
1738 __bch_btree_node_write(b, NULL);
1739 mutex_unlock(&b->write_lock);
1740 }
1741
1742 if (ca->alloc_thread)
1743 kthread_stop(ca->alloc_thread);
1744
1745 if (c->journal.cur) {
1746 cancel_delayed_work_sync(&c->journal.work);
1747
1748 c->journal.work.work.func(&c->journal.work.work);
1749 }
1750
1751 closure_return(cl);
1752 }
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770 static void conditional_stop_bcache_device(struct cache_set *c,
1771 struct bcache_device *d,
1772 struct cached_dev *dc)
1773 {
1774 if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) {
1775 pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.\n",
1776 d->disk->disk_name, c->set_uuid);
1777 bcache_device_stop(d);
1778 } else if (atomic_read(&dc->has_dirty)) {
1779
1780
1781
1782
1783 pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.\n",
1784 d->disk->disk_name);
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796 dc->io_disable = true;
1797
1798 smp_mb();
1799 bcache_device_stop(d);
1800 } else {
1801
1802
1803
1804
1805 pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.\n",
1806 d->disk->disk_name);
1807 }
1808 }
1809
1810 static void __cache_set_unregister(struct closure *cl)
1811 {
1812 struct cache_set *c = container_of(cl, struct cache_set, caching);
1813 struct cached_dev *dc;
1814 struct bcache_device *d;
1815 size_t i;
1816
1817 mutex_lock(&bch_register_lock);
1818
1819 for (i = 0; i < c->devices_max_used; i++) {
1820 d = c->devices[i];
1821 if (!d)
1822 continue;
1823
1824 if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
1825 test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
1826 dc = container_of(d, struct cached_dev, disk);
1827 bch_cached_dev_detach(dc);
1828 if (test_bit(CACHE_SET_IO_DISABLE, &c->flags))
1829 conditional_stop_bcache_device(c, d, dc);
1830 } else {
1831 bcache_device_stop(d);
1832 }
1833 }
1834
1835 mutex_unlock(&bch_register_lock);
1836
1837 continue_at(cl, cache_set_flush, system_wq);
1838 }
1839
1840 void bch_cache_set_stop(struct cache_set *c)
1841 {
1842 if (!test_and_set_bit(CACHE_SET_STOPPING, &c->flags))
1843
1844 closure_queue(&c->caching);
1845 }
1846
1847 void bch_cache_set_unregister(struct cache_set *c)
1848 {
1849 set_bit(CACHE_SET_UNREGISTERING, &c->flags);
1850 bch_cache_set_stop(c);
1851 }
1852
1853 #define alloc_meta_bucket_pages(gfp, sb) \
1854 ((void *) __get_free_pages(__GFP_ZERO|__GFP_COMP|gfp, ilog2(meta_bucket_pages(sb))))
1855
1856 struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
1857 {
1858 int iter_size;
1859 struct cache *ca = container_of(sb, struct cache, sb);
1860 struct cache_set *c = kzalloc(sizeof(struct cache_set), GFP_KERNEL);
1861
1862 if (!c)
1863 return NULL;
1864
1865 __module_get(THIS_MODULE);
1866 closure_init(&c->cl, NULL);
1867 set_closure_fn(&c->cl, cache_set_free, system_wq);
1868
1869 closure_init(&c->caching, &c->cl);
1870 set_closure_fn(&c->caching, __cache_set_unregister, system_wq);
1871
1872
1873 closure_set_stopped(&c->cl);
1874 closure_put(&c->cl);
1875
1876 kobject_init(&c->kobj, &bch_cache_set_ktype);
1877 kobject_init(&c->internal, &bch_cache_set_internal_ktype);
1878
1879 bch_cache_accounting_init(&c->accounting, &c->cl);
1880
1881 memcpy(c->set_uuid, sb->set_uuid, 16);
1882
1883 c->cache = ca;
1884 c->cache->set = c;
1885 c->bucket_bits = ilog2(sb->bucket_size);
1886 c->block_bits = ilog2(sb->block_size);
1887 c->nr_uuids = meta_bucket_bytes(sb) / sizeof(struct uuid_entry);
1888 c->devices_max_used = 0;
1889 atomic_set(&c->attached_dev_nr, 0);
1890 c->btree_pages = meta_bucket_pages(sb);
1891 if (c->btree_pages > BTREE_MAX_PAGES)
1892 c->btree_pages = max_t(int, c->btree_pages / 4,
1893 BTREE_MAX_PAGES);
1894
1895 sema_init(&c->sb_write_mutex, 1);
1896 mutex_init(&c->bucket_lock);
1897 init_waitqueue_head(&c->btree_cache_wait);
1898 spin_lock_init(&c->btree_cannibalize_lock);
1899 init_waitqueue_head(&c->bucket_wait);
1900 init_waitqueue_head(&c->gc_wait);
1901 sema_init(&c->uuid_write_mutex, 1);
1902
1903 spin_lock_init(&c->btree_gc_time.lock);
1904 spin_lock_init(&c->btree_split_time.lock);
1905 spin_lock_init(&c->btree_read_time.lock);
1906
1907 bch_moving_init_cache_set(c);
1908
1909 INIT_LIST_HEAD(&c->list);
1910 INIT_LIST_HEAD(&c->cached_devs);
1911 INIT_LIST_HEAD(&c->btree_cache);
1912 INIT_LIST_HEAD(&c->btree_cache_freeable);
1913 INIT_LIST_HEAD(&c->btree_cache_freed);
1914 INIT_LIST_HEAD(&c->data_buckets);
1915
1916 iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size + 1) *
1917 sizeof(struct btree_iter_set);
1918
1919 c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL);
1920 if (!c->devices)
1921 goto err;
1922
1923 if (mempool_init_slab_pool(&c->search, 32, bch_search_cache))
1924 goto err;
1925
1926 if (mempool_init_kmalloc_pool(&c->bio_meta, 2,
1927 sizeof(struct bbio) +
1928 sizeof(struct bio_vec) * meta_bucket_pages(sb)))
1929 goto err;
1930
1931 if (mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size))
1932 goto err;
1933
1934 if (bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio),
1935 BIOSET_NEED_RESCUER))
1936 goto err;
1937
1938 c->uuids = alloc_meta_bucket_pages(GFP_KERNEL, sb);
1939 if (!c->uuids)
1940 goto err;
1941
1942 c->moving_gc_wq = alloc_workqueue("bcache_gc", WQ_MEM_RECLAIM, 0);
1943 if (!c->moving_gc_wq)
1944 goto err;
1945
1946 if (bch_journal_alloc(c))
1947 goto err;
1948
1949 if (bch_btree_cache_alloc(c))
1950 goto err;
1951
1952 if (bch_open_buckets_alloc(c))
1953 goto err;
1954
1955 if (bch_bset_sort_state_init(&c->sort, ilog2(c->btree_pages)))
1956 goto err;
1957
1958 c->congested_read_threshold_us = 2000;
1959 c->congested_write_threshold_us = 20000;
1960 c->error_limit = DEFAULT_IO_ERROR_LIMIT;
1961 c->idle_max_writeback_rate_enabled = 1;
1962 WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
1963
1964 return c;
1965 err:
1966 bch_cache_set_unregister(c);
1967 return NULL;
1968 }
1969
1970 static int run_cache_set(struct cache_set *c)
1971 {
1972 const char *err = "cannot allocate memory";
1973 struct cached_dev *dc, *t;
1974 struct cache *ca = c->cache;
1975 struct closure cl;
1976 LIST_HEAD(journal);
1977 struct journal_replay *l;
1978
1979 closure_init_stack(&cl);
1980
1981 c->nbuckets = ca->sb.nbuckets;
1982 set_gc_sectors(c);
1983
1984 if (CACHE_SYNC(&c->cache->sb)) {
1985 struct bkey *k;
1986 struct jset *j;
1987
1988 err = "cannot allocate memory for journal";
1989 if (bch_journal_read(c, &journal))
1990 goto err;
1991
1992 pr_debug("btree_journal_read() done\n");
1993
1994 err = "no journal entries found";
1995 if (list_empty(&journal))
1996 goto err;
1997
1998 j = &list_entry(journal.prev, struct journal_replay, list)->j;
1999
2000 err = "IO error reading priorities";
2001 if (prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev]))
2002 goto err;
2003
2004
2005
2006
2007
2008
2009
2010 k = &j->btree_root;
2011
2012 err = "bad btree root";
2013 if (__bch_btree_ptr_invalid(c, k))
2014 goto err;
2015
2016 err = "error reading btree root";
2017 c->root = bch_btree_node_get(c, NULL, k,
2018 j->btree_level,
2019 true, NULL);
2020 if (IS_ERR_OR_NULL(c->root))
2021 goto err;
2022
2023 list_del_init(&c->root->list);
2024 rw_unlock(true, c->root);
2025
2026 err = uuid_read(c, j, &cl);
2027 if (err)
2028 goto err;
2029
2030 err = "error in recovery";
2031 if (bch_btree_check(c))
2032 goto err;
2033
2034 bch_journal_mark(c, &journal);
2035 bch_initial_gc_finish(c);
2036 pr_debug("btree_check() done\n");
2037
2038
2039
2040
2041
2042
2043 bch_journal_next(&c->journal);
2044
2045 err = "error starting allocator thread";
2046 if (bch_cache_allocator_start(ca))
2047 goto err;
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059 if (j->version < BCACHE_JSET_VERSION_UUID)
2060 __uuid_write(c);
2061
2062 err = "bcache: replay journal failed";
2063 if (bch_journal_replay(c, &journal))
2064 goto err;
2065 } else {
2066 unsigned int j;
2067
2068 pr_notice("invalidating existing data\n");
2069 ca->sb.keys = clamp_t(int, ca->sb.nbuckets >> 7,
2070 2, SB_JOURNAL_BUCKETS);
2071
2072 for (j = 0; j < ca->sb.keys; j++)
2073 ca->sb.d[j] = ca->sb.first_bucket + j;
2074
2075 bch_initial_gc_finish(c);
2076
2077 err = "error starting allocator thread";
2078 if (bch_cache_allocator_start(ca))
2079 goto err;
2080
2081 mutex_lock(&c->bucket_lock);
2082 bch_prio_write(ca, true);
2083 mutex_unlock(&c->bucket_lock);
2084
2085 err = "cannot allocate new UUID bucket";
2086 if (__uuid_write(c))
2087 goto err;
2088
2089 err = "cannot allocate new btree root";
2090 c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL);
2091 if (IS_ERR_OR_NULL(c->root))
2092 goto err;
2093
2094 mutex_lock(&c->root->write_lock);
2095 bkey_copy_key(&c->root->key, &MAX_KEY);
2096 bch_btree_node_write(c->root, &cl);
2097 mutex_unlock(&c->root->write_lock);
2098
2099 bch_btree_set_root(c->root);
2100 rw_unlock(true, c->root);
2101
2102
2103
2104
2105
2106
2107 SET_CACHE_SYNC(&c->cache->sb, true);
2108
2109 bch_journal_next(&c->journal);
2110 bch_journal_meta(c, &cl);
2111 }
2112
2113 err = "error starting gc thread";
2114 if (bch_gc_thread_start(c))
2115 goto err;
2116
2117 closure_sync(&cl);
2118 c->cache->sb.last_mount = (u32)ktime_get_real_seconds();
2119 bcache_write_super(c);
2120
2121 if (bch_has_feature_obso_large_bucket(&c->cache->sb))
2122 pr_err("Detect obsoleted large bucket layout, all attached bcache device will be read-only\n");
2123
2124 list_for_each_entry_safe(dc, t, &uncached_devices, list)
2125 bch_cached_dev_attach(dc, c, NULL);
2126
2127 flash_devs_run(c);
2128
2129 bch_journal_space_reserve(&c->journal);
2130 set_bit(CACHE_SET_RUNNING, &c->flags);
2131 return 0;
2132 err:
2133 while (!list_empty(&journal)) {
2134 l = list_first_entry(&journal, struct journal_replay, list);
2135 list_del(&l->list);
2136 kfree(l);
2137 }
2138
2139 closure_sync(&cl);
2140
2141 bch_cache_set_error(c, "%s", err);
2142
2143 return -EIO;
2144 }
2145
2146 static const char *register_cache_set(struct cache *ca)
2147 {
2148 char buf[12];
2149 const char *err = "cannot allocate memory";
2150 struct cache_set *c;
2151
2152 list_for_each_entry(c, &bch_cache_sets, list)
2153 if (!memcmp(c->set_uuid, ca->sb.set_uuid, 16)) {
2154 if (c->cache)
2155 return "duplicate cache set member";
2156
2157 goto found;
2158 }
2159
2160 c = bch_cache_set_alloc(&ca->sb);
2161 if (!c)
2162 return err;
2163
2164 err = "error creating kobject";
2165 if (kobject_add(&c->kobj, bcache_kobj, "%pU", c->set_uuid) ||
2166 kobject_add(&c->internal, &c->kobj, "internal"))
2167 goto err;
2168
2169 if (bch_cache_accounting_add_kobjs(&c->accounting, &c->kobj))
2170 goto err;
2171
2172 bch_debug_init_cache_set(c);
2173
2174 list_add(&c->list, &bch_cache_sets);
2175 found:
2176 sprintf(buf, "cache%i", ca->sb.nr_this_dev);
2177 if (sysfs_create_link(&ca->kobj, &c->kobj, "set") ||
2178 sysfs_create_link(&c->kobj, &ca->kobj, buf))
2179 goto err;
2180
2181 kobject_get(&ca->kobj);
2182 ca->set = c;
2183 ca->set->cache = ca;
2184
2185 err = "failed to run cache set";
2186 if (run_cache_set(c) < 0)
2187 goto err;
2188
2189 return NULL;
2190 err:
2191 bch_cache_set_unregister(c);
2192 return err;
2193 }
2194
2195
2196
2197
2198 void bch_cache_release(struct kobject *kobj)
2199 {
2200 struct cache *ca = container_of(kobj, struct cache, kobj);
2201 unsigned int i;
2202
2203 if (ca->set) {
2204 BUG_ON(ca->set->cache != ca);
2205 ca->set->cache = NULL;
2206 }
2207
2208 free_pages((unsigned long) ca->disk_buckets, ilog2(meta_bucket_pages(&ca->sb)));
2209 kfree(ca->prio_buckets);
2210 vfree(ca->buckets);
2211
2212 free_heap(&ca->heap);
2213 free_fifo(&ca->free_inc);
2214
2215 for (i = 0; i < RESERVE_NR; i++)
2216 free_fifo(&ca->free[i]);
2217
2218 if (ca->sb_disk)
2219 put_page(virt_to_page(ca->sb_disk));
2220
2221 if (!IS_ERR_OR_NULL(ca->bdev))
2222 blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2223
2224 kfree(ca);
2225 module_put(THIS_MODULE);
2226 }
2227
2228 static int cache_alloc(struct cache *ca)
2229 {
2230 size_t free;
2231 size_t btree_buckets;
2232 struct bucket *b;
2233 int ret = -ENOMEM;
2234 const char *err = NULL;
2235
2236 __module_get(THIS_MODULE);
2237 kobject_init(&ca->kobj, &bch_cache_ktype);
2238
2239 bio_init(&ca->journal.bio, NULL, ca->journal.bio.bi_inline_vecs, 8, 0);
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250 btree_buckets = ca->sb.njournal_buckets ?: 8;
2251 free = roundup_pow_of_two(ca->sb.nbuckets) >> 10;
2252 if (!free) {
2253 ret = -EPERM;
2254 err = "ca->sb.nbuckets is too small";
2255 goto err_free;
2256 }
2257
2258 if (!init_fifo(&ca->free[RESERVE_BTREE], btree_buckets,
2259 GFP_KERNEL)) {
2260 err = "ca->free[RESERVE_BTREE] alloc failed";
2261 goto err_btree_alloc;
2262 }
2263
2264 if (!init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca),
2265 GFP_KERNEL)) {
2266 err = "ca->free[RESERVE_PRIO] alloc failed";
2267 goto err_prio_alloc;
2268 }
2269
2270 if (!init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL)) {
2271 err = "ca->free[RESERVE_MOVINGGC] alloc failed";
2272 goto err_movinggc_alloc;
2273 }
2274
2275 if (!init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL)) {
2276 err = "ca->free[RESERVE_NONE] alloc failed";
2277 goto err_none_alloc;
2278 }
2279
2280 if (!init_fifo(&ca->free_inc, free << 2, GFP_KERNEL)) {
2281 err = "ca->free_inc alloc failed";
2282 goto err_free_inc_alloc;
2283 }
2284
2285 if (!init_heap(&ca->heap, free << 3, GFP_KERNEL)) {
2286 err = "ca->heap alloc failed";
2287 goto err_heap_alloc;
2288 }
2289
2290 ca->buckets = vzalloc(array_size(sizeof(struct bucket),
2291 ca->sb.nbuckets));
2292 if (!ca->buckets) {
2293 err = "ca->buckets alloc failed";
2294 goto err_buckets_alloc;
2295 }
2296
2297 ca->prio_buckets = kzalloc(array3_size(sizeof(uint64_t),
2298 prio_buckets(ca), 2),
2299 GFP_KERNEL);
2300 if (!ca->prio_buckets) {
2301 err = "ca->prio_buckets alloc failed";
2302 goto err_prio_buckets_alloc;
2303 }
2304
2305 ca->disk_buckets = alloc_meta_bucket_pages(GFP_KERNEL, &ca->sb);
2306 if (!ca->disk_buckets) {
2307 err = "ca->disk_buckets alloc failed";
2308 goto err_disk_buckets_alloc;
2309 }
2310
2311 ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca);
2312
2313 for_each_bucket(b, ca)
2314 atomic_set(&b->pin, 0);
2315 return 0;
2316
2317 err_disk_buckets_alloc:
2318 kfree(ca->prio_buckets);
2319 err_prio_buckets_alloc:
2320 vfree(ca->buckets);
2321 err_buckets_alloc:
2322 free_heap(&ca->heap);
2323 err_heap_alloc:
2324 free_fifo(&ca->free_inc);
2325 err_free_inc_alloc:
2326 free_fifo(&ca->free[RESERVE_NONE]);
2327 err_none_alloc:
2328 free_fifo(&ca->free[RESERVE_MOVINGGC]);
2329 err_movinggc_alloc:
2330 free_fifo(&ca->free[RESERVE_PRIO]);
2331 err_prio_alloc:
2332 free_fifo(&ca->free[RESERVE_BTREE]);
2333 err_btree_alloc:
2334 err_free:
2335 module_put(THIS_MODULE);
2336 if (err)
2337 pr_notice("error %pg: %s\n", ca->bdev, err);
2338 return ret;
2339 }
2340
2341 static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
2342 struct block_device *bdev, struct cache *ca)
2343 {
2344 const char *err = NULL;
2345 int ret = 0;
2346
2347 memcpy(&ca->sb, sb, sizeof(struct cache_sb));
2348 ca->bdev = bdev;
2349 ca->bdev->bd_holder = ca;
2350 ca->sb_disk = sb_disk;
2351
2352 if (bdev_max_discard_sectors((bdev)))
2353 ca->discard = CACHE_DISCARD(&ca->sb);
2354
2355 ret = cache_alloc(ca);
2356 if (ret != 0) {
2357
2358
2359
2360
2361
2362
2363 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2364 if (ret == -ENOMEM)
2365 err = "cache_alloc(): -ENOMEM";
2366 else if (ret == -EPERM)
2367 err = "cache_alloc(): cache device is too small";
2368 else
2369 err = "cache_alloc(): unknown error";
2370 goto err;
2371 }
2372
2373 if (kobject_add(&ca->kobj, bdev_kobj(bdev), "bcache")) {
2374 err = "error calling kobject_add";
2375 ret = -ENOMEM;
2376 goto out;
2377 }
2378
2379 mutex_lock(&bch_register_lock);
2380 err = register_cache_set(ca);
2381 mutex_unlock(&bch_register_lock);
2382
2383 if (err) {
2384 ret = -ENODEV;
2385 goto out;
2386 }
2387
2388 pr_info("registered cache device %pg\n", ca->bdev);
2389
2390 out:
2391 kobject_put(&ca->kobj);
2392
2393 err:
2394 if (err)
2395 pr_notice("error %pg: %s\n", ca->bdev, err);
2396
2397 return ret;
2398 }
2399
2400
2401
2402 static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
2403 const char *buffer, size_t size);
2404 static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
2405 struct kobj_attribute *attr,
2406 const char *buffer, size_t size);
2407
2408 kobj_attribute_write(register, register_bcache);
2409 kobj_attribute_write(register_quiet, register_bcache);
2410 kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup);
2411
2412 static bool bch_is_open_backing(dev_t dev)
2413 {
2414 struct cache_set *c, *tc;
2415 struct cached_dev *dc, *t;
2416
2417 list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
2418 list_for_each_entry_safe(dc, t, &c->cached_devs, list)
2419 if (dc->bdev->bd_dev == dev)
2420 return true;
2421 list_for_each_entry_safe(dc, t, &uncached_devices, list)
2422 if (dc->bdev->bd_dev == dev)
2423 return true;
2424 return false;
2425 }
2426
2427 static bool bch_is_open_cache(dev_t dev)
2428 {
2429 struct cache_set *c, *tc;
2430
2431 list_for_each_entry_safe(c, tc, &bch_cache_sets, list) {
2432 struct cache *ca = c->cache;
2433
2434 if (ca->bdev->bd_dev == dev)
2435 return true;
2436 }
2437
2438 return false;
2439 }
2440
2441 static bool bch_is_open(dev_t dev)
2442 {
2443 return bch_is_open_cache(dev) || bch_is_open_backing(dev);
2444 }
2445
2446 struct async_reg_args {
2447 struct delayed_work reg_work;
2448 char *path;
2449 struct cache_sb *sb;
2450 struct cache_sb_disk *sb_disk;
2451 struct block_device *bdev;
2452 };
2453
2454 static void register_bdev_worker(struct work_struct *work)
2455 {
2456 int fail = false;
2457 struct async_reg_args *args =
2458 container_of(work, struct async_reg_args, reg_work.work);
2459 struct cached_dev *dc;
2460
2461 dc = kzalloc(sizeof(*dc), GFP_KERNEL);
2462 if (!dc) {
2463 fail = true;
2464 put_page(virt_to_page(args->sb_disk));
2465 blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
2466 goto out;
2467 }
2468
2469 mutex_lock(&bch_register_lock);
2470 if (register_bdev(args->sb, args->sb_disk, args->bdev, dc) < 0)
2471 fail = true;
2472 mutex_unlock(&bch_register_lock);
2473
2474 out:
2475 if (fail)
2476 pr_info("error %s: fail to register backing device\n",
2477 args->path);
2478 kfree(args->sb);
2479 kfree(args->path);
2480 kfree(args);
2481 module_put(THIS_MODULE);
2482 }
2483
2484 static void register_cache_worker(struct work_struct *work)
2485 {
2486 int fail = false;
2487 struct async_reg_args *args =
2488 container_of(work, struct async_reg_args, reg_work.work);
2489 struct cache *ca;
2490
2491 ca = kzalloc(sizeof(*ca), GFP_KERNEL);
2492 if (!ca) {
2493 fail = true;
2494 put_page(virt_to_page(args->sb_disk));
2495 blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
2496 goto out;
2497 }
2498
2499
2500 if (register_cache(args->sb, args->sb_disk, args->bdev, ca) != 0)
2501 fail = true;
2502
2503 out:
2504 if (fail)
2505 pr_info("error %s: fail to register cache device\n",
2506 args->path);
2507 kfree(args->sb);
2508 kfree(args->path);
2509 kfree(args);
2510 module_put(THIS_MODULE);
2511 }
2512
2513 static void register_device_async(struct async_reg_args *args)
2514 {
2515 if (SB_IS_BDEV(args->sb))
2516 INIT_DELAYED_WORK(&args->reg_work, register_bdev_worker);
2517 else
2518 INIT_DELAYED_WORK(&args->reg_work, register_cache_worker);
2519
2520
2521 queue_delayed_work(system_wq, &args->reg_work, 10);
2522 }
2523
2524 static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
2525 const char *buffer, size_t size)
2526 {
2527 const char *err;
2528 char *path = NULL;
2529 struct cache_sb *sb;
2530 struct cache_sb_disk *sb_disk;
2531 struct block_device *bdev;
2532 ssize_t ret;
2533 bool async_registration = false;
2534
2535 #ifdef CONFIG_BCACHE_ASYNC_REGISTRATION
2536 async_registration = true;
2537 #endif
2538
2539 ret = -EBUSY;
2540 err = "failed to reference bcache module";
2541 if (!try_module_get(THIS_MODULE))
2542 goto out;
2543
2544
2545 smp_mb();
2546 err = "bcache is in reboot";
2547 if (bcache_is_reboot)
2548 goto out_module_put;
2549
2550 ret = -ENOMEM;
2551 err = "cannot allocate memory";
2552 path = kstrndup(buffer, size, GFP_KERNEL);
2553 if (!path)
2554 goto out_module_put;
2555
2556 sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL);
2557 if (!sb)
2558 goto out_free_path;
2559
2560 ret = -EINVAL;
2561 err = "failed to open device";
2562 bdev = blkdev_get_by_path(strim(path),
2563 FMODE_READ|FMODE_WRITE|FMODE_EXCL,
2564 sb);
2565 if (IS_ERR(bdev)) {
2566 if (bdev == ERR_PTR(-EBUSY)) {
2567 dev_t dev;
2568
2569 mutex_lock(&bch_register_lock);
2570 if (lookup_bdev(strim(path), &dev) == 0 &&
2571 bch_is_open(dev))
2572 err = "device already registered";
2573 else
2574 err = "device busy";
2575 mutex_unlock(&bch_register_lock);
2576 if (attr == &ksysfs_register_quiet)
2577 goto done;
2578 }
2579 goto out_free_sb;
2580 }
2581
2582 err = "failed to set blocksize";
2583 if (set_blocksize(bdev, 4096))
2584 goto out_blkdev_put;
2585
2586 err = read_super(sb, bdev, &sb_disk);
2587 if (err)
2588 goto out_blkdev_put;
2589
2590 err = "failed to register device";
2591
2592 if (async_registration) {
2593
2594 struct async_reg_args *args =
2595 kzalloc(sizeof(struct async_reg_args), GFP_KERNEL);
2596
2597 if (!args) {
2598 ret = -ENOMEM;
2599 err = "cannot allocate memory";
2600 goto out_put_sb_page;
2601 }
2602
2603 args->path = path;
2604 args->sb = sb;
2605 args->sb_disk = sb_disk;
2606 args->bdev = bdev;
2607 register_device_async(args);
2608
2609 goto async_done;
2610 }
2611
2612 if (SB_IS_BDEV(sb)) {
2613 struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
2614
2615 if (!dc) {
2616 ret = -ENOMEM;
2617 err = "cannot allocate memory";
2618 goto out_put_sb_page;
2619 }
2620
2621 mutex_lock(&bch_register_lock);
2622 ret = register_bdev(sb, sb_disk, bdev, dc);
2623 mutex_unlock(&bch_register_lock);
2624
2625 if (ret < 0)
2626 goto out_free_sb;
2627 } else {
2628 struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
2629
2630 if (!ca) {
2631 ret = -ENOMEM;
2632 err = "cannot allocate memory";
2633 goto out_put_sb_page;
2634 }
2635
2636
2637 ret = register_cache(sb, sb_disk, bdev, ca);
2638 if (ret)
2639 goto out_free_sb;
2640 }
2641
2642 done:
2643 kfree(sb);
2644 kfree(path);
2645 module_put(THIS_MODULE);
2646 async_done:
2647 return size;
2648
2649 out_put_sb_page:
2650 put_page(virt_to_page(sb_disk));
2651 out_blkdev_put:
2652 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
2653 out_free_sb:
2654 kfree(sb);
2655 out_free_path:
2656 kfree(path);
2657 path = NULL;
2658 out_module_put:
2659 module_put(THIS_MODULE);
2660 out:
2661 pr_info("error %s: %s\n", path?path:"", err);
2662 return ret;
2663 }
2664
2665
2666 struct pdev {
2667 struct list_head list;
2668 struct cached_dev *dc;
2669 };
2670
2671 static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
2672 struct kobj_attribute *attr,
2673 const char *buffer,
2674 size_t size)
2675 {
2676 LIST_HEAD(pending_devs);
2677 ssize_t ret = size;
2678 struct cached_dev *dc, *tdc;
2679 struct pdev *pdev, *tpdev;
2680 struct cache_set *c, *tc;
2681
2682 mutex_lock(&bch_register_lock);
2683 list_for_each_entry_safe(dc, tdc, &uncached_devices, list) {
2684 pdev = kmalloc(sizeof(struct pdev), GFP_KERNEL);
2685 if (!pdev)
2686 break;
2687 pdev->dc = dc;
2688 list_add(&pdev->list, &pending_devs);
2689 }
2690
2691 list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) {
2692 char *pdev_set_uuid = pdev->dc->sb.set_uuid;
2693 list_for_each_entry_safe(c, tc, &bch_cache_sets, list) {
2694 char *set_uuid = c->set_uuid;
2695
2696 if (!memcmp(pdev_set_uuid, set_uuid, 16)) {
2697 list_del(&pdev->list);
2698 kfree(pdev);
2699 break;
2700 }
2701 }
2702 }
2703 mutex_unlock(&bch_register_lock);
2704
2705 list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) {
2706 pr_info("delete pdev %p\n", pdev);
2707 list_del(&pdev->list);
2708 bcache_device_stop(&pdev->dc->disk);
2709 kfree(pdev);
2710 }
2711
2712 return ret;
2713 }
2714
2715 static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
2716 {
2717 if (bcache_is_reboot)
2718 return NOTIFY_DONE;
2719
2720 if (code == SYS_DOWN ||
2721 code == SYS_HALT ||
2722 code == SYS_POWER_OFF) {
2723 DEFINE_WAIT(wait);
2724 unsigned long start = jiffies;
2725 bool stopped = false;
2726
2727 struct cache_set *c, *tc;
2728 struct cached_dev *dc, *tdc;
2729
2730 mutex_lock(&bch_register_lock);
2731
2732 if (bcache_is_reboot)
2733 goto out;
2734
2735
2736 bcache_is_reboot = true;
2737
2738
2739
2740
2741 smp_mb();
2742
2743 if (list_empty(&bch_cache_sets) &&
2744 list_empty(&uncached_devices))
2745 goto out;
2746
2747 mutex_unlock(&bch_register_lock);
2748
2749 pr_info("Stopping all devices:\n");
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765 list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
2766 bch_cache_set_stop(c);
2767
2768 list_for_each_entry_safe(dc, tdc, &uncached_devices, list)
2769 bcache_device_stop(&dc->disk);
2770
2771
2772
2773
2774
2775
2776 schedule();
2777
2778
2779 while (1) {
2780 long timeout = start + 10 * HZ - jiffies;
2781
2782 mutex_lock(&bch_register_lock);
2783 stopped = list_empty(&bch_cache_sets) &&
2784 list_empty(&uncached_devices);
2785
2786 if (timeout < 0 || stopped)
2787 break;
2788
2789 prepare_to_wait(&unregister_wait, &wait,
2790 TASK_UNINTERRUPTIBLE);
2791
2792 mutex_unlock(&bch_register_lock);
2793 schedule_timeout(timeout);
2794 }
2795
2796 finish_wait(&unregister_wait, &wait);
2797
2798 if (stopped)
2799 pr_info("All devices stopped\n");
2800 else
2801 pr_notice("Timeout waiting for devices to be closed\n");
2802 out:
2803 mutex_unlock(&bch_register_lock);
2804 }
2805
2806 return NOTIFY_DONE;
2807 }
2808
2809 static struct notifier_block reboot = {
2810 .notifier_call = bcache_reboot,
2811 .priority = INT_MAX,
2812 };
2813
2814 static void bcache_exit(void)
2815 {
2816 bch_debug_exit();
2817 bch_request_exit();
2818 if (bcache_kobj)
2819 kobject_put(bcache_kobj);
2820 if (bcache_wq)
2821 destroy_workqueue(bcache_wq);
2822 if (bch_journal_wq)
2823 destroy_workqueue(bch_journal_wq);
2824 if (bch_flush_wq)
2825 destroy_workqueue(bch_flush_wq);
2826 bch_btree_exit();
2827
2828 if (bcache_major)
2829 unregister_blkdev(bcache_major, "bcache");
2830 unregister_reboot_notifier(&reboot);
2831 mutex_destroy(&bch_register_lock);
2832 }
2833
2834
2835 static void check_module_parameters(void)
2836 {
2837 if (bch_cutoff_writeback_sync == 0)
2838 bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC;
2839 else if (bch_cutoff_writeback_sync > CUTOFF_WRITEBACK_SYNC_MAX) {
2840 pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u\n",
2841 bch_cutoff_writeback_sync, CUTOFF_WRITEBACK_SYNC_MAX);
2842 bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC_MAX;
2843 }
2844
2845 if (bch_cutoff_writeback == 0)
2846 bch_cutoff_writeback = CUTOFF_WRITEBACK;
2847 else if (bch_cutoff_writeback > CUTOFF_WRITEBACK_MAX) {
2848 pr_warn("set bch_cutoff_writeback (%u) to max value %u\n",
2849 bch_cutoff_writeback, CUTOFF_WRITEBACK_MAX);
2850 bch_cutoff_writeback = CUTOFF_WRITEBACK_MAX;
2851 }
2852
2853 if (bch_cutoff_writeback > bch_cutoff_writeback_sync) {
2854 pr_warn("set bch_cutoff_writeback (%u) to %u\n",
2855 bch_cutoff_writeback, bch_cutoff_writeback_sync);
2856 bch_cutoff_writeback = bch_cutoff_writeback_sync;
2857 }
2858 }
2859
2860 static int __init bcache_init(void)
2861 {
2862 static const struct attribute *files[] = {
2863 &ksysfs_register.attr,
2864 &ksysfs_register_quiet.attr,
2865 &ksysfs_pendings_cleanup.attr,
2866 NULL
2867 };
2868
2869 check_module_parameters();
2870
2871 mutex_init(&bch_register_lock);
2872 init_waitqueue_head(&unregister_wait);
2873 register_reboot_notifier(&reboot);
2874
2875 bcache_major = register_blkdev(0, "bcache");
2876 if (bcache_major < 0) {
2877 unregister_reboot_notifier(&reboot);
2878 mutex_destroy(&bch_register_lock);
2879 return bcache_major;
2880 }
2881
2882 if (bch_btree_init())
2883 goto err;
2884
2885 bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0);
2886 if (!bcache_wq)
2887 goto err;
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898 bch_flush_wq = alloc_workqueue("bch_flush", 0, 0);
2899 if (!bch_flush_wq)
2900 goto err;
2901
2902 bch_journal_wq = alloc_workqueue("bch_journal", WQ_MEM_RECLAIM, 0);
2903 if (!bch_journal_wq)
2904 goto err;
2905
2906 bcache_kobj = kobject_create_and_add("bcache", fs_kobj);
2907 if (!bcache_kobj)
2908 goto err;
2909
2910 if (bch_request_init() ||
2911 sysfs_create_files(bcache_kobj, files))
2912 goto err;
2913
2914 bch_debug_init();
2915 closure_debug_init();
2916
2917 bcache_is_reboot = false;
2918
2919 return 0;
2920 err:
2921 bcache_exit();
2922 return -ENOMEM;
2923 }
2924
2925
2926
2927
2928 module_exit(bcache_exit);
2929 module_init(bcache_init);
2930
2931 module_param(bch_cutoff_writeback, uint, 0);
2932 MODULE_PARM_DESC(bch_cutoff_writeback, "threshold to cutoff writeback");
2933
2934 module_param(bch_cutoff_writeback_sync, uint, 0);
2935 MODULE_PARM_DESC(bch_cutoff_writeback_sync, "hard threshold to cutoff writeback");
2936
2937 MODULE_DESCRIPTION("Bcache: a Linux block layer cache");
2938 MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>");
2939 MODULE_LICENSE("GPL");