0001
0002
0003
0004
0005
0006
0007
0008
0009 #include "bcache.h"
0010 #include "sysfs.h"
0011 #include "btree.h"
0012 #include "request.h"
0013 #include "writeback.h"
0014 #include "features.h"
0015
0016 #include <linux/blkdev.h>
0017 #include <linux/sort.h>
0018 #include <linux/sched/clock.h>
0019
0020 extern bool bcache_is_reboot;
0021
0022
0023 static const char * const bch_cache_modes[] = {
0024 "writethrough",
0025 "writeback",
0026 "writearound",
0027 "none",
0028 NULL
0029 };
0030
0031 static const char * const bch_reada_cache_policies[] = {
0032 "all",
0033 "meta-only",
0034 NULL
0035 };
0036
0037
0038 static const char * const bch_stop_on_failure_modes[] = {
0039 "auto",
0040 "always",
0041 NULL
0042 };
0043
0044 static const char * const cache_replacement_policies[] = {
0045 "lru",
0046 "fifo",
0047 "random",
0048 NULL
0049 };
0050
0051 static const char * const error_actions[] = {
0052 "unregister",
0053 "panic",
0054 NULL
0055 };
0056
0057 write_attribute(attach);
0058 write_attribute(detach);
0059 write_attribute(unregister);
0060 write_attribute(stop);
0061 write_attribute(clear_stats);
0062 write_attribute(trigger_gc);
0063 write_attribute(prune_cache);
0064 write_attribute(flash_vol_create);
0065
0066 read_attribute(bucket_size);
0067 read_attribute(block_size);
0068 read_attribute(nbuckets);
0069 read_attribute(tree_depth);
0070 read_attribute(root_usage_percent);
0071 read_attribute(priority_stats);
0072 read_attribute(btree_cache_size);
0073 read_attribute(btree_cache_max_chain);
0074 read_attribute(cache_available_percent);
0075 read_attribute(written);
0076 read_attribute(btree_written);
0077 read_attribute(metadata_written);
0078 read_attribute(active_journal_entries);
0079 read_attribute(backing_dev_name);
0080 read_attribute(backing_dev_uuid);
0081
0082 sysfs_time_stats_attribute(btree_gc, sec, ms);
0083 sysfs_time_stats_attribute(btree_split, sec, us);
0084 sysfs_time_stats_attribute(btree_sort, ms, us);
0085 sysfs_time_stats_attribute(btree_read, ms, us);
0086
0087 read_attribute(btree_nodes);
0088 read_attribute(btree_used_percent);
0089 read_attribute(average_key_size);
0090 read_attribute(dirty_data);
0091 read_attribute(bset_tree_stats);
0092 read_attribute(feature_compat);
0093 read_attribute(feature_ro_compat);
0094 read_attribute(feature_incompat);
0095
0096 read_attribute(state);
0097 read_attribute(cache_read_races);
0098 read_attribute(reclaim);
0099 read_attribute(reclaimed_journal_buckets);
0100 read_attribute(flush_write);
0101 read_attribute(writeback_keys_done);
0102 read_attribute(writeback_keys_failed);
0103 read_attribute(io_errors);
0104 read_attribute(congested);
0105 read_attribute(cutoff_writeback);
0106 read_attribute(cutoff_writeback_sync);
0107 rw_attribute(congested_read_threshold_us);
0108 rw_attribute(congested_write_threshold_us);
0109
0110 rw_attribute(sequential_cutoff);
0111 rw_attribute(data_csum);
0112 rw_attribute(cache_mode);
0113 rw_attribute(readahead_cache_policy);
0114 rw_attribute(stop_when_cache_set_failed);
0115 rw_attribute(writeback_metadata);
0116 rw_attribute(writeback_running);
0117 rw_attribute(writeback_percent);
0118 rw_attribute(writeback_delay);
0119 rw_attribute(writeback_rate);
0120 rw_attribute(writeback_consider_fragment);
0121
0122 rw_attribute(writeback_rate_update_seconds);
0123 rw_attribute(writeback_rate_i_term_inverse);
0124 rw_attribute(writeback_rate_p_term_inverse);
0125 rw_attribute(writeback_rate_fp_term_low);
0126 rw_attribute(writeback_rate_fp_term_mid);
0127 rw_attribute(writeback_rate_fp_term_high);
0128 rw_attribute(writeback_rate_minimum);
0129 read_attribute(writeback_rate_debug);
0130
0131 read_attribute(stripe_size);
0132 read_attribute(partial_stripes_expensive);
0133
0134 rw_attribute(synchronous);
0135 rw_attribute(journal_delay_ms);
0136 rw_attribute(io_disable);
0137 rw_attribute(discard);
0138 rw_attribute(running);
0139 rw_attribute(label);
0140 rw_attribute(errors);
0141 rw_attribute(io_error_limit);
0142 rw_attribute(io_error_halflife);
0143 rw_attribute(verify);
0144 rw_attribute(bypass_torture_test);
0145 rw_attribute(key_merging_disabled);
0146 rw_attribute(gc_always_rewrite);
0147 rw_attribute(expensive_debug_checks);
0148 rw_attribute(cache_replacement_policy);
0149 rw_attribute(btree_shrinker_disabled);
0150 rw_attribute(copy_gc_enabled);
0151 rw_attribute(idle_max_writeback_rate);
0152 rw_attribute(gc_after_writeback);
0153 rw_attribute(size);
0154
0155 static ssize_t bch_snprint_string_list(char *buf,
0156 size_t size,
0157 const char * const list[],
0158 size_t selected)
0159 {
0160 char *out = buf;
0161 size_t i;
0162
0163 for (i = 0; list[i]; i++)
0164 out += scnprintf(out, buf + size - out,
0165 i == selected ? "[%s] " : "%s ", list[i]);
0166
0167 out[-1] = '\n';
0168 return out - buf;
0169 }
0170
0171 SHOW(__bch_cached_dev)
0172 {
0173 struct cached_dev *dc = container_of(kobj, struct cached_dev,
0174 disk.kobj);
0175 char const *states[] = { "no cache", "clean", "dirty", "inconsistent" };
0176 int wb = dc->writeback_running;
0177
0178 #define var(stat) (dc->stat)
0179
0180 if (attr == &sysfs_cache_mode)
0181 return bch_snprint_string_list(buf, PAGE_SIZE,
0182 bch_cache_modes,
0183 BDEV_CACHE_MODE(&dc->sb));
0184
0185 if (attr == &sysfs_readahead_cache_policy)
0186 return bch_snprint_string_list(buf, PAGE_SIZE,
0187 bch_reada_cache_policies,
0188 dc->cache_readahead_policy);
0189
0190 if (attr == &sysfs_stop_when_cache_set_failed)
0191 return bch_snprint_string_list(buf, PAGE_SIZE,
0192 bch_stop_on_failure_modes,
0193 dc->stop_when_cache_set_failed);
0194
0195
0196 sysfs_printf(data_csum, "%i", dc->disk.data_csum);
0197 var_printf(verify, "%i");
0198 var_printf(bypass_torture_test, "%i");
0199 var_printf(writeback_metadata, "%i");
0200 var_printf(writeback_running, "%i");
0201 var_printf(writeback_consider_fragment, "%i");
0202 var_print(writeback_delay);
0203 var_print(writeback_percent);
0204 sysfs_hprint(writeback_rate,
0205 wb ? atomic_long_read(&dc->writeback_rate.rate) << 9 : 0);
0206 sysfs_printf(io_errors, "%i", atomic_read(&dc->io_errors));
0207 sysfs_printf(io_error_limit, "%i", dc->error_limit);
0208 sysfs_printf(io_disable, "%i", dc->io_disable);
0209 var_print(writeback_rate_update_seconds);
0210 var_print(writeback_rate_i_term_inverse);
0211 var_print(writeback_rate_p_term_inverse);
0212 var_print(writeback_rate_fp_term_low);
0213 var_print(writeback_rate_fp_term_mid);
0214 var_print(writeback_rate_fp_term_high);
0215 var_print(writeback_rate_minimum);
0216
0217 if (attr == &sysfs_writeback_rate_debug) {
0218 char rate[20];
0219 char dirty[20];
0220 char target[20];
0221 char proportional[20];
0222 char integral[20];
0223 char change[20];
0224 s64 next_io;
0225
0226
0227
0228
0229
0230 bch_hprint(rate,
0231 wb ? atomic_long_read(&dc->writeback_rate.rate) << 9
0232 : 0);
0233 bch_hprint(dirty, bcache_dev_sectors_dirty(&dc->disk) << 9);
0234 bch_hprint(target, dc->writeback_rate_target << 9);
0235 bch_hprint(proportional,
0236 wb ? dc->writeback_rate_proportional << 9 : 0);
0237 bch_hprint(integral,
0238 wb ? dc->writeback_rate_integral_scaled << 9 : 0);
0239 bch_hprint(change, wb ? dc->writeback_rate_change << 9 : 0);
0240 next_io = wb ? div64_s64(dc->writeback_rate.next-local_clock(),
0241 NSEC_PER_MSEC) : 0;
0242
0243 return sprintf(buf,
0244 "rate:\t\t%s/sec\n"
0245 "dirty:\t\t%s\n"
0246 "target:\t\t%s\n"
0247 "proportional:\t%s\n"
0248 "integral:\t%s\n"
0249 "change:\t\t%s/sec\n"
0250 "next io:\t%llims\n",
0251 rate, dirty, target, proportional,
0252 integral, change, next_io);
0253 }
0254
0255 sysfs_hprint(dirty_data,
0256 bcache_dev_sectors_dirty(&dc->disk) << 9);
0257
0258 sysfs_hprint(stripe_size, ((uint64_t)dc->disk.stripe_size) << 9);
0259 var_printf(partial_stripes_expensive, "%u");
0260
0261 var_hprint(sequential_cutoff);
0262
0263 sysfs_print(running, atomic_read(&dc->running));
0264 sysfs_print(state, states[BDEV_STATE(&dc->sb)]);
0265
0266 if (attr == &sysfs_label) {
0267 memcpy(buf, dc->sb.label, SB_LABEL_SIZE);
0268 buf[SB_LABEL_SIZE + 1] = '\0';
0269 strcat(buf, "\n");
0270 return strlen(buf);
0271 }
0272
0273 if (attr == &sysfs_backing_dev_name) {
0274 snprintf(buf, BDEVNAME_SIZE + 1, "%pg", dc->bdev);
0275 strcat(buf, "\n");
0276 return strlen(buf);
0277 }
0278
0279 if (attr == &sysfs_backing_dev_uuid) {
0280
0281 snprintf(buf, 36+1, "%pU", dc->sb.uuid);
0282 strcat(buf, "\n");
0283 return strlen(buf);
0284 }
0285
0286 #undef var
0287 return 0;
0288 }
0289 SHOW_LOCKED(bch_cached_dev)
0290
0291 STORE(__cached_dev)
0292 {
0293 struct cached_dev *dc = container_of(kobj, struct cached_dev,
0294 disk.kobj);
0295 ssize_t v;
0296 struct cache_set *c;
0297 struct kobj_uevent_env *env;
0298
0299
0300 if (bcache_is_reboot)
0301 return -EBUSY;
0302
0303 #define d_strtoul(var) sysfs_strtoul(var, dc->var)
0304 #define d_strtoul_nonzero(var) sysfs_strtoul_clamp(var, dc->var, 1, INT_MAX)
0305 #define d_strtoi_h(var) sysfs_hatoi(var, dc->var)
0306
0307 sysfs_strtoul(data_csum, dc->disk.data_csum);
0308 d_strtoul(verify);
0309 sysfs_strtoul_bool(bypass_torture_test, dc->bypass_torture_test);
0310 sysfs_strtoul_bool(writeback_metadata, dc->writeback_metadata);
0311 sysfs_strtoul_bool(writeback_running, dc->writeback_running);
0312 sysfs_strtoul_bool(writeback_consider_fragment, dc->writeback_consider_fragment);
0313 sysfs_strtoul_clamp(writeback_delay, dc->writeback_delay, 0, UINT_MAX);
0314
0315 sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent,
0316 0, bch_cutoff_writeback);
0317
0318 if (attr == &sysfs_writeback_rate) {
0319 ssize_t ret;
0320 long int v = atomic_long_read(&dc->writeback_rate.rate);
0321
0322 ret = strtoul_safe_clamp(buf, v, 1, INT_MAX);
0323
0324 if (!ret) {
0325 atomic_long_set(&dc->writeback_rate.rate, v);
0326 ret = size;
0327 }
0328
0329 return ret;
0330 }
0331
0332 sysfs_strtoul_clamp(writeback_rate_update_seconds,
0333 dc->writeback_rate_update_seconds,
0334 1, WRITEBACK_RATE_UPDATE_SECS_MAX);
0335 sysfs_strtoul_clamp(writeback_rate_i_term_inverse,
0336 dc->writeback_rate_i_term_inverse,
0337 1, UINT_MAX);
0338 sysfs_strtoul_clamp(writeback_rate_p_term_inverse,
0339 dc->writeback_rate_p_term_inverse,
0340 1, UINT_MAX);
0341 sysfs_strtoul_clamp(writeback_rate_fp_term_low,
0342 dc->writeback_rate_fp_term_low,
0343 1, dc->writeback_rate_fp_term_mid - 1);
0344 sysfs_strtoul_clamp(writeback_rate_fp_term_mid,
0345 dc->writeback_rate_fp_term_mid,
0346 dc->writeback_rate_fp_term_low + 1,
0347 dc->writeback_rate_fp_term_high - 1);
0348 sysfs_strtoul_clamp(writeback_rate_fp_term_high,
0349 dc->writeback_rate_fp_term_high,
0350 dc->writeback_rate_fp_term_mid + 1, UINT_MAX);
0351 sysfs_strtoul_clamp(writeback_rate_minimum,
0352 dc->writeback_rate_minimum,
0353 1, UINT_MAX);
0354
0355 sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX);
0356
0357 if (attr == &sysfs_io_disable) {
0358 int v = strtoul_or_return(buf);
0359
0360 dc->io_disable = v ? 1 : 0;
0361 }
0362
0363 sysfs_strtoul_clamp(sequential_cutoff,
0364 dc->sequential_cutoff,
0365 0, UINT_MAX);
0366
0367 if (attr == &sysfs_clear_stats)
0368 bch_cache_accounting_clear(&dc->accounting);
0369
0370 if (attr == &sysfs_running &&
0371 strtoul_or_return(buf)) {
0372 v = bch_cached_dev_run(dc);
0373 if (v)
0374 return v;
0375 }
0376
0377 if (attr == &sysfs_cache_mode) {
0378 v = __sysfs_match_string(bch_cache_modes, -1, buf);
0379 if (v < 0)
0380 return v;
0381
0382 if ((unsigned int) v != BDEV_CACHE_MODE(&dc->sb)) {
0383 SET_BDEV_CACHE_MODE(&dc->sb, v);
0384 bch_write_bdev_super(dc, NULL);
0385 }
0386 }
0387
0388 if (attr == &sysfs_readahead_cache_policy) {
0389 v = __sysfs_match_string(bch_reada_cache_policies, -1, buf);
0390 if (v < 0)
0391 return v;
0392
0393 if ((unsigned int) v != dc->cache_readahead_policy)
0394 dc->cache_readahead_policy = v;
0395 }
0396
0397 if (attr == &sysfs_stop_when_cache_set_failed) {
0398 v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf);
0399 if (v < 0)
0400 return v;
0401
0402 dc->stop_when_cache_set_failed = v;
0403 }
0404
0405 if (attr == &sysfs_label) {
0406 if (size > SB_LABEL_SIZE)
0407 return -EINVAL;
0408 memcpy(dc->sb.label, buf, size);
0409 if (size < SB_LABEL_SIZE)
0410 dc->sb.label[size] = '\0';
0411 if (size && dc->sb.label[size - 1] == '\n')
0412 dc->sb.label[size - 1] = '\0';
0413 bch_write_bdev_super(dc, NULL);
0414 if (dc->disk.c) {
0415 memcpy(dc->disk.c->uuids[dc->disk.id].label,
0416 buf, SB_LABEL_SIZE);
0417 bch_uuid_write(dc->disk.c);
0418 }
0419 env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
0420 if (!env)
0421 return -ENOMEM;
0422 add_uevent_var(env, "DRIVER=bcache");
0423 add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid);
0424 add_uevent_var(env, "CACHED_LABEL=%s", buf);
0425 kobject_uevent_env(&disk_to_dev(dc->disk.disk)->kobj,
0426 KOBJ_CHANGE,
0427 env->envp);
0428 kfree(env);
0429 }
0430
0431 if (attr == &sysfs_attach) {
0432 uint8_t set_uuid[16];
0433
0434 if (bch_parse_uuid(buf, set_uuid) < 16)
0435 return -EINVAL;
0436
0437 v = -ENOENT;
0438 list_for_each_entry(c, &bch_cache_sets, list) {
0439 v = bch_cached_dev_attach(dc, c, set_uuid);
0440 if (!v)
0441 return size;
0442 }
0443 if (v == -ENOENT)
0444 pr_err("Can't attach %s: cache set not found\n", buf);
0445 return v;
0446 }
0447
0448 if (attr == &sysfs_detach && dc->disk.c)
0449 bch_cached_dev_detach(dc);
0450
0451 if (attr == &sysfs_stop)
0452 bcache_device_stop(&dc->disk);
0453
0454 return size;
0455 }
0456
0457 STORE(bch_cached_dev)
0458 {
0459 struct cached_dev *dc = container_of(kobj, struct cached_dev,
0460 disk.kobj);
0461
0462
0463 if (bcache_is_reboot)
0464 return -EBUSY;
0465
0466 mutex_lock(&bch_register_lock);
0467 size = __cached_dev_store(kobj, attr, buf, size);
0468
0469 if (attr == &sysfs_writeback_running) {
0470
0471 if (IS_ERR_OR_NULL(dc->writeback_thread)) {
0472
0473
0474
0475
0476 if (dc->writeback_running) {
0477 dc->writeback_running = false;
0478 pr_err("%s: failed to run non-existent writeback thread\n",
0479 dc->disk.disk->disk_name);
0480 }
0481 } else
0482
0483
0484
0485
0486 bch_writeback_queue(dc);
0487 }
0488
0489
0490
0491
0492
0493 if (attr == &sysfs_writeback_percent)
0494 if ((dc->disk.c != NULL) &&
0495 (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)))
0496 schedule_delayed_work(&dc->writeback_rate_update,
0497 dc->writeback_rate_update_seconds * HZ);
0498
0499 mutex_unlock(&bch_register_lock);
0500 return size;
0501 }
0502
0503 static struct attribute *bch_cached_dev_attrs[] = {
0504 &sysfs_attach,
0505 &sysfs_detach,
0506 &sysfs_stop,
0507 #if 0
0508 &sysfs_data_csum,
0509 #endif
0510 &sysfs_cache_mode,
0511 &sysfs_readahead_cache_policy,
0512 &sysfs_stop_when_cache_set_failed,
0513 &sysfs_writeback_metadata,
0514 &sysfs_writeback_running,
0515 &sysfs_writeback_delay,
0516 &sysfs_writeback_percent,
0517 &sysfs_writeback_rate,
0518 &sysfs_writeback_consider_fragment,
0519 &sysfs_writeback_rate_update_seconds,
0520 &sysfs_writeback_rate_i_term_inverse,
0521 &sysfs_writeback_rate_p_term_inverse,
0522 &sysfs_writeback_rate_fp_term_low,
0523 &sysfs_writeback_rate_fp_term_mid,
0524 &sysfs_writeback_rate_fp_term_high,
0525 &sysfs_writeback_rate_minimum,
0526 &sysfs_writeback_rate_debug,
0527 &sysfs_io_errors,
0528 &sysfs_io_error_limit,
0529 &sysfs_io_disable,
0530 &sysfs_dirty_data,
0531 &sysfs_stripe_size,
0532 &sysfs_partial_stripes_expensive,
0533 &sysfs_sequential_cutoff,
0534 &sysfs_clear_stats,
0535 &sysfs_running,
0536 &sysfs_state,
0537 &sysfs_label,
0538 #ifdef CONFIG_BCACHE_DEBUG
0539 &sysfs_verify,
0540 &sysfs_bypass_torture_test,
0541 #endif
0542 &sysfs_backing_dev_name,
0543 &sysfs_backing_dev_uuid,
0544 NULL
0545 };
0546 ATTRIBUTE_GROUPS(bch_cached_dev);
0547 KTYPE(bch_cached_dev);
0548
0549 SHOW(bch_flash_dev)
0550 {
0551 struct bcache_device *d = container_of(kobj, struct bcache_device,
0552 kobj);
0553 struct uuid_entry *u = &d->c->uuids[d->id];
0554
0555 sysfs_printf(data_csum, "%i", d->data_csum);
0556 sysfs_hprint(size, u->sectors << 9);
0557
0558 if (attr == &sysfs_label) {
0559 memcpy(buf, u->label, SB_LABEL_SIZE);
0560 buf[SB_LABEL_SIZE + 1] = '\0';
0561 strcat(buf, "\n");
0562 return strlen(buf);
0563 }
0564
0565 return 0;
0566 }
0567
0568 STORE(__bch_flash_dev)
0569 {
0570 struct bcache_device *d = container_of(kobj, struct bcache_device,
0571 kobj);
0572 struct uuid_entry *u = &d->c->uuids[d->id];
0573
0574
0575 if (bcache_is_reboot)
0576 return -EBUSY;
0577
0578 sysfs_strtoul(data_csum, d->data_csum);
0579
0580 if (attr == &sysfs_size) {
0581 uint64_t v;
0582
0583 strtoi_h_or_return(buf, v);
0584
0585 u->sectors = v >> 9;
0586 bch_uuid_write(d->c);
0587 set_capacity(d->disk, u->sectors);
0588 }
0589
0590 if (attr == &sysfs_label) {
0591 memcpy(u->label, buf, SB_LABEL_SIZE);
0592 bch_uuid_write(d->c);
0593 }
0594
0595 if (attr == &sysfs_unregister) {
0596 set_bit(BCACHE_DEV_DETACHING, &d->flags);
0597 bcache_device_stop(d);
0598 }
0599
0600 return size;
0601 }
0602 STORE_LOCKED(bch_flash_dev)
0603
0604 static struct attribute *bch_flash_dev_attrs[] = {
0605 &sysfs_unregister,
0606 #if 0
0607 &sysfs_data_csum,
0608 #endif
0609 &sysfs_label,
0610 &sysfs_size,
0611 NULL
0612 };
0613 ATTRIBUTE_GROUPS(bch_flash_dev);
0614 KTYPE(bch_flash_dev);
0615
0616 struct bset_stats_op {
0617 struct btree_op op;
0618 size_t nodes;
0619 struct bset_stats stats;
0620 };
0621
0622 static int bch_btree_bset_stats(struct btree_op *b_op, struct btree *b)
0623 {
0624 struct bset_stats_op *op = container_of(b_op, struct bset_stats_op, op);
0625
0626 op->nodes++;
0627 bch_btree_keys_stats(&b->keys, &op->stats);
0628
0629 return MAP_CONTINUE;
0630 }
0631
0632 static int bch_bset_print_stats(struct cache_set *c, char *buf)
0633 {
0634 struct bset_stats_op op;
0635 int ret;
0636
0637 memset(&op, 0, sizeof(op));
0638 bch_btree_op_init(&op.op, -1);
0639
0640 ret = bch_btree_map_nodes(&op.op, c, &ZERO_KEY, bch_btree_bset_stats);
0641 if (ret < 0)
0642 return ret;
0643
0644 return snprintf(buf, PAGE_SIZE,
0645 "btree nodes: %zu\n"
0646 "written sets: %zu\n"
0647 "unwritten sets: %zu\n"
0648 "written key bytes: %zu\n"
0649 "unwritten key bytes: %zu\n"
0650 "floats: %zu\n"
0651 "failed: %zu\n",
0652 op.nodes,
0653 op.stats.sets_written, op.stats.sets_unwritten,
0654 op.stats.bytes_written, op.stats.bytes_unwritten,
0655 op.stats.floats, op.stats.failed);
0656 }
0657
0658 static unsigned int bch_root_usage(struct cache_set *c)
0659 {
0660 unsigned int bytes = 0;
0661 struct bkey *k;
0662 struct btree *b;
0663 struct btree_iter iter;
0664
0665 goto lock_root;
0666
0667 do {
0668 rw_unlock(false, b);
0669 lock_root:
0670 b = c->root;
0671 rw_lock(false, b, b->level);
0672 } while (b != c->root);
0673
0674 for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
0675 bytes += bkey_bytes(k);
0676
0677 rw_unlock(false, b);
0678
0679 return (bytes * 100) / btree_bytes(c);
0680 }
0681
0682 static size_t bch_cache_size(struct cache_set *c)
0683 {
0684 size_t ret = 0;
0685 struct btree *b;
0686
0687 mutex_lock(&c->bucket_lock);
0688 list_for_each_entry(b, &c->btree_cache, list)
0689 ret += 1 << (b->keys.page_order + PAGE_SHIFT);
0690
0691 mutex_unlock(&c->bucket_lock);
0692 return ret;
0693 }
0694
0695 static unsigned int bch_cache_max_chain(struct cache_set *c)
0696 {
0697 unsigned int ret = 0;
0698 struct hlist_head *h;
0699
0700 mutex_lock(&c->bucket_lock);
0701
0702 for (h = c->bucket_hash;
0703 h < c->bucket_hash + (1 << BUCKET_HASH_BITS);
0704 h++) {
0705 unsigned int i = 0;
0706 struct hlist_node *p;
0707
0708 hlist_for_each(p, h)
0709 i++;
0710
0711 ret = max(ret, i);
0712 }
0713
0714 mutex_unlock(&c->bucket_lock);
0715 return ret;
0716 }
0717
0718 static unsigned int bch_btree_used(struct cache_set *c)
0719 {
0720 return div64_u64(c->gc_stats.key_bytes * 100,
0721 (c->gc_stats.nodes ?: 1) * btree_bytes(c));
0722 }
0723
0724 static unsigned int bch_average_key_size(struct cache_set *c)
0725 {
0726 return c->gc_stats.nkeys
0727 ? div64_u64(c->gc_stats.data, c->gc_stats.nkeys)
0728 : 0;
0729 }
0730
0731 SHOW(__bch_cache_set)
0732 {
0733 struct cache_set *c = container_of(kobj, struct cache_set, kobj);
0734
0735 sysfs_print(synchronous, CACHE_SYNC(&c->cache->sb));
0736 sysfs_print(journal_delay_ms, c->journal_delay_ms);
0737 sysfs_hprint(bucket_size, bucket_bytes(c->cache));
0738 sysfs_hprint(block_size, block_bytes(c->cache));
0739 sysfs_print(tree_depth, c->root->level);
0740 sysfs_print(root_usage_percent, bch_root_usage(c));
0741
0742 sysfs_hprint(btree_cache_size, bch_cache_size(c));
0743 sysfs_print(btree_cache_max_chain, bch_cache_max_chain(c));
0744 sysfs_print(cache_available_percent, 100 - c->gc_stats.in_use);
0745
0746 sysfs_print_time_stats(&c->btree_gc_time, btree_gc, sec, ms);
0747 sysfs_print_time_stats(&c->btree_split_time, btree_split, sec, us);
0748 sysfs_print_time_stats(&c->sort.time, btree_sort, ms, us);
0749 sysfs_print_time_stats(&c->btree_read_time, btree_read, ms, us);
0750
0751 sysfs_print(btree_used_percent, bch_btree_used(c));
0752 sysfs_print(btree_nodes, c->gc_stats.nodes);
0753 sysfs_hprint(average_key_size, bch_average_key_size(c));
0754
0755 sysfs_print(cache_read_races,
0756 atomic_long_read(&c->cache_read_races));
0757
0758 sysfs_print(reclaim,
0759 atomic_long_read(&c->reclaim));
0760
0761 sysfs_print(reclaimed_journal_buckets,
0762 atomic_long_read(&c->reclaimed_journal_buckets));
0763
0764 sysfs_print(flush_write,
0765 atomic_long_read(&c->flush_write));
0766
0767 sysfs_print(writeback_keys_done,
0768 atomic_long_read(&c->writeback_keys_done));
0769 sysfs_print(writeback_keys_failed,
0770 atomic_long_read(&c->writeback_keys_failed));
0771
0772 if (attr == &sysfs_errors)
0773 return bch_snprint_string_list(buf, PAGE_SIZE, error_actions,
0774 c->on_error);
0775
0776
0777 sysfs_print(io_error_halflife, c->error_decay * 88);
0778 sysfs_print(io_error_limit, c->error_limit);
0779
0780 sysfs_hprint(congested,
0781 ((uint64_t) bch_get_congested(c)) << 9);
0782 sysfs_print(congested_read_threshold_us,
0783 c->congested_read_threshold_us);
0784 sysfs_print(congested_write_threshold_us,
0785 c->congested_write_threshold_us);
0786
0787 sysfs_print(cutoff_writeback, bch_cutoff_writeback);
0788 sysfs_print(cutoff_writeback_sync, bch_cutoff_writeback_sync);
0789
0790 sysfs_print(active_journal_entries, fifo_used(&c->journal.pin));
0791 sysfs_printf(verify, "%i", c->verify);
0792 sysfs_printf(key_merging_disabled, "%i", c->key_merging_disabled);
0793 sysfs_printf(expensive_debug_checks,
0794 "%i", c->expensive_debug_checks);
0795 sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite);
0796 sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled);
0797 sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
0798 sysfs_printf(idle_max_writeback_rate, "%i",
0799 c->idle_max_writeback_rate_enabled);
0800 sysfs_printf(gc_after_writeback, "%i", c->gc_after_writeback);
0801 sysfs_printf(io_disable, "%i",
0802 test_bit(CACHE_SET_IO_DISABLE, &c->flags));
0803
0804 if (attr == &sysfs_bset_tree_stats)
0805 return bch_bset_print_stats(c, buf);
0806
0807 if (attr == &sysfs_feature_compat)
0808 return bch_print_cache_set_feature_compat(c, buf, PAGE_SIZE);
0809 if (attr == &sysfs_feature_ro_compat)
0810 return bch_print_cache_set_feature_ro_compat(c, buf, PAGE_SIZE);
0811 if (attr == &sysfs_feature_incompat)
0812 return bch_print_cache_set_feature_incompat(c, buf, PAGE_SIZE);
0813
0814 return 0;
0815 }
0816 SHOW_LOCKED(bch_cache_set)
0817
0818 STORE(__bch_cache_set)
0819 {
0820 struct cache_set *c = container_of(kobj, struct cache_set, kobj);
0821 ssize_t v;
0822
0823
0824 if (bcache_is_reboot)
0825 return -EBUSY;
0826
0827 if (attr == &sysfs_unregister)
0828 bch_cache_set_unregister(c);
0829
0830 if (attr == &sysfs_stop)
0831 bch_cache_set_stop(c);
0832
0833 if (attr == &sysfs_synchronous) {
0834 bool sync = strtoul_or_return(buf);
0835
0836 if (sync != CACHE_SYNC(&c->cache->sb)) {
0837 SET_CACHE_SYNC(&c->cache->sb, sync);
0838 bcache_write_super(c);
0839 }
0840 }
0841
0842 if (attr == &sysfs_flash_vol_create) {
0843 int r;
0844 uint64_t v;
0845
0846 strtoi_h_or_return(buf, v);
0847
0848 r = bch_flash_dev_create(c, v);
0849 if (r)
0850 return r;
0851 }
0852
0853 if (attr == &sysfs_clear_stats) {
0854 atomic_long_set(&c->writeback_keys_done, 0);
0855 atomic_long_set(&c->writeback_keys_failed, 0);
0856
0857 memset(&c->gc_stats, 0, sizeof(struct gc_stat));
0858 bch_cache_accounting_clear(&c->accounting);
0859 }
0860
0861 if (attr == &sysfs_trigger_gc)
0862 force_wake_up_gc(c);
0863
0864 if (attr == &sysfs_prune_cache) {
0865 struct shrink_control sc;
0866
0867 sc.gfp_mask = GFP_KERNEL;
0868 sc.nr_to_scan = strtoul_or_return(buf);
0869 c->shrink.scan_objects(&c->shrink, &sc);
0870 }
0871
0872 sysfs_strtoul_clamp(congested_read_threshold_us,
0873 c->congested_read_threshold_us,
0874 0, UINT_MAX);
0875 sysfs_strtoul_clamp(congested_write_threshold_us,
0876 c->congested_write_threshold_us,
0877 0, UINT_MAX);
0878
0879 if (attr == &sysfs_errors) {
0880 v = __sysfs_match_string(error_actions, -1, buf);
0881 if (v < 0)
0882 return v;
0883
0884 c->on_error = v;
0885 }
0886
0887 sysfs_strtoul_clamp(io_error_limit, c->error_limit, 0, UINT_MAX);
0888
0889
0890 if (attr == &sysfs_io_error_halflife) {
0891 unsigned long v = 0;
0892 ssize_t ret;
0893
0894 ret = strtoul_safe_clamp(buf, v, 0, UINT_MAX);
0895 if (!ret) {
0896 c->error_decay = v / 88;
0897 return size;
0898 }
0899 return ret;
0900 }
0901
0902 if (attr == &sysfs_io_disable) {
0903 v = strtoul_or_return(buf);
0904 if (v) {
0905 if (test_and_set_bit(CACHE_SET_IO_DISABLE,
0906 &c->flags))
0907 pr_warn("CACHE_SET_IO_DISABLE already set\n");
0908 } else {
0909 if (!test_and_clear_bit(CACHE_SET_IO_DISABLE,
0910 &c->flags))
0911 pr_warn("CACHE_SET_IO_DISABLE already cleared\n");
0912 }
0913 }
0914
0915 sysfs_strtoul_clamp(journal_delay_ms,
0916 c->journal_delay_ms,
0917 0, USHRT_MAX);
0918 sysfs_strtoul_bool(verify, c->verify);
0919 sysfs_strtoul_bool(key_merging_disabled, c->key_merging_disabled);
0920 sysfs_strtoul(expensive_debug_checks, c->expensive_debug_checks);
0921 sysfs_strtoul_bool(gc_always_rewrite, c->gc_always_rewrite);
0922 sysfs_strtoul_bool(btree_shrinker_disabled, c->shrinker_disabled);
0923 sysfs_strtoul_bool(copy_gc_enabled, c->copy_gc_enabled);
0924 sysfs_strtoul_bool(idle_max_writeback_rate,
0925 c->idle_max_writeback_rate_enabled);
0926
0927
0928
0929
0930
0931
0932 sysfs_strtoul_clamp(gc_after_writeback, c->gc_after_writeback, 0, 1);
0933
0934 return size;
0935 }
0936 STORE_LOCKED(bch_cache_set)
0937
0938 SHOW(bch_cache_set_internal)
0939 {
0940 struct cache_set *c = container_of(kobj, struct cache_set, internal);
0941
0942 return bch_cache_set_show(&c->kobj, attr, buf);
0943 }
0944
0945 STORE(bch_cache_set_internal)
0946 {
0947 struct cache_set *c = container_of(kobj, struct cache_set, internal);
0948
0949
0950 if (bcache_is_reboot)
0951 return -EBUSY;
0952
0953 return bch_cache_set_store(&c->kobj, attr, buf, size);
0954 }
0955
0956 static void bch_cache_set_internal_release(struct kobject *k)
0957 {
0958 }
0959
0960 static struct attribute *bch_cache_set_attrs[] = {
0961 &sysfs_unregister,
0962 &sysfs_stop,
0963 &sysfs_synchronous,
0964 &sysfs_journal_delay_ms,
0965 &sysfs_flash_vol_create,
0966
0967 &sysfs_bucket_size,
0968 &sysfs_block_size,
0969 &sysfs_tree_depth,
0970 &sysfs_root_usage_percent,
0971 &sysfs_btree_cache_size,
0972 &sysfs_cache_available_percent,
0973
0974 &sysfs_average_key_size,
0975
0976 &sysfs_errors,
0977 &sysfs_io_error_limit,
0978 &sysfs_io_error_halflife,
0979 &sysfs_congested,
0980 &sysfs_congested_read_threshold_us,
0981 &sysfs_congested_write_threshold_us,
0982 &sysfs_clear_stats,
0983 NULL
0984 };
0985 ATTRIBUTE_GROUPS(bch_cache_set);
0986 KTYPE(bch_cache_set);
0987
0988 static struct attribute *bch_cache_set_internal_attrs[] = {
0989 &sysfs_active_journal_entries,
0990
0991 sysfs_time_stats_attribute_list(btree_gc, sec, ms)
0992 sysfs_time_stats_attribute_list(btree_split, sec, us)
0993 sysfs_time_stats_attribute_list(btree_sort, ms, us)
0994 sysfs_time_stats_attribute_list(btree_read, ms, us)
0995
0996 &sysfs_btree_nodes,
0997 &sysfs_btree_used_percent,
0998 &sysfs_btree_cache_max_chain,
0999
1000 &sysfs_bset_tree_stats,
1001 &sysfs_cache_read_races,
1002 &sysfs_reclaim,
1003 &sysfs_reclaimed_journal_buckets,
1004 &sysfs_flush_write,
1005 &sysfs_writeback_keys_done,
1006 &sysfs_writeback_keys_failed,
1007
1008 &sysfs_trigger_gc,
1009 &sysfs_prune_cache,
1010 #ifdef CONFIG_BCACHE_DEBUG
1011 &sysfs_verify,
1012 &sysfs_key_merging_disabled,
1013 &sysfs_expensive_debug_checks,
1014 #endif
1015 &sysfs_gc_always_rewrite,
1016 &sysfs_btree_shrinker_disabled,
1017 &sysfs_copy_gc_enabled,
1018 &sysfs_idle_max_writeback_rate,
1019 &sysfs_gc_after_writeback,
1020 &sysfs_io_disable,
1021 &sysfs_cutoff_writeback,
1022 &sysfs_cutoff_writeback_sync,
1023 &sysfs_feature_compat,
1024 &sysfs_feature_ro_compat,
1025 &sysfs_feature_incompat,
1026 NULL
1027 };
1028 ATTRIBUTE_GROUPS(bch_cache_set_internal);
1029 KTYPE(bch_cache_set_internal);
1030
1031 static int __bch_cache_cmp(const void *l, const void *r)
1032 {
1033 cond_resched();
1034 return *((uint16_t *)r) - *((uint16_t *)l);
1035 }
1036
1037 SHOW(__bch_cache)
1038 {
1039 struct cache *ca = container_of(kobj, struct cache, kobj);
1040
1041 sysfs_hprint(bucket_size, bucket_bytes(ca));
1042 sysfs_hprint(block_size, block_bytes(ca));
1043 sysfs_print(nbuckets, ca->sb.nbuckets);
1044 sysfs_print(discard, ca->discard);
1045 sysfs_hprint(written, atomic_long_read(&ca->sectors_written) << 9);
1046 sysfs_hprint(btree_written,
1047 atomic_long_read(&ca->btree_sectors_written) << 9);
1048 sysfs_hprint(metadata_written,
1049 (atomic_long_read(&ca->meta_sectors_written) +
1050 atomic_long_read(&ca->btree_sectors_written)) << 9);
1051
1052 sysfs_print(io_errors,
1053 atomic_read(&ca->io_errors) >> IO_ERROR_SHIFT);
1054
1055 if (attr == &sysfs_cache_replacement_policy)
1056 return bch_snprint_string_list(buf, PAGE_SIZE,
1057 cache_replacement_policies,
1058 CACHE_REPLACEMENT(&ca->sb));
1059
1060 if (attr == &sysfs_priority_stats) {
1061 struct bucket *b;
1062 size_t n = ca->sb.nbuckets, i;
1063 size_t unused = 0, available = 0, dirty = 0, meta = 0;
1064 uint64_t sum = 0;
1065
1066 uint16_t q[31], *p, *cached;
1067 ssize_t ret;
1068
1069 cached = p = vmalloc(array_size(sizeof(uint16_t),
1070 ca->sb.nbuckets));
1071 if (!p)
1072 return -ENOMEM;
1073
1074 mutex_lock(&ca->set->bucket_lock);
1075 for_each_bucket(b, ca) {
1076 if (!GC_SECTORS_USED(b))
1077 unused++;
1078 if (GC_MARK(b) == GC_MARK_RECLAIMABLE)
1079 available++;
1080 if (GC_MARK(b) == GC_MARK_DIRTY)
1081 dirty++;
1082 if (GC_MARK(b) == GC_MARK_METADATA)
1083 meta++;
1084 }
1085
1086 for (i = ca->sb.first_bucket; i < n; i++)
1087 p[i] = ca->buckets[i].prio;
1088 mutex_unlock(&ca->set->bucket_lock);
1089
1090 sort(p, n, sizeof(uint16_t), __bch_cache_cmp, NULL);
1091
1092 while (n &&
1093 !cached[n - 1])
1094 --n;
1095
1096 while (cached < p + n &&
1097 *cached == BTREE_PRIO) {
1098 cached++;
1099 n--;
1100 }
1101
1102 for (i = 0; i < n; i++)
1103 sum += INITIAL_PRIO - cached[i];
1104
1105 if (n)
1106 do_div(sum, n);
1107
1108 for (i = 0; i < ARRAY_SIZE(q); i++)
1109 q[i] = INITIAL_PRIO - cached[n * (i + 1) /
1110 (ARRAY_SIZE(q) + 1)];
1111
1112 vfree(p);
1113
1114 ret = scnprintf(buf, PAGE_SIZE,
1115 "Unused: %zu%%\n"
1116 "Clean: %zu%%\n"
1117 "Dirty: %zu%%\n"
1118 "Metadata: %zu%%\n"
1119 "Average: %llu\n"
1120 "Sectors per Q: %zu\n"
1121 "Quantiles: [",
1122 unused * 100 / (size_t) ca->sb.nbuckets,
1123 available * 100 / (size_t) ca->sb.nbuckets,
1124 dirty * 100 / (size_t) ca->sb.nbuckets,
1125 meta * 100 / (size_t) ca->sb.nbuckets, sum,
1126 n * ca->sb.bucket_size / (ARRAY_SIZE(q) + 1));
1127
1128 for (i = 0; i < ARRAY_SIZE(q); i++)
1129 ret += scnprintf(buf + ret, PAGE_SIZE - ret,
1130 "%u ", q[i]);
1131 ret--;
1132
1133 ret += scnprintf(buf + ret, PAGE_SIZE - ret, "]\n");
1134
1135 return ret;
1136 }
1137
1138 return 0;
1139 }
1140 SHOW_LOCKED(bch_cache)
1141
1142 STORE(__bch_cache)
1143 {
1144 struct cache *ca = container_of(kobj, struct cache, kobj);
1145 ssize_t v;
1146
1147
1148 if (bcache_is_reboot)
1149 return -EBUSY;
1150
1151 if (attr == &sysfs_discard) {
1152 bool v = strtoul_or_return(buf);
1153
1154 if (bdev_max_discard_sectors(ca->bdev))
1155 ca->discard = v;
1156
1157 if (v != CACHE_DISCARD(&ca->sb)) {
1158 SET_CACHE_DISCARD(&ca->sb, v);
1159 bcache_write_super(ca->set);
1160 }
1161 }
1162
1163 if (attr == &sysfs_cache_replacement_policy) {
1164 v = __sysfs_match_string(cache_replacement_policies, -1, buf);
1165 if (v < 0)
1166 return v;
1167
1168 if ((unsigned int) v != CACHE_REPLACEMENT(&ca->sb)) {
1169 mutex_lock(&ca->set->bucket_lock);
1170 SET_CACHE_REPLACEMENT(&ca->sb, v);
1171 mutex_unlock(&ca->set->bucket_lock);
1172
1173 bcache_write_super(ca->set);
1174 }
1175 }
1176
1177 if (attr == &sysfs_clear_stats) {
1178 atomic_long_set(&ca->sectors_written, 0);
1179 atomic_long_set(&ca->btree_sectors_written, 0);
1180 atomic_long_set(&ca->meta_sectors_written, 0);
1181 atomic_set(&ca->io_count, 0);
1182 atomic_set(&ca->io_errors, 0);
1183 }
1184
1185 return size;
1186 }
1187 STORE_LOCKED(bch_cache)
1188
1189 static struct attribute *bch_cache_attrs[] = {
1190 &sysfs_bucket_size,
1191 &sysfs_block_size,
1192 &sysfs_nbuckets,
1193 &sysfs_priority_stats,
1194 &sysfs_discard,
1195 &sysfs_written,
1196 &sysfs_btree_written,
1197 &sysfs_metadata_written,
1198 &sysfs_io_errors,
1199 &sysfs_clear_stats,
1200 &sysfs_cache_replacement_policy,
1201 NULL
1202 };
1203 ATTRIBUTE_GROUPS(bch_cache);
1204 KTYPE(bch_cache);