0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024 #include "bcache.h"
0025 #include "btree.h"
0026 #include "debug.h"
0027 #include "extents.h"
0028 #include "writeback.h"
0029
0030 static void sort_key_next(struct btree_iter *iter,
0031 struct btree_iter_set *i)
0032 {
0033 i->k = bkey_next(i->k);
0034
0035 if (i->k == i->end)
0036 *i = iter->data[--iter->used];
0037 }
0038
0039 static bool bch_key_sort_cmp(struct btree_iter_set l,
0040 struct btree_iter_set r)
0041 {
0042 int64_t c = bkey_cmp(l.k, r.k);
0043
0044 return c ? c > 0 : l.k < r.k;
0045 }
0046
0047 static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
0048 {
0049 unsigned int i;
0050
0051 for (i = 0; i < KEY_PTRS(k); i++)
0052 if (ptr_available(c, k, i)) {
0053 struct cache *ca = c->cache;
0054 size_t bucket = PTR_BUCKET_NR(c, k, i);
0055 size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
0056
0057 if (KEY_SIZE(k) + r > c->cache->sb.bucket_size ||
0058 bucket < ca->sb.first_bucket ||
0059 bucket >= ca->sb.nbuckets)
0060 return true;
0061 }
0062
0063 return false;
0064 }
0065
0066
0067
0068 static const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
0069 {
0070 unsigned int i;
0071
0072 for (i = 0; i < KEY_PTRS(k); i++)
0073 if (ptr_available(c, k, i)) {
0074 struct cache *ca = c->cache;
0075 size_t bucket = PTR_BUCKET_NR(c, k, i);
0076 size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
0077
0078 if (KEY_SIZE(k) + r > c->cache->sb.bucket_size)
0079 return "bad, length too big";
0080 if (bucket < ca->sb.first_bucket)
0081 return "bad, short offset";
0082 if (bucket >= ca->sb.nbuckets)
0083 return "bad, offset past end of device";
0084 if (ptr_stale(c, k, i))
0085 return "stale";
0086 }
0087
0088 if (!bkey_cmp(k, &ZERO_KEY))
0089 return "bad, null key";
0090 if (!KEY_PTRS(k))
0091 return "bad, no pointers";
0092 if (!KEY_SIZE(k))
0093 return "zeroed key";
0094 return "";
0095 }
0096
0097 void bch_extent_to_text(char *buf, size_t size, const struct bkey *k)
0098 {
0099 unsigned int i = 0;
0100 char *out = buf, *end = buf + size;
0101
0102 #define p(...) (out += scnprintf(out, end - out, __VA_ARGS__))
0103
0104 p("%llu:%llu len %llu -> [", KEY_INODE(k), KEY_START(k), KEY_SIZE(k));
0105
0106 for (i = 0; i < KEY_PTRS(k); i++) {
0107 if (i)
0108 p(", ");
0109
0110 if (PTR_DEV(k, i) == PTR_CHECK_DEV)
0111 p("check dev");
0112 else
0113 p("%llu:%llu gen %llu", PTR_DEV(k, i),
0114 PTR_OFFSET(k, i), PTR_GEN(k, i));
0115 }
0116
0117 p("]");
0118
0119 if (KEY_DIRTY(k))
0120 p(" dirty");
0121 if (KEY_CSUM(k))
0122 p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]);
0123 #undef p
0124 }
0125
0126 static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k)
0127 {
0128 struct btree *b = container_of(keys, struct btree, keys);
0129 unsigned int j;
0130 char buf[80];
0131
0132 bch_extent_to_text(buf, sizeof(buf), k);
0133 pr_cont(" %s", buf);
0134
0135 for (j = 0; j < KEY_PTRS(k); j++) {
0136 size_t n = PTR_BUCKET_NR(b->c, k, j);
0137
0138 pr_cont(" bucket %zu", n);
0139 if (n >= b->c->cache->sb.first_bucket && n < b->c->cache->sb.nbuckets)
0140 pr_cont(" prio %i",
0141 PTR_BUCKET(b->c, k, j)->prio);
0142 }
0143
0144 pr_cont(" %s\n", bch_ptr_status(b->c, k));
0145 }
0146
0147
0148
0149 bool __bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k)
0150 {
0151 char buf[80];
0152
0153 if (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))
0154 goto bad;
0155
0156 if (__ptr_invalid(c, k))
0157 goto bad;
0158
0159 return false;
0160 bad:
0161 bch_extent_to_text(buf, sizeof(buf), k);
0162 cache_bug(c, "spotted btree ptr %s: %s", buf, bch_ptr_status(c, k));
0163 return true;
0164 }
0165
0166 static bool bch_btree_ptr_invalid(struct btree_keys *bk, const struct bkey *k)
0167 {
0168 struct btree *b = container_of(bk, struct btree, keys);
0169
0170 return __bch_btree_ptr_invalid(b->c, k);
0171 }
0172
0173 static bool btree_ptr_bad_expensive(struct btree *b, const struct bkey *k)
0174 {
0175 unsigned int i;
0176 char buf[80];
0177 struct bucket *g;
0178
0179 if (mutex_trylock(&b->c->bucket_lock)) {
0180 for (i = 0; i < KEY_PTRS(k); i++)
0181 if (ptr_available(b->c, k, i)) {
0182 g = PTR_BUCKET(b->c, k, i);
0183
0184 if (KEY_DIRTY(k) ||
0185 g->prio != BTREE_PRIO ||
0186 (b->c->gc_mark_valid &&
0187 GC_MARK(g) != GC_MARK_METADATA))
0188 goto err;
0189 }
0190
0191 mutex_unlock(&b->c->bucket_lock);
0192 }
0193
0194 return false;
0195 err:
0196 mutex_unlock(&b->c->bucket_lock);
0197 bch_extent_to_text(buf, sizeof(buf), k);
0198 btree_bug(b,
0199 "inconsistent btree pointer %s: bucket %zi pin %i prio %i gen %i last_gc %i mark %llu",
0200 buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
0201 g->prio, g->gen, g->last_gc, GC_MARK(g));
0202 return true;
0203 }
0204
0205 static bool bch_btree_ptr_bad(struct btree_keys *bk, const struct bkey *k)
0206 {
0207 struct btree *b = container_of(bk, struct btree, keys);
0208 unsigned int i;
0209
0210 if (!bkey_cmp(k, &ZERO_KEY) ||
0211 !KEY_PTRS(k) ||
0212 bch_ptr_invalid(bk, k))
0213 return true;
0214
0215 for (i = 0; i < KEY_PTRS(k); i++)
0216 if (!ptr_available(b->c, k, i) ||
0217 ptr_stale(b->c, k, i))
0218 return true;
0219
0220 if (expensive_debug_checks(b->c) &&
0221 btree_ptr_bad_expensive(b, k))
0222 return true;
0223
0224 return false;
0225 }
0226
0227 static bool bch_btree_ptr_insert_fixup(struct btree_keys *bk,
0228 struct bkey *insert,
0229 struct btree_iter *iter,
0230 struct bkey *replace_key)
0231 {
0232 struct btree *b = container_of(bk, struct btree, keys);
0233
0234 if (!KEY_OFFSET(insert))
0235 btree_current_write(b)->prio_blocked++;
0236
0237 return false;
0238 }
0239
0240 const struct btree_keys_ops bch_btree_keys_ops = {
0241 .sort_cmp = bch_key_sort_cmp,
0242 .insert_fixup = bch_btree_ptr_insert_fixup,
0243 .key_invalid = bch_btree_ptr_invalid,
0244 .key_bad = bch_btree_ptr_bad,
0245 .key_to_text = bch_extent_to_text,
0246 .key_dump = bch_bkey_dump,
0247 };
0248
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258 static bool bch_extent_sort_cmp(struct btree_iter_set l,
0259 struct btree_iter_set r)
0260 {
0261 int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k));
0262
0263 return c ? c > 0 : l.k < r.k;
0264 }
0265
0266 static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
0267 struct bkey *tmp)
0268 {
0269 while (iter->used > 1) {
0270 struct btree_iter_set *top = iter->data, *i = top + 1;
0271
0272 if (iter->used > 2 &&
0273 bch_extent_sort_cmp(i[0], i[1]))
0274 i++;
0275
0276 if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
0277 break;
0278
0279 if (!KEY_SIZE(i->k)) {
0280 sort_key_next(iter, i);
0281 heap_sift(iter, i - top, bch_extent_sort_cmp);
0282 continue;
0283 }
0284
0285 if (top->k > i->k) {
0286 if (bkey_cmp(top->k, i->k) >= 0)
0287 sort_key_next(iter, i);
0288 else
0289 bch_cut_front(top->k, i->k);
0290
0291 heap_sift(iter, i - top, bch_extent_sort_cmp);
0292 } else {
0293
0294 BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
0295
0296 if (bkey_cmp(i->k, top->k) < 0) {
0297 bkey_copy(tmp, top->k);
0298
0299 bch_cut_back(&START_KEY(i->k), tmp);
0300 bch_cut_front(i->k, top->k);
0301 heap_sift(iter, 0, bch_extent_sort_cmp);
0302
0303 return tmp;
0304 } else {
0305 bch_cut_back(&START_KEY(i->k), top->k);
0306 }
0307 }
0308 }
0309
0310 return NULL;
0311 }
0312
0313 static void bch_subtract_dirty(struct bkey *k,
0314 struct cache_set *c,
0315 uint64_t offset,
0316 int sectors)
0317 {
0318 if (KEY_DIRTY(k))
0319 bcache_dev_sectors_dirty_add(c, KEY_INODE(k),
0320 offset, -sectors);
0321 }
0322
0323 static bool bch_extent_insert_fixup(struct btree_keys *b,
0324 struct bkey *insert,
0325 struct btree_iter *iter,
0326 struct bkey *replace_key)
0327 {
0328 struct cache_set *c = container_of(b, struct btree, keys)->c;
0329
0330 uint64_t old_offset;
0331 unsigned int old_size, sectors_found = 0;
0332
0333 BUG_ON(!KEY_OFFSET(insert));
0334 BUG_ON(!KEY_SIZE(insert));
0335
0336 while (1) {
0337 struct bkey *k = bch_btree_iter_next(iter);
0338
0339 if (!k)
0340 break;
0341
0342 if (bkey_cmp(&START_KEY(k), insert) >= 0) {
0343 if (KEY_SIZE(k))
0344 break;
0345 else
0346 continue;
0347 }
0348
0349 if (bkey_cmp(k, &START_KEY(insert)) <= 0)
0350 continue;
0351
0352 old_offset = KEY_START(k);
0353 old_size = KEY_SIZE(k);
0354
0355
0356
0357
0358
0359
0360
0361
0362
0363 if (replace_key && KEY_SIZE(k)) {
0364
0365
0366
0367
0368 unsigned int i;
0369 uint64_t offset = KEY_START(k) -
0370 KEY_START(replace_key);
0371
0372
0373 if (KEY_START(k) < KEY_START(replace_key) ||
0374 KEY_OFFSET(k) > KEY_OFFSET(replace_key))
0375 goto check_failed;
0376
0377
0378 if (KEY_START(k) > KEY_START(insert) + sectors_found)
0379 goto check_failed;
0380
0381 if (!bch_bkey_equal_header(k, replace_key))
0382 goto check_failed;
0383
0384
0385 offset <<= 8;
0386
0387 BUG_ON(!KEY_PTRS(replace_key));
0388
0389 for (i = 0; i < KEY_PTRS(replace_key); i++)
0390 if (k->ptr[i] != replace_key->ptr[i] + offset)
0391 goto check_failed;
0392
0393 sectors_found = KEY_OFFSET(k) - KEY_START(insert);
0394 }
0395
0396 if (bkey_cmp(insert, k) < 0 &&
0397 bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0) {
0398
0399
0400
0401
0402
0403
0404
0405 struct bkey *top;
0406
0407 bch_subtract_dirty(k, c, KEY_START(insert),
0408 KEY_SIZE(insert));
0409
0410 if (bkey_written(b, k)) {
0411
0412
0413
0414
0415
0416
0417
0418
0419
0420
0421
0422
0423 top = bch_bset_search(b, bset_tree_last(b),
0424 insert);
0425 bch_bset_insert(b, top, k);
0426 } else {
0427 BKEY_PADDED(key) temp;
0428 bkey_copy(&temp.key, k);
0429 bch_bset_insert(b, k, &temp.key);
0430 top = bkey_next(k);
0431 }
0432
0433 bch_cut_front(insert, top);
0434 bch_cut_back(&START_KEY(insert), k);
0435 bch_bset_fix_invalidated_key(b, k);
0436 goto out;
0437 }
0438
0439 if (bkey_cmp(insert, k) < 0) {
0440 bch_cut_front(insert, k);
0441 } else {
0442 if (bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0)
0443 old_offset = KEY_START(insert);
0444
0445 if (bkey_written(b, k) &&
0446 bkey_cmp(&START_KEY(insert), &START_KEY(k)) <= 0) {
0447
0448
0449
0450
0451 bch_cut_front(k, k);
0452 } else {
0453 __bch_cut_back(&START_KEY(insert), k);
0454 bch_bset_fix_invalidated_key(b, k);
0455 }
0456 }
0457
0458 bch_subtract_dirty(k, c, old_offset, old_size - KEY_SIZE(k));
0459 }
0460
0461 check_failed:
0462 if (replace_key) {
0463 if (!sectors_found) {
0464 return true;
0465 } else if (sectors_found < KEY_SIZE(insert)) {
0466 SET_KEY_OFFSET(insert, KEY_OFFSET(insert) -
0467 (KEY_SIZE(insert) - sectors_found));
0468 SET_KEY_SIZE(insert, sectors_found);
0469 }
0470 }
0471 out:
0472 if (KEY_DIRTY(insert))
0473 bcache_dev_sectors_dirty_add(c, KEY_INODE(insert),
0474 KEY_START(insert),
0475 KEY_SIZE(insert));
0476
0477 return false;
0478 }
0479
0480 bool __bch_extent_invalid(struct cache_set *c, const struct bkey *k)
0481 {
0482 char buf[80];
0483
0484 if (!KEY_SIZE(k))
0485 return true;
0486
0487 if (KEY_SIZE(k) > KEY_OFFSET(k))
0488 goto bad;
0489
0490 if (__ptr_invalid(c, k))
0491 goto bad;
0492
0493 return false;
0494 bad:
0495 bch_extent_to_text(buf, sizeof(buf), k);
0496 cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k));
0497 return true;
0498 }
0499
0500 static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
0501 {
0502 struct btree *b = container_of(bk, struct btree, keys);
0503
0504 return __bch_extent_invalid(b->c, k);
0505 }
0506
0507 static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
0508 unsigned int ptr)
0509 {
0510 struct bucket *g = PTR_BUCKET(b->c, k, ptr);
0511 char buf[80];
0512
0513 if (mutex_trylock(&b->c->bucket_lock)) {
0514 if (b->c->gc_mark_valid &&
0515 (!GC_MARK(g) ||
0516 GC_MARK(g) == GC_MARK_METADATA ||
0517 (GC_MARK(g) != GC_MARK_DIRTY && KEY_DIRTY(k))))
0518 goto err;
0519
0520 if (g->prio == BTREE_PRIO)
0521 goto err;
0522
0523 mutex_unlock(&b->c->bucket_lock);
0524 }
0525
0526 return false;
0527 err:
0528 mutex_unlock(&b->c->bucket_lock);
0529 bch_extent_to_text(buf, sizeof(buf), k);
0530 btree_bug(b,
0531 "inconsistent extent pointer %s:\nbucket %zu pin %i prio %i gen %i last_gc %i mark %llu",
0532 buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin),
0533 g->prio, g->gen, g->last_gc, GC_MARK(g));
0534 return true;
0535 }
0536
0537 static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
0538 {
0539 struct btree *b = container_of(bk, struct btree, keys);
0540 unsigned int i, stale;
0541 char buf[80];
0542
0543 if (!KEY_PTRS(k) ||
0544 bch_extent_invalid(bk, k))
0545 return true;
0546
0547 for (i = 0; i < KEY_PTRS(k); i++)
0548 if (!ptr_available(b->c, k, i))
0549 return true;
0550
0551 for (i = 0; i < KEY_PTRS(k); i++) {
0552 stale = ptr_stale(b->c, k, i);
0553
0554 if (stale && KEY_DIRTY(k)) {
0555 bch_extent_to_text(buf, sizeof(buf), k);
0556 pr_info("stale dirty pointer, stale %u, key: %s\n",
0557 stale, buf);
0558 }
0559
0560 btree_bug_on(stale > BUCKET_GC_GEN_MAX, b,
0561 "key too stale: %i, need_gc %u",
0562 stale, b->c->need_gc);
0563
0564 if (stale)
0565 return true;
0566
0567 if (expensive_debug_checks(b->c) &&
0568 bch_extent_bad_expensive(b, k, i))
0569 return true;
0570 }
0571
0572 return false;
0573 }
0574
0575 static uint64_t merge_chksums(struct bkey *l, struct bkey *r)
0576 {
0577 return (l->ptr[KEY_PTRS(l)] + r->ptr[KEY_PTRS(r)]) &
0578 ~((uint64_t)1 << 63);
0579 }
0580
0581 static bool bch_extent_merge(struct btree_keys *bk,
0582 struct bkey *l,
0583 struct bkey *r)
0584 {
0585 struct btree *b = container_of(bk, struct btree, keys);
0586 unsigned int i;
0587
0588 if (key_merging_disabled(b->c))
0589 return false;
0590
0591 for (i = 0; i < KEY_PTRS(l); i++)
0592 if (l->ptr[i] + MAKE_PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
0593 PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i))
0594 return false;
0595
0596
0597
0598
0599 if (KEY_SIZE(l) + KEY_SIZE(r) > USHRT_MAX) {
0600 SET_KEY_OFFSET(l, KEY_OFFSET(l) + USHRT_MAX - KEY_SIZE(l));
0601 SET_KEY_SIZE(l, USHRT_MAX);
0602
0603 bch_cut_front(l, r);
0604 return false;
0605 }
0606
0607 if (KEY_CSUM(l)) {
0608 if (KEY_CSUM(r))
0609 l->ptr[KEY_PTRS(l)] = merge_chksums(l, r);
0610 else
0611 SET_KEY_CSUM(l, 0);
0612 }
0613
0614 SET_KEY_OFFSET(l, KEY_OFFSET(l) + KEY_SIZE(r));
0615 SET_KEY_SIZE(l, KEY_SIZE(l) + KEY_SIZE(r));
0616
0617 return true;
0618 }
0619
0620 const struct btree_keys_ops bch_extent_keys_ops = {
0621 .sort_cmp = bch_extent_sort_cmp,
0622 .sort_fixup = bch_extent_sort_fixup,
0623 .insert_fixup = bch_extent_insert_fixup,
0624 .key_invalid = bch_extent_invalid,
0625 .key_bad = bch_extent_bad,
0626 .key_merge = bch_extent_merge,
0627 .key_to_text = bch_extent_to_text,
0628 .key_dump = bch_bkey_dump,
0629 .is_extents = true,
0630 };