0001
0002
0003
0004
0005
0006
0007 #include <linux/sched.h>
0008 #include <linux/bio.h>
0009 #include <linux/slab.h>
0010 #include <linux/blkdev.h>
0011 #include <linux/raid/pq.h>
0012 #include <linux/hash.h>
0013 #include <linux/list_sort.h>
0014 #include <linux/raid/xor.h>
0015 #include <linux/mm.h>
0016 #include "misc.h"
0017 #include "ctree.h"
0018 #include "disk-io.h"
0019 #include "volumes.h"
0020 #include "raid56.h"
0021 #include "async-thread.h"
0022
0023
0024 #define RBIO_RMW_LOCKED_BIT 1
0025
0026
0027
0028
0029
0030 #define RBIO_CACHE_BIT 2
0031
0032
0033
0034
0035 #define RBIO_CACHE_READY_BIT 3
0036
0037 #define RBIO_CACHE_SIZE 1024
0038
0039 #define BTRFS_STRIPE_HASH_TABLE_BITS 11
0040
0041
0042 struct btrfs_stripe_hash {
0043 struct list_head hash_list;
0044 spinlock_t lock;
0045 };
0046
0047
0048 struct btrfs_stripe_hash_table {
0049 struct list_head stripe_cache;
0050 spinlock_t cache_lock;
0051 int cache_size;
0052 struct btrfs_stripe_hash table[];
0053 };
0054
0055
0056
0057
0058
0059
0060 struct sector_ptr {
0061 struct page *page;
0062 unsigned int pgoff:24;
0063 unsigned int uptodate:8;
0064 };
0065
0066 static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
0067 static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
0068 static void rmw_work(struct work_struct *work);
0069 static void read_rebuild_work(struct work_struct *work);
0070 static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio);
0071 static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
0072 static void __free_raid_bio(struct btrfs_raid_bio *rbio);
0073 static void index_rbio_pages(struct btrfs_raid_bio *rbio);
0074 static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
0075
0076 static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
0077 int need_check);
0078 static void scrub_parity_work(struct work_struct *work);
0079
0080 static void start_async_work(struct btrfs_raid_bio *rbio, work_func_t work_func)
0081 {
0082 INIT_WORK(&rbio->work, work_func);
0083 queue_work(rbio->bioc->fs_info->rmw_workers, &rbio->work);
0084 }
0085
0086
0087
0088
0089
0090 int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
0091 {
0092 struct btrfs_stripe_hash_table *table;
0093 struct btrfs_stripe_hash_table *x;
0094 struct btrfs_stripe_hash *cur;
0095 struct btrfs_stripe_hash *h;
0096 int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS;
0097 int i;
0098
0099 if (info->stripe_hash_table)
0100 return 0;
0101
0102
0103
0104
0105
0106
0107
0108
0109 table = kvzalloc(struct_size(table, table, num_entries), GFP_KERNEL);
0110 if (!table)
0111 return -ENOMEM;
0112
0113 spin_lock_init(&table->cache_lock);
0114 INIT_LIST_HEAD(&table->stripe_cache);
0115
0116 h = table->table;
0117
0118 for (i = 0; i < num_entries; i++) {
0119 cur = h + i;
0120 INIT_LIST_HEAD(&cur->hash_list);
0121 spin_lock_init(&cur->lock);
0122 }
0123
0124 x = cmpxchg(&info->stripe_hash_table, NULL, table);
0125 kvfree(x);
0126 return 0;
0127 }
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138 static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
0139 {
0140 int i;
0141 int ret;
0142
0143 ret = alloc_rbio_pages(rbio);
0144 if (ret)
0145 return;
0146
0147 for (i = 0; i < rbio->nr_sectors; i++) {
0148
0149 if (!rbio->bio_sectors[i].page)
0150 continue;
0151
0152 ASSERT(rbio->stripe_sectors[i].page);
0153 memcpy_page(rbio->stripe_sectors[i].page,
0154 rbio->stripe_sectors[i].pgoff,
0155 rbio->bio_sectors[i].page,
0156 rbio->bio_sectors[i].pgoff,
0157 rbio->bioc->fs_info->sectorsize);
0158 rbio->stripe_sectors[i].uptodate = 1;
0159 }
0160 set_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
0161 }
0162
0163
0164
0165
0166 static int rbio_bucket(struct btrfs_raid_bio *rbio)
0167 {
0168 u64 num = rbio->bioc->raid_map[0];
0169
0170
0171
0172
0173
0174
0175
0176
0177
0178 return hash_64(num >> 16, BTRFS_STRIPE_HASH_TABLE_BITS);
0179 }
0180
0181 static bool full_page_sectors_uptodate(struct btrfs_raid_bio *rbio,
0182 unsigned int page_nr)
0183 {
0184 const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
0185 const u32 sectors_per_page = PAGE_SIZE / sectorsize;
0186 int i;
0187
0188 ASSERT(page_nr < rbio->nr_pages);
0189
0190 for (i = sectors_per_page * page_nr;
0191 i < sectors_per_page * page_nr + sectors_per_page;
0192 i++) {
0193 if (!rbio->stripe_sectors[i].uptodate)
0194 return false;
0195 }
0196 return true;
0197 }
0198
0199
0200
0201
0202
0203
0204 static void index_stripe_sectors(struct btrfs_raid_bio *rbio)
0205 {
0206 const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
0207 u32 offset;
0208 int i;
0209
0210 for (i = 0, offset = 0; i < rbio->nr_sectors; i++, offset += sectorsize) {
0211 int page_index = offset >> PAGE_SHIFT;
0212
0213 ASSERT(page_index < rbio->nr_pages);
0214 rbio->stripe_sectors[i].page = rbio->stripe_pages[page_index];
0215 rbio->stripe_sectors[i].pgoff = offset_in_page(offset);
0216 }
0217 }
0218
0219 static void steal_rbio_page(struct btrfs_raid_bio *src,
0220 struct btrfs_raid_bio *dest, int page_nr)
0221 {
0222 const u32 sectorsize = src->bioc->fs_info->sectorsize;
0223 const u32 sectors_per_page = PAGE_SIZE / sectorsize;
0224 int i;
0225
0226 if (dest->stripe_pages[page_nr])
0227 __free_page(dest->stripe_pages[page_nr]);
0228 dest->stripe_pages[page_nr] = src->stripe_pages[page_nr];
0229 src->stripe_pages[page_nr] = NULL;
0230
0231
0232 for (i = sectors_per_page * page_nr;
0233 i < sectors_per_page * page_nr + sectors_per_page; i++)
0234 dest->stripe_sectors[i].uptodate = true;
0235 }
0236
0237
0238
0239
0240
0241
0242
0243
0244 static void steal_rbio(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest)
0245 {
0246 int i;
0247 struct page *s;
0248
0249 if (!test_bit(RBIO_CACHE_READY_BIT, &src->flags))
0250 return;
0251
0252 for (i = 0; i < dest->nr_pages; i++) {
0253 s = src->stripe_pages[i];
0254 if (!s || !full_page_sectors_uptodate(src, i))
0255 continue;
0256
0257 steal_rbio_page(src, dest, i);
0258 }
0259 index_stripe_sectors(dest);
0260 index_stripe_sectors(src);
0261 }
0262
0263
0264
0265
0266
0267
0268
0269
0270 static void merge_rbio(struct btrfs_raid_bio *dest,
0271 struct btrfs_raid_bio *victim)
0272 {
0273 bio_list_merge(&dest->bio_list, &victim->bio_list);
0274 dest->bio_list_bytes += victim->bio_list_bytes;
0275
0276 bitmap_or(&dest->dbitmap, &victim->dbitmap, &dest->dbitmap,
0277 dest->stripe_nsectors);
0278 dest->generic_bio_cnt += victim->generic_bio_cnt;
0279 bio_list_init(&victim->bio_list);
0280 }
0281
0282
0283
0284
0285
0286 static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
0287 {
0288 int bucket = rbio_bucket(rbio);
0289 struct btrfs_stripe_hash_table *table;
0290 struct btrfs_stripe_hash *h;
0291 int freeit = 0;
0292
0293
0294
0295
0296 if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
0297 return;
0298
0299 table = rbio->bioc->fs_info->stripe_hash_table;
0300 h = table->table + bucket;
0301
0302
0303
0304
0305 spin_lock(&h->lock);
0306
0307
0308
0309
0310
0311 spin_lock(&rbio->bio_list_lock);
0312
0313 if (test_and_clear_bit(RBIO_CACHE_BIT, &rbio->flags)) {
0314 list_del_init(&rbio->stripe_cache);
0315 table->cache_size -= 1;
0316 freeit = 1;
0317
0318
0319
0320
0321
0322
0323
0324
0325
0326
0327 if (bio_list_empty(&rbio->bio_list)) {
0328 if (!list_empty(&rbio->hash_list)) {
0329 list_del_init(&rbio->hash_list);
0330 refcount_dec(&rbio->refs);
0331 BUG_ON(!list_empty(&rbio->plug_list));
0332 }
0333 }
0334 }
0335
0336 spin_unlock(&rbio->bio_list_lock);
0337 spin_unlock(&h->lock);
0338
0339 if (freeit)
0340 __free_raid_bio(rbio);
0341 }
0342
0343
0344
0345
0346 static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
0347 {
0348 struct btrfs_stripe_hash_table *table;
0349 unsigned long flags;
0350
0351 if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
0352 return;
0353
0354 table = rbio->bioc->fs_info->stripe_hash_table;
0355
0356 spin_lock_irqsave(&table->cache_lock, flags);
0357 __remove_rbio_from_cache(rbio);
0358 spin_unlock_irqrestore(&table->cache_lock, flags);
0359 }
0360
0361
0362
0363
0364 static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info)
0365 {
0366 struct btrfs_stripe_hash_table *table;
0367 unsigned long flags;
0368 struct btrfs_raid_bio *rbio;
0369
0370 table = info->stripe_hash_table;
0371
0372 spin_lock_irqsave(&table->cache_lock, flags);
0373 while (!list_empty(&table->stripe_cache)) {
0374 rbio = list_entry(table->stripe_cache.next,
0375 struct btrfs_raid_bio,
0376 stripe_cache);
0377 __remove_rbio_from_cache(rbio);
0378 }
0379 spin_unlock_irqrestore(&table->cache_lock, flags);
0380 }
0381
0382
0383
0384
0385
0386 void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info)
0387 {
0388 if (!info->stripe_hash_table)
0389 return;
0390 btrfs_clear_rbio_cache(info);
0391 kvfree(info->stripe_hash_table);
0392 info->stripe_hash_table = NULL;
0393 }
0394
0395
0396
0397
0398
0399
0400
0401
0402
0403
0404
0405
0406 static void cache_rbio(struct btrfs_raid_bio *rbio)
0407 {
0408 struct btrfs_stripe_hash_table *table;
0409 unsigned long flags;
0410
0411 if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags))
0412 return;
0413
0414 table = rbio->bioc->fs_info->stripe_hash_table;
0415
0416 spin_lock_irqsave(&table->cache_lock, flags);
0417 spin_lock(&rbio->bio_list_lock);
0418
0419
0420 if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags))
0421 refcount_inc(&rbio->refs);
0422
0423 if (!list_empty(&rbio->stripe_cache)){
0424 list_move(&rbio->stripe_cache, &table->stripe_cache);
0425 } else {
0426 list_add(&rbio->stripe_cache, &table->stripe_cache);
0427 table->cache_size += 1;
0428 }
0429
0430 spin_unlock(&rbio->bio_list_lock);
0431
0432 if (table->cache_size > RBIO_CACHE_SIZE) {
0433 struct btrfs_raid_bio *found;
0434
0435 found = list_entry(table->stripe_cache.prev,
0436 struct btrfs_raid_bio,
0437 stripe_cache);
0438
0439 if (found != rbio)
0440 __remove_rbio_from_cache(found);
0441 }
0442
0443 spin_unlock_irqrestore(&table->cache_lock, flags);
0444 }
0445
0446
0447
0448
0449
0450
0451 static void run_xor(void **pages, int src_cnt, ssize_t len)
0452 {
0453 int src_off = 0;
0454 int xor_src_cnt = 0;
0455 void *dest = pages[src_cnt];
0456
0457 while(src_cnt > 0) {
0458 xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
0459 xor_blocks(xor_src_cnt, len, dest, pages + src_off);
0460
0461 src_cnt -= xor_src_cnt;
0462 src_off += xor_src_cnt;
0463 }
0464 }
0465
0466
0467
0468
0469
0470 static int rbio_is_full(struct btrfs_raid_bio *rbio)
0471 {
0472 unsigned long flags;
0473 unsigned long size = rbio->bio_list_bytes;
0474 int ret = 1;
0475
0476 spin_lock_irqsave(&rbio->bio_list_lock, flags);
0477 if (size != rbio->nr_data * BTRFS_STRIPE_LEN)
0478 ret = 0;
0479 BUG_ON(size > rbio->nr_data * BTRFS_STRIPE_LEN);
0480 spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
0481
0482 return ret;
0483 }
0484
0485
0486
0487
0488
0489
0490
0491
0492
0493
0494
0495 static int rbio_can_merge(struct btrfs_raid_bio *last,
0496 struct btrfs_raid_bio *cur)
0497 {
0498 if (test_bit(RBIO_RMW_LOCKED_BIT, &last->flags) ||
0499 test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags))
0500 return 0;
0501
0502
0503
0504
0505
0506
0507
0508
0509 if (test_bit(RBIO_CACHE_BIT, &last->flags) ||
0510 test_bit(RBIO_CACHE_BIT, &cur->flags))
0511 return 0;
0512
0513 if (last->bioc->raid_map[0] != cur->bioc->raid_map[0])
0514 return 0;
0515
0516
0517 if (last->operation != cur->operation)
0518 return 0;
0519
0520
0521
0522
0523
0524
0525
0526
0527 if (last->operation == BTRFS_RBIO_PARITY_SCRUB)
0528 return 0;
0529
0530 if (last->operation == BTRFS_RBIO_REBUILD_MISSING)
0531 return 0;
0532
0533 if (last->operation == BTRFS_RBIO_READ_REBUILD) {
0534 int fa = last->faila;
0535 int fb = last->failb;
0536 int cur_fa = cur->faila;
0537 int cur_fb = cur->failb;
0538
0539 if (last->faila >= last->failb) {
0540 fa = last->failb;
0541 fb = last->faila;
0542 }
0543
0544 if (cur->faila >= cur->failb) {
0545 cur_fa = cur->failb;
0546 cur_fb = cur->faila;
0547 }
0548
0549 if (fa != cur_fa || fb != cur_fb)
0550 return 0;
0551 }
0552 return 1;
0553 }
0554
0555 static unsigned int rbio_stripe_sector_index(const struct btrfs_raid_bio *rbio,
0556 unsigned int stripe_nr,
0557 unsigned int sector_nr)
0558 {
0559 ASSERT(stripe_nr < rbio->real_stripes);
0560 ASSERT(sector_nr < rbio->stripe_nsectors);
0561
0562 return stripe_nr * rbio->stripe_nsectors + sector_nr;
0563 }
0564
0565
0566 static struct sector_ptr *rbio_stripe_sector(const struct btrfs_raid_bio *rbio,
0567 unsigned int stripe_nr,
0568 unsigned int sector_nr)
0569 {
0570 return &rbio->stripe_sectors[rbio_stripe_sector_index(rbio, stripe_nr,
0571 sector_nr)];
0572 }
0573
0574
0575 static struct sector_ptr *rbio_pstripe_sector(const struct btrfs_raid_bio *rbio,
0576 unsigned int sector_nr)
0577 {
0578 return rbio_stripe_sector(rbio, rbio->nr_data, sector_nr);
0579 }
0580
0581
0582 static struct sector_ptr *rbio_qstripe_sector(const struct btrfs_raid_bio *rbio,
0583 unsigned int sector_nr)
0584 {
0585 if (rbio->nr_data + 1 == rbio->real_stripes)
0586 return NULL;
0587 return rbio_stripe_sector(rbio, rbio->nr_data + 1, sector_nr);
0588 }
0589
0590
0591
0592
0593
0594
0595
0596
0597
0598
0599
0600
0601
0602
0603
0604
0605
0606
0607
0608
0609
0610
0611
0612 static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
0613 {
0614 struct btrfs_stripe_hash *h;
0615 struct btrfs_raid_bio *cur;
0616 struct btrfs_raid_bio *pending;
0617 unsigned long flags;
0618 struct btrfs_raid_bio *freeit = NULL;
0619 struct btrfs_raid_bio *cache_drop = NULL;
0620 int ret = 0;
0621
0622 h = rbio->bioc->fs_info->stripe_hash_table->table + rbio_bucket(rbio);
0623
0624 spin_lock_irqsave(&h->lock, flags);
0625 list_for_each_entry(cur, &h->hash_list, hash_list) {
0626 if (cur->bioc->raid_map[0] != rbio->bioc->raid_map[0])
0627 continue;
0628
0629 spin_lock(&cur->bio_list_lock);
0630
0631
0632 if (bio_list_empty(&cur->bio_list) &&
0633 list_empty(&cur->plug_list) &&
0634 test_bit(RBIO_CACHE_BIT, &cur->flags) &&
0635 !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) {
0636 list_del_init(&cur->hash_list);
0637 refcount_dec(&cur->refs);
0638
0639 steal_rbio(cur, rbio);
0640 cache_drop = cur;
0641 spin_unlock(&cur->bio_list_lock);
0642
0643 goto lockit;
0644 }
0645
0646
0647 if (rbio_can_merge(cur, rbio)) {
0648 merge_rbio(cur, rbio);
0649 spin_unlock(&cur->bio_list_lock);
0650 freeit = rbio;
0651 ret = 1;
0652 goto out;
0653 }
0654
0655
0656
0657
0658
0659
0660
0661 list_for_each_entry(pending, &cur->plug_list, plug_list) {
0662 if (rbio_can_merge(pending, rbio)) {
0663 merge_rbio(pending, rbio);
0664 spin_unlock(&cur->bio_list_lock);
0665 freeit = rbio;
0666 ret = 1;
0667 goto out;
0668 }
0669 }
0670
0671
0672
0673
0674
0675 list_add_tail(&rbio->plug_list, &cur->plug_list);
0676 spin_unlock(&cur->bio_list_lock);
0677 ret = 1;
0678 goto out;
0679 }
0680 lockit:
0681 refcount_inc(&rbio->refs);
0682 list_add(&rbio->hash_list, &h->hash_list);
0683 out:
0684 spin_unlock_irqrestore(&h->lock, flags);
0685 if (cache_drop)
0686 remove_rbio_from_cache(cache_drop);
0687 if (freeit)
0688 __free_raid_bio(freeit);
0689 return ret;
0690 }
0691
0692
0693
0694
0695
0696 static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
0697 {
0698 int bucket;
0699 struct btrfs_stripe_hash *h;
0700 unsigned long flags;
0701 int keep_cache = 0;
0702
0703 bucket = rbio_bucket(rbio);
0704 h = rbio->bioc->fs_info->stripe_hash_table->table + bucket;
0705
0706 if (list_empty(&rbio->plug_list))
0707 cache_rbio(rbio);
0708
0709 spin_lock_irqsave(&h->lock, flags);
0710 spin_lock(&rbio->bio_list_lock);
0711
0712 if (!list_empty(&rbio->hash_list)) {
0713
0714
0715
0716
0717
0718 if (list_empty(&rbio->plug_list) &&
0719 test_bit(RBIO_CACHE_BIT, &rbio->flags)) {
0720 keep_cache = 1;
0721 clear_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
0722 BUG_ON(!bio_list_empty(&rbio->bio_list));
0723 goto done;
0724 }
0725
0726 list_del_init(&rbio->hash_list);
0727 refcount_dec(&rbio->refs);
0728
0729
0730
0731
0732
0733
0734 if (!list_empty(&rbio->plug_list)) {
0735 struct btrfs_raid_bio *next;
0736 struct list_head *head = rbio->plug_list.next;
0737
0738 next = list_entry(head, struct btrfs_raid_bio,
0739 plug_list);
0740
0741 list_del_init(&rbio->plug_list);
0742
0743 list_add(&next->hash_list, &h->hash_list);
0744 refcount_inc(&next->refs);
0745 spin_unlock(&rbio->bio_list_lock);
0746 spin_unlock_irqrestore(&h->lock, flags);
0747
0748 if (next->operation == BTRFS_RBIO_READ_REBUILD)
0749 start_async_work(next, read_rebuild_work);
0750 else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) {
0751 steal_rbio(rbio, next);
0752 start_async_work(next, read_rebuild_work);
0753 } else if (next->operation == BTRFS_RBIO_WRITE) {
0754 steal_rbio(rbio, next);
0755 start_async_work(next, rmw_work);
0756 } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
0757 steal_rbio(rbio, next);
0758 start_async_work(next, scrub_parity_work);
0759 }
0760
0761 goto done_nolock;
0762 }
0763 }
0764 done:
0765 spin_unlock(&rbio->bio_list_lock);
0766 spin_unlock_irqrestore(&h->lock, flags);
0767
0768 done_nolock:
0769 if (!keep_cache)
0770 remove_rbio_from_cache(rbio);
0771 }
0772
0773 static void __free_raid_bio(struct btrfs_raid_bio *rbio)
0774 {
0775 int i;
0776
0777 if (!refcount_dec_and_test(&rbio->refs))
0778 return;
0779
0780 WARN_ON(!list_empty(&rbio->stripe_cache));
0781 WARN_ON(!list_empty(&rbio->hash_list));
0782 WARN_ON(!bio_list_empty(&rbio->bio_list));
0783
0784 for (i = 0; i < rbio->nr_pages; i++) {
0785 if (rbio->stripe_pages[i]) {
0786 __free_page(rbio->stripe_pages[i]);
0787 rbio->stripe_pages[i] = NULL;
0788 }
0789 }
0790
0791 btrfs_put_bioc(rbio->bioc);
0792 kfree(rbio);
0793 }
0794
0795 static void rbio_endio_bio_list(struct bio *cur, blk_status_t err)
0796 {
0797 struct bio *next;
0798
0799 while (cur) {
0800 next = cur->bi_next;
0801 cur->bi_next = NULL;
0802 cur->bi_status = err;
0803 bio_endio(cur);
0804 cur = next;
0805 }
0806 }
0807
0808
0809
0810
0811
0812 static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
0813 {
0814 struct bio *cur = bio_list_get(&rbio->bio_list);
0815 struct bio *extra;
0816
0817 if (rbio->generic_bio_cnt)
0818 btrfs_bio_counter_sub(rbio->bioc->fs_info, rbio->generic_bio_cnt);
0819
0820
0821
0822
0823
0824 bitmap_clear(&rbio->dbitmap, 0, rbio->stripe_nsectors);
0825
0826
0827
0828
0829
0830
0831
0832
0833
0834 unlock_stripe(rbio);
0835 extra = bio_list_get(&rbio->bio_list);
0836 __free_raid_bio(rbio);
0837
0838 rbio_endio_bio_list(cur, err);
0839 if (extra)
0840 rbio_endio_bio_list(extra, err);
0841 }
0842
0843
0844
0845
0846
0847 static void raid_write_end_io(struct bio *bio)
0848 {
0849 struct btrfs_raid_bio *rbio = bio->bi_private;
0850 blk_status_t err = bio->bi_status;
0851 int max_errors;
0852
0853 if (err)
0854 fail_bio_stripe(rbio, bio);
0855
0856 bio_put(bio);
0857
0858 if (!atomic_dec_and_test(&rbio->stripes_pending))
0859 return;
0860
0861 err = BLK_STS_OK;
0862
0863
0864 max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
0865 0 : rbio->bioc->max_errors;
0866 if (atomic_read(&rbio->error) > max_errors)
0867 err = BLK_STS_IOERR;
0868
0869 rbio_orig_end_io(rbio, err);
0870 }
0871
0872
0873
0874
0875
0876
0877
0878
0879
0880
0881
0882
0883
0884 static struct sector_ptr *sector_in_rbio(struct btrfs_raid_bio *rbio,
0885 int stripe_nr, int sector_nr,
0886 bool bio_list_only)
0887 {
0888 struct sector_ptr *sector;
0889 int index;
0890
0891 ASSERT(stripe_nr >= 0 && stripe_nr < rbio->real_stripes);
0892 ASSERT(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors);
0893
0894 index = stripe_nr * rbio->stripe_nsectors + sector_nr;
0895 ASSERT(index >= 0 && index < rbio->nr_sectors);
0896
0897 spin_lock_irq(&rbio->bio_list_lock);
0898 sector = &rbio->bio_sectors[index];
0899 if (sector->page || bio_list_only) {
0900
0901 if (!sector->page)
0902 sector = NULL;
0903 spin_unlock_irq(&rbio->bio_list_lock);
0904 return sector;
0905 }
0906 spin_unlock_irq(&rbio->bio_list_lock);
0907
0908 return &rbio->stripe_sectors[index];
0909 }
0910
0911
0912
0913
0914
0915 static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
0916 struct btrfs_io_context *bioc)
0917 {
0918 const unsigned int real_stripes = bioc->num_stripes - bioc->num_tgtdevs;
0919 const unsigned int stripe_npages = BTRFS_STRIPE_LEN >> PAGE_SHIFT;
0920 const unsigned int num_pages = stripe_npages * real_stripes;
0921 const unsigned int stripe_nsectors =
0922 BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits;
0923 const unsigned int num_sectors = stripe_nsectors * real_stripes;
0924 struct btrfs_raid_bio *rbio;
0925 void *p;
0926
0927
0928 ASSERT(IS_ALIGNED(PAGE_SIZE, fs_info->sectorsize));
0929
0930
0931
0932
0933 ASSERT(stripe_nsectors <= BITS_PER_LONG);
0934
0935 rbio = kzalloc(sizeof(*rbio) +
0936 sizeof(*rbio->stripe_pages) * num_pages +
0937 sizeof(*rbio->bio_sectors) * num_sectors +
0938 sizeof(*rbio->stripe_sectors) * num_sectors +
0939 sizeof(*rbio->finish_pointers) * real_stripes,
0940 GFP_NOFS);
0941 if (!rbio)
0942 return ERR_PTR(-ENOMEM);
0943
0944 bio_list_init(&rbio->bio_list);
0945 INIT_LIST_HEAD(&rbio->plug_list);
0946 spin_lock_init(&rbio->bio_list_lock);
0947 INIT_LIST_HEAD(&rbio->stripe_cache);
0948 INIT_LIST_HEAD(&rbio->hash_list);
0949 rbio->bioc = bioc;
0950 rbio->nr_pages = num_pages;
0951 rbio->nr_sectors = num_sectors;
0952 rbio->real_stripes = real_stripes;
0953 rbio->stripe_npages = stripe_npages;
0954 rbio->stripe_nsectors = stripe_nsectors;
0955 rbio->faila = -1;
0956 rbio->failb = -1;
0957 refcount_set(&rbio->refs, 1);
0958 atomic_set(&rbio->error, 0);
0959 atomic_set(&rbio->stripes_pending, 0);
0960
0961
0962
0963
0964
0965 p = rbio + 1;
0966 #define CONSUME_ALLOC(ptr, count) do { \
0967 ptr = p; \
0968 p = (unsigned char *)p + sizeof(*(ptr)) * (count); \
0969 } while (0)
0970 CONSUME_ALLOC(rbio->stripe_pages, num_pages);
0971 CONSUME_ALLOC(rbio->bio_sectors, num_sectors);
0972 CONSUME_ALLOC(rbio->stripe_sectors, num_sectors);
0973 CONSUME_ALLOC(rbio->finish_pointers, real_stripes);
0974 #undef CONSUME_ALLOC
0975
0976 ASSERT(btrfs_nr_parity_stripes(bioc->map_type));
0977 rbio->nr_data = real_stripes - btrfs_nr_parity_stripes(bioc->map_type);
0978
0979 return rbio;
0980 }
0981
0982
0983 static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
0984 {
0985 int ret;
0986
0987 ret = btrfs_alloc_page_array(rbio->nr_pages, rbio->stripe_pages);
0988 if (ret < 0)
0989 return ret;
0990
0991 index_stripe_sectors(rbio);
0992 return 0;
0993 }
0994
0995
0996 static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
0997 {
0998 const int data_pages = rbio->nr_data * rbio->stripe_npages;
0999 int ret;
1000
1001 ret = btrfs_alloc_page_array(rbio->nr_pages - data_pages,
1002 rbio->stripe_pages + data_pages);
1003 if (ret < 0)
1004 return ret;
1005
1006 index_stripe_sectors(rbio);
1007 return 0;
1008 }
1009
1010
1011
1012
1013
1014
1015
1016 static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
1017 struct bio_list *bio_list,
1018 struct sector_ptr *sector,
1019 unsigned int stripe_nr,
1020 unsigned int sector_nr,
1021 enum req_op op)
1022 {
1023 const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
1024 struct bio *last = bio_list->tail;
1025 int ret;
1026 struct bio *bio;
1027 struct btrfs_io_stripe *stripe;
1028 u64 disk_start;
1029
1030
1031
1032
1033
1034
1035 ASSERT(stripe_nr >= 0 && stripe_nr < rbio->bioc->num_stripes);
1036 ASSERT(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors);
1037 ASSERT(sector->page);
1038
1039 stripe = &rbio->bioc->stripes[stripe_nr];
1040 disk_start = stripe->physical + sector_nr * sectorsize;
1041
1042
1043 if (!stripe->dev->bdev)
1044 return fail_rbio_index(rbio, stripe_nr);
1045
1046
1047 if (last) {
1048 u64 last_end = last->bi_iter.bi_sector << 9;
1049 last_end += last->bi_iter.bi_size;
1050
1051
1052
1053
1054
1055 if (last_end == disk_start && !last->bi_status &&
1056 last->bi_bdev == stripe->dev->bdev) {
1057 ret = bio_add_page(last, sector->page, sectorsize,
1058 sector->pgoff);
1059 if (ret == sectorsize)
1060 return 0;
1061 }
1062 }
1063
1064
1065 bio = bio_alloc(stripe->dev->bdev,
1066 max(BTRFS_STRIPE_LEN >> PAGE_SHIFT, 1),
1067 op, GFP_NOFS);
1068 bio->bi_iter.bi_sector = disk_start >> 9;
1069 bio->bi_private = rbio;
1070
1071 bio_add_page(bio, sector->page, sectorsize, sector->pgoff);
1072 bio_list_add(bio_list, bio);
1073 return 0;
1074 }
1075
1076
1077
1078
1079
1080
1081
1082
1083 static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
1084 {
1085 if (rbio->faila >= 0 || rbio->failb >= 0) {
1086 BUG_ON(rbio->faila == rbio->real_stripes - 1);
1087 __raid56_parity_recover(rbio);
1088 } else {
1089 finish_rmw(rbio);
1090 }
1091 }
1092
1093 static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
1094 {
1095 const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
1096 struct bio_vec bvec;
1097 struct bvec_iter iter;
1098 u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
1099 rbio->bioc->raid_map[0];
1100
1101 bio_for_each_segment(bvec, bio, iter) {
1102 u32 bvec_offset;
1103
1104 for (bvec_offset = 0; bvec_offset < bvec.bv_len;
1105 bvec_offset += sectorsize, offset += sectorsize) {
1106 int index = offset / sectorsize;
1107 struct sector_ptr *sector = &rbio->bio_sectors[index];
1108
1109 sector->page = bvec.bv_page;
1110 sector->pgoff = bvec.bv_offset + bvec_offset;
1111 ASSERT(sector->pgoff < PAGE_SIZE);
1112 }
1113 }
1114 }
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124 static void index_rbio_pages(struct btrfs_raid_bio *rbio)
1125 {
1126 struct bio *bio;
1127
1128 spin_lock_irq(&rbio->bio_list_lock);
1129 bio_list_for_each(bio, &rbio->bio_list)
1130 index_one_bio(rbio, bio);
1131
1132 spin_unlock_irq(&rbio->bio_list_lock);
1133 }
1134
1135 static void bio_get_trace_info(struct btrfs_raid_bio *rbio, struct bio *bio,
1136 struct raid56_bio_trace_info *trace_info)
1137 {
1138 const struct btrfs_io_context *bioc = rbio->bioc;
1139 int i;
1140
1141 ASSERT(bioc);
1142
1143
1144 if (!bio->bi_bdev)
1145 goto not_found;
1146
1147 for (i = 0; i < bioc->num_stripes; i++) {
1148 if (bio->bi_bdev != bioc->stripes[i].dev->bdev)
1149 continue;
1150 trace_info->stripe_nr = i;
1151 trace_info->devid = bioc->stripes[i].dev->devid;
1152 trace_info->offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
1153 bioc->stripes[i].physical;
1154 return;
1155 }
1156
1157 not_found:
1158 trace_info->devid = -1;
1159 trace_info->offset = -1;
1160 trace_info->stripe_nr = -1;
1161 }
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171 static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
1172 {
1173 struct btrfs_io_context *bioc = rbio->bioc;
1174 const u32 sectorsize = bioc->fs_info->sectorsize;
1175 void **pointers = rbio->finish_pointers;
1176 int nr_data = rbio->nr_data;
1177
1178 int total_sector_nr;
1179 int stripe;
1180
1181 int sectornr;
1182 bool has_qstripe;
1183 struct bio_list bio_list;
1184 struct bio *bio;
1185 int ret;
1186
1187 bio_list_init(&bio_list);
1188
1189 if (rbio->real_stripes - rbio->nr_data == 1)
1190 has_qstripe = false;
1191 else if (rbio->real_stripes - rbio->nr_data == 2)
1192 has_qstripe = true;
1193 else
1194 BUG();
1195
1196
1197 ASSERT(bitmap_weight(&rbio->dbitmap, rbio->stripe_nsectors));
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207 spin_lock_irq(&rbio->bio_list_lock);
1208 set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
1209 spin_unlock_irq(&rbio->bio_list_lock);
1210
1211 atomic_set(&rbio->error, 0);
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222 index_rbio_pages(rbio);
1223 if (!rbio_is_full(rbio))
1224 cache_rbio_pages(rbio);
1225 else
1226 clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
1227
1228 for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) {
1229 struct sector_ptr *sector;
1230
1231
1232 for (stripe = 0; stripe < nr_data; stripe++) {
1233 sector = sector_in_rbio(rbio, stripe, sectornr, 0);
1234 pointers[stripe] = kmap_local_page(sector->page) +
1235 sector->pgoff;
1236 }
1237
1238
1239 sector = rbio_pstripe_sector(rbio, sectornr);
1240 sector->uptodate = 1;
1241 pointers[stripe++] = kmap_local_page(sector->page) + sector->pgoff;
1242
1243 if (has_qstripe) {
1244
1245
1246
1247
1248 sector = rbio_qstripe_sector(rbio, sectornr);
1249 sector->uptodate = 1;
1250 pointers[stripe++] = kmap_local_page(sector->page) +
1251 sector->pgoff;
1252
1253 raid6_call.gen_syndrome(rbio->real_stripes, sectorsize,
1254 pointers);
1255 } else {
1256
1257 memcpy(pointers[nr_data], pointers[0], sectorsize);
1258 run_xor(pointers + 1, nr_data - 1, sectorsize);
1259 }
1260 for (stripe = stripe - 1; stripe >= 0; stripe--)
1261 kunmap_local(pointers[stripe]);
1262 }
1263
1264
1265
1266
1267
1268 for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
1269 total_sector_nr++) {
1270 struct sector_ptr *sector;
1271
1272 stripe = total_sector_nr / rbio->stripe_nsectors;
1273 sectornr = total_sector_nr % rbio->stripe_nsectors;
1274
1275
1276 if (!test_bit(sectornr, &rbio->dbitmap))
1277 continue;
1278
1279 if (stripe < rbio->nr_data) {
1280 sector = sector_in_rbio(rbio, stripe, sectornr, 1);
1281 if (!sector)
1282 continue;
1283 } else {
1284 sector = rbio_stripe_sector(rbio, stripe, sectornr);
1285 }
1286
1287 ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe,
1288 sectornr, REQ_OP_WRITE);
1289 if (ret)
1290 goto cleanup;
1291 }
1292
1293 if (likely(!bioc->num_tgtdevs))
1294 goto write_data;
1295
1296 for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
1297 total_sector_nr++) {
1298 struct sector_ptr *sector;
1299
1300 stripe = total_sector_nr / rbio->stripe_nsectors;
1301 sectornr = total_sector_nr % rbio->stripe_nsectors;
1302
1303 if (!bioc->tgtdev_map[stripe]) {
1304
1305
1306
1307
1308 ASSERT(sectornr == 0);
1309 total_sector_nr += rbio->stripe_nsectors - 1;
1310 continue;
1311 }
1312
1313
1314 if (!test_bit(sectornr, &rbio->dbitmap))
1315 continue;
1316
1317 if (stripe < rbio->nr_data) {
1318 sector = sector_in_rbio(rbio, stripe, sectornr, 1);
1319 if (!sector)
1320 continue;
1321 } else {
1322 sector = rbio_stripe_sector(rbio, stripe, sectornr);
1323 }
1324
1325 ret = rbio_add_io_sector(rbio, &bio_list, sector,
1326 rbio->bioc->tgtdev_map[stripe],
1327 sectornr, REQ_OP_WRITE);
1328 if (ret)
1329 goto cleanup;
1330 }
1331
1332 write_data:
1333 atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list));
1334 BUG_ON(atomic_read(&rbio->stripes_pending) == 0);
1335
1336 while ((bio = bio_list_pop(&bio_list))) {
1337 bio->bi_end_io = raid_write_end_io;
1338
1339 if (trace_raid56_write_stripe_enabled()) {
1340 struct raid56_bio_trace_info trace_info = { 0 };
1341
1342 bio_get_trace_info(rbio, bio, &trace_info);
1343 trace_raid56_write_stripe(rbio, bio, &trace_info);
1344 }
1345 submit_bio(bio);
1346 }
1347 return;
1348
1349 cleanup:
1350 rbio_orig_end_io(rbio, BLK_STS_IOERR);
1351
1352 while ((bio = bio_list_pop(&bio_list)))
1353 bio_put(bio);
1354 }
1355
1356
1357
1358
1359
1360
1361 static int find_bio_stripe(struct btrfs_raid_bio *rbio,
1362 struct bio *bio)
1363 {
1364 u64 physical = bio->bi_iter.bi_sector;
1365 int i;
1366 struct btrfs_io_stripe *stripe;
1367
1368 physical <<= 9;
1369
1370 for (i = 0; i < rbio->bioc->num_stripes; i++) {
1371 stripe = &rbio->bioc->stripes[i];
1372 if (in_range(physical, stripe->physical, BTRFS_STRIPE_LEN) &&
1373 stripe->dev->bdev && bio->bi_bdev == stripe->dev->bdev) {
1374 return i;
1375 }
1376 }
1377 return -1;
1378 }
1379
1380
1381
1382
1383
1384
1385 static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
1386 struct bio *bio)
1387 {
1388 u64 logical = bio->bi_iter.bi_sector << 9;
1389 int i;
1390
1391 for (i = 0; i < rbio->nr_data; i++) {
1392 u64 stripe_start = rbio->bioc->raid_map[i];
1393
1394 if (in_range(logical, stripe_start, BTRFS_STRIPE_LEN))
1395 return i;
1396 }
1397 return -1;
1398 }
1399
1400
1401
1402
1403 static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed)
1404 {
1405 unsigned long flags;
1406 int ret = 0;
1407
1408 spin_lock_irqsave(&rbio->bio_list_lock, flags);
1409
1410
1411 if (rbio->faila == failed || rbio->failb == failed)
1412 goto out;
1413
1414 if (rbio->faila == -1) {
1415
1416 rbio->faila = failed;
1417 atomic_inc(&rbio->error);
1418 } else if (rbio->failb == -1) {
1419
1420 rbio->failb = failed;
1421 atomic_inc(&rbio->error);
1422 } else {
1423 ret = -EIO;
1424 }
1425 out:
1426 spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
1427
1428 return ret;
1429 }
1430
1431
1432
1433
1434
1435 static int fail_bio_stripe(struct btrfs_raid_bio *rbio,
1436 struct bio *bio)
1437 {
1438 int failed = find_bio_stripe(rbio, bio);
1439
1440 if (failed < 0)
1441 return -EIO;
1442
1443 return fail_rbio_index(rbio, failed);
1444 }
1445
1446
1447
1448
1449
1450 static struct sector_ptr *find_stripe_sector(struct btrfs_raid_bio *rbio,
1451 struct page *page,
1452 unsigned int pgoff)
1453 {
1454 int i;
1455
1456 for (i = 0; i < rbio->nr_sectors; i++) {
1457 struct sector_ptr *sector = &rbio->stripe_sectors[i];
1458
1459 if (sector->page == page && sector->pgoff == pgoff)
1460 return sector;
1461 }
1462 return NULL;
1463 }
1464
1465
1466
1467
1468
1469 static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio)
1470 {
1471 const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
1472 struct bio_vec *bvec;
1473 struct bvec_iter_all iter_all;
1474
1475 ASSERT(!bio_flagged(bio, BIO_CLONED));
1476
1477 bio_for_each_segment_all(bvec, bio, iter_all) {
1478 struct sector_ptr *sector;
1479 int pgoff;
1480
1481 for (pgoff = bvec->bv_offset; pgoff - bvec->bv_offset < bvec->bv_len;
1482 pgoff += sectorsize) {
1483 sector = find_stripe_sector(rbio, bvec->bv_page, pgoff);
1484 ASSERT(sector);
1485 if (sector)
1486 sector->uptodate = 1;
1487 }
1488 }
1489 }
1490
1491 static void raid56_bio_end_io(struct bio *bio)
1492 {
1493 struct btrfs_raid_bio *rbio = bio->bi_private;
1494
1495 if (bio->bi_status)
1496 fail_bio_stripe(rbio, bio);
1497 else
1498 set_bio_pages_uptodate(rbio, bio);
1499
1500 bio_put(bio);
1501
1502 if (atomic_dec_and_test(&rbio->stripes_pending))
1503 queue_work(rbio->bioc->fs_info->endio_raid56_workers,
1504 &rbio->end_io_work);
1505 }
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515 static void raid56_rmw_end_io_work(struct work_struct *work)
1516 {
1517 struct btrfs_raid_bio *rbio =
1518 container_of(work, struct btrfs_raid_bio, end_io_work);
1519
1520 if (atomic_read(&rbio->error) > rbio->bioc->max_errors) {
1521 rbio_orig_end_io(rbio, BLK_STS_IOERR);
1522 return;
1523 }
1524
1525
1526
1527
1528
1529 validate_rbio_for_rmw(rbio);
1530 }
1531
1532
1533
1534
1535
1536 static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
1537 {
1538 int bios_to_read = 0;
1539 struct bio_list bio_list;
1540 const int nr_data_sectors = rbio->stripe_nsectors * rbio->nr_data;
1541 int ret;
1542 int total_sector_nr;
1543 struct bio *bio;
1544
1545 bio_list_init(&bio_list);
1546
1547 ret = alloc_rbio_pages(rbio);
1548 if (ret)
1549 goto cleanup;
1550
1551 index_rbio_pages(rbio);
1552
1553 atomic_set(&rbio->error, 0);
1554
1555 for (total_sector_nr = 0; total_sector_nr < nr_data_sectors;
1556 total_sector_nr++) {
1557 struct sector_ptr *sector;
1558 int stripe = total_sector_nr / rbio->stripe_nsectors;
1559 int sectornr = total_sector_nr % rbio->stripe_nsectors;
1560
1561
1562
1563
1564
1565
1566 sector = sector_in_rbio(rbio, stripe, sectornr, 1);
1567 if (sector)
1568 continue;
1569
1570 sector = rbio_stripe_sector(rbio, stripe, sectornr);
1571
1572
1573
1574
1575 if (sector->uptodate)
1576 continue;
1577
1578 ret = rbio_add_io_sector(rbio, &bio_list, sector,
1579 stripe, sectornr, REQ_OP_READ);
1580 if (ret)
1581 goto cleanup;
1582 }
1583
1584 bios_to_read = bio_list_size(&bio_list);
1585 if (!bios_to_read) {
1586
1587
1588
1589
1590
1591
1592 goto finish;
1593 }
1594
1595
1596
1597
1598
1599 atomic_set(&rbio->stripes_pending, bios_to_read);
1600 INIT_WORK(&rbio->end_io_work, raid56_rmw_end_io_work);
1601 while ((bio = bio_list_pop(&bio_list))) {
1602 bio->bi_end_io = raid56_bio_end_io;
1603
1604 if (trace_raid56_read_partial_enabled()) {
1605 struct raid56_bio_trace_info trace_info = { 0 };
1606
1607 bio_get_trace_info(rbio, bio, &trace_info);
1608 trace_raid56_read_partial(rbio, bio, &trace_info);
1609 }
1610 submit_bio(bio);
1611 }
1612
1613 return 0;
1614
1615 cleanup:
1616 rbio_orig_end_io(rbio, BLK_STS_IOERR);
1617
1618 while ((bio = bio_list_pop(&bio_list)))
1619 bio_put(bio);
1620
1621 return -EIO;
1622
1623 finish:
1624 validate_rbio_for_rmw(rbio);
1625 return 0;
1626 }
1627
1628
1629
1630
1631
1632 static int full_stripe_write(struct btrfs_raid_bio *rbio)
1633 {
1634 int ret;
1635
1636 ret = alloc_rbio_parity_pages(rbio);
1637 if (ret) {
1638 __free_raid_bio(rbio);
1639 return ret;
1640 }
1641
1642 ret = lock_stripe_add(rbio);
1643 if (ret == 0)
1644 finish_rmw(rbio);
1645 return 0;
1646 }
1647
1648
1649
1650
1651
1652
1653 static int partial_stripe_write(struct btrfs_raid_bio *rbio)
1654 {
1655 int ret;
1656
1657 ret = lock_stripe_add(rbio);
1658 if (ret == 0)
1659 start_async_work(rbio, rmw_work);
1660 return 0;
1661 }
1662
1663
1664
1665
1666
1667
1668
1669 static int __raid56_parity_write(struct btrfs_raid_bio *rbio)
1670 {
1671
1672 if (!rbio_is_full(rbio))
1673 return partial_stripe_write(rbio);
1674 return full_stripe_write(rbio);
1675 }
1676
1677
1678
1679
1680
1681
1682
1683
1684 struct btrfs_plug_cb {
1685 struct blk_plug_cb cb;
1686 struct btrfs_fs_info *info;
1687 struct list_head rbio_list;
1688 struct work_struct work;
1689 };
1690
1691
1692
1693
1694 static int plug_cmp(void *priv, const struct list_head *a,
1695 const struct list_head *b)
1696 {
1697 const struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio,
1698 plug_list);
1699 const struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio,
1700 plug_list);
1701 u64 a_sector = ra->bio_list.head->bi_iter.bi_sector;
1702 u64 b_sector = rb->bio_list.head->bi_iter.bi_sector;
1703
1704 if (a_sector < b_sector)
1705 return -1;
1706 if (a_sector > b_sector)
1707 return 1;
1708 return 0;
1709 }
1710
1711 static void run_plug(struct btrfs_plug_cb *plug)
1712 {
1713 struct btrfs_raid_bio *cur;
1714 struct btrfs_raid_bio *last = NULL;
1715
1716
1717
1718
1719
1720
1721 list_sort(NULL, &plug->rbio_list, plug_cmp);
1722 while (!list_empty(&plug->rbio_list)) {
1723 cur = list_entry(plug->rbio_list.next,
1724 struct btrfs_raid_bio, plug_list);
1725 list_del_init(&cur->plug_list);
1726
1727 if (rbio_is_full(cur)) {
1728 int ret;
1729
1730
1731 ret = full_stripe_write(cur);
1732 BUG_ON(ret);
1733 continue;
1734 }
1735 if (last) {
1736 if (rbio_can_merge(last, cur)) {
1737 merge_rbio(last, cur);
1738 __free_raid_bio(cur);
1739 continue;
1740
1741 }
1742 __raid56_parity_write(last);
1743 }
1744 last = cur;
1745 }
1746 if (last) {
1747 __raid56_parity_write(last);
1748 }
1749 kfree(plug);
1750 }
1751
1752
1753
1754
1755
1756 static void unplug_work(struct work_struct *work)
1757 {
1758 struct btrfs_plug_cb *plug;
1759 plug = container_of(work, struct btrfs_plug_cb, work);
1760 run_plug(plug);
1761 }
1762
1763 static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
1764 {
1765 struct btrfs_plug_cb *plug;
1766 plug = container_of(cb, struct btrfs_plug_cb, cb);
1767
1768 if (from_schedule) {
1769 INIT_WORK(&plug->work, unplug_work);
1770 queue_work(plug->info->rmw_workers, &plug->work);
1771 return;
1772 }
1773 run_plug(plug);
1774 }
1775
1776
1777 static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
1778 {
1779 const struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
1780 const u64 orig_logical = orig_bio->bi_iter.bi_sector << SECTOR_SHIFT;
1781 const u64 full_stripe_start = rbio->bioc->raid_map[0];
1782 const u32 orig_len = orig_bio->bi_iter.bi_size;
1783 const u32 sectorsize = fs_info->sectorsize;
1784 u64 cur_logical;
1785
1786 ASSERT(orig_logical >= full_stripe_start &&
1787 orig_logical + orig_len <= full_stripe_start +
1788 rbio->nr_data * BTRFS_STRIPE_LEN);
1789
1790 bio_list_add(&rbio->bio_list, orig_bio);
1791 rbio->bio_list_bytes += orig_bio->bi_iter.bi_size;
1792
1793
1794 for (cur_logical = orig_logical; cur_logical < orig_logical + orig_len;
1795 cur_logical += sectorsize) {
1796 int bit = ((u32)(cur_logical - full_stripe_start) >>
1797 fs_info->sectorsize_bits) % rbio->stripe_nsectors;
1798
1799 set_bit(bit, &rbio->dbitmap);
1800 }
1801 }
1802
1803
1804
1805
1806 void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
1807 {
1808 struct btrfs_fs_info *fs_info = bioc->fs_info;
1809 struct btrfs_raid_bio *rbio;
1810 struct btrfs_plug_cb *plug = NULL;
1811 struct blk_plug_cb *cb;
1812 int ret = 0;
1813
1814 rbio = alloc_rbio(fs_info, bioc);
1815 if (IS_ERR(rbio)) {
1816 btrfs_put_bioc(bioc);
1817 ret = PTR_ERR(rbio);
1818 goto out_dec_counter;
1819 }
1820 rbio->operation = BTRFS_RBIO_WRITE;
1821 rbio_add_bio(rbio, bio);
1822
1823 rbio->generic_bio_cnt = 1;
1824
1825
1826
1827
1828
1829 if (rbio_is_full(rbio)) {
1830 ret = full_stripe_write(rbio);
1831 if (ret)
1832 goto out_dec_counter;
1833 return;
1834 }
1835
1836 cb = blk_check_plugged(btrfs_raid_unplug, fs_info, sizeof(*plug));
1837 if (cb) {
1838 plug = container_of(cb, struct btrfs_plug_cb, cb);
1839 if (!plug->info) {
1840 plug->info = fs_info;
1841 INIT_LIST_HEAD(&plug->rbio_list);
1842 }
1843 list_add_tail(&rbio->plug_list, &plug->rbio_list);
1844 } else {
1845 ret = __raid56_parity_write(rbio);
1846 if (ret)
1847 goto out_dec_counter;
1848 }
1849
1850 return;
1851
1852 out_dec_counter:
1853 btrfs_bio_counter_dec(fs_info);
1854 bio->bi_status = errno_to_blk_status(ret);
1855 bio_endio(bio);
1856 }
1857
1858
1859
1860
1861
1862
1863 static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
1864 {
1865 const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
1866 int sectornr, stripe;
1867 void **pointers;
1868 void **unmap_array;
1869 int faila = -1, failb = -1;
1870 blk_status_t err;
1871 int i;
1872
1873
1874
1875
1876
1877 pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
1878 if (!pointers) {
1879 err = BLK_STS_RESOURCE;
1880 goto cleanup_io;
1881 }
1882
1883
1884
1885
1886
1887 unmap_array = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
1888 if (!unmap_array) {
1889 err = BLK_STS_RESOURCE;
1890 goto cleanup_pointers;
1891 }
1892
1893 faila = rbio->faila;
1894 failb = rbio->failb;
1895
1896 if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
1897 rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
1898 spin_lock_irq(&rbio->bio_list_lock);
1899 set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
1900 spin_unlock_irq(&rbio->bio_list_lock);
1901 }
1902
1903 index_rbio_pages(rbio);
1904
1905 for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) {
1906 struct sector_ptr *sector;
1907
1908
1909
1910
1911
1912 if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB &&
1913 !test_bit(sectornr, &rbio->dbitmap))
1914 continue;
1915
1916
1917
1918
1919
1920
1921
1922 for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
1923
1924
1925
1926
1927 if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
1928 rbio->operation == BTRFS_RBIO_REBUILD_MISSING) &&
1929 (stripe == faila || stripe == failb)) {
1930 sector = sector_in_rbio(rbio, stripe, sectornr, 0);
1931 } else {
1932 sector = rbio_stripe_sector(rbio, stripe, sectornr);
1933 }
1934 ASSERT(sector->page);
1935 pointers[stripe] = kmap_local_page(sector->page) +
1936 sector->pgoff;
1937 unmap_array[stripe] = pointers[stripe];
1938 }
1939
1940
1941 if (rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6) {
1942
1943 if (failb < 0) {
1944 if (faila == rbio->nr_data) {
1945
1946
1947
1948
1949
1950 err = BLK_STS_IOERR;
1951 goto cleanup;
1952 }
1953
1954
1955
1956
1957 goto pstripe;
1958 }
1959
1960
1961 if (faila > failb)
1962 swap(faila, failb);
1963
1964
1965
1966
1967
1968
1969
1970 if (rbio->bioc->raid_map[failb] == RAID6_Q_STRIPE) {
1971 if (rbio->bioc->raid_map[faila] ==
1972 RAID5_P_STRIPE) {
1973 err = BLK_STS_IOERR;
1974 goto cleanup;
1975 }
1976
1977
1978
1979
1980 goto pstripe;
1981 }
1982
1983 if (rbio->bioc->raid_map[failb] == RAID5_P_STRIPE) {
1984 raid6_datap_recov(rbio->real_stripes,
1985 sectorsize, faila, pointers);
1986 } else {
1987 raid6_2data_recov(rbio->real_stripes,
1988 sectorsize, faila, failb,
1989 pointers);
1990 }
1991 } else {
1992 void *p;
1993
1994
1995 BUG_ON(failb != -1);
1996 pstripe:
1997
1998 memcpy(pointers[faila], pointers[rbio->nr_data], sectorsize);
1999
2000
2001 p = pointers[faila];
2002 for (stripe = faila; stripe < rbio->nr_data - 1; stripe++)
2003 pointers[stripe] = pointers[stripe + 1];
2004 pointers[rbio->nr_data - 1] = p;
2005
2006
2007 run_xor(pointers, rbio->nr_data - 1, sectorsize);
2008 }
2009
2010
2011
2012
2013
2014
2015 if (rbio->operation == BTRFS_RBIO_WRITE) {
2016 for (i = 0; i < rbio->stripe_nsectors; i++) {
2017 if (faila != -1) {
2018 sector = rbio_stripe_sector(rbio, faila, i);
2019 sector->uptodate = 1;
2020 }
2021 if (failb != -1) {
2022 sector = rbio_stripe_sector(rbio, failb, i);
2023 sector->uptodate = 1;
2024 }
2025 }
2026 }
2027 for (stripe = rbio->real_stripes - 1; stripe >= 0; stripe--)
2028 kunmap_local(unmap_array[stripe]);
2029 }
2030
2031 err = BLK_STS_OK;
2032 cleanup:
2033 kfree(unmap_array);
2034 cleanup_pointers:
2035 kfree(pointers);
2036
2037 cleanup_io:
2038
2039
2040
2041
2042
2043 if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
2044 rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060 if (err == BLK_STS_OK && rbio->failb < 0)
2061 cache_rbio_pages(rbio);
2062 else
2063 clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
2064
2065 rbio_orig_end_io(rbio, err);
2066 } else if (err == BLK_STS_OK) {
2067 rbio->faila = -1;
2068 rbio->failb = -1;
2069
2070 if (rbio->operation == BTRFS_RBIO_WRITE)
2071 finish_rmw(rbio);
2072 else if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB)
2073 finish_parity_scrub(rbio, 0);
2074 else
2075 BUG();
2076 } else {
2077 rbio_orig_end_io(rbio, err);
2078 }
2079 }
2080
2081
2082
2083
2084
2085 static void raid_recover_end_io_work(struct work_struct *work)
2086 {
2087 struct btrfs_raid_bio *rbio =
2088 container_of(work, struct btrfs_raid_bio, end_io_work);
2089
2090 if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
2091 rbio_orig_end_io(rbio, BLK_STS_IOERR);
2092 else
2093 __raid_recover_end_io(rbio);
2094 }
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104 static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
2105 {
2106 int bios_to_read = 0;
2107 struct bio_list bio_list;
2108 int ret;
2109 int total_sector_nr;
2110 struct bio *bio;
2111
2112 bio_list_init(&bio_list);
2113
2114 ret = alloc_rbio_pages(rbio);
2115 if (ret)
2116 goto cleanup;
2117
2118 atomic_set(&rbio->error, 0);
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128 for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
2129 total_sector_nr++) {
2130 int stripe = total_sector_nr / rbio->stripe_nsectors;
2131 int sectornr = total_sector_nr % rbio->stripe_nsectors;
2132 struct sector_ptr *sector;
2133
2134 if (rbio->faila == stripe || rbio->failb == stripe) {
2135 atomic_inc(&rbio->error);
2136
2137 ASSERT(sectornr == 0);
2138 total_sector_nr += rbio->stripe_nsectors - 1;
2139 continue;
2140 }
2141 sector = rbio_stripe_sector(rbio, stripe, sectornr);
2142 ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe,
2143 sectornr, REQ_OP_READ);
2144 if (ret < 0)
2145 goto cleanup;
2146 }
2147
2148 bios_to_read = bio_list_size(&bio_list);
2149 if (!bios_to_read) {
2150
2151
2152
2153
2154
2155 if (atomic_read(&rbio->error) <= rbio->bioc->max_errors) {
2156 __raid_recover_end_io(rbio);
2157 return 0;
2158 } else {
2159 goto cleanup;
2160 }
2161 }
2162
2163
2164
2165
2166
2167 atomic_set(&rbio->stripes_pending, bios_to_read);
2168 INIT_WORK(&rbio->end_io_work, raid_recover_end_io_work);
2169 while ((bio = bio_list_pop(&bio_list))) {
2170 bio->bi_end_io = raid56_bio_end_io;
2171
2172 if (trace_raid56_scrub_read_recover_enabled()) {
2173 struct raid56_bio_trace_info trace_info = { 0 };
2174
2175 bio_get_trace_info(rbio, bio, &trace_info);
2176 trace_raid56_scrub_read_recover(rbio, bio, &trace_info);
2177 }
2178 submit_bio(bio);
2179 }
2180
2181 return 0;
2182
2183 cleanup:
2184 if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
2185 rbio->operation == BTRFS_RBIO_REBUILD_MISSING)
2186 rbio_orig_end_io(rbio, BLK_STS_IOERR);
2187
2188 while ((bio = bio_list_pop(&bio_list)))
2189 bio_put(bio);
2190
2191 return -EIO;
2192 }
2193
2194
2195
2196
2197
2198
2199
2200 void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
2201 int mirror_num, bool generic_io)
2202 {
2203 struct btrfs_fs_info *fs_info = bioc->fs_info;
2204 struct btrfs_raid_bio *rbio;
2205
2206 if (generic_io) {
2207 ASSERT(bioc->mirror_num == mirror_num);
2208 btrfs_bio(bio)->mirror_num = mirror_num;
2209 } else {
2210 btrfs_get_bioc(bioc);
2211 }
2212
2213 rbio = alloc_rbio(fs_info, bioc);
2214 if (IS_ERR(rbio)) {
2215 bio->bi_status = errno_to_blk_status(PTR_ERR(rbio));
2216 goto out_end_bio;
2217 }
2218
2219 rbio->operation = BTRFS_RBIO_READ_REBUILD;
2220 rbio_add_bio(rbio, bio);
2221
2222 rbio->faila = find_logical_bio_stripe(rbio, bio);
2223 if (rbio->faila == -1) {
2224 btrfs_warn(fs_info,
2225 "%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bioc has map_type %llu)",
2226 __func__, bio->bi_iter.bi_sector << 9,
2227 (u64)bio->bi_iter.bi_size, bioc->map_type);
2228 kfree(rbio);
2229 bio->bi_status = BLK_STS_IOERR;
2230 goto out_end_bio;
2231 }
2232
2233 if (generic_io)
2234 rbio->generic_bio_cnt = 1;
2235
2236
2237
2238
2239
2240
2241 if (mirror_num > 2) {
2242
2243
2244
2245
2246
2247 rbio->failb = rbio->real_stripes - (mirror_num - 1);
2248 ASSERT(rbio->failb > 0);
2249 if (rbio->failb <= rbio->faila)
2250 rbio->failb--;
2251 }
2252
2253 if (lock_stripe_add(rbio))
2254 return;
2255
2256
2257
2258
2259
2260 __raid56_parity_recover(rbio);
2261 return;
2262
2263 out_end_bio:
2264 btrfs_bio_counter_dec(fs_info);
2265 btrfs_put_bioc(bioc);
2266 bio_endio(bio);
2267 }
2268
2269 static void rmw_work(struct work_struct *work)
2270 {
2271 struct btrfs_raid_bio *rbio;
2272
2273 rbio = container_of(work, struct btrfs_raid_bio, work);
2274 raid56_rmw_stripe(rbio);
2275 }
2276
2277 static void read_rebuild_work(struct work_struct *work)
2278 {
2279 struct btrfs_raid_bio *rbio;
2280
2281 rbio = container_of(work, struct btrfs_raid_bio, work);
2282 __raid56_parity_recover(rbio);
2283 }
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295 struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
2296 struct btrfs_io_context *bioc,
2297 struct btrfs_device *scrub_dev,
2298 unsigned long *dbitmap, int stripe_nsectors)
2299 {
2300 struct btrfs_fs_info *fs_info = bioc->fs_info;
2301 struct btrfs_raid_bio *rbio;
2302 int i;
2303
2304 rbio = alloc_rbio(fs_info, bioc);
2305 if (IS_ERR(rbio))
2306 return NULL;
2307 bio_list_add(&rbio->bio_list, bio);
2308
2309
2310
2311
2312 ASSERT(!bio->bi_iter.bi_size);
2313 rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
2314
2315
2316
2317
2318
2319
2320 for (i = rbio->nr_data; i < rbio->real_stripes; i++) {
2321 if (bioc->stripes[i].dev == scrub_dev) {
2322 rbio->scrubp = i;
2323 break;
2324 }
2325 }
2326 ASSERT(i < rbio->real_stripes);
2327
2328 bitmap_copy(&rbio->dbitmap, dbitmap, stripe_nsectors);
2329
2330
2331
2332
2333
2334 rbio->generic_bio_cnt = 1;
2335
2336 return rbio;
2337 }
2338
2339
2340 void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
2341 unsigned int pgoff, u64 logical)
2342 {
2343 const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
2344 int stripe_offset;
2345 int index;
2346
2347 ASSERT(logical >= rbio->bioc->raid_map[0]);
2348 ASSERT(logical + sectorsize <= rbio->bioc->raid_map[0] +
2349 BTRFS_STRIPE_LEN * rbio->nr_data);
2350 stripe_offset = (int)(logical - rbio->bioc->raid_map[0]);
2351 index = stripe_offset / sectorsize;
2352 rbio->bio_sectors[index].page = page;
2353 rbio->bio_sectors[index].pgoff = pgoff;
2354 }
2355
2356
2357
2358
2359
2360 static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
2361 {
2362 const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
2363 int total_sector_nr;
2364
2365 for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
2366 total_sector_nr++) {
2367 struct page *page;
2368 int sectornr = total_sector_nr % rbio->stripe_nsectors;
2369 int index = (total_sector_nr * sectorsize) >> PAGE_SHIFT;
2370
2371 if (!test_bit(sectornr, &rbio->dbitmap))
2372 continue;
2373 if (rbio->stripe_pages[index])
2374 continue;
2375 page = alloc_page(GFP_NOFS);
2376 if (!page)
2377 return -ENOMEM;
2378 rbio->stripe_pages[index] = page;
2379 }
2380 index_stripe_sectors(rbio);
2381 return 0;
2382 }
2383
2384 static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
2385 int need_check)
2386 {
2387 struct btrfs_io_context *bioc = rbio->bioc;
2388 const u32 sectorsize = bioc->fs_info->sectorsize;
2389 void **pointers = rbio->finish_pointers;
2390 unsigned long *pbitmap = &rbio->finish_pbitmap;
2391 int nr_data = rbio->nr_data;
2392 int stripe;
2393 int sectornr;
2394 bool has_qstripe;
2395 struct sector_ptr p_sector = { 0 };
2396 struct sector_ptr q_sector = { 0 };
2397 struct bio_list bio_list;
2398 struct bio *bio;
2399 int is_replace = 0;
2400 int ret;
2401
2402 bio_list_init(&bio_list);
2403
2404 if (rbio->real_stripes - rbio->nr_data == 1)
2405 has_qstripe = false;
2406 else if (rbio->real_stripes - rbio->nr_data == 2)
2407 has_qstripe = true;
2408 else
2409 BUG();
2410
2411 if (bioc->num_tgtdevs && bioc->tgtdev_map[rbio->scrubp]) {
2412 is_replace = 1;
2413 bitmap_copy(pbitmap, &rbio->dbitmap, rbio->stripe_nsectors);
2414 }
2415
2416
2417
2418
2419
2420
2421 clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
2422
2423 if (!need_check)
2424 goto writeback;
2425
2426 p_sector.page = alloc_page(GFP_NOFS);
2427 if (!p_sector.page)
2428 goto cleanup;
2429 p_sector.pgoff = 0;
2430 p_sector.uptodate = 1;
2431
2432 if (has_qstripe) {
2433
2434 q_sector.page = alloc_page(GFP_NOFS);
2435 if (!q_sector.page) {
2436 __free_page(p_sector.page);
2437 p_sector.page = NULL;
2438 goto cleanup;
2439 }
2440 q_sector.pgoff = 0;
2441 q_sector.uptodate = 1;
2442 pointers[rbio->real_stripes - 1] = kmap_local_page(q_sector.page);
2443 }
2444
2445 atomic_set(&rbio->error, 0);
2446
2447
2448 pointers[nr_data] = kmap_local_page(p_sector.page);
2449
2450 for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) {
2451 struct sector_ptr *sector;
2452 void *parity;
2453
2454
2455 for (stripe = 0; stripe < nr_data; stripe++) {
2456 sector = sector_in_rbio(rbio, stripe, sectornr, 0);
2457 pointers[stripe] = kmap_local_page(sector->page) +
2458 sector->pgoff;
2459 }
2460
2461 if (has_qstripe) {
2462
2463 raid6_call.gen_syndrome(rbio->real_stripes, sectorsize,
2464 pointers);
2465 } else {
2466
2467 memcpy(pointers[nr_data], pointers[0], sectorsize);
2468 run_xor(pointers + 1, nr_data - 1, sectorsize);
2469 }
2470
2471
2472 sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr);
2473 parity = kmap_local_page(sector->page) + sector->pgoff;
2474 if (memcmp(parity, pointers[rbio->scrubp], sectorsize) != 0)
2475 memcpy(parity, pointers[rbio->scrubp], sectorsize);
2476 else
2477
2478 bitmap_clear(&rbio->dbitmap, sectornr, 1);
2479 kunmap_local(parity);
2480
2481 for (stripe = nr_data - 1; stripe >= 0; stripe--)
2482 kunmap_local(pointers[stripe]);
2483 }
2484
2485 kunmap_local(pointers[nr_data]);
2486 __free_page(p_sector.page);
2487 p_sector.page = NULL;
2488 if (q_sector.page) {
2489 kunmap_local(pointers[rbio->real_stripes - 1]);
2490 __free_page(q_sector.page);
2491 q_sector.page = NULL;
2492 }
2493
2494 writeback:
2495
2496
2497
2498
2499
2500 for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) {
2501 struct sector_ptr *sector;
2502
2503 sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr);
2504 ret = rbio_add_io_sector(rbio, &bio_list, sector, rbio->scrubp,
2505 sectornr, REQ_OP_WRITE);
2506 if (ret)
2507 goto cleanup;
2508 }
2509
2510 if (!is_replace)
2511 goto submit_write;
2512
2513 for_each_set_bit(sectornr, pbitmap, rbio->stripe_nsectors) {
2514 struct sector_ptr *sector;
2515
2516 sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr);
2517 ret = rbio_add_io_sector(rbio, &bio_list, sector,
2518 bioc->tgtdev_map[rbio->scrubp],
2519 sectornr, REQ_OP_WRITE);
2520 if (ret)
2521 goto cleanup;
2522 }
2523
2524 submit_write:
2525 nr_data = bio_list_size(&bio_list);
2526 if (!nr_data) {
2527
2528 rbio_orig_end_io(rbio, BLK_STS_OK);
2529 return;
2530 }
2531
2532 atomic_set(&rbio->stripes_pending, nr_data);
2533
2534 while ((bio = bio_list_pop(&bio_list))) {
2535 bio->bi_end_io = raid_write_end_io;
2536
2537 if (trace_raid56_scrub_write_stripe_enabled()) {
2538 struct raid56_bio_trace_info trace_info = { 0 };
2539
2540 bio_get_trace_info(rbio, bio, &trace_info);
2541 trace_raid56_scrub_write_stripe(rbio, bio, &trace_info);
2542 }
2543 submit_bio(bio);
2544 }
2545 return;
2546
2547 cleanup:
2548 rbio_orig_end_io(rbio, BLK_STS_IOERR);
2549
2550 while ((bio = bio_list_pop(&bio_list)))
2551 bio_put(bio);
2552 }
2553
2554 static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
2555 {
2556 if (stripe >= 0 && stripe < rbio->nr_data)
2557 return 1;
2558 return 0;
2559 }
2560
2561
2562
2563
2564
2565
2566
2567
2568 static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
2569 {
2570 if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
2571 goto cleanup;
2572
2573 if (rbio->faila >= 0 || rbio->failb >= 0) {
2574 int dfail = 0, failp = -1;
2575
2576 if (is_data_stripe(rbio, rbio->faila))
2577 dfail++;
2578 else if (is_parity_stripe(rbio->faila))
2579 failp = rbio->faila;
2580
2581 if (is_data_stripe(rbio, rbio->failb))
2582 dfail++;
2583 else if (is_parity_stripe(rbio->failb))
2584 failp = rbio->failb;
2585
2586
2587
2588
2589
2590
2591 if (dfail > rbio->bioc->max_errors - 1)
2592 goto cleanup;
2593
2594
2595
2596
2597
2598 if (dfail == 0) {
2599 finish_parity_scrub(rbio, 0);
2600 return;
2601 }
2602
2603
2604
2605
2606
2607
2608
2609 if (failp != rbio->scrubp)
2610 goto cleanup;
2611
2612 __raid_recover_end_io(rbio);
2613 } else {
2614 finish_parity_scrub(rbio, 1);
2615 }
2616 return;
2617
2618 cleanup:
2619 rbio_orig_end_io(rbio, BLK_STS_IOERR);
2620 }
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630 static void raid56_parity_scrub_end_io_work(struct work_struct *work)
2631 {
2632 struct btrfs_raid_bio *rbio =
2633 container_of(work, struct btrfs_raid_bio, end_io_work);
2634
2635
2636
2637
2638
2639 validate_rbio_for_parity_scrub(rbio);
2640 }
2641
2642 static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
2643 {
2644 int bios_to_read = 0;
2645 struct bio_list bio_list;
2646 int ret;
2647 int total_sector_nr;
2648 struct bio *bio;
2649
2650 bio_list_init(&bio_list);
2651
2652 ret = alloc_rbio_essential_pages(rbio);
2653 if (ret)
2654 goto cleanup;
2655
2656 atomic_set(&rbio->error, 0);
2657
2658 for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
2659 total_sector_nr++) {
2660 int sectornr = total_sector_nr % rbio->stripe_nsectors;
2661 int stripe = total_sector_nr / rbio->stripe_nsectors;
2662 struct sector_ptr *sector;
2663
2664
2665 if (!test_bit(sectornr, &rbio->dbitmap))
2666 continue;
2667
2668
2669
2670
2671
2672
2673 sector = sector_in_rbio(rbio, stripe, sectornr, 1);
2674 if (sector)
2675 continue;
2676
2677 sector = rbio_stripe_sector(rbio, stripe, sectornr);
2678
2679
2680
2681
2682 if (sector->uptodate)
2683 continue;
2684
2685 ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe,
2686 sectornr, REQ_OP_READ);
2687 if (ret)
2688 goto cleanup;
2689 }
2690
2691 bios_to_read = bio_list_size(&bio_list);
2692 if (!bios_to_read) {
2693
2694
2695
2696
2697
2698
2699 goto finish;
2700 }
2701
2702
2703
2704
2705
2706 atomic_set(&rbio->stripes_pending, bios_to_read);
2707 INIT_WORK(&rbio->end_io_work, raid56_parity_scrub_end_io_work);
2708 while ((bio = bio_list_pop(&bio_list))) {
2709 bio->bi_end_io = raid56_bio_end_io;
2710
2711 if (trace_raid56_scrub_read_enabled()) {
2712 struct raid56_bio_trace_info trace_info = { 0 };
2713
2714 bio_get_trace_info(rbio, bio, &trace_info);
2715 trace_raid56_scrub_read(rbio, bio, &trace_info);
2716 }
2717 submit_bio(bio);
2718 }
2719
2720 return;
2721
2722 cleanup:
2723 rbio_orig_end_io(rbio, BLK_STS_IOERR);
2724
2725 while ((bio = bio_list_pop(&bio_list)))
2726 bio_put(bio);
2727
2728 return;
2729
2730 finish:
2731 validate_rbio_for_parity_scrub(rbio);
2732 }
2733
2734 static void scrub_parity_work(struct work_struct *work)
2735 {
2736 struct btrfs_raid_bio *rbio;
2737
2738 rbio = container_of(work, struct btrfs_raid_bio, work);
2739 raid56_parity_scrub_stripe(rbio);
2740 }
2741
2742 void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
2743 {
2744 if (!lock_stripe_add(rbio))
2745 start_async_work(rbio, scrub_parity_work);
2746 }
2747
2748
2749
2750 struct btrfs_raid_bio *
2751 raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc)
2752 {
2753 struct btrfs_fs_info *fs_info = bioc->fs_info;
2754 struct btrfs_raid_bio *rbio;
2755
2756 rbio = alloc_rbio(fs_info, bioc);
2757 if (IS_ERR(rbio))
2758 return NULL;
2759
2760 rbio->operation = BTRFS_RBIO_REBUILD_MISSING;
2761 bio_list_add(&rbio->bio_list, bio);
2762
2763
2764
2765
2766 ASSERT(!bio->bi_iter.bi_size);
2767
2768 rbio->faila = find_logical_bio_stripe(rbio, bio);
2769 if (rbio->faila == -1) {
2770 BUG();
2771 kfree(rbio);
2772 return NULL;
2773 }
2774
2775
2776
2777
2778
2779 rbio->generic_bio_cnt = 1;
2780
2781 return rbio;
2782 }
2783
2784 void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio)
2785 {
2786 if (!lock_stripe_add(rbio))
2787 start_async_work(rbio, read_rebuild_work);
2788 }