0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #include <linux/slab.h>
0013 #include <linux/delay.h>
0014 #include <linux/blkdev.h>
0015 #include <linux/module.h>
0016 #include <linux/seq_file.h>
0017 #include <linux/ratelimit.h>
0018 #include <linux/kthread.h>
0019 #include <linux/raid/md_p.h>
0020 #include <trace/events/block.h>
0021 #include "md.h"
0022 #include "raid10.h"
0023 #include "raid0.h"
0024 #include "md-bitmap.h"
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067 static void allow_barrier(struct r10conf *conf);
0068 static void lower_barrier(struct r10conf *conf);
0069 static int _enough(struct r10conf *conf, int previous, int ignore);
0070 static int enough(struct r10conf *conf, int ignore);
0071 static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
0072 int *skipped);
0073 static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio);
0074 static void end_reshape_write(struct bio *bio);
0075 static void end_reshape(struct r10conf *conf);
0076
0077 #define raid10_log(md, fmt, args...) \
0078 do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid10 " fmt, ##args); } while (0)
0079
0080 #include "raid1-10.c"
0081
0082
0083
0084
0085
0086 static inline struct r10bio *get_resync_r10bio(struct bio *bio)
0087 {
0088 return get_resync_pages(bio)->raid_bio;
0089 }
0090
0091 static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
0092 {
0093 struct r10conf *conf = data;
0094 int size = offsetof(struct r10bio, devs[conf->geo.raid_disks]);
0095
0096
0097
0098 return kzalloc(size, gfp_flags);
0099 }
0100
0101 #define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9)
0102
0103 #define RESYNC_WINDOW (1024*1024)
0104
0105 #define RESYNC_DEPTH (32*1024*1024/RESYNC_BLOCK_SIZE)
0106 #define CLUSTER_RESYNC_WINDOW (32 * RESYNC_WINDOW)
0107 #define CLUSTER_RESYNC_WINDOW_SECTORS (CLUSTER_RESYNC_WINDOW >> 9)
0108
0109
0110
0111
0112
0113
0114
0115
0116 static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
0117 {
0118 struct r10conf *conf = data;
0119 struct r10bio *r10_bio;
0120 struct bio *bio;
0121 int j;
0122 int nalloc, nalloc_rp;
0123 struct resync_pages *rps;
0124
0125 r10_bio = r10bio_pool_alloc(gfp_flags, conf);
0126 if (!r10_bio)
0127 return NULL;
0128
0129 if (test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery) ||
0130 test_bit(MD_RECOVERY_RESHAPE, &conf->mddev->recovery))
0131 nalloc = conf->copies;
0132 else
0133 nalloc = 2;
0134
0135
0136 if (!conf->have_replacement)
0137 nalloc_rp = nalloc;
0138 else
0139 nalloc_rp = nalloc * 2;
0140 rps = kmalloc_array(nalloc_rp, sizeof(struct resync_pages), gfp_flags);
0141 if (!rps)
0142 goto out_free_r10bio;
0143
0144
0145
0146
0147 for (j = nalloc ; j-- ; ) {
0148 bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
0149 if (!bio)
0150 goto out_free_bio;
0151 bio_init(bio, NULL, bio->bi_inline_vecs, RESYNC_PAGES, 0);
0152 r10_bio->devs[j].bio = bio;
0153 if (!conf->have_replacement)
0154 continue;
0155 bio = bio_kmalloc(RESYNC_PAGES, gfp_flags);
0156 if (!bio)
0157 goto out_free_bio;
0158 bio_init(bio, NULL, bio->bi_inline_vecs, RESYNC_PAGES, 0);
0159 r10_bio->devs[j].repl_bio = bio;
0160 }
0161
0162
0163
0164
0165 for (j = 0; j < nalloc; j++) {
0166 struct bio *rbio = r10_bio->devs[j].repl_bio;
0167 struct resync_pages *rp, *rp_repl;
0168
0169 rp = &rps[j];
0170 if (rbio)
0171 rp_repl = &rps[nalloc + j];
0172
0173 bio = r10_bio->devs[j].bio;
0174
0175 if (!j || test_bit(MD_RECOVERY_SYNC,
0176 &conf->mddev->recovery)) {
0177 if (resync_alloc_pages(rp, gfp_flags))
0178 goto out_free_pages;
0179 } else {
0180 memcpy(rp, &rps[0], sizeof(*rp));
0181 resync_get_all_pages(rp);
0182 }
0183
0184 rp->raid_bio = r10_bio;
0185 bio->bi_private = rp;
0186 if (rbio) {
0187 memcpy(rp_repl, rp, sizeof(*rp));
0188 rbio->bi_private = rp_repl;
0189 }
0190 }
0191
0192 return r10_bio;
0193
0194 out_free_pages:
0195 while (--j >= 0)
0196 resync_free_pages(&rps[j]);
0197
0198 j = 0;
0199 out_free_bio:
0200 for ( ; j < nalloc; j++) {
0201 if (r10_bio->devs[j].bio)
0202 bio_uninit(r10_bio->devs[j].bio);
0203 kfree(r10_bio->devs[j].bio);
0204 if (r10_bio->devs[j].repl_bio)
0205 bio_uninit(r10_bio->devs[j].repl_bio);
0206 kfree(r10_bio->devs[j].repl_bio);
0207 }
0208 kfree(rps);
0209 out_free_r10bio:
0210 rbio_pool_free(r10_bio, conf);
0211 return NULL;
0212 }
0213
0214 static void r10buf_pool_free(void *__r10_bio, void *data)
0215 {
0216 struct r10conf *conf = data;
0217 struct r10bio *r10bio = __r10_bio;
0218 int j;
0219 struct resync_pages *rp = NULL;
0220
0221 for (j = conf->copies; j--; ) {
0222 struct bio *bio = r10bio->devs[j].bio;
0223
0224 if (bio) {
0225 rp = get_resync_pages(bio);
0226 resync_free_pages(rp);
0227 bio_uninit(bio);
0228 kfree(bio);
0229 }
0230
0231 bio = r10bio->devs[j].repl_bio;
0232 if (bio) {
0233 bio_uninit(bio);
0234 kfree(bio);
0235 }
0236 }
0237
0238
0239 kfree(rp);
0240
0241 rbio_pool_free(r10bio, conf);
0242 }
0243
0244 static void put_all_bios(struct r10conf *conf, struct r10bio *r10_bio)
0245 {
0246 int i;
0247
0248 for (i = 0; i < conf->geo.raid_disks; i++) {
0249 struct bio **bio = & r10_bio->devs[i].bio;
0250 if (!BIO_SPECIAL(*bio))
0251 bio_put(*bio);
0252 *bio = NULL;
0253 bio = &r10_bio->devs[i].repl_bio;
0254 if (r10_bio->read_slot < 0 && !BIO_SPECIAL(*bio))
0255 bio_put(*bio);
0256 *bio = NULL;
0257 }
0258 }
0259
0260 static void free_r10bio(struct r10bio *r10_bio)
0261 {
0262 struct r10conf *conf = r10_bio->mddev->private;
0263
0264 put_all_bios(conf, r10_bio);
0265 mempool_free(r10_bio, &conf->r10bio_pool);
0266 }
0267
0268 static void put_buf(struct r10bio *r10_bio)
0269 {
0270 struct r10conf *conf = r10_bio->mddev->private;
0271
0272 mempool_free(r10_bio, &conf->r10buf_pool);
0273
0274 lower_barrier(conf);
0275 }
0276
0277 static void reschedule_retry(struct r10bio *r10_bio)
0278 {
0279 unsigned long flags;
0280 struct mddev *mddev = r10_bio->mddev;
0281 struct r10conf *conf = mddev->private;
0282
0283 spin_lock_irqsave(&conf->device_lock, flags);
0284 list_add(&r10_bio->retry_list, &conf->retry_list);
0285 conf->nr_queued ++;
0286 spin_unlock_irqrestore(&conf->device_lock, flags);
0287
0288
0289 wake_up(&conf->wait_barrier);
0290
0291 md_wakeup_thread(mddev->thread);
0292 }
0293
0294
0295
0296
0297
0298
0299 static void raid_end_bio_io(struct r10bio *r10_bio)
0300 {
0301 struct bio *bio = r10_bio->master_bio;
0302 struct r10conf *conf = r10_bio->mddev->private;
0303
0304 if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
0305 bio->bi_status = BLK_STS_IOERR;
0306
0307 if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
0308 bio_end_io_acct(bio, r10_bio->start_time);
0309 bio_endio(bio);
0310
0311
0312
0313
0314 allow_barrier(conf);
0315
0316 free_r10bio(r10_bio);
0317 }
0318
0319
0320
0321
0322 static inline void update_head_pos(int slot, struct r10bio *r10_bio)
0323 {
0324 struct r10conf *conf = r10_bio->mddev->private;
0325
0326 conf->mirrors[r10_bio->devs[slot].devnum].head_position =
0327 r10_bio->devs[slot].addr + (r10_bio->sectors);
0328 }
0329
0330
0331
0332
0333 static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio,
0334 struct bio *bio, int *slotp, int *replp)
0335 {
0336 int slot;
0337 int repl = 0;
0338
0339 for (slot = 0; slot < conf->geo.raid_disks; slot++) {
0340 if (r10_bio->devs[slot].bio == bio)
0341 break;
0342 if (r10_bio->devs[slot].repl_bio == bio) {
0343 repl = 1;
0344 break;
0345 }
0346 }
0347
0348 update_head_pos(slot, r10_bio);
0349
0350 if (slotp)
0351 *slotp = slot;
0352 if (replp)
0353 *replp = repl;
0354 return r10_bio->devs[slot].devnum;
0355 }
0356
0357 static void raid10_end_read_request(struct bio *bio)
0358 {
0359 int uptodate = !bio->bi_status;
0360 struct r10bio *r10_bio = bio->bi_private;
0361 int slot;
0362 struct md_rdev *rdev;
0363 struct r10conf *conf = r10_bio->mddev->private;
0364
0365 slot = r10_bio->read_slot;
0366 rdev = r10_bio->devs[slot].rdev;
0367
0368
0369
0370 update_head_pos(slot, r10_bio);
0371
0372 if (uptodate) {
0373
0374
0375
0376
0377
0378
0379
0380
0381
0382 set_bit(R10BIO_Uptodate, &r10_bio->state);
0383 } else {
0384
0385
0386
0387
0388
0389 if (!_enough(conf, test_bit(R10BIO_Previous, &r10_bio->state),
0390 rdev->raid_disk))
0391 uptodate = 1;
0392 }
0393 if (uptodate) {
0394 raid_end_bio_io(r10_bio);
0395 rdev_dec_pending(rdev, conf->mddev);
0396 } else {
0397
0398
0399
0400 pr_err_ratelimited("md/raid10:%s: %pg: rescheduling sector %llu\n",
0401 mdname(conf->mddev),
0402 rdev->bdev,
0403 (unsigned long long)r10_bio->sector);
0404 set_bit(R10BIO_ReadError, &r10_bio->state);
0405 reschedule_retry(r10_bio);
0406 }
0407 }
0408
0409 static void close_write(struct r10bio *r10_bio)
0410 {
0411
0412 md_bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
0413 r10_bio->sectors,
0414 !test_bit(R10BIO_Degraded, &r10_bio->state),
0415 0);
0416 md_write_end(r10_bio->mddev);
0417 }
0418
0419 static void one_write_done(struct r10bio *r10_bio)
0420 {
0421 if (atomic_dec_and_test(&r10_bio->remaining)) {
0422 if (test_bit(R10BIO_WriteError, &r10_bio->state))
0423 reschedule_retry(r10_bio);
0424 else {
0425 close_write(r10_bio);
0426 if (test_bit(R10BIO_MadeGood, &r10_bio->state))
0427 reschedule_retry(r10_bio);
0428 else
0429 raid_end_bio_io(r10_bio);
0430 }
0431 }
0432 }
0433
0434 static void raid10_end_write_request(struct bio *bio)
0435 {
0436 struct r10bio *r10_bio = bio->bi_private;
0437 int dev;
0438 int dec_rdev = 1;
0439 struct r10conf *conf = r10_bio->mddev->private;
0440 int slot, repl;
0441 struct md_rdev *rdev = NULL;
0442 struct bio *to_put = NULL;
0443 bool discard_error;
0444
0445 discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
0446
0447 dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
0448
0449 if (repl)
0450 rdev = conf->mirrors[dev].replacement;
0451 if (!rdev) {
0452 smp_rmb();
0453 repl = 0;
0454 rdev = conf->mirrors[dev].rdev;
0455 }
0456
0457
0458
0459 if (bio->bi_status && !discard_error) {
0460 if (repl)
0461
0462
0463
0464 md_error(rdev->mddev, rdev);
0465 else {
0466 set_bit(WriteErrorSeen, &rdev->flags);
0467 if (!test_and_set_bit(WantReplacement, &rdev->flags))
0468 set_bit(MD_RECOVERY_NEEDED,
0469 &rdev->mddev->recovery);
0470
0471 dec_rdev = 0;
0472 if (test_bit(FailFast, &rdev->flags) &&
0473 (bio->bi_opf & MD_FAILFAST)) {
0474 md_error(rdev->mddev, rdev);
0475 }
0476
0477
0478
0479
0480
0481 if (!test_bit(Faulty, &rdev->flags))
0482 set_bit(R10BIO_WriteError, &r10_bio->state);
0483 else {
0484
0485 set_bit(R10BIO_Degraded, &r10_bio->state);
0486 r10_bio->devs[slot].bio = NULL;
0487 to_put = bio;
0488 dec_rdev = 1;
0489 }
0490 }
0491 } else {
0492
0493
0494
0495
0496
0497
0498
0499
0500
0501 sector_t first_bad;
0502 int bad_sectors;
0503
0504
0505
0506
0507
0508
0509
0510
0511
0512 if (test_bit(In_sync, &rdev->flags) &&
0513 !test_bit(Faulty, &rdev->flags))
0514 set_bit(R10BIO_Uptodate, &r10_bio->state);
0515
0516
0517 if (is_badblock(rdev,
0518 r10_bio->devs[slot].addr,
0519 r10_bio->sectors,
0520 &first_bad, &bad_sectors) && !discard_error) {
0521 bio_put(bio);
0522 if (repl)
0523 r10_bio->devs[slot].repl_bio = IO_MADE_GOOD;
0524 else
0525 r10_bio->devs[slot].bio = IO_MADE_GOOD;
0526 dec_rdev = 0;
0527 set_bit(R10BIO_MadeGood, &r10_bio->state);
0528 }
0529 }
0530
0531
0532
0533
0534
0535
0536 one_write_done(r10_bio);
0537 if (dec_rdev)
0538 rdev_dec_pending(rdev, conf->mddev);
0539 if (to_put)
0540 bio_put(to_put);
0541 }
0542
0543
0544
0545
0546
0547
0548
0549
0550
0551
0552
0553
0554
0555
0556
0557
0558
0559
0560
0561
0562
0563
0564
0565
0566
0567
0568 static void __raid10_find_phys(struct geom *geo, struct r10bio *r10bio)
0569 {
0570 int n,f;
0571 sector_t sector;
0572 sector_t chunk;
0573 sector_t stripe;
0574 int dev;
0575 int slot = 0;
0576 int last_far_set_start, last_far_set_size;
0577
0578 last_far_set_start = (geo->raid_disks / geo->far_set_size) - 1;
0579 last_far_set_start *= geo->far_set_size;
0580
0581 last_far_set_size = geo->far_set_size;
0582 last_far_set_size += (geo->raid_disks % geo->far_set_size);
0583
0584
0585 chunk = r10bio->sector >> geo->chunk_shift;
0586 sector = r10bio->sector & geo->chunk_mask;
0587
0588 chunk *= geo->near_copies;
0589 stripe = chunk;
0590 dev = sector_div(stripe, geo->raid_disks);
0591 if (geo->far_offset)
0592 stripe *= geo->far_copies;
0593
0594 sector += stripe << geo->chunk_shift;
0595
0596
0597 for (n = 0; n < geo->near_copies; n++) {
0598 int d = dev;
0599 int set;
0600 sector_t s = sector;
0601 r10bio->devs[slot].devnum = d;
0602 r10bio->devs[slot].addr = s;
0603 slot++;
0604
0605 for (f = 1; f < geo->far_copies; f++) {
0606 set = d / geo->far_set_size;
0607 d += geo->near_copies;
0608
0609 if ((geo->raid_disks % geo->far_set_size) &&
0610 (d > last_far_set_start)) {
0611 d -= last_far_set_start;
0612 d %= last_far_set_size;
0613 d += last_far_set_start;
0614 } else {
0615 d %= geo->far_set_size;
0616 d += geo->far_set_size * set;
0617 }
0618 s += geo->stride;
0619 r10bio->devs[slot].devnum = d;
0620 r10bio->devs[slot].addr = s;
0621 slot++;
0622 }
0623 dev++;
0624 if (dev >= geo->raid_disks) {
0625 dev = 0;
0626 sector += (geo->chunk_mask + 1);
0627 }
0628 }
0629 }
0630
0631 static void raid10_find_phys(struct r10conf *conf, struct r10bio *r10bio)
0632 {
0633 struct geom *geo = &conf->geo;
0634
0635 if (conf->reshape_progress != MaxSector &&
0636 ((r10bio->sector >= conf->reshape_progress) !=
0637 conf->mddev->reshape_backwards)) {
0638 set_bit(R10BIO_Previous, &r10bio->state);
0639 geo = &conf->prev;
0640 } else
0641 clear_bit(R10BIO_Previous, &r10bio->state);
0642
0643 __raid10_find_phys(geo, r10bio);
0644 }
0645
0646 static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev)
0647 {
0648 sector_t offset, chunk, vchunk;
0649
0650
0651
0652 struct geom *geo = &conf->geo;
0653 int far_set_start = (dev / geo->far_set_size) * geo->far_set_size;
0654 int far_set_size = geo->far_set_size;
0655 int last_far_set_start;
0656
0657 if (geo->raid_disks % geo->far_set_size) {
0658 last_far_set_start = (geo->raid_disks / geo->far_set_size) - 1;
0659 last_far_set_start *= geo->far_set_size;
0660
0661 if (dev >= last_far_set_start) {
0662 far_set_size = geo->far_set_size;
0663 far_set_size += (geo->raid_disks % geo->far_set_size);
0664 far_set_start = last_far_set_start;
0665 }
0666 }
0667
0668 offset = sector & geo->chunk_mask;
0669 if (geo->far_offset) {
0670 int fc;
0671 chunk = sector >> geo->chunk_shift;
0672 fc = sector_div(chunk, geo->far_copies);
0673 dev -= fc * geo->near_copies;
0674 if (dev < far_set_start)
0675 dev += far_set_size;
0676 } else {
0677 while (sector >= geo->stride) {
0678 sector -= geo->stride;
0679 if (dev < (geo->near_copies + far_set_start))
0680 dev += far_set_size - geo->near_copies;
0681 else
0682 dev -= geo->near_copies;
0683 }
0684 chunk = sector >> geo->chunk_shift;
0685 }
0686 vchunk = chunk * geo->raid_disks + dev;
0687 sector_div(vchunk, geo->near_copies);
0688 return (vchunk << geo->chunk_shift) + offset;
0689 }
0690
0691
0692
0693
0694
0695
0696
0697
0698
0699
0700
0701
0702
0703
0704
0705
0706
0707
0708
0709
0710 static struct md_rdev *read_balance(struct r10conf *conf,
0711 struct r10bio *r10_bio,
0712 int *max_sectors)
0713 {
0714 const sector_t this_sector = r10_bio->sector;
0715 int disk, slot;
0716 int sectors = r10_bio->sectors;
0717 int best_good_sectors;
0718 sector_t new_distance, best_dist;
0719 struct md_rdev *best_dist_rdev, *best_pending_rdev, *rdev = NULL;
0720 int do_balance;
0721 int best_dist_slot, best_pending_slot;
0722 bool has_nonrot_disk = false;
0723 unsigned int min_pending;
0724 struct geom *geo = &conf->geo;
0725
0726 raid10_find_phys(conf, r10_bio);
0727 rcu_read_lock();
0728 best_dist_slot = -1;
0729 min_pending = UINT_MAX;
0730 best_dist_rdev = NULL;
0731 best_pending_rdev = NULL;
0732 best_dist = MaxSector;
0733 best_good_sectors = 0;
0734 do_balance = 1;
0735 clear_bit(R10BIO_FailFast, &r10_bio->state);
0736
0737
0738
0739
0740
0741
0742 if ((conf->mddev->recovery_cp < MaxSector
0743 && (this_sector + sectors >= conf->next_resync)) ||
0744 (mddev_is_clustered(conf->mddev) &&
0745 md_cluster_ops->area_resyncing(conf->mddev, READ, this_sector,
0746 this_sector + sectors)))
0747 do_balance = 0;
0748
0749 for (slot = 0; slot < conf->copies ; slot++) {
0750 sector_t first_bad;
0751 int bad_sectors;
0752 sector_t dev_sector;
0753 unsigned int pending;
0754 bool nonrot;
0755
0756 if (r10_bio->devs[slot].bio == IO_BLOCKED)
0757 continue;
0758 disk = r10_bio->devs[slot].devnum;
0759 rdev = rcu_dereference(conf->mirrors[disk].replacement);
0760 if (rdev == NULL || test_bit(Faulty, &rdev->flags) ||
0761 r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
0762 rdev = rcu_dereference(conf->mirrors[disk].rdev);
0763 if (rdev == NULL ||
0764 test_bit(Faulty, &rdev->flags))
0765 continue;
0766 if (!test_bit(In_sync, &rdev->flags) &&
0767 r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
0768 continue;
0769
0770 dev_sector = r10_bio->devs[slot].addr;
0771 if (is_badblock(rdev, dev_sector, sectors,
0772 &first_bad, &bad_sectors)) {
0773 if (best_dist < MaxSector)
0774
0775 continue;
0776 if (first_bad <= dev_sector) {
0777
0778
0779
0780
0781 bad_sectors -= (dev_sector - first_bad);
0782 if (!do_balance && sectors > bad_sectors)
0783 sectors = bad_sectors;
0784 if (best_good_sectors > sectors)
0785 best_good_sectors = sectors;
0786 } else {
0787 sector_t good_sectors =
0788 first_bad - dev_sector;
0789 if (good_sectors > best_good_sectors) {
0790 best_good_sectors = good_sectors;
0791 best_dist_slot = slot;
0792 best_dist_rdev = rdev;
0793 }
0794 if (!do_balance)
0795
0796 break;
0797 }
0798 continue;
0799 } else
0800 best_good_sectors = sectors;
0801
0802 if (!do_balance)
0803 break;
0804
0805 nonrot = bdev_nonrot(rdev->bdev);
0806 has_nonrot_disk |= nonrot;
0807 pending = atomic_read(&rdev->nr_pending);
0808 if (min_pending > pending && nonrot) {
0809 min_pending = pending;
0810 best_pending_slot = slot;
0811 best_pending_rdev = rdev;
0812 }
0813
0814 if (best_dist_slot >= 0)
0815
0816 set_bit(R10BIO_FailFast, &r10_bio->state);
0817
0818
0819
0820
0821 if (geo->near_copies > 1 && !pending)
0822 new_distance = 0;
0823
0824
0825 else if (geo->far_copies > 1)
0826 new_distance = r10_bio->devs[slot].addr;
0827 else
0828 new_distance = abs(r10_bio->devs[slot].addr -
0829 conf->mirrors[disk].head_position);
0830
0831 if (new_distance < best_dist) {
0832 best_dist = new_distance;
0833 best_dist_slot = slot;
0834 best_dist_rdev = rdev;
0835 }
0836 }
0837 if (slot >= conf->copies) {
0838 if (has_nonrot_disk) {
0839 slot = best_pending_slot;
0840 rdev = best_pending_rdev;
0841 } else {
0842 slot = best_dist_slot;
0843 rdev = best_dist_rdev;
0844 }
0845 }
0846
0847 if (slot >= 0) {
0848 atomic_inc(&rdev->nr_pending);
0849 r10_bio->read_slot = slot;
0850 } else
0851 rdev = NULL;
0852 rcu_read_unlock();
0853 *max_sectors = best_good_sectors;
0854
0855 return rdev;
0856 }
0857
0858 static void flush_pending_writes(struct r10conf *conf)
0859 {
0860
0861
0862
0863 spin_lock_irq(&conf->device_lock);
0864
0865 if (conf->pending_bio_list.head) {
0866 struct blk_plug plug;
0867 struct bio *bio;
0868
0869 bio = bio_list_get(&conf->pending_bio_list);
0870 spin_unlock_irq(&conf->device_lock);
0871
0872
0873
0874
0875
0876
0877
0878
0879
0880
0881 __set_current_state(TASK_RUNNING);
0882
0883 blk_start_plug(&plug);
0884
0885
0886 md_bitmap_unplug(conf->mddev->bitmap);
0887 wake_up(&conf->wait_barrier);
0888
0889 while (bio) {
0890 struct bio *next = bio->bi_next;
0891 struct md_rdev *rdev = (void*)bio->bi_bdev;
0892 bio->bi_next = NULL;
0893 bio_set_dev(bio, rdev->bdev);
0894 if (test_bit(Faulty, &rdev->flags)) {
0895 bio_io_error(bio);
0896 } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
0897 !bdev_max_discard_sectors(bio->bi_bdev)))
0898
0899 bio_endio(bio);
0900 else
0901 submit_bio_noacct(bio);
0902 bio = next;
0903 }
0904 blk_finish_plug(&plug);
0905 } else
0906 spin_unlock_irq(&conf->device_lock);
0907 }
0908
0909
0910
0911
0912
0913
0914
0915
0916
0917
0918
0919
0920
0921
0922
0923
0924
0925
0926
0927
0928
0929
0930
0931 static void raise_barrier(struct r10conf *conf, int force)
0932 {
0933 BUG_ON(force && !conf->barrier);
0934 spin_lock_irq(&conf->resync_lock);
0935
0936
0937 wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
0938 conf->resync_lock);
0939
0940
0941 conf->barrier++;
0942
0943
0944 wait_event_lock_irq(conf->wait_barrier,
0945 !atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH,
0946 conf->resync_lock);
0947
0948 spin_unlock_irq(&conf->resync_lock);
0949 }
0950
0951 static void lower_barrier(struct r10conf *conf)
0952 {
0953 unsigned long flags;
0954 spin_lock_irqsave(&conf->resync_lock, flags);
0955 conf->barrier--;
0956 spin_unlock_irqrestore(&conf->resync_lock, flags);
0957 wake_up(&conf->wait_barrier);
0958 }
0959
0960 static bool wait_barrier(struct r10conf *conf, bool nowait)
0961 {
0962 bool ret = true;
0963
0964 spin_lock_irq(&conf->resync_lock);
0965 if (conf->barrier) {
0966 struct bio_list *bio_list = current->bio_list;
0967 conf->nr_waiting++;
0968
0969
0970
0971
0972
0973
0974
0975
0976
0977
0978 if (nowait) {
0979 ret = false;
0980 } else {
0981 raid10_log(conf->mddev, "wait barrier");
0982 wait_event_lock_irq(conf->wait_barrier,
0983 !conf->barrier ||
0984 (atomic_read(&conf->nr_pending) &&
0985 bio_list &&
0986 (!bio_list_empty(&bio_list[0]) ||
0987 !bio_list_empty(&bio_list[1]))) ||
0988
0989
0990
0991 (conf->mddev->thread->tsk == current &&
0992 test_bit(MD_RECOVERY_RUNNING,
0993 &conf->mddev->recovery) &&
0994 conf->nr_queued > 0),
0995 conf->resync_lock);
0996 }
0997 conf->nr_waiting--;
0998 if (!conf->nr_waiting)
0999 wake_up(&conf->wait_barrier);
1000 }
1001
1002 if (ret)
1003 atomic_inc(&conf->nr_pending);
1004 spin_unlock_irq(&conf->resync_lock);
1005 return ret;
1006 }
1007
1008 static void allow_barrier(struct r10conf *conf)
1009 {
1010 if ((atomic_dec_and_test(&conf->nr_pending)) ||
1011 (conf->array_freeze_pending))
1012 wake_up(&conf->wait_barrier);
1013 }
1014
1015 static void freeze_array(struct r10conf *conf, int extra)
1016 {
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029 spin_lock_irq(&conf->resync_lock);
1030 conf->array_freeze_pending++;
1031 conf->barrier++;
1032 conf->nr_waiting++;
1033 wait_event_lock_irq_cmd(conf->wait_barrier,
1034 atomic_read(&conf->nr_pending) == conf->nr_queued+extra,
1035 conf->resync_lock,
1036 flush_pending_writes(conf));
1037
1038 conf->array_freeze_pending--;
1039 spin_unlock_irq(&conf->resync_lock);
1040 }
1041
1042 static void unfreeze_array(struct r10conf *conf)
1043 {
1044
1045 spin_lock_irq(&conf->resync_lock);
1046 conf->barrier--;
1047 conf->nr_waiting--;
1048 wake_up(&conf->wait_barrier);
1049 spin_unlock_irq(&conf->resync_lock);
1050 }
1051
1052 static sector_t choose_data_offset(struct r10bio *r10_bio,
1053 struct md_rdev *rdev)
1054 {
1055 if (!test_bit(MD_RECOVERY_RESHAPE, &rdev->mddev->recovery) ||
1056 test_bit(R10BIO_Previous, &r10_bio->state))
1057 return rdev->data_offset;
1058 else
1059 return rdev->new_data_offset;
1060 }
1061
1062 static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
1063 {
1064 struct raid1_plug_cb *plug = container_of(cb, struct raid1_plug_cb, cb);
1065 struct mddev *mddev = plug->cb.data;
1066 struct r10conf *conf = mddev->private;
1067 struct bio *bio;
1068
1069 if (from_schedule || current->bio_list) {
1070 spin_lock_irq(&conf->device_lock);
1071 bio_list_merge(&conf->pending_bio_list, &plug->pending);
1072 spin_unlock_irq(&conf->device_lock);
1073 wake_up(&conf->wait_barrier);
1074 md_wakeup_thread(mddev->thread);
1075 kfree(plug);
1076 return;
1077 }
1078
1079
1080 bio = bio_list_get(&plug->pending);
1081 md_bitmap_unplug(mddev->bitmap);
1082 wake_up(&conf->wait_barrier);
1083
1084 while (bio) {
1085 struct bio *next = bio->bi_next;
1086 struct md_rdev *rdev = (void*)bio->bi_bdev;
1087 bio->bi_next = NULL;
1088 bio_set_dev(bio, rdev->bdev);
1089 if (test_bit(Faulty, &rdev->flags)) {
1090 bio_io_error(bio);
1091 } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
1092 !bdev_max_discard_sectors(bio->bi_bdev)))
1093
1094 bio_endio(bio);
1095 else
1096 submit_bio_noacct(bio);
1097 bio = next;
1098 }
1099 kfree(plug);
1100 }
1101
1102
1103
1104
1105
1106
1107
1108 static bool regular_request_wait(struct mddev *mddev, struct r10conf *conf,
1109 struct bio *bio, sector_t sectors)
1110 {
1111
1112 if (!wait_barrier(conf, bio->bi_opf & REQ_NOWAIT)) {
1113 bio_wouldblock_error(bio);
1114 return false;
1115 }
1116 while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
1117 bio->bi_iter.bi_sector < conf->reshape_progress &&
1118 bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
1119 allow_barrier(conf);
1120 if (bio->bi_opf & REQ_NOWAIT) {
1121 bio_wouldblock_error(bio);
1122 return false;
1123 }
1124 raid10_log(conf->mddev, "wait reshape");
1125 wait_event(conf->wait_barrier,
1126 conf->reshape_progress <= bio->bi_iter.bi_sector ||
1127 conf->reshape_progress >= bio->bi_iter.bi_sector +
1128 sectors);
1129 wait_barrier(conf, false);
1130 }
1131 return true;
1132 }
1133
1134 static void raid10_read_request(struct mddev *mddev, struct bio *bio,
1135 struct r10bio *r10_bio)
1136 {
1137 struct r10conf *conf = mddev->private;
1138 struct bio *read_bio;
1139 const enum req_op op = bio_op(bio);
1140 const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
1141 int max_sectors;
1142 struct md_rdev *rdev;
1143 char b[BDEVNAME_SIZE];
1144 int slot = r10_bio->read_slot;
1145 struct md_rdev *err_rdev = NULL;
1146 gfp_t gfp = GFP_NOIO;
1147
1148 if (slot >= 0 && r10_bio->devs[slot].rdev) {
1149
1150
1151
1152
1153
1154
1155
1156 int disk;
1157
1158
1159
1160
1161 gfp = GFP_NOIO | __GFP_HIGH;
1162
1163 rcu_read_lock();
1164 disk = r10_bio->devs[slot].devnum;
1165 err_rdev = rcu_dereference(conf->mirrors[disk].rdev);
1166 if (err_rdev)
1167 snprintf(b, sizeof(b), "%pg", err_rdev->bdev);
1168 else {
1169 strcpy(b, "???");
1170
1171 err_rdev = r10_bio->devs[slot].rdev;
1172 }
1173 rcu_read_unlock();
1174 }
1175
1176 if (!regular_request_wait(mddev, conf, bio, r10_bio->sectors))
1177 return;
1178 rdev = read_balance(conf, r10_bio, &max_sectors);
1179 if (!rdev) {
1180 if (err_rdev) {
1181 pr_crit_ratelimited("md/raid10:%s: %s: unrecoverable I/O read error for block %llu\n",
1182 mdname(mddev), b,
1183 (unsigned long long)r10_bio->sector);
1184 }
1185 raid_end_bio_io(r10_bio);
1186 return;
1187 }
1188 if (err_rdev)
1189 pr_err_ratelimited("md/raid10:%s: %pg: redirecting sector %llu to another mirror\n",
1190 mdname(mddev),
1191 rdev->bdev,
1192 (unsigned long long)r10_bio->sector);
1193 if (max_sectors < bio_sectors(bio)) {
1194 struct bio *split = bio_split(bio, max_sectors,
1195 gfp, &conf->bio_split);
1196 bio_chain(split, bio);
1197 allow_barrier(conf);
1198 submit_bio_noacct(bio);
1199 wait_barrier(conf, false);
1200 bio = split;
1201 r10_bio->master_bio = bio;
1202 r10_bio->sectors = max_sectors;
1203 }
1204 slot = r10_bio->read_slot;
1205
1206 if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
1207 r10_bio->start_time = bio_start_io_acct(bio);
1208 read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set);
1209
1210 r10_bio->devs[slot].bio = read_bio;
1211 r10_bio->devs[slot].rdev = rdev;
1212
1213 read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
1214 choose_data_offset(r10_bio, rdev);
1215 read_bio->bi_end_io = raid10_end_read_request;
1216 bio_set_op_attrs(read_bio, op, do_sync);
1217 if (test_bit(FailFast, &rdev->flags) &&
1218 test_bit(R10BIO_FailFast, &r10_bio->state))
1219 read_bio->bi_opf |= MD_FAILFAST;
1220 read_bio->bi_private = r10_bio;
1221
1222 if (mddev->gendisk)
1223 trace_block_bio_remap(read_bio, disk_devt(mddev->gendisk),
1224 r10_bio->sector);
1225 submit_bio_noacct(read_bio);
1226 return;
1227 }
1228
1229 static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
1230 struct bio *bio, bool replacement,
1231 int n_copy)
1232 {
1233 const enum req_op op = bio_op(bio);
1234 const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
1235 const blk_opf_t do_fua = bio->bi_opf & REQ_FUA;
1236 unsigned long flags;
1237 struct blk_plug_cb *cb;
1238 struct raid1_plug_cb *plug = NULL;
1239 struct r10conf *conf = mddev->private;
1240 struct md_rdev *rdev;
1241 int devnum = r10_bio->devs[n_copy].devnum;
1242 struct bio *mbio;
1243
1244 if (replacement) {
1245 rdev = conf->mirrors[devnum].replacement;
1246 if (rdev == NULL) {
1247
1248 smp_mb();
1249 rdev = conf->mirrors[devnum].rdev;
1250 }
1251 } else
1252 rdev = conf->mirrors[devnum].rdev;
1253
1254 mbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO, &mddev->bio_set);
1255 if (replacement)
1256 r10_bio->devs[n_copy].repl_bio = mbio;
1257 else
1258 r10_bio->devs[n_copy].bio = mbio;
1259
1260 mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr +
1261 choose_data_offset(r10_bio, rdev));
1262 mbio->bi_end_io = raid10_end_write_request;
1263 bio_set_op_attrs(mbio, op, do_sync | do_fua);
1264 if (!replacement && test_bit(FailFast,
1265 &conf->mirrors[devnum].rdev->flags)
1266 && enough(conf, devnum))
1267 mbio->bi_opf |= MD_FAILFAST;
1268 mbio->bi_private = r10_bio;
1269
1270 if (conf->mddev->gendisk)
1271 trace_block_bio_remap(mbio, disk_devt(conf->mddev->gendisk),
1272 r10_bio->sector);
1273
1274 mbio->bi_bdev = (void *)rdev;
1275
1276 atomic_inc(&r10_bio->remaining);
1277
1278 cb = blk_check_plugged(raid10_unplug, mddev, sizeof(*plug));
1279 if (cb)
1280 plug = container_of(cb, struct raid1_plug_cb, cb);
1281 else
1282 plug = NULL;
1283 if (plug) {
1284 bio_list_add(&plug->pending, mbio);
1285 } else {
1286 spin_lock_irqsave(&conf->device_lock, flags);
1287 bio_list_add(&conf->pending_bio_list, mbio);
1288 spin_unlock_irqrestore(&conf->device_lock, flags);
1289 md_wakeup_thread(mddev->thread);
1290 }
1291 }
1292
1293 static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
1294 {
1295 int i;
1296 struct r10conf *conf = mddev->private;
1297 struct md_rdev *blocked_rdev;
1298
1299 retry_wait:
1300 blocked_rdev = NULL;
1301 rcu_read_lock();
1302 for (i = 0; i < conf->copies; i++) {
1303 struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
1304 struct md_rdev *rrdev = rcu_dereference(
1305 conf->mirrors[i].replacement);
1306 if (rdev == rrdev)
1307 rrdev = NULL;
1308 if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
1309 atomic_inc(&rdev->nr_pending);
1310 blocked_rdev = rdev;
1311 break;
1312 }
1313 if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) {
1314 atomic_inc(&rrdev->nr_pending);
1315 blocked_rdev = rrdev;
1316 break;
1317 }
1318
1319 if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
1320 sector_t first_bad;
1321 sector_t dev_sector = r10_bio->devs[i].addr;
1322 int bad_sectors;
1323 int is_bad;
1324
1325
1326
1327
1328
1329 if (!r10_bio->sectors)
1330 continue;
1331
1332 is_bad = is_badblock(rdev, dev_sector, r10_bio->sectors,
1333 &first_bad, &bad_sectors);
1334 if (is_bad < 0) {
1335
1336
1337
1338
1339 atomic_inc(&rdev->nr_pending);
1340 set_bit(BlockedBadBlocks, &rdev->flags);
1341 blocked_rdev = rdev;
1342 break;
1343 }
1344 }
1345 }
1346 rcu_read_unlock();
1347
1348 if (unlikely(blocked_rdev)) {
1349
1350 allow_barrier(conf);
1351 raid10_log(conf->mddev, "%s wait rdev %d blocked",
1352 __func__, blocked_rdev->raid_disk);
1353 md_wait_for_blocked_rdev(blocked_rdev, mddev);
1354 wait_barrier(conf, false);
1355 goto retry_wait;
1356 }
1357 }
1358
1359 static void raid10_write_request(struct mddev *mddev, struct bio *bio,
1360 struct r10bio *r10_bio)
1361 {
1362 struct r10conf *conf = mddev->private;
1363 int i;
1364 sector_t sectors;
1365 int max_sectors;
1366
1367 if ((mddev_is_clustered(mddev) &&
1368 md_cluster_ops->area_resyncing(mddev, WRITE,
1369 bio->bi_iter.bi_sector,
1370 bio_end_sector(bio)))) {
1371 DEFINE_WAIT(w);
1372
1373 if (bio->bi_opf & REQ_NOWAIT) {
1374 bio_wouldblock_error(bio);
1375 return;
1376 }
1377 for (;;) {
1378 prepare_to_wait(&conf->wait_barrier,
1379 &w, TASK_IDLE);
1380 if (!md_cluster_ops->area_resyncing(mddev, WRITE,
1381 bio->bi_iter.bi_sector, bio_end_sector(bio)))
1382 break;
1383 schedule();
1384 }
1385 finish_wait(&conf->wait_barrier, &w);
1386 }
1387
1388 sectors = r10_bio->sectors;
1389 if (!regular_request_wait(mddev, conf, bio, sectors))
1390 return;
1391 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
1392 (mddev->reshape_backwards
1393 ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
1394 bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
1395 : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
1396 bio->bi_iter.bi_sector < conf->reshape_progress))) {
1397
1398 mddev->reshape_position = conf->reshape_progress;
1399 set_mask_bits(&mddev->sb_flags, 0,
1400 BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
1401 md_wakeup_thread(mddev->thread);
1402 if (bio->bi_opf & REQ_NOWAIT) {
1403 allow_barrier(conf);
1404 bio_wouldblock_error(bio);
1405 return;
1406 }
1407 raid10_log(conf->mddev, "wait reshape metadata");
1408 wait_event(mddev->sb_wait,
1409 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
1410
1411 conf->reshape_safe = mddev->reshape_position;
1412 }
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424 r10_bio->read_slot = -1;
1425 raid10_find_phys(conf, r10_bio);
1426
1427 wait_blocked_dev(mddev, r10_bio);
1428
1429 rcu_read_lock();
1430 max_sectors = r10_bio->sectors;
1431
1432 for (i = 0; i < conf->copies; i++) {
1433 int d = r10_bio->devs[i].devnum;
1434 struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
1435 struct md_rdev *rrdev = rcu_dereference(
1436 conf->mirrors[d].replacement);
1437 if (rdev == rrdev)
1438 rrdev = NULL;
1439 if (rdev && (test_bit(Faulty, &rdev->flags)))
1440 rdev = NULL;
1441 if (rrdev && (test_bit(Faulty, &rrdev->flags)))
1442 rrdev = NULL;
1443
1444 r10_bio->devs[i].bio = NULL;
1445 r10_bio->devs[i].repl_bio = NULL;
1446
1447 if (!rdev && !rrdev) {
1448 set_bit(R10BIO_Degraded, &r10_bio->state);
1449 continue;
1450 }
1451 if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
1452 sector_t first_bad;
1453 sector_t dev_sector = r10_bio->devs[i].addr;
1454 int bad_sectors;
1455 int is_bad;
1456
1457 is_bad = is_badblock(rdev, dev_sector, max_sectors,
1458 &first_bad, &bad_sectors);
1459 if (is_bad && first_bad <= dev_sector) {
1460
1461 bad_sectors -= (dev_sector - first_bad);
1462 if (bad_sectors < max_sectors)
1463
1464
1465
1466 max_sectors = bad_sectors;
1467
1468
1469
1470
1471
1472
1473
1474
1475 continue;
1476 }
1477 if (is_bad) {
1478 int good_sectors = first_bad - dev_sector;
1479 if (good_sectors < max_sectors)
1480 max_sectors = good_sectors;
1481 }
1482 }
1483 if (rdev) {
1484 r10_bio->devs[i].bio = bio;
1485 atomic_inc(&rdev->nr_pending);
1486 }
1487 if (rrdev) {
1488 r10_bio->devs[i].repl_bio = bio;
1489 atomic_inc(&rrdev->nr_pending);
1490 }
1491 }
1492 rcu_read_unlock();
1493
1494 if (max_sectors < r10_bio->sectors)
1495 r10_bio->sectors = max_sectors;
1496
1497 if (r10_bio->sectors < bio_sectors(bio)) {
1498 struct bio *split = bio_split(bio, r10_bio->sectors,
1499 GFP_NOIO, &conf->bio_split);
1500 bio_chain(split, bio);
1501 allow_barrier(conf);
1502 submit_bio_noacct(bio);
1503 wait_barrier(conf, false);
1504 bio = split;
1505 r10_bio->master_bio = bio;
1506 }
1507
1508 if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
1509 r10_bio->start_time = bio_start_io_acct(bio);
1510 atomic_set(&r10_bio->remaining, 1);
1511 md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
1512
1513 for (i = 0; i < conf->copies; i++) {
1514 if (r10_bio->devs[i].bio)
1515 raid10_write_one_disk(mddev, r10_bio, bio, false, i);
1516 if (r10_bio->devs[i].repl_bio)
1517 raid10_write_one_disk(mddev, r10_bio, bio, true, i);
1518 }
1519 one_write_done(r10_bio);
1520 }
1521
1522 static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
1523 {
1524 struct r10conf *conf = mddev->private;
1525 struct r10bio *r10_bio;
1526
1527 r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO);
1528
1529 r10_bio->master_bio = bio;
1530 r10_bio->sectors = sectors;
1531
1532 r10_bio->mddev = mddev;
1533 r10_bio->sector = bio->bi_iter.bi_sector;
1534 r10_bio->state = 0;
1535 r10_bio->read_slot = -1;
1536 memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) *
1537 conf->geo.raid_disks);
1538
1539 if (bio_data_dir(bio) == READ)
1540 raid10_read_request(mddev, bio, r10_bio);
1541 else
1542 raid10_write_request(mddev, bio, r10_bio);
1543 }
1544
1545 static void raid_end_discard_bio(struct r10bio *r10bio)
1546 {
1547 struct r10conf *conf = r10bio->mddev->private;
1548 struct r10bio *first_r10bio;
1549
1550 while (atomic_dec_and_test(&r10bio->remaining)) {
1551
1552 allow_barrier(conf);
1553
1554 if (!test_bit(R10BIO_Discard, &r10bio->state)) {
1555 first_r10bio = (struct r10bio *)r10bio->master_bio;
1556 free_r10bio(r10bio);
1557 r10bio = first_r10bio;
1558 } else {
1559 md_write_end(r10bio->mddev);
1560 bio_endio(r10bio->master_bio);
1561 free_r10bio(r10bio);
1562 break;
1563 }
1564 }
1565 }
1566
1567 static void raid10_end_discard_request(struct bio *bio)
1568 {
1569 struct r10bio *r10_bio = bio->bi_private;
1570 struct r10conf *conf = r10_bio->mddev->private;
1571 struct md_rdev *rdev = NULL;
1572 int dev;
1573 int slot, repl;
1574
1575
1576
1577
1578 if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
1579 set_bit(R10BIO_Uptodate, &r10_bio->state);
1580
1581 dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
1582 if (repl)
1583 rdev = conf->mirrors[dev].replacement;
1584 if (!rdev) {
1585
1586
1587
1588
1589
1590 smp_rmb();
1591 rdev = conf->mirrors[dev].rdev;
1592 }
1593
1594 raid_end_discard_bio(r10_bio);
1595 rdev_dec_pending(rdev, conf->mddev);
1596 }
1597
1598
1599
1600
1601
1602
1603
1604 static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
1605 {
1606 struct r10conf *conf = mddev->private;
1607 struct geom *geo = &conf->geo;
1608 int far_copies = geo->far_copies;
1609 bool first_copy = true;
1610 struct r10bio *r10_bio, *first_r10bio;
1611 struct bio *split;
1612 int disk;
1613 sector_t chunk;
1614 unsigned int stripe_size;
1615 unsigned int stripe_data_disks;
1616 sector_t split_size;
1617 sector_t bio_start, bio_end;
1618 sector_t first_stripe_index, last_stripe_index;
1619 sector_t start_disk_offset;
1620 unsigned int start_disk_index;
1621 sector_t end_disk_offset;
1622 unsigned int end_disk_index;
1623 unsigned int remainder;
1624
1625 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
1626 return -EAGAIN;
1627
1628 if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT)) {
1629 bio_wouldblock_error(bio);
1630 return 0;
1631 }
1632 wait_barrier(conf, false);
1633
1634
1635
1636
1637
1638 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
1639 goto out;
1640
1641 if (geo->near_copies)
1642 stripe_data_disks = geo->raid_disks / geo->near_copies +
1643 geo->raid_disks % geo->near_copies;
1644 else
1645 stripe_data_disks = geo->raid_disks;
1646
1647 stripe_size = stripe_data_disks << geo->chunk_shift;
1648
1649 bio_start = bio->bi_iter.bi_sector;
1650 bio_end = bio_end_sector(bio);
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660 if (bio_sectors(bio) < stripe_size*2)
1661 goto out;
1662
1663
1664
1665
1666 div_u64_rem(bio_start, stripe_size, &remainder);
1667 if (remainder) {
1668 split_size = stripe_size - remainder;
1669 split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split);
1670 bio_chain(split, bio);
1671 allow_barrier(conf);
1672
1673 submit_bio_noacct(split);
1674 wait_barrier(conf, false);
1675 }
1676 div_u64_rem(bio_end, stripe_size, &remainder);
1677 if (remainder) {
1678 split_size = bio_sectors(bio) - remainder;
1679 split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split);
1680 bio_chain(split, bio);
1681 allow_barrier(conf);
1682
1683 submit_bio_noacct(bio);
1684 bio = split;
1685 wait_barrier(conf, false);
1686 }
1687
1688 bio_start = bio->bi_iter.bi_sector;
1689 bio_end = bio_end_sector(bio);
1690
1691
1692
1693
1694
1695
1696 chunk = bio_start >> geo->chunk_shift;
1697 chunk *= geo->near_copies;
1698 first_stripe_index = chunk;
1699 start_disk_index = sector_div(first_stripe_index, geo->raid_disks);
1700 if (geo->far_offset)
1701 first_stripe_index *= geo->far_copies;
1702 start_disk_offset = (bio_start & geo->chunk_mask) +
1703 (first_stripe_index << geo->chunk_shift);
1704
1705 chunk = bio_end >> geo->chunk_shift;
1706 chunk *= geo->near_copies;
1707 last_stripe_index = chunk;
1708 end_disk_index = sector_div(last_stripe_index, geo->raid_disks);
1709 if (geo->far_offset)
1710 last_stripe_index *= geo->far_copies;
1711 end_disk_offset = (bio_end & geo->chunk_mask) +
1712 (last_stripe_index << geo->chunk_shift);
1713
1714 retry_discard:
1715 r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO);
1716 r10_bio->mddev = mddev;
1717 r10_bio->state = 0;
1718 r10_bio->sectors = 0;
1719 memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * geo->raid_disks);
1720 wait_blocked_dev(mddev, r10_bio);
1721
1722
1723
1724
1725
1726
1727
1728
1729 if (first_copy) {
1730 r10_bio->master_bio = bio;
1731 set_bit(R10BIO_Discard, &r10_bio->state);
1732 first_copy = false;
1733 first_r10bio = r10_bio;
1734 } else
1735 r10_bio->master_bio = (struct bio *)first_r10bio;
1736
1737
1738
1739
1740
1741
1742 rcu_read_lock();
1743 for (disk = 0; disk < geo->raid_disks; disk++) {
1744 struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
1745 struct md_rdev *rrdev = rcu_dereference(
1746 conf->mirrors[disk].replacement);
1747
1748 r10_bio->devs[disk].bio = NULL;
1749 r10_bio->devs[disk].repl_bio = NULL;
1750
1751 if (rdev && (test_bit(Faulty, &rdev->flags)))
1752 rdev = NULL;
1753 if (rrdev && (test_bit(Faulty, &rrdev->flags)))
1754 rrdev = NULL;
1755 if (!rdev && !rrdev)
1756 continue;
1757
1758 if (rdev) {
1759 r10_bio->devs[disk].bio = bio;
1760 atomic_inc(&rdev->nr_pending);
1761 }
1762 if (rrdev) {
1763 r10_bio->devs[disk].repl_bio = bio;
1764 atomic_inc(&rrdev->nr_pending);
1765 }
1766 }
1767 rcu_read_unlock();
1768
1769 atomic_set(&r10_bio->remaining, 1);
1770 for (disk = 0; disk < geo->raid_disks; disk++) {
1771 sector_t dev_start, dev_end;
1772 struct bio *mbio, *rbio = NULL;
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786 if (disk < start_disk_index)
1787 dev_start = (first_stripe_index + 1) * mddev->chunk_sectors;
1788 else if (disk > start_disk_index)
1789 dev_start = first_stripe_index * mddev->chunk_sectors;
1790 else
1791 dev_start = start_disk_offset;
1792
1793 if (disk < end_disk_index)
1794 dev_end = (last_stripe_index + 1) * mddev->chunk_sectors;
1795 else if (disk > end_disk_index)
1796 dev_end = last_stripe_index * mddev->chunk_sectors;
1797 else
1798 dev_end = end_disk_offset;
1799
1800
1801
1802
1803
1804
1805
1806 if (r10_bio->devs[disk].bio) {
1807 struct md_rdev *rdev = conf->mirrors[disk].rdev;
1808 mbio = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO,
1809 &mddev->bio_set);
1810 mbio->bi_end_io = raid10_end_discard_request;
1811 mbio->bi_private = r10_bio;
1812 r10_bio->devs[disk].bio = mbio;
1813 r10_bio->devs[disk].devnum = disk;
1814 atomic_inc(&r10_bio->remaining);
1815 md_submit_discard_bio(mddev, rdev, mbio,
1816 dev_start + choose_data_offset(r10_bio, rdev),
1817 dev_end - dev_start);
1818 bio_endio(mbio);
1819 }
1820 if (r10_bio->devs[disk].repl_bio) {
1821 struct md_rdev *rrdev = conf->mirrors[disk].replacement;
1822 rbio = bio_alloc_clone(bio->bi_bdev, bio, GFP_NOIO,
1823 &mddev->bio_set);
1824 rbio->bi_end_io = raid10_end_discard_request;
1825 rbio->bi_private = r10_bio;
1826 r10_bio->devs[disk].repl_bio = rbio;
1827 r10_bio->devs[disk].devnum = disk;
1828 atomic_inc(&r10_bio->remaining);
1829 md_submit_discard_bio(mddev, rrdev, rbio,
1830 dev_start + choose_data_offset(r10_bio, rrdev),
1831 dev_end - dev_start);
1832 bio_endio(rbio);
1833 }
1834 }
1835
1836 if (!geo->far_offset && --far_copies) {
1837 first_stripe_index += geo->stride >> geo->chunk_shift;
1838 start_disk_offset += geo->stride;
1839 last_stripe_index += geo->stride >> geo->chunk_shift;
1840 end_disk_offset += geo->stride;
1841 atomic_inc(&first_r10bio->remaining);
1842 raid_end_discard_bio(r10_bio);
1843 wait_barrier(conf, false);
1844 goto retry_discard;
1845 }
1846
1847 raid_end_discard_bio(r10_bio);
1848
1849 return 0;
1850 out:
1851 allow_barrier(conf);
1852 return -EAGAIN;
1853 }
1854
1855 static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
1856 {
1857 struct r10conf *conf = mddev->private;
1858 sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
1859 int chunk_sects = chunk_mask + 1;
1860 int sectors = bio_sectors(bio);
1861
1862 if (unlikely(bio->bi_opf & REQ_PREFLUSH)
1863 && md_flush_request(mddev, bio))
1864 return true;
1865
1866 if (!md_write_start(mddev, bio))
1867 return false;
1868
1869 if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
1870 if (!raid10_handle_discard(mddev, bio))
1871 return true;
1872
1873
1874
1875
1876
1877 if (unlikely((bio->bi_iter.bi_sector & chunk_mask) +
1878 sectors > chunk_sects
1879 && (conf->geo.near_copies < conf->geo.raid_disks
1880 || conf->prev.near_copies <
1881 conf->prev.raid_disks)))
1882 sectors = chunk_sects -
1883 (bio->bi_iter.bi_sector &
1884 (chunk_sects - 1));
1885 __make_request(mddev, bio, sectors);
1886
1887
1888 wake_up(&conf->wait_barrier);
1889 return true;
1890 }
1891
1892 static void raid10_status(struct seq_file *seq, struct mddev *mddev)
1893 {
1894 struct r10conf *conf = mddev->private;
1895 int i;
1896
1897 if (conf->geo.near_copies < conf->geo.raid_disks)
1898 seq_printf(seq, " %dK chunks", mddev->chunk_sectors / 2);
1899 if (conf->geo.near_copies > 1)
1900 seq_printf(seq, " %d near-copies", conf->geo.near_copies);
1901 if (conf->geo.far_copies > 1) {
1902 if (conf->geo.far_offset)
1903 seq_printf(seq, " %d offset-copies", conf->geo.far_copies);
1904 else
1905 seq_printf(seq, " %d far-copies", conf->geo.far_copies);
1906 if (conf->geo.far_set_size != conf->geo.raid_disks)
1907 seq_printf(seq, " %d devices per set", conf->geo.far_set_size);
1908 }
1909 seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks,
1910 conf->geo.raid_disks - mddev->degraded);
1911 rcu_read_lock();
1912 for (i = 0; i < conf->geo.raid_disks; i++) {
1913 struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
1914 seq_printf(seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
1915 }
1916 rcu_read_unlock();
1917 seq_printf(seq, "]");
1918 }
1919
1920
1921
1922
1923
1924
1925 static int _enough(struct r10conf *conf, int previous, int ignore)
1926 {
1927 int first = 0;
1928 int has_enough = 0;
1929 int disks, ncopies;
1930 if (previous) {
1931 disks = conf->prev.raid_disks;
1932 ncopies = conf->prev.near_copies;
1933 } else {
1934 disks = conf->geo.raid_disks;
1935 ncopies = conf->geo.near_copies;
1936 }
1937
1938 rcu_read_lock();
1939 do {
1940 int n = conf->copies;
1941 int cnt = 0;
1942 int this = first;
1943 while (n--) {
1944 struct md_rdev *rdev;
1945 if (this != ignore &&
1946 (rdev = rcu_dereference(conf->mirrors[this].rdev)) &&
1947 test_bit(In_sync, &rdev->flags))
1948 cnt++;
1949 this = (this+1) % disks;
1950 }
1951 if (cnt == 0)
1952 goto out;
1953 first = (first + ncopies) % disks;
1954 } while (first != 0);
1955 has_enough = 1;
1956 out:
1957 rcu_read_unlock();
1958 return has_enough;
1959 }
1960
1961 static int enough(struct r10conf *conf, int ignore)
1962 {
1963
1964
1965
1966
1967
1968 return _enough(conf, 0, ignore) &&
1969 _enough(conf, 1, ignore);
1970 }
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987 static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
1988 {
1989 struct r10conf *conf = mddev->private;
1990 unsigned long flags;
1991
1992 spin_lock_irqsave(&conf->device_lock, flags);
1993
1994 if (test_bit(In_sync, &rdev->flags) && !enough(conf, rdev->raid_disk)) {
1995 set_bit(MD_BROKEN, &mddev->flags);
1996
1997 if (!mddev->fail_last_dev) {
1998 spin_unlock_irqrestore(&conf->device_lock, flags);
1999 return;
2000 }
2001 }
2002 if (test_and_clear_bit(In_sync, &rdev->flags))
2003 mddev->degraded++;
2004
2005 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
2006 set_bit(Blocked, &rdev->flags);
2007 set_bit(Faulty, &rdev->flags);
2008 set_mask_bits(&mddev->sb_flags, 0,
2009 BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
2010 spin_unlock_irqrestore(&conf->device_lock, flags);
2011 pr_crit("md/raid10:%s: Disk failure on %pg, disabling device.\n"
2012 "md/raid10:%s: Operation continuing on %d devices.\n",
2013 mdname(mddev), rdev->bdev,
2014 mdname(mddev), conf->geo.raid_disks - mddev->degraded);
2015 }
2016
2017 static void print_conf(struct r10conf *conf)
2018 {
2019 int i;
2020 struct md_rdev *rdev;
2021
2022 pr_debug("RAID10 conf printout:\n");
2023 if (!conf) {
2024 pr_debug("(!conf)\n");
2025 return;
2026 }
2027 pr_debug(" --- wd:%d rd:%d\n", conf->geo.raid_disks - conf->mddev->degraded,
2028 conf->geo.raid_disks);
2029
2030
2031
2032 for (i = 0; i < conf->geo.raid_disks; i++) {
2033 rdev = conf->mirrors[i].rdev;
2034 if (rdev)
2035 pr_debug(" disk %d, wo:%d, o:%d, dev:%pg\n",
2036 i, !test_bit(In_sync, &rdev->flags),
2037 !test_bit(Faulty, &rdev->flags),
2038 rdev->bdev);
2039 }
2040 }
2041
2042 static void close_sync(struct r10conf *conf)
2043 {
2044 wait_barrier(conf, false);
2045 allow_barrier(conf);
2046
2047 mempool_exit(&conf->r10buf_pool);
2048 }
2049
2050 static int raid10_spare_active(struct mddev *mddev)
2051 {
2052 int i;
2053 struct r10conf *conf = mddev->private;
2054 struct raid10_info *tmp;
2055 int count = 0;
2056 unsigned long flags;
2057
2058
2059
2060
2061
2062 for (i = 0; i < conf->geo.raid_disks; i++) {
2063 tmp = conf->mirrors + i;
2064 if (tmp->replacement
2065 && tmp->replacement->recovery_offset == MaxSector
2066 && !test_bit(Faulty, &tmp->replacement->flags)
2067 && !test_and_set_bit(In_sync, &tmp->replacement->flags)) {
2068
2069 if (!tmp->rdev
2070 || !test_and_clear_bit(In_sync, &tmp->rdev->flags))
2071 count++;
2072 if (tmp->rdev) {
2073
2074
2075
2076
2077 set_bit(Faulty, &tmp->rdev->flags);
2078 sysfs_notify_dirent_safe(
2079 tmp->rdev->sysfs_state);
2080 }
2081 sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
2082 } else if (tmp->rdev
2083 && tmp->rdev->recovery_offset == MaxSector
2084 && !test_bit(Faulty, &tmp->rdev->flags)
2085 && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
2086 count++;
2087 sysfs_notify_dirent_safe(tmp->rdev->sysfs_state);
2088 }
2089 }
2090 spin_lock_irqsave(&conf->device_lock, flags);
2091 mddev->degraded -= count;
2092 spin_unlock_irqrestore(&conf->device_lock, flags);
2093
2094 print_conf(conf);
2095 return count;
2096 }
2097
2098 static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
2099 {
2100 struct r10conf *conf = mddev->private;
2101 int err = -EEXIST;
2102 int mirror;
2103 int first = 0;
2104 int last = conf->geo.raid_disks - 1;
2105
2106 if (mddev->recovery_cp < MaxSector)
2107
2108
2109
2110 return -EBUSY;
2111 if (rdev->saved_raid_disk < 0 && !_enough(conf, 1, -1))
2112 return -EINVAL;
2113
2114 if (md_integrity_add_rdev(rdev, mddev))
2115 return -ENXIO;
2116
2117 if (rdev->raid_disk >= 0)
2118 first = last = rdev->raid_disk;
2119
2120 if (rdev->saved_raid_disk >= first &&
2121 rdev->saved_raid_disk < conf->geo.raid_disks &&
2122 conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
2123 mirror = rdev->saved_raid_disk;
2124 else
2125 mirror = first;
2126 for ( ; mirror <= last ; mirror++) {
2127 struct raid10_info *p = &conf->mirrors[mirror];
2128 if (p->recovery_disabled == mddev->recovery_disabled)
2129 continue;
2130 if (p->rdev) {
2131 if (!test_bit(WantReplacement, &p->rdev->flags) ||
2132 p->replacement != NULL)
2133 continue;
2134 clear_bit(In_sync, &rdev->flags);
2135 set_bit(Replacement, &rdev->flags);
2136 rdev->raid_disk = mirror;
2137 err = 0;
2138 if (mddev->gendisk)
2139 disk_stack_limits(mddev->gendisk, rdev->bdev,
2140 rdev->data_offset << 9);
2141 conf->fullsync = 1;
2142 rcu_assign_pointer(p->replacement, rdev);
2143 break;
2144 }
2145
2146 if (mddev->gendisk)
2147 disk_stack_limits(mddev->gendisk, rdev->bdev,
2148 rdev->data_offset << 9);
2149
2150 p->head_position = 0;
2151 p->recovery_disabled = mddev->recovery_disabled - 1;
2152 rdev->raid_disk = mirror;
2153 err = 0;
2154 if (rdev->saved_raid_disk != mirror)
2155 conf->fullsync = 1;
2156 rcu_assign_pointer(p->rdev, rdev);
2157 break;
2158 }
2159
2160 print_conf(conf);
2161 return err;
2162 }
2163
2164 static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
2165 {
2166 struct r10conf *conf = mddev->private;
2167 int err = 0;
2168 int number = rdev->raid_disk;
2169 struct md_rdev **rdevp;
2170 struct raid10_info *p;
2171
2172 print_conf(conf);
2173 if (unlikely(number >= mddev->raid_disks))
2174 return 0;
2175 p = conf->mirrors + number;
2176 if (rdev == p->rdev)
2177 rdevp = &p->rdev;
2178 else if (rdev == p->replacement)
2179 rdevp = &p->replacement;
2180 else
2181 return 0;
2182
2183 if (test_bit(In_sync, &rdev->flags) ||
2184 atomic_read(&rdev->nr_pending)) {
2185 err = -EBUSY;
2186 goto abort;
2187 }
2188
2189
2190
2191 if (!test_bit(Faulty, &rdev->flags) &&
2192 mddev->recovery_disabled != p->recovery_disabled &&
2193 (!p->replacement || p->replacement == rdev) &&
2194 number < conf->geo.raid_disks &&
2195 enough(conf, -1)) {
2196 err = -EBUSY;
2197 goto abort;
2198 }
2199 *rdevp = NULL;
2200 if (!test_bit(RemoveSynchronized, &rdev->flags)) {
2201 synchronize_rcu();
2202 if (atomic_read(&rdev->nr_pending)) {
2203
2204 err = -EBUSY;
2205 *rdevp = rdev;
2206 goto abort;
2207 }
2208 }
2209 if (p->replacement) {
2210
2211 p->rdev = p->replacement;
2212 clear_bit(Replacement, &p->replacement->flags);
2213 smp_mb();
2214
2215
2216 p->replacement = NULL;
2217 }
2218
2219 clear_bit(WantReplacement, &rdev->flags);
2220 err = md_integrity_register(mddev);
2221
2222 abort:
2223
2224 print_conf(conf);
2225 return err;
2226 }
2227
2228 static void __end_sync_read(struct r10bio *r10_bio, struct bio *bio, int d)
2229 {
2230 struct r10conf *conf = r10_bio->mddev->private;
2231
2232 if (!bio->bi_status)
2233 set_bit(R10BIO_Uptodate, &r10_bio->state);
2234 else
2235
2236
2237
2238 atomic_add(r10_bio->sectors,
2239 &conf->mirrors[d].rdev->corrected_errors);
2240
2241
2242
2243
2244 rdev_dec_pending(conf->mirrors[d].rdev, conf->mddev);
2245 if (test_bit(R10BIO_IsRecover, &r10_bio->state) ||
2246 atomic_dec_and_test(&r10_bio->remaining)) {
2247
2248
2249
2250 reschedule_retry(r10_bio);
2251 }
2252 }
2253
2254 static void end_sync_read(struct bio *bio)
2255 {
2256 struct r10bio *r10_bio = get_resync_r10bio(bio);
2257 struct r10conf *conf = r10_bio->mddev->private;
2258 int d = find_bio_disk(conf, r10_bio, bio, NULL, NULL);
2259
2260 __end_sync_read(r10_bio, bio, d);
2261 }
2262
2263 static void end_reshape_read(struct bio *bio)
2264 {
2265
2266 struct r10bio *r10_bio = bio->bi_private;
2267
2268 __end_sync_read(r10_bio, bio, r10_bio->read_slot);
2269 }
2270
2271 static void end_sync_request(struct r10bio *r10_bio)
2272 {
2273 struct mddev *mddev = r10_bio->mddev;
2274
2275 while (atomic_dec_and_test(&r10_bio->remaining)) {
2276 if (r10_bio->master_bio == NULL) {
2277
2278 sector_t s = r10_bio->sectors;
2279 if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
2280 test_bit(R10BIO_WriteError, &r10_bio->state))
2281 reschedule_retry(r10_bio);
2282 else
2283 put_buf(r10_bio);
2284 md_done_sync(mddev, s, 1);
2285 break;
2286 } else {
2287 struct r10bio *r10_bio2 = (struct r10bio *)r10_bio->master_bio;
2288 if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
2289 test_bit(R10BIO_WriteError, &r10_bio->state))
2290 reschedule_retry(r10_bio);
2291 else
2292 put_buf(r10_bio);
2293 r10_bio = r10_bio2;
2294 }
2295 }
2296 }
2297
2298 static void end_sync_write(struct bio *bio)
2299 {
2300 struct r10bio *r10_bio = get_resync_r10bio(bio);
2301 struct mddev *mddev = r10_bio->mddev;
2302 struct r10conf *conf = mddev->private;
2303 int d;
2304 sector_t first_bad;
2305 int bad_sectors;
2306 int slot;
2307 int repl;
2308 struct md_rdev *rdev = NULL;
2309
2310 d = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
2311 if (repl)
2312 rdev = conf->mirrors[d].replacement;
2313 else
2314 rdev = conf->mirrors[d].rdev;
2315
2316 if (bio->bi_status) {
2317 if (repl)
2318 md_error(mddev, rdev);
2319 else {
2320 set_bit(WriteErrorSeen, &rdev->flags);
2321 if (!test_and_set_bit(WantReplacement, &rdev->flags))
2322 set_bit(MD_RECOVERY_NEEDED,
2323 &rdev->mddev->recovery);
2324 set_bit(R10BIO_WriteError, &r10_bio->state);
2325 }
2326 } else if (is_badblock(rdev,
2327 r10_bio->devs[slot].addr,
2328 r10_bio->sectors,
2329 &first_bad, &bad_sectors))
2330 set_bit(R10BIO_MadeGood, &r10_bio->state);
2331
2332 rdev_dec_pending(rdev, mddev);
2333
2334 end_sync_request(r10_bio);
2335 }
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353 static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
2354 {
2355 struct r10conf *conf = mddev->private;
2356 int i, first;
2357 struct bio *tbio, *fbio;
2358 int vcnt;
2359 struct page **tpages, **fpages;
2360
2361 atomic_set(&r10_bio->remaining, 1);
2362
2363
2364 for (i=0; i<conf->copies; i++)
2365 if (!r10_bio->devs[i].bio->bi_status)
2366 break;
2367
2368 if (i == conf->copies)
2369 goto done;
2370
2371 first = i;
2372 fbio = r10_bio->devs[i].bio;
2373 fbio->bi_iter.bi_size = r10_bio->sectors << 9;
2374 fbio->bi_iter.bi_idx = 0;
2375 fpages = get_resync_pages(fbio)->pages;
2376
2377 vcnt = (r10_bio->sectors + (PAGE_SIZE >> 9) - 1) >> (PAGE_SHIFT - 9);
2378
2379 for (i=0 ; i < conf->copies ; i++) {
2380 int j, d;
2381 struct md_rdev *rdev;
2382 struct resync_pages *rp;
2383
2384 tbio = r10_bio->devs[i].bio;
2385
2386 if (tbio->bi_end_io != end_sync_read)
2387 continue;
2388 if (i == first)
2389 continue;
2390
2391 tpages = get_resync_pages(tbio)->pages;
2392 d = r10_bio->devs[i].devnum;
2393 rdev = conf->mirrors[d].rdev;
2394 if (!r10_bio->devs[i].bio->bi_status) {
2395
2396
2397
2398
2399 int sectors = r10_bio->sectors;
2400 for (j = 0; j < vcnt; j++) {
2401 int len = PAGE_SIZE;
2402 if (sectors < (len / 512))
2403 len = sectors * 512;
2404 if (memcmp(page_address(fpages[j]),
2405 page_address(tpages[j]),
2406 len))
2407 break;
2408 sectors -= len/512;
2409 }
2410 if (j == vcnt)
2411 continue;
2412 atomic64_add(r10_bio->sectors, &mddev->resync_mismatches);
2413 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
2414
2415 continue;
2416 } else if (test_bit(FailFast, &rdev->flags)) {
2417
2418 md_error(rdev->mddev, rdev);
2419 continue;
2420 }
2421
2422
2423
2424
2425
2426 rp = get_resync_pages(tbio);
2427 bio_reset(tbio, conf->mirrors[d].rdev->bdev, REQ_OP_WRITE);
2428
2429 md_bio_reset_resync_pages(tbio, rp, fbio->bi_iter.bi_size);
2430
2431 rp->raid_bio = r10_bio;
2432 tbio->bi_private = rp;
2433 tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
2434 tbio->bi_end_io = end_sync_write;
2435
2436 bio_copy_data(tbio, fbio);
2437
2438 atomic_inc(&conf->mirrors[d].rdev->nr_pending);
2439 atomic_inc(&r10_bio->remaining);
2440 md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio));
2441
2442 if (test_bit(FailFast, &conf->mirrors[d].rdev->flags))
2443 tbio->bi_opf |= MD_FAILFAST;
2444 tbio->bi_iter.bi_sector += conf->mirrors[d].rdev->data_offset;
2445 submit_bio_noacct(tbio);
2446 }
2447
2448
2449
2450
2451 for (i = 0; i < conf->copies; i++) {
2452 int d;
2453
2454 tbio = r10_bio->devs[i].repl_bio;
2455 if (!tbio || !tbio->bi_end_io)
2456 continue;
2457 if (r10_bio->devs[i].bio->bi_end_io != end_sync_write
2458 && r10_bio->devs[i].bio != fbio)
2459 bio_copy_data(tbio, fbio);
2460 d = r10_bio->devs[i].devnum;
2461 atomic_inc(&r10_bio->remaining);
2462 md_sync_acct(conf->mirrors[d].replacement->bdev,
2463 bio_sectors(tbio));
2464 submit_bio_noacct(tbio);
2465 }
2466
2467 done:
2468 if (atomic_dec_and_test(&r10_bio->remaining)) {
2469 md_done_sync(mddev, r10_bio->sectors, 1);
2470 put_buf(r10_bio);
2471 }
2472 }
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484 static void fix_recovery_read_error(struct r10bio *r10_bio)
2485 {
2486
2487
2488
2489
2490
2491
2492
2493 struct mddev *mddev = r10_bio->mddev;
2494 struct r10conf *conf = mddev->private;
2495 struct bio *bio = r10_bio->devs[0].bio;
2496 sector_t sect = 0;
2497 int sectors = r10_bio->sectors;
2498 int idx = 0;
2499 int dr = r10_bio->devs[0].devnum;
2500 int dw = r10_bio->devs[1].devnum;
2501 struct page **pages = get_resync_pages(bio)->pages;
2502
2503 while (sectors) {
2504 int s = sectors;
2505 struct md_rdev *rdev;
2506 sector_t addr;
2507 int ok;
2508
2509 if (s > (PAGE_SIZE>>9))
2510 s = PAGE_SIZE >> 9;
2511
2512 rdev = conf->mirrors[dr].rdev;
2513 addr = r10_bio->devs[0].addr + sect,
2514 ok = sync_page_io(rdev,
2515 addr,
2516 s << 9,
2517 pages[idx],
2518 REQ_OP_READ, false);
2519 if (ok) {
2520 rdev = conf->mirrors[dw].rdev;
2521 addr = r10_bio->devs[1].addr + sect;
2522 ok = sync_page_io(rdev,
2523 addr,
2524 s << 9,
2525 pages[idx],
2526 REQ_OP_WRITE, false);
2527 if (!ok) {
2528 set_bit(WriteErrorSeen, &rdev->flags);
2529 if (!test_and_set_bit(WantReplacement,
2530 &rdev->flags))
2531 set_bit(MD_RECOVERY_NEEDED,
2532 &rdev->mddev->recovery);
2533 }
2534 }
2535 if (!ok) {
2536
2537
2538
2539
2540 rdev_set_badblocks(rdev, addr, s, 0);
2541
2542 if (rdev != conf->mirrors[dw].rdev) {
2543
2544 struct md_rdev *rdev2 = conf->mirrors[dw].rdev;
2545 addr = r10_bio->devs[1].addr + sect;
2546 ok = rdev_set_badblocks(rdev2, addr, s, 0);
2547 if (!ok) {
2548
2549 pr_notice("md/raid10:%s: recovery aborted due to read error\n",
2550 mdname(mddev));
2551
2552 conf->mirrors[dw].recovery_disabled
2553 = mddev->recovery_disabled;
2554 set_bit(MD_RECOVERY_INTR,
2555 &mddev->recovery);
2556 break;
2557 }
2558 }
2559 }
2560
2561 sectors -= s;
2562 sect += s;
2563 idx++;
2564 }
2565 }
2566
2567 static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
2568 {
2569 struct r10conf *conf = mddev->private;
2570 int d;
2571 struct bio *wbio, *wbio2;
2572
2573 if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) {
2574 fix_recovery_read_error(r10_bio);
2575 end_sync_request(r10_bio);
2576 return;
2577 }
2578
2579
2580
2581
2582
2583 d = r10_bio->devs[1].devnum;
2584 wbio = r10_bio->devs[1].bio;
2585 wbio2 = r10_bio->devs[1].repl_bio;
2586
2587
2588
2589
2590 if (wbio2 && !wbio2->bi_end_io)
2591 wbio2 = NULL;
2592 if (wbio->bi_end_io) {
2593 atomic_inc(&conf->mirrors[d].rdev->nr_pending);
2594 md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
2595 submit_bio_noacct(wbio);
2596 }
2597 if (wbio2) {
2598 atomic_inc(&conf->mirrors[d].replacement->nr_pending);
2599 md_sync_acct(conf->mirrors[d].replacement->bdev,
2600 bio_sectors(wbio2));
2601 submit_bio_noacct(wbio2);
2602 }
2603 }
2604
2605
2606
2607
2608
2609
2610
2611 static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
2612 {
2613 long cur_time_mon;
2614 unsigned long hours_since_last;
2615 unsigned int read_errors = atomic_read(&rdev->read_errors);
2616
2617 cur_time_mon = ktime_get_seconds();
2618
2619 if (rdev->last_read_error == 0) {
2620
2621 rdev->last_read_error = cur_time_mon;
2622 return;
2623 }
2624
2625 hours_since_last = (long)(cur_time_mon -
2626 rdev->last_read_error) / 3600;
2627
2628 rdev->last_read_error = cur_time_mon;
2629
2630
2631
2632
2633
2634
2635 if (hours_since_last >= 8 * sizeof(read_errors))
2636 atomic_set(&rdev->read_errors, 0);
2637 else
2638 atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
2639 }
2640
2641 static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
2642 int sectors, struct page *page, enum req_op op)
2643 {
2644 sector_t first_bad;
2645 int bad_sectors;
2646
2647 if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors)
2648 && (op == REQ_OP_READ || test_bit(WriteErrorSeen, &rdev->flags)))
2649 return -1;
2650 if (sync_page_io(rdev, sector, sectors << 9, page, op, false))
2651
2652 return 1;
2653 if (op == REQ_OP_WRITE) {
2654 set_bit(WriteErrorSeen, &rdev->flags);
2655 if (!test_and_set_bit(WantReplacement, &rdev->flags))
2656 set_bit(MD_RECOVERY_NEEDED,
2657 &rdev->mddev->recovery);
2658 }
2659
2660 if (!rdev_set_badblocks(rdev, sector, sectors, 0))
2661 md_error(rdev->mddev, rdev);
2662 return 0;
2663 }
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673 static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10bio *r10_bio)
2674 {
2675 int sect = 0;
2676 int sectors = r10_bio->sectors;
2677 struct md_rdev *rdev;
2678 int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
2679 int d = r10_bio->devs[r10_bio->read_slot].devnum;
2680
2681
2682
2683
2684 rdev = conf->mirrors[d].rdev;
2685
2686 if (test_bit(Faulty, &rdev->flags))
2687
2688
2689 return;
2690
2691 check_decay_read_errors(mddev, rdev);
2692 atomic_inc(&rdev->read_errors);
2693 if (atomic_read(&rdev->read_errors) > max_read_errors) {
2694 pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n",
2695 mdname(mddev), rdev->bdev,
2696 atomic_read(&rdev->read_errors), max_read_errors);
2697 pr_notice("md/raid10:%s: %pg: Failing raid device\n",
2698 mdname(mddev), rdev->bdev);
2699 md_error(mddev, rdev);
2700 r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
2701 return;
2702 }
2703
2704 while(sectors) {
2705 int s = sectors;
2706 int sl = r10_bio->read_slot;
2707 int success = 0;
2708 int start;
2709
2710 if (s > (PAGE_SIZE>>9))
2711 s = PAGE_SIZE >> 9;
2712
2713 rcu_read_lock();
2714 do {
2715 sector_t first_bad;
2716 int bad_sectors;
2717
2718 d = r10_bio->devs[sl].devnum;
2719 rdev = rcu_dereference(conf->mirrors[d].rdev);
2720 if (rdev &&
2721 test_bit(In_sync, &rdev->flags) &&
2722 !test_bit(Faulty, &rdev->flags) &&
2723 is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
2724 &first_bad, &bad_sectors) == 0) {
2725 atomic_inc(&rdev->nr_pending);
2726 rcu_read_unlock();
2727 success = sync_page_io(rdev,
2728 r10_bio->devs[sl].addr +
2729 sect,
2730 s<<9,
2731 conf->tmppage,
2732 REQ_OP_READ, false);
2733 rdev_dec_pending(rdev, mddev);
2734 rcu_read_lock();
2735 if (success)
2736 break;
2737 }
2738 sl++;
2739 if (sl == conf->copies)
2740 sl = 0;
2741 } while (!success && sl != r10_bio->read_slot);
2742 rcu_read_unlock();
2743
2744 if (!success) {
2745
2746
2747
2748
2749 int dn = r10_bio->devs[r10_bio->read_slot].devnum;
2750 rdev = conf->mirrors[dn].rdev;
2751
2752 if (!rdev_set_badblocks(
2753 rdev,
2754 r10_bio->devs[r10_bio->read_slot].addr
2755 + sect,
2756 s, 0)) {
2757 md_error(mddev, rdev);
2758 r10_bio->devs[r10_bio->read_slot].bio
2759 = IO_BLOCKED;
2760 }
2761 break;
2762 }
2763
2764 start = sl;
2765
2766 rcu_read_lock();
2767 while (sl != r10_bio->read_slot) {
2768 if (sl==0)
2769 sl = conf->copies;
2770 sl--;
2771 d = r10_bio->devs[sl].devnum;
2772 rdev = rcu_dereference(conf->mirrors[d].rdev);
2773 if (!rdev ||
2774 test_bit(Faulty, &rdev->flags) ||
2775 !test_bit(In_sync, &rdev->flags))
2776 continue;
2777
2778 atomic_inc(&rdev->nr_pending);
2779 rcu_read_unlock();
2780 if (r10_sync_page_io(rdev,
2781 r10_bio->devs[sl].addr +
2782 sect,
2783 s, conf->tmppage, REQ_OP_WRITE)
2784 == 0) {
2785
2786 pr_notice("md/raid10:%s: read correction write failed (%d sectors at %llu on %pg)\n",
2787 mdname(mddev), s,
2788 (unsigned long long)(
2789 sect +
2790 choose_data_offset(r10_bio,
2791 rdev)),
2792 rdev->bdev);
2793 pr_notice("md/raid10:%s: %pg: failing drive\n",
2794 mdname(mddev),
2795 rdev->bdev);
2796 }
2797 rdev_dec_pending(rdev, mddev);
2798 rcu_read_lock();
2799 }
2800 sl = start;
2801 while (sl != r10_bio->read_slot) {
2802 if (sl==0)
2803 sl = conf->copies;
2804 sl--;
2805 d = r10_bio->devs[sl].devnum;
2806 rdev = rcu_dereference(conf->mirrors[d].rdev);
2807 if (!rdev ||
2808 test_bit(Faulty, &rdev->flags) ||
2809 !test_bit(In_sync, &rdev->flags))
2810 continue;
2811
2812 atomic_inc(&rdev->nr_pending);
2813 rcu_read_unlock();
2814 switch (r10_sync_page_io(rdev,
2815 r10_bio->devs[sl].addr +
2816 sect,
2817 s, conf->tmppage, REQ_OP_READ)) {
2818 case 0:
2819
2820 pr_notice("md/raid10:%s: unable to read back corrected sectors (%d sectors at %llu on %pg)\n",
2821 mdname(mddev), s,
2822 (unsigned long long)(
2823 sect +
2824 choose_data_offset(r10_bio, rdev)),
2825 rdev->bdev);
2826 pr_notice("md/raid10:%s: %pg: failing drive\n",
2827 mdname(mddev),
2828 rdev->bdev);
2829 break;
2830 case 1:
2831 pr_info("md/raid10:%s: read error corrected (%d sectors at %llu on %pg)\n",
2832 mdname(mddev), s,
2833 (unsigned long long)(
2834 sect +
2835 choose_data_offset(r10_bio, rdev)),
2836 rdev->bdev);
2837 atomic_add(s, &rdev->corrected_errors);
2838 }
2839
2840 rdev_dec_pending(rdev, mddev);
2841 rcu_read_lock();
2842 }
2843 rcu_read_unlock();
2844
2845 sectors -= s;
2846 sect += s;
2847 }
2848 }
2849
2850 static int narrow_write_error(struct r10bio *r10_bio, int i)
2851 {
2852 struct bio *bio = r10_bio->master_bio;
2853 struct mddev *mddev = r10_bio->mddev;
2854 struct r10conf *conf = mddev->private;
2855 struct md_rdev *rdev = conf->mirrors[r10_bio->devs[i].devnum].rdev;
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867 int block_sectors;
2868 sector_t sector;
2869 int sectors;
2870 int sect_to_write = r10_bio->sectors;
2871 int ok = 1;
2872
2873 if (rdev->badblocks.shift < 0)
2874 return 0;
2875
2876 block_sectors = roundup(1 << rdev->badblocks.shift,
2877 bdev_logical_block_size(rdev->bdev) >> 9);
2878 sector = r10_bio->sector;
2879 sectors = ((r10_bio->sector + block_sectors)
2880 & ~(sector_t)(block_sectors - 1))
2881 - sector;
2882
2883 while (sect_to_write) {
2884 struct bio *wbio;
2885 sector_t wsector;
2886 if (sectors > sect_to_write)
2887 sectors = sect_to_write;
2888
2889 wbio = bio_alloc_clone(rdev->bdev, bio, GFP_NOIO,
2890 &mddev->bio_set);
2891 bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors);
2892 wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector);
2893 wbio->bi_iter.bi_sector = wsector +
2894 choose_data_offset(r10_bio, rdev);
2895 bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
2896
2897 if (submit_bio_wait(wbio) < 0)
2898
2899 ok = rdev_set_badblocks(rdev, wsector,
2900 sectors, 0)
2901 && ok;
2902
2903 bio_put(wbio);
2904 sect_to_write -= sectors;
2905 sector += sectors;
2906 sectors = block_sectors;
2907 }
2908 return ok;
2909 }
2910
2911 static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
2912 {
2913 int slot = r10_bio->read_slot;
2914 struct bio *bio;
2915 struct r10conf *conf = mddev->private;
2916 struct md_rdev *rdev = r10_bio->devs[slot].rdev;
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926 bio = r10_bio->devs[slot].bio;
2927 bio_put(bio);
2928 r10_bio->devs[slot].bio = NULL;
2929
2930 if (mddev->ro)
2931 r10_bio->devs[slot].bio = IO_BLOCKED;
2932 else if (!test_bit(FailFast, &rdev->flags)) {
2933 freeze_array(conf, 1);
2934 fix_read_error(conf, mddev, r10_bio);
2935 unfreeze_array(conf);
2936 } else
2937 md_error(mddev, rdev);
2938
2939 rdev_dec_pending(rdev, mddev);
2940 allow_barrier(conf);
2941 r10_bio->state = 0;
2942 raid10_read_request(mddev, r10_bio->master_bio, r10_bio);
2943 }
2944
2945 static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
2946 {
2947
2948
2949
2950
2951
2952
2953 int m;
2954 struct md_rdev *rdev;
2955
2956 if (test_bit(R10BIO_IsSync, &r10_bio->state) ||
2957 test_bit(R10BIO_IsRecover, &r10_bio->state)) {
2958 for (m = 0; m < conf->copies; m++) {
2959 int dev = r10_bio->devs[m].devnum;
2960 rdev = conf->mirrors[dev].rdev;
2961 if (r10_bio->devs[m].bio == NULL ||
2962 r10_bio->devs[m].bio->bi_end_io == NULL)
2963 continue;
2964 if (!r10_bio->devs[m].bio->bi_status) {
2965 rdev_clear_badblocks(
2966 rdev,
2967 r10_bio->devs[m].addr,
2968 r10_bio->sectors, 0);
2969 } else {
2970 if (!rdev_set_badblocks(
2971 rdev,
2972 r10_bio->devs[m].addr,
2973 r10_bio->sectors, 0))
2974 md_error(conf->mddev, rdev);
2975 }
2976 rdev = conf->mirrors[dev].replacement;
2977 if (r10_bio->devs[m].repl_bio == NULL ||
2978 r10_bio->devs[m].repl_bio->bi_end_io == NULL)
2979 continue;
2980
2981 if (!r10_bio->devs[m].repl_bio->bi_status) {
2982 rdev_clear_badblocks(
2983 rdev,
2984 r10_bio->devs[m].addr,
2985 r10_bio->sectors, 0);
2986 } else {
2987 if (!rdev_set_badblocks(
2988 rdev,
2989 r10_bio->devs[m].addr,
2990 r10_bio->sectors, 0))
2991 md_error(conf->mddev, rdev);
2992 }
2993 }
2994 put_buf(r10_bio);
2995 } else {
2996 bool fail = false;
2997 for (m = 0; m < conf->copies; m++) {
2998 int dev = r10_bio->devs[m].devnum;
2999 struct bio *bio = r10_bio->devs[m].bio;
3000 rdev = conf->mirrors[dev].rdev;
3001 if (bio == IO_MADE_GOOD) {
3002 rdev_clear_badblocks(
3003 rdev,
3004 r10_bio->devs[m].addr,
3005 r10_bio->sectors, 0);
3006 rdev_dec_pending(rdev, conf->mddev);
3007 } else if (bio != NULL && bio->bi_status) {
3008 fail = true;
3009 if (!narrow_write_error(r10_bio, m)) {
3010 md_error(conf->mddev, rdev);
3011 set_bit(R10BIO_Degraded,
3012 &r10_bio->state);
3013 }
3014 rdev_dec_pending(rdev, conf->mddev);
3015 }
3016 bio = r10_bio->devs[m].repl_bio;
3017 rdev = conf->mirrors[dev].replacement;
3018 if (rdev && bio == IO_MADE_GOOD) {
3019 rdev_clear_badblocks(
3020 rdev,
3021 r10_bio->devs[m].addr,
3022 r10_bio->sectors, 0);
3023 rdev_dec_pending(rdev, conf->mddev);
3024 }
3025 }
3026 if (fail) {
3027 spin_lock_irq(&conf->device_lock);
3028 list_add(&r10_bio->retry_list, &conf->bio_end_io_list);
3029 conf->nr_queued++;
3030 spin_unlock_irq(&conf->device_lock);
3031
3032
3033
3034
3035 wake_up(&conf->wait_barrier);
3036 md_wakeup_thread(conf->mddev->thread);
3037 } else {
3038 if (test_bit(R10BIO_WriteError,
3039 &r10_bio->state))
3040 close_write(r10_bio);
3041 raid_end_bio_io(r10_bio);
3042 }
3043 }
3044 }
3045
3046 static void raid10d(struct md_thread *thread)
3047 {
3048 struct mddev *mddev = thread->mddev;
3049 struct r10bio *r10_bio;
3050 unsigned long flags;
3051 struct r10conf *conf = mddev->private;
3052 struct list_head *head = &conf->retry_list;
3053 struct blk_plug plug;
3054
3055 md_check_recovery(mddev);
3056
3057 if (!list_empty_careful(&conf->bio_end_io_list) &&
3058 !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
3059 LIST_HEAD(tmp);
3060 spin_lock_irqsave(&conf->device_lock, flags);
3061 if (!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
3062 while (!list_empty(&conf->bio_end_io_list)) {
3063 list_move(conf->bio_end_io_list.prev, &tmp);
3064 conf->nr_queued--;
3065 }
3066 }
3067 spin_unlock_irqrestore(&conf->device_lock, flags);
3068 while (!list_empty(&tmp)) {
3069 r10_bio = list_first_entry(&tmp, struct r10bio,
3070 retry_list);
3071 list_del(&r10_bio->retry_list);
3072 if (mddev->degraded)
3073 set_bit(R10BIO_Degraded, &r10_bio->state);
3074
3075 if (test_bit(R10BIO_WriteError,
3076 &r10_bio->state))
3077 close_write(r10_bio);
3078 raid_end_bio_io(r10_bio);
3079 }
3080 }
3081
3082 blk_start_plug(&plug);
3083 for (;;) {
3084
3085 flush_pending_writes(conf);
3086
3087 spin_lock_irqsave(&conf->device_lock, flags);
3088 if (list_empty(head)) {
3089 spin_unlock_irqrestore(&conf->device_lock, flags);
3090 break;
3091 }
3092 r10_bio = list_entry(head->prev, struct r10bio, retry_list);
3093 list_del(head->prev);
3094 conf->nr_queued--;
3095 spin_unlock_irqrestore(&conf->device_lock, flags);
3096
3097 mddev = r10_bio->mddev;
3098 conf = mddev->private;
3099 if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
3100 test_bit(R10BIO_WriteError, &r10_bio->state))
3101 handle_write_completed(conf, r10_bio);
3102 else if (test_bit(R10BIO_IsReshape, &r10_bio->state))
3103 reshape_request_write(mddev, r10_bio);
3104 else if (test_bit(R10BIO_IsSync, &r10_bio->state))
3105 sync_request_write(mddev, r10_bio);
3106 else if (test_bit(R10BIO_IsRecover, &r10_bio->state))
3107 recovery_request_write(mddev, r10_bio);
3108 else if (test_bit(R10BIO_ReadError, &r10_bio->state))
3109 handle_read_error(mddev, r10_bio);
3110 else
3111 WARN_ON_ONCE(1);
3112
3113 cond_resched();
3114 if (mddev->sb_flags & ~(1<<MD_SB_CHANGE_PENDING))
3115 md_check_recovery(mddev);
3116 }
3117 blk_finish_plug(&plug);
3118 }
3119
3120 static int init_resync(struct r10conf *conf)
3121 {
3122 int ret, buffs, i;
3123
3124 buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE;
3125 BUG_ON(mempool_initialized(&conf->r10buf_pool));
3126 conf->have_replacement = 0;
3127 for (i = 0; i < conf->geo.raid_disks; i++)
3128 if (conf->mirrors[i].replacement)
3129 conf->have_replacement = 1;
3130 ret = mempool_init(&conf->r10buf_pool, buffs,
3131 r10buf_pool_alloc, r10buf_pool_free, conf);
3132 if (ret)
3133 return ret;
3134 conf->next_resync = 0;
3135 return 0;
3136 }
3137
3138 static struct r10bio *raid10_alloc_init_r10buf(struct r10conf *conf)
3139 {
3140 struct r10bio *r10bio = mempool_alloc(&conf->r10buf_pool, GFP_NOIO);
3141 struct rsync_pages *rp;
3142 struct bio *bio;
3143 int nalloc;
3144 int i;
3145
3146 if (test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery) ||
3147 test_bit(MD_RECOVERY_RESHAPE, &conf->mddev->recovery))
3148 nalloc = conf->copies;
3149 else
3150 nalloc = 2;
3151
3152 for (i = 0; i < nalloc; i++) {
3153 bio = r10bio->devs[i].bio;
3154 rp = bio->bi_private;
3155 bio_reset(bio, NULL, 0);
3156 bio->bi_private = rp;
3157 bio = r10bio->devs[i].repl_bio;
3158 if (bio) {
3159 rp = bio->bi_private;
3160 bio_reset(bio, NULL, 0);
3161 bio->bi_private = rp;
3162 }
3163 }
3164 return r10bio;
3165 }
3166
3167
3168
3169
3170
3171 static void raid10_set_cluster_sync_high(struct r10conf *conf)
3172 {
3173 sector_t window_size;
3174 int extra_chunk, chunks;
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188 chunks = conf->geo.raid_disks / conf->geo.near_copies;
3189 if (conf->geo.raid_disks % conf->geo.near_copies == 0)
3190 extra_chunk = 0;
3191 else
3192 extra_chunk = 1;
3193 window_size = (chunks + extra_chunk) * conf->mddev->chunk_sectors;
3194
3195
3196
3197
3198 window_size = (CLUSTER_RESYNC_WINDOW_SECTORS > window_size) ?
3199 CLUSTER_RESYNC_WINDOW_SECTORS : window_size;
3200
3201 conf->cluster_sync_high = conf->cluster_sync_low + window_size;
3202 }
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236 static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
3237 int *skipped)
3238 {
3239 struct r10conf *conf = mddev->private;
3240 struct r10bio *r10_bio;
3241 struct bio *biolist = NULL, *bio;
3242 sector_t max_sector, nr_sectors;
3243 int i;
3244 int max_sync;
3245 sector_t sync_blocks;
3246 sector_t sectors_skipped = 0;
3247 int chunks_skipped = 0;
3248 sector_t chunk_mask = conf->geo.chunk_mask;
3249 int page_idx = 0;
3250
3251 if (!mempool_initialized(&conf->r10buf_pool))
3252 if (init_resync(conf))
3253 return 0;
3254
3255
3256
3257
3258
3259 if (mddev->bitmap == NULL &&
3260 mddev->recovery_cp == MaxSector &&
3261 mddev->reshape_position == MaxSector &&
3262 !test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
3263 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
3264 !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
3265 conf->fullsync == 0) {
3266 *skipped = 1;
3267 return mddev->dev_sectors - sector_nr;
3268 }
3269
3270 skipped:
3271 max_sector = mddev->dev_sectors;
3272 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
3273 test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
3274 max_sector = mddev->resync_max_sectors;
3275 if (sector_nr >= max_sector) {
3276 conf->cluster_sync_low = 0;
3277 conf->cluster_sync_high = 0;
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
3289 end_reshape(conf);
3290 close_sync(conf);
3291 return 0;
3292 }
3293
3294 if (mddev->curr_resync < max_sector) {
3295 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
3296 md_bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
3297 &sync_blocks, 1);
3298 else for (i = 0; i < conf->geo.raid_disks; i++) {
3299 sector_t sect =
3300 raid10_find_virt(conf, mddev->curr_resync, i);
3301 md_bitmap_end_sync(mddev->bitmap, sect,
3302 &sync_blocks, 1);
3303 }
3304 } else {
3305
3306 if ((!mddev->bitmap || conf->fullsync)
3307 && conf->have_replacement
3308 && test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
3309
3310
3311
3312 rcu_read_lock();
3313 for (i = 0; i < conf->geo.raid_disks; i++) {
3314 struct md_rdev *rdev =
3315 rcu_dereference(conf->mirrors[i].replacement);
3316 if (rdev)
3317 rdev->recovery_offset = MaxSector;
3318 }
3319 rcu_read_unlock();
3320 }
3321 conf->fullsync = 0;
3322 }
3323 md_bitmap_close_sync(mddev->bitmap);
3324 close_sync(conf);
3325 *skipped = 1;
3326 return sectors_skipped;
3327 }
3328
3329 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
3330 return reshape_request(mddev, sector_nr, skipped);
3331
3332 if (chunks_skipped >= conf->geo.raid_disks) {
3333
3334
3335
3336 *skipped = 1;
3337 return (max_sector - sector_nr) + sectors_skipped;
3338 }
3339
3340 if (max_sector > mddev->resync_max)
3341 max_sector = mddev->resync_max;
3342
3343
3344
3345
3346 if (conf->geo.near_copies < conf->geo.raid_disks &&
3347 max_sector > (sector_nr | chunk_mask))
3348 max_sector = (sector_nr | chunk_mask) + 1;
3349
3350
3351
3352
3353
3354 if (conf->nr_waiting)
3355 schedule_timeout_uninterruptible(1);
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372 max_sync = RESYNC_PAGES << (PAGE_SHIFT-9);
3373 if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
3374
3375 int j;
3376 r10_bio = NULL;
3377
3378 for (i = 0 ; i < conf->geo.raid_disks; i++) {
3379 int still_degraded;
3380 struct r10bio *rb2;
3381 sector_t sect;
3382 int must_sync;
3383 int any_working;
3384 int need_recover = 0;
3385 int need_replace = 0;
3386 struct raid10_info *mirror = &conf->mirrors[i];
3387 struct md_rdev *mrdev, *mreplace;
3388
3389 rcu_read_lock();
3390 mrdev = rcu_dereference(mirror->rdev);
3391 mreplace = rcu_dereference(mirror->replacement);
3392
3393 if (mrdev != NULL &&
3394 !test_bit(Faulty, &mrdev->flags) &&
3395 !test_bit(In_sync, &mrdev->flags))
3396 need_recover = 1;
3397 if (mreplace != NULL &&
3398 !test_bit(Faulty, &mreplace->flags))
3399 need_replace = 1;
3400
3401 if (!need_recover && !need_replace) {
3402 rcu_read_unlock();
3403 continue;
3404 }
3405
3406 still_degraded = 0;
3407
3408 rb2 = r10_bio;
3409 sect = raid10_find_virt(conf, sector_nr, i);
3410 if (sect >= mddev->resync_max_sectors) {
3411
3412
3413
3414 rcu_read_unlock();
3415 continue;
3416 }
3417 if (mreplace && test_bit(Faulty, &mreplace->flags))
3418 mreplace = NULL;
3419
3420
3421
3422
3423 must_sync = md_bitmap_start_sync(mddev->bitmap, sect,
3424 &sync_blocks, 1);
3425 if (sync_blocks < max_sync)
3426 max_sync = sync_blocks;
3427 if (!must_sync &&
3428 mreplace == NULL &&
3429 !conf->fullsync) {
3430
3431
3432
3433 chunks_skipped = -1;
3434 rcu_read_unlock();
3435 continue;
3436 }
3437 atomic_inc(&mrdev->nr_pending);
3438 if (mreplace)
3439 atomic_inc(&mreplace->nr_pending);
3440 rcu_read_unlock();
3441
3442 r10_bio = raid10_alloc_init_r10buf(conf);
3443 r10_bio->state = 0;
3444 raise_barrier(conf, rb2 != NULL);
3445 atomic_set(&r10_bio->remaining, 0);
3446
3447 r10_bio->master_bio = (struct bio*)rb2;
3448 if (rb2)
3449 atomic_inc(&rb2->remaining);
3450 r10_bio->mddev = mddev;
3451 set_bit(R10BIO_IsRecover, &r10_bio->state);
3452 r10_bio->sector = sect;
3453
3454 raid10_find_phys(conf, r10_bio);
3455
3456
3457
3458
3459 rcu_read_lock();
3460 for (j = 0; j < conf->geo.raid_disks; j++) {
3461 struct md_rdev *rdev = rcu_dereference(
3462 conf->mirrors[j].rdev);
3463 if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
3464 still_degraded = 1;
3465 break;
3466 }
3467 }
3468
3469 must_sync = md_bitmap_start_sync(mddev->bitmap, sect,
3470 &sync_blocks, still_degraded);
3471
3472 any_working = 0;
3473 for (j=0; j<conf->copies;j++) {
3474 int k;
3475 int d = r10_bio->devs[j].devnum;
3476 sector_t from_addr, to_addr;
3477 struct md_rdev *rdev =
3478 rcu_dereference(conf->mirrors[d].rdev);
3479 sector_t sector, first_bad;
3480 int bad_sectors;
3481 if (!rdev ||
3482 !test_bit(In_sync, &rdev->flags))
3483 continue;
3484
3485 any_working = 1;
3486 sector = r10_bio->devs[j].addr;
3487
3488 if (is_badblock(rdev, sector, max_sync,
3489 &first_bad, &bad_sectors)) {
3490 if (first_bad > sector)
3491 max_sync = first_bad - sector;
3492 else {
3493 bad_sectors -= (sector
3494 - first_bad);
3495 if (max_sync > bad_sectors)
3496 max_sync = bad_sectors;
3497 continue;
3498 }
3499 }
3500 bio = r10_bio->devs[0].bio;
3501 bio->bi_next = biolist;
3502 biolist = bio;
3503 bio->bi_end_io = end_sync_read;
3504 bio_set_op_attrs(bio, REQ_OP_READ, 0);
3505 if (test_bit(FailFast, &rdev->flags))
3506 bio->bi_opf |= MD_FAILFAST;
3507 from_addr = r10_bio->devs[j].addr;
3508 bio->bi_iter.bi_sector = from_addr +
3509 rdev->data_offset;
3510 bio_set_dev(bio, rdev->bdev);
3511 atomic_inc(&rdev->nr_pending);
3512
3513
3514 for (k=0; k<conf->copies; k++)
3515 if (r10_bio->devs[k].devnum == i)
3516 break;
3517 BUG_ON(k == conf->copies);
3518 to_addr = r10_bio->devs[k].addr;
3519 r10_bio->devs[0].devnum = d;
3520 r10_bio->devs[0].addr = from_addr;
3521 r10_bio->devs[1].devnum = i;
3522 r10_bio->devs[1].addr = to_addr;
3523
3524 if (need_recover) {
3525 bio = r10_bio->devs[1].bio;
3526 bio->bi_next = biolist;
3527 biolist = bio;
3528 bio->bi_end_io = end_sync_write;
3529 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
3530 bio->bi_iter.bi_sector = to_addr
3531 + mrdev->data_offset;
3532 bio_set_dev(bio, mrdev->bdev);
3533 atomic_inc(&r10_bio->remaining);
3534 } else
3535 r10_bio->devs[1].bio->bi_end_io = NULL;
3536
3537
3538 bio = r10_bio->devs[1].repl_bio;
3539 if (bio)
3540 bio->bi_end_io = NULL;
3541
3542
3543
3544
3545 if (!need_replace)
3546 break;
3547 bio->bi_next = biolist;
3548 biolist = bio;
3549 bio->bi_end_io = end_sync_write;
3550 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
3551 bio->bi_iter.bi_sector = to_addr +
3552 mreplace->data_offset;
3553 bio_set_dev(bio, mreplace->bdev);
3554 atomic_inc(&r10_bio->remaining);
3555 break;
3556 }
3557 rcu_read_unlock();
3558 if (j == conf->copies) {
3559
3560
3561 if (any_working) {
3562
3563
3564
3565 int k;
3566 for (k = 0; k < conf->copies; k++)
3567 if (r10_bio->devs[k].devnum == i)
3568 break;
3569 if (!test_bit(In_sync,
3570 &mrdev->flags)
3571 && !rdev_set_badblocks(
3572 mrdev,
3573 r10_bio->devs[k].addr,
3574 max_sync, 0))
3575 any_working = 0;
3576 if (mreplace &&
3577 !rdev_set_badblocks(
3578 mreplace,
3579 r10_bio->devs[k].addr,
3580 max_sync, 0))
3581 any_working = 0;
3582 }
3583 if (!any_working) {
3584 if (!test_and_set_bit(MD_RECOVERY_INTR,
3585 &mddev->recovery))
3586 pr_warn("md/raid10:%s: insufficient working devices for recovery.\n",
3587 mdname(mddev));
3588 mirror->recovery_disabled
3589 = mddev->recovery_disabled;
3590 }
3591 put_buf(r10_bio);
3592 if (rb2)
3593 atomic_dec(&rb2->remaining);
3594 r10_bio = rb2;
3595 rdev_dec_pending(mrdev, mddev);
3596 if (mreplace)
3597 rdev_dec_pending(mreplace, mddev);
3598 break;
3599 }
3600 rdev_dec_pending(mrdev, mddev);
3601 if (mreplace)
3602 rdev_dec_pending(mreplace, mddev);
3603 if (r10_bio->devs[0].bio->bi_opf & MD_FAILFAST) {
3604
3605
3606
3607
3608 int targets = 1;
3609 for (; j < conf->copies; j++) {
3610 int d = r10_bio->devs[j].devnum;
3611 if (conf->mirrors[d].rdev &&
3612 test_bit(In_sync,
3613 &conf->mirrors[d].rdev->flags))
3614 targets++;
3615 }
3616 if (targets == 1)
3617 r10_bio->devs[0].bio->bi_opf
3618 &= ~MD_FAILFAST;
3619 }
3620 }
3621 if (biolist == NULL) {
3622 while (r10_bio) {
3623 struct r10bio *rb2 = r10_bio;
3624 r10_bio = (struct r10bio*) rb2->master_bio;
3625 rb2->master_bio = NULL;
3626 put_buf(rb2);
3627 }
3628 goto giveup;
3629 }
3630 } else {
3631
3632 int count = 0;
3633
3634
3635
3636
3637
3638
3639
3640
3641 md_bitmap_cond_end_sync(mddev->bitmap, sector_nr,
3642 mddev_is_clustered(mddev) &&
3643 (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high));
3644
3645 if (!md_bitmap_start_sync(mddev->bitmap, sector_nr,
3646 &sync_blocks, mddev->degraded) &&
3647 !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED,
3648 &mddev->recovery)) {
3649
3650 *skipped = 1;
3651 return sync_blocks + sectors_skipped;
3652 }
3653 if (sync_blocks < max_sync)
3654 max_sync = sync_blocks;
3655 r10_bio = raid10_alloc_init_r10buf(conf);
3656 r10_bio->state = 0;
3657
3658 r10_bio->mddev = mddev;
3659 atomic_set(&r10_bio->remaining, 0);
3660 raise_barrier(conf, 0);
3661 conf->next_resync = sector_nr;
3662
3663 r10_bio->master_bio = NULL;
3664 r10_bio->sector = sector_nr;
3665 set_bit(R10BIO_IsSync, &r10_bio->state);
3666 raid10_find_phys(conf, r10_bio);
3667 r10_bio->sectors = (sector_nr | chunk_mask) - sector_nr + 1;
3668
3669 for (i = 0; i < conf->copies; i++) {
3670 int d = r10_bio->devs[i].devnum;
3671 sector_t first_bad, sector;
3672 int bad_sectors;
3673 struct md_rdev *rdev;
3674
3675 if (r10_bio->devs[i].repl_bio)
3676 r10_bio->devs[i].repl_bio->bi_end_io = NULL;
3677
3678 bio = r10_bio->devs[i].bio;
3679 bio->bi_status = BLK_STS_IOERR;
3680 rcu_read_lock();
3681 rdev = rcu_dereference(conf->mirrors[d].rdev);
3682 if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
3683 rcu_read_unlock();
3684 continue;
3685 }
3686 sector = r10_bio->devs[i].addr;
3687 if (is_badblock(rdev, sector, max_sync,
3688 &first_bad, &bad_sectors)) {
3689 if (first_bad > sector)
3690 max_sync = first_bad - sector;
3691 else {
3692 bad_sectors -= (sector - first_bad);
3693 if (max_sync > bad_sectors)
3694 max_sync = bad_sectors;
3695 rcu_read_unlock();
3696 continue;
3697 }
3698 }
3699 atomic_inc(&rdev->nr_pending);
3700 atomic_inc(&r10_bio->remaining);
3701 bio->bi_next = biolist;
3702 biolist = bio;
3703 bio->bi_end_io = end_sync_read;
3704 bio_set_op_attrs(bio, REQ_OP_READ, 0);
3705 if (test_bit(FailFast, &rdev->flags))
3706 bio->bi_opf |= MD_FAILFAST;
3707 bio->bi_iter.bi_sector = sector + rdev->data_offset;
3708 bio_set_dev(bio, rdev->bdev);
3709 count++;
3710
3711 rdev = rcu_dereference(conf->mirrors[d].replacement);
3712 if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
3713 rcu_read_unlock();
3714 continue;
3715 }
3716 atomic_inc(&rdev->nr_pending);
3717
3718
3719 bio = r10_bio->devs[i].repl_bio;
3720 bio->bi_status = BLK_STS_IOERR;
3721
3722 sector = r10_bio->devs[i].addr;
3723 bio->bi_next = biolist;
3724 biolist = bio;
3725 bio->bi_end_io = end_sync_write;
3726 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
3727 if (test_bit(FailFast, &rdev->flags))
3728 bio->bi_opf |= MD_FAILFAST;
3729 bio->bi_iter.bi_sector = sector + rdev->data_offset;
3730 bio_set_dev(bio, rdev->bdev);
3731 count++;
3732 rcu_read_unlock();
3733 }
3734
3735 if (count < 2) {
3736 for (i=0; i<conf->copies; i++) {
3737 int d = r10_bio->devs[i].devnum;
3738 if (r10_bio->devs[i].bio->bi_end_io)
3739 rdev_dec_pending(conf->mirrors[d].rdev,
3740 mddev);
3741 if (r10_bio->devs[i].repl_bio &&
3742 r10_bio->devs[i].repl_bio->bi_end_io)
3743 rdev_dec_pending(
3744 conf->mirrors[d].replacement,
3745 mddev);
3746 }
3747 put_buf(r10_bio);
3748 biolist = NULL;
3749 goto giveup;
3750 }
3751 }
3752
3753 nr_sectors = 0;
3754 if (sector_nr + max_sync < max_sector)
3755 max_sector = sector_nr + max_sync;
3756 do {
3757 struct page *page;
3758 int len = PAGE_SIZE;
3759 if (sector_nr + (len>>9) > max_sector)
3760 len = (max_sector - sector_nr) << 9;
3761 if (len == 0)
3762 break;
3763 for (bio= biolist ; bio ; bio=bio->bi_next) {
3764 struct resync_pages *rp = get_resync_pages(bio);
3765 page = resync_fetch_page(rp, page_idx);
3766
3767
3768
3769
3770 bio_add_page(bio, page, len, 0);
3771 }
3772 nr_sectors += len>>9;
3773 sector_nr += len>>9;
3774 } while (++page_idx < RESYNC_PAGES);
3775 r10_bio->sectors = nr_sectors;
3776
3777 if (mddev_is_clustered(mddev) &&
3778 test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
3779
3780 if (conf->cluster_sync_high < sector_nr + nr_sectors) {
3781 conf->cluster_sync_low = mddev->curr_resync_completed;
3782 raid10_set_cluster_sync_high(conf);
3783
3784 md_cluster_ops->resync_info_update(mddev,
3785 conf->cluster_sync_low,
3786 conf->cluster_sync_high);
3787 }
3788 } else if (mddev_is_clustered(mddev)) {
3789
3790 sector_t sect_va1, sect_va2;
3791 bool broadcast_msg = false;
3792
3793 for (i = 0; i < conf->geo.raid_disks; i++) {
3794
3795
3796
3797
3798
3799 sect_va1 = raid10_find_virt(conf, sector_nr, i);
3800
3801 if (conf->cluster_sync_high < sect_va1 + nr_sectors) {
3802 broadcast_msg = true;
3803
3804
3805
3806
3807 sect_va2 = raid10_find_virt(conf,
3808 mddev->curr_resync_completed, i);
3809
3810 if (conf->cluster_sync_low == 0 ||
3811 conf->cluster_sync_low > sect_va2)
3812 conf->cluster_sync_low = sect_va2;
3813 }
3814 }
3815 if (broadcast_msg) {
3816 raid10_set_cluster_sync_high(conf);
3817 md_cluster_ops->resync_info_update(mddev,
3818 conf->cluster_sync_low,
3819 conf->cluster_sync_high);
3820 }
3821 }
3822
3823 while (biolist) {
3824 bio = biolist;
3825 biolist = biolist->bi_next;
3826
3827 bio->bi_next = NULL;
3828 r10_bio = get_resync_r10bio(bio);
3829 r10_bio->sectors = nr_sectors;
3830
3831 if (bio->bi_end_io == end_sync_read) {
3832 md_sync_acct_bio(bio, nr_sectors);
3833 bio->bi_status = 0;
3834 submit_bio_noacct(bio);
3835 }
3836 }
3837
3838 if (sectors_skipped)
3839
3840
3841
3842 md_done_sync(mddev, sectors_skipped, 1);
3843
3844 return sectors_skipped + nr_sectors;
3845 giveup:
3846
3847
3848
3849
3850 if (sector_nr + max_sync < max_sector)
3851 max_sector = sector_nr + max_sync;
3852
3853 sectors_skipped += (max_sector - sector_nr);
3854 chunks_skipped ++;
3855 sector_nr = max_sector;
3856 goto skipped;
3857 }
3858
3859 static sector_t
3860 raid10_size(struct mddev *mddev, sector_t sectors, int raid_disks)
3861 {
3862 sector_t size;
3863 struct r10conf *conf = mddev->private;
3864
3865 if (!raid_disks)
3866 raid_disks = min(conf->geo.raid_disks,
3867 conf->prev.raid_disks);
3868 if (!sectors)
3869 sectors = conf->dev_sectors;
3870
3871 size = sectors >> conf->geo.chunk_shift;
3872 sector_div(size, conf->geo.far_copies);
3873 size = size * raid_disks;
3874 sector_div(size, conf->geo.near_copies);
3875
3876 return size << conf->geo.chunk_shift;
3877 }
3878
3879 static void calc_sectors(struct r10conf *conf, sector_t size)
3880 {
3881
3882
3883
3884
3885
3886 size = size >> conf->geo.chunk_shift;
3887 sector_div(size, conf->geo.far_copies);
3888 size = size * conf->geo.raid_disks;
3889 sector_div(size, conf->geo.near_copies);
3890
3891
3892 size = size * conf->copies;
3893
3894
3895
3896
3897 size = DIV_ROUND_UP_SECTOR_T(size, conf->geo.raid_disks);
3898
3899 conf->dev_sectors = size << conf->geo.chunk_shift;
3900
3901 if (conf->geo.far_offset)
3902 conf->geo.stride = 1 << conf->geo.chunk_shift;
3903 else {
3904 sector_div(size, conf->geo.far_copies);
3905 conf->geo.stride = size << conf->geo.chunk_shift;
3906 }
3907 }
3908
3909 enum geo_type {geo_new, geo_old, geo_start};
3910 static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new)
3911 {
3912 int nc, fc, fo;
3913 int layout, chunk, disks;
3914 switch (new) {
3915 case geo_old:
3916 layout = mddev->layout;
3917 chunk = mddev->chunk_sectors;
3918 disks = mddev->raid_disks - mddev->delta_disks;
3919 break;
3920 case geo_new:
3921 layout = mddev->new_layout;
3922 chunk = mddev->new_chunk_sectors;
3923 disks = mddev->raid_disks;
3924 break;
3925 default:
3926 case geo_start:
3927
3928 layout = mddev->new_layout;
3929 chunk = mddev->new_chunk_sectors;
3930 disks = mddev->raid_disks + mddev->delta_disks;
3931 break;
3932 }
3933 if (layout >> 19)
3934 return -1;
3935 if (chunk < (PAGE_SIZE >> 9) ||
3936 !is_power_of_2(chunk))
3937 return -2;
3938 nc = layout & 255;
3939 fc = (layout >> 8) & 255;
3940 fo = layout & (1<<16);
3941 geo->raid_disks = disks;
3942 geo->near_copies = nc;
3943 geo->far_copies = fc;
3944 geo->far_offset = fo;
3945 switch (layout >> 17) {
3946 case 0:
3947 geo->far_set_size = disks;
3948 break;
3949 case 1:
3950
3951 geo->far_set_size = disks/fc;
3952 WARN(geo->far_set_size < fc,
3953 "This RAID10 layout does not provide data safety - please backup and create new array\n");
3954 break;
3955 case 2:
3956 geo->far_set_size = fc * nc;
3957 break;
3958 default:
3959 return -1;
3960 }
3961 geo->chunk_mask = chunk - 1;
3962 geo->chunk_shift = ffz(~chunk);
3963 return nc*fc;
3964 }
3965
3966 static struct r10conf *setup_conf(struct mddev *mddev)
3967 {
3968 struct r10conf *conf = NULL;
3969 int err = -EINVAL;
3970 struct geom geo;
3971 int copies;
3972
3973 copies = setup_geo(&geo, mddev, geo_new);
3974
3975 if (copies == -2) {
3976 pr_warn("md/raid10:%s: chunk size must be at least PAGE_SIZE(%ld) and be a power of 2.\n",
3977 mdname(mddev), PAGE_SIZE);
3978 goto out;
3979 }
3980
3981 if (copies < 2 || copies > mddev->raid_disks) {
3982 pr_warn("md/raid10:%s: unsupported raid10 layout: 0x%8x\n",
3983 mdname(mddev), mddev->new_layout);
3984 goto out;
3985 }
3986
3987 err = -ENOMEM;
3988 conf = kzalloc(sizeof(struct r10conf), GFP_KERNEL);
3989 if (!conf)
3990 goto out;
3991
3992
3993 conf->mirrors = kcalloc(mddev->raid_disks + max(0, -mddev->delta_disks),
3994 sizeof(struct raid10_info),
3995 GFP_KERNEL);
3996 if (!conf->mirrors)
3997 goto out;
3998
3999 conf->tmppage = alloc_page(GFP_KERNEL);
4000 if (!conf->tmppage)
4001 goto out;
4002
4003 conf->geo = geo;
4004 conf->copies = copies;
4005 err = mempool_init(&conf->r10bio_pool, NR_RAID_BIOS, r10bio_pool_alloc,
4006 rbio_pool_free, conf);
4007 if (err)
4008 goto out;
4009
4010 err = bioset_init(&conf->bio_split, BIO_POOL_SIZE, 0, 0);
4011 if (err)
4012 goto out;
4013
4014 calc_sectors(conf, mddev->dev_sectors);
4015 if (mddev->reshape_position == MaxSector) {
4016 conf->prev = conf->geo;
4017 conf->reshape_progress = MaxSector;
4018 } else {
4019 if (setup_geo(&conf->prev, mddev, geo_old) != conf->copies) {
4020 err = -EINVAL;
4021 goto out;
4022 }
4023 conf->reshape_progress = mddev->reshape_position;
4024 if (conf->prev.far_offset)
4025 conf->prev.stride = 1 << conf->prev.chunk_shift;
4026 else
4027
4028 conf->prev.stride = conf->dev_sectors;
4029 }
4030 conf->reshape_safe = conf->reshape_progress;
4031 spin_lock_init(&conf->device_lock);
4032 INIT_LIST_HEAD(&conf->retry_list);
4033 INIT_LIST_HEAD(&conf->bio_end_io_list);
4034
4035 spin_lock_init(&conf->resync_lock);
4036 init_waitqueue_head(&conf->wait_barrier);
4037 atomic_set(&conf->nr_pending, 0);
4038
4039 err = -ENOMEM;
4040 conf->thread = md_register_thread(raid10d, mddev, "raid10");
4041 if (!conf->thread)
4042 goto out;
4043
4044 conf->mddev = mddev;
4045 return conf;
4046
4047 out:
4048 if (conf) {
4049 mempool_exit(&conf->r10bio_pool);
4050 kfree(conf->mirrors);
4051 safe_put_page(conf->tmppage);
4052 bioset_exit(&conf->bio_split);
4053 kfree(conf);
4054 }
4055 return ERR_PTR(err);
4056 }
4057
4058 static void raid10_set_io_opt(struct r10conf *conf)
4059 {
4060 int raid_disks = conf->geo.raid_disks;
4061
4062 if (!(conf->geo.raid_disks % conf->geo.near_copies))
4063 raid_disks /= conf->geo.near_copies;
4064 blk_queue_io_opt(conf->mddev->queue, (conf->mddev->chunk_sectors << 9) *
4065 raid_disks);
4066 }
4067
4068 static int raid10_run(struct mddev *mddev)
4069 {
4070 struct r10conf *conf;
4071 int i, disk_idx;
4072 struct raid10_info *disk;
4073 struct md_rdev *rdev;
4074 sector_t size;
4075 sector_t min_offset_diff = 0;
4076 int first = 1;
4077
4078 if (mddev_init_writes_pending(mddev) < 0)
4079 return -ENOMEM;
4080
4081 if (mddev->private == NULL) {
4082 conf = setup_conf(mddev);
4083 if (IS_ERR(conf))
4084 return PTR_ERR(conf);
4085 mddev->private = conf;
4086 }
4087 conf = mddev->private;
4088 if (!conf)
4089 goto out;
4090
4091 if (mddev_is_clustered(conf->mddev)) {
4092 int fc, fo;
4093
4094 fc = (mddev->layout >> 8) & 255;
4095 fo = mddev->layout & (1<<16);
4096 if (fc > 1 || fo > 0) {
4097 pr_err("only near layout is supported by clustered"
4098 " raid10\n");
4099 goto out_free_conf;
4100 }
4101 }
4102
4103 mddev->thread = conf->thread;
4104 conf->thread = NULL;
4105
4106 if (mddev->queue) {
4107 blk_queue_max_discard_sectors(mddev->queue,
4108 UINT_MAX);
4109 blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
4110 blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
4111 raid10_set_io_opt(conf);
4112 }
4113
4114 rdev_for_each(rdev, mddev) {
4115 long long diff;
4116
4117 disk_idx = rdev->raid_disk;
4118 if (disk_idx < 0)
4119 continue;
4120 if (disk_idx >= conf->geo.raid_disks &&
4121 disk_idx >= conf->prev.raid_disks)
4122 continue;
4123 disk = conf->mirrors + disk_idx;
4124
4125 if (test_bit(Replacement, &rdev->flags)) {
4126 if (disk->replacement)
4127 goto out_free_conf;
4128 disk->replacement = rdev;
4129 } else {
4130 if (disk->rdev)
4131 goto out_free_conf;
4132 disk->rdev = rdev;
4133 }
4134 diff = (rdev->new_data_offset - rdev->data_offset);
4135 if (!mddev->reshape_backwards)
4136 diff = -diff;
4137 if (diff < 0)
4138 diff = 0;
4139 if (first || diff < min_offset_diff)
4140 min_offset_diff = diff;
4141
4142 if (mddev->gendisk)
4143 disk_stack_limits(mddev->gendisk, rdev->bdev,
4144 rdev->data_offset << 9);
4145
4146 disk->head_position = 0;
4147 first = 0;
4148 }
4149
4150
4151 if (!enough(conf, -1)) {
4152 pr_err("md/raid10:%s: not enough operational mirrors.\n",
4153 mdname(mddev));
4154 goto out_free_conf;
4155 }
4156
4157 if (conf->reshape_progress != MaxSector) {
4158
4159 if (conf->geo.far_copies != 1 &&
4160 conf->geo.far_offset == 0)
4161 goto out_free_conf;
4162 if (conf->prev.far_copies != 1 &&
4163 conf->prev.far_offset == 0)
4164 goto out_free_conf;
4165 }
4166
4167 mddev->degraded = 0;
4168 for (i = 0;
4169 i < conf->geo.raid_disks
4170 || i < conf->prev.raid_disks;
4171 i++) {
4172
4173 disk = conf->mirrors + i;
4174
4175 if (!disk->rdev && disk->replacement) {
4176
4177 disk->rdev = disk->replacement;
4178 disk->replacement = NULL;
4179 clear_bit(Replacement, &disk->rdev->flags);
4180 }
4181
4182 if (!disk->rdev ||
4183 !test_bit(In_sync, &disk->rdev->flags)) {
4184 disk->head_position = 0;
4185 mddev->degraded++;
4186 if (disk->rdev &&
4187 disk->rdev->saved_raid_disk < 0)
4188 conf->fullsync = 1;
4189 }
4190
4191 if (disk->replacement &&
4192 !test_bit(In_sync, &disk->replacement->flags) &&
4193 disk->replacement->saved_raid_disk < 0) {
4194 conf->fullsync = 1;
4195 }
4196
4197 disk->recovery_disabled = mddev->recovery_disabled - 1;
4198 }
4199
4200 if (mddev->recovery_cp != MaxSector)
4201 pr_notice("md/raid10:%s: not clean -- starting background reconstruction\n",
4202 mdname(mddev));
4203 pr_info("md/raid10:%s: active with %d out of %d devices\n",
4204 mdname(mddev), conf->geo.raid_disks - mddev->degraded,
4205 conf->geo.raid_disks);
4206
4207
4208
4209 mddev->dev_sectors = conf->dev_sectors;
4210 size = raid10_size(mddev, 0, 0);
4211 md_set_array_sectors(mddev, size);
4212 mddev->resync_max_sectors = size;
4213 set_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
4214
4215 if (md_integrity_register(mddev))
4216 goto out_free_conf;
4217
4218 if (conf->reshape_progress != MaxSector) {
4219 unsigned long before_length, after_length;
4220
4221 before_length = ((1 << conf->prev.chunk_shift) *
4222 conf->prev.far_copies);
4223 after_length = ((1 << conf->geo.chunk_shift) *
4224 conf->geo.far_copies);
4225
4226 if (max(before_length, after_length) > min_offset_diff) {
4227
4228 pr_warn("md/raid10: offset difference not enough to continue reshape\n");
4229 goto out_free_conf;
4230 }
4231 conf->offset_diff = min_offset_diff;
4232
4233 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4234 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4235 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
4236 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
4237 mddev->sync_thread = md_register_thread(md_do_sync, mddev,
4238 "reshape");
4239 if (!mddev->sync_thread)
4240 goto out_free_conf;
4241 }
4242
4243 return 0;
4244
4245 out_free_conf:
4246 md_unregister_thread(&mddev->thread);
4247 mempool_exit(&conf->r10bio_pool);
4248 safe_put_page(conf->tmppage);
4249 kfree(conf->mirrors);
4250 kfree(conf);
4251 mddev->private = NULL;
4252 out:
4253 return -EIO;
4254 }
4255
4256 static void raid10_free(struct mddev *mddev, void *priv)
4257 {
4258 struct r10conf *conf = priv;
4259
4260 mempool_exit(&conf->r10bio_pool);
4261 safe_put_page(conf->tmppage);
4262 kfree(conf->mirrors);
4263 kfree(conf->mirrors_old);
4264 kfree(conf->mirrors_new);
4265 bioset_exit(&conf->bio_split);
4266 kfree(conf);
4267 }
4268
4269 static void raid10_quiesce(struct mddev *mddev, int quiesce)
4270 {
4271 struct r10conf *conf = mddev->private;
4272
4273 if (quiesce)
4274 raise_barrier(conf, 0);
4275 else
4276 lower_barrier(conf);
4277 }
4278
4279 static int raid10_resize(struct mddev *mddev, sector_t sectors)
4280 {
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293 struct r10conf *conf = mddev->private;
4294 sector_t oldsize, size;
4295
4296 if (mddev->reshape_position != MaxSector)
4297 return -EBUSY;
4298
4299 if (conf->geo.far_copies > 1 && !conf->geo.far_offset)
4300 return -EINVAL;
4301
4302 oldsize = raid10_size(mddev, 0, 0);
4303 size = raid10_size(mddev, sectors, 0);
4304 if (mddev->external_size &&
4305 mddev->array_sectors > size)
4306 return -EINVAL;
4307 if (mddev->bitmap) {
4308 int ret = md_bitmap_resize(mddev->bitmap, size, 0, 0);
4309 if (ret)
4310 return ret;
4311 }
4312 md_set_array_sectors(mddev, size);
4313 if (sectors > mddev->dev_sectors &&
4314 mddev->recovery_cp > oldsize) {
4315 mddev->recovery_cp = oldsize;
4316 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4317 }
4318 calc_sectors(conf, sectors);
4319 mddev->dev_sectors = conf->dev_sectors;
4320 mddev->resync_max_sectors = size;
4321 return 0;
4322 }
4323
4324 static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs)
4325 {
4326 struct md_rdev *rdev;
4327 struct r10conf *conf;
4328
4329 if (mddev->degraded > 0) {
4330 pr_warn("md/raid10:%s: Error: degraded raid0!\n",
4331 mdname(mddev));
4332 return ERR_PTR(-EINVAL);
4333 }
4334 sector_div(size, devs);
4335
4336
4337 mddev->new_level = 10;
4338
4339 mddev->new_layout = (1<<8) + 2;
4340 mddev->new_chunk_sectors = mddev->chunk_sectors;
4341 mddev->delta_disks = mddev->raid_disks;
4342 mddev->raid_disks *= 2;
4343
4344 mddev->recovery_cp = MaxSector;
4345 mddev->dev_sectors = size;
4346
4347 conf = setup_conf(mddev);
4348 if (!IS_ERR(conf)) {
4349 rdev_for_each(rdev, mddev)
4350 if (rdev->raid_disk >= 0) {
4351 rdev->new_raid_disk = rdev->raid_disk * 2;
4352 rdev->sectors = size;
4353 }
4354 conf->barrier = 1;
4355 }
4356
4357 return conf;
4358 }
4359
4360 static void *raid10_takeover(struct mddev *mddev)
4361 {
4362 struct r0conf *raid0_conf;
4363
4364
4365
4366
4367 if (mddev->level == 0) {
4368
4369 raid0_conf = mddev->private;
4370 if (raid0_conf->nr_strip_zones > 1) {
4371 pr_warn("md/raid10:%s: cannot takeover raid 0 with more than one zone.\n",
4372 mdname(mddev));
4373 return ERR_PTR(-EINVAL);
4374 }
4375 return raid10_takeover_raid0(mddev,
4376 raid0_conf->strip_zone->zone_end,
4377 raid0_conf->strip_zone->nb_dev);
4378 }
4379 return ERR_PTR(-EINVAL);
4380 }
4381
4382 static int raid10_check_reshape(struct mddev *mddev)
4383 {
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398 struct r10conf *conf = mddev->private;
4399 struct geom geo;
4400
4401 if (conf->geo.far_copies != 1 && !conf->geo.far_offset)
4402 return -EINVAL;
4403
4404 if (setup_geo(&geo, mddev, geo_start) != conf->copies)
4405
4406 return -EINVAL;
4407 if (geo.far_copies > 1 && !geo.far_offset)
4408
4409 return -EINVAL;
4410
4411 if (mddev->array_sectors & geo.chunk_mask)
4412
4413 return -EINVAL;
4414
4415 if (!enough(conf, -1))
4416 return -EINVAL;
4417
4418 kfree(conf->mirrors_new);
4419 conf->mirrors_new = NULL;
4420 if (mddev->delta_disks > 0) {
4421
4422 conf->mirrors_new =
4423 kcalloc(mddev->raid_disks + mddev->delta_disks,
4424 sizeof(struct raid10_info),
4425 GFP_KERNEL);
4426 if (!conf->mirrors_new)
4427 return -ENOMEM;
4428 }
4429 return 0;
4430 }
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445 static int calc_degraded(struct r10conf *conf)
4446 {
4447 int degraded, degraded2;
4448 int i;
4449
4450 rcu_read_lock();
4451 degraded = 0;
4452
4453 for (i = 0; i < conf->prev.raid_disks; i++) {
4454 struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
4455 if (!rdev || test_bit(Faulty, &rdev->flags))
4456 degraded++;
4457 else if (!test_bit(In_sync, &rdev->flags))
4458
4459
4460
4461
4462 degraded++;
4463 }
4464 rcu_read_unlock();
4465 if (conf->geo.raid_disks == conf->prev.raid_disks)
4466 return degraded;
4467 rcu_read_lock();
4468 degraded2 = 0;
4469 for (i = 0; i < conf->geo.raid_disks; i++) {
4470 struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
4471 if (!rdev || test_bit(Faulty, &rdev->flags))
4472 degraded2++;
4473 else if (!test_bit(In_sync, &rdev->flags)) {
4474
4475
4476
4477
4478
4479 if (conf->geo.raid_disks <= conf->prev.raid_disks)
4480 degraded2++;
4481 }
4482 }
4483 rcu_read_unlock();
4484 if (degraded2 > degraded)
4485 return degraded2;
4486 return degraded;
4487 }
4488
4489 static int raid10_start_reshape(struct mddev *mddev)
4490 {
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501 unsigned long before_length, after_length;
4502 sector_t min_offset_diff = 0;
4503 int first = 1;
4504 struct geom new;
4505 struct r10conf *conf = mddev->private;
4506 struct md_rdev *rdev;
4507 int spares = 0;
4508 int ret;
4509
4510 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4511 return -EBUSY;
4512
4513 if (setup_geo(&new, mddev, geo_start) != conf->copies)
4514 return -EINVAL;
4515
4516 before_length = ((1 << conf->prev.chunk_shift) *
4517 conf->prev.far_copies);
4518 after_length = ((1 << conf->geo.chunk_shift) *
4519 conf->geo.far_copies);
4520
4521 rdev_for_each(rdev, mddev) {
4522 if (!test_bit(In_sync, &rdev->flags)
4523 && !test_bit(Faulty, &rdev->flags))
4524 spares++;
4525 if (rdev->raid_disk >= 0) {
4526 long long diff = (rdev->new_data_offset
4527 - rdev->data_offset);
4528 if (!mddev->reshape_backwards)
4529 diff = -diff;
4530 if (diff < 0)
4531 diff = 0;
4532 if (first || diff < min_offset_diff)
4533 min_offset_diff = diff;
4534 first = 0;
4535 }
4536 }
4537
4538 if (max(before_length, after_length) > min_offset_diff)
4539 return -EINVAL;
4540
4541 if (spares < mddev->delta_disks)
4542 return -EINVAL;
4543
4544 conf->offset_diff = min_offset_diff;
4545 spin_lock_irq(&conf->device_lock);
4546 if (conf->mirrors_new) {
4547 memcpy(conf->mirrors_new, conf->mirrors,
4548 sizeof(struct raid10_info)*conf->prev.raid_disks);
4549 smp_mb();
4550 kfree(conf->mirrors_old);
4551 conf->mirrors_old = conf->mirrors;
4552 conf->mirrors = conf->mirrors_new;
4553 conf->mirrors_new = NULL;
4554 }
4555 setup_geo(&conf->geo, mddev, geo_start);
4556 smp_mb();
4557 if (mddev->reshape_backwards) {
4558 sector_t size = raid10_size(mddev, 0, 0);
4559 if (size < mddev->array_sectors) {
4560 spin_unlock_irq(&conf->device_lock);
4561 pr_warn("md/raid10:%s: array size must be reduce before number of disks\n",
4562 mdname(mddev));
4563 return -EINVAL;
4564 }
4565 mddev->resync_max_sectors = size;
4566 conf->reshape_progress = size;
4567 } else
4568 conf->reshape_progress = 0;
4569 conf->reshape_safe = conf->reshape_progress;
4570 spin_unlock_irq(&conf->device_lock);
4571
4572 if (mddev->delta_disks && mddev->bitmap) {
4573 struct mdp_superblock_1 *sb = NULL;
4574 sector_t oldsize, newsize;
4575
4576 oldsize = raid10_size(mddev, 0, 0);
4577 newsize = raid10_size(mddev, 0, conf->geo.raid_disks);
4578
4579 if (!mddev_is_clustered(mddev)) {
4580 ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0);
4581 if (ret)
4582 goto abort;
4583 else
4584 goto out;
4585 }
4586
4587 rdev_for_each(rdev, mddev) {
4588 if (rdev->raid_disk > -1 &&
4589 !test_bit(Faulty, &rdev->flags))
4590 sb = page_address(rdev->sb_page);
4591 }
4592
4593
4594
4595
4596
4597
4598 if ((sb && (le32_to_cpu(sb->feature_map) &
4599 MD_FEATURE_RESHAPE_ACTIVE)) || (oldsize == newsize))
4600 goto out;
4601
4602 ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0);
4603 if (ret)
4604 goto abort;
4605
4606 ret = md_cluster_ops->resize_bitmaps(mddev, newsize, oldsize);
4607 if (ret) {
4608 md_bitmap_resize(mddev->bitmap, oldsize, 0, 0);
4609 goto abort;
4610 }
4611 }
4612 out:
4613 if (mddev->delta_disks > 0) {
4614 rdev_for_each(rdev, mddev)
4615 if (rdev->raid_disk < 0 &&
4616 !test_bit(Faulty, &rdev->flags)) {
4617 if (raid10_add_disk(mddev, rdev) == 0) {
4618 if (rdev->raid_disk >=
4619 conf->prev.raid_disks)
4620 set_bit(In_sync, &rdev->flags);
4621 else
4622 rdev->recovery_offset = 0;
4623
4624
4625 sysfs_link_rdev(mddev, rdev);
4626 }
4627 } else if (rdev->raid_disk >= conf->prev.raid_disks
4628 && !test_bit(Faulty, &rdev->flags)) {
4629
4630 set_bit(In_sync, &rdev->flags);
4631 }
4632 }
4633
4634
4635
4636
4637 spin_lock_irq(&conf->device_lock);
4638 mddev->degraded = calc_degraded(conf);
4639 spin_unlock_irq(&conf->device_lock);
4640 mddev->raid_disks = conf->geo.raid_disks;
4641 mddev->reshape_position = conf->reshape_progress;
4642 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
4643
4644 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
4645 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
4646 clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
4647 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
4648 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
4649
4650 mddev->sync_thread = md_register_thread(md_do_sync, mddev,
4651 "reshape");
4652 if (!mddev->sync_thread) {
4653 ret = -EAGAIN;
4654 goto abort;
4655 }
4656 conf->reshape_checkpoint = jiffies;
4657 md_wakeup_thread(mddev->sync_thread);
4658 md_new_event();
4659 return 0;
4660
4661 abort:
4662 mddev->recovery = 0;
4663 spin_lock_irq(&conf->device_lock);
4664 conf->geo = conf->prev;
4665 mddev->raid_disks = conf->geo.raid_disks;
4666 rdev_for_each(rdev, mddev)
4667 rdev->new_data_offset = rdev->data_offset;
4668 smp_wmb();
4669 conf->reshape_progress = MaxSector;
4670 conf->reshape_safe = MaxSector;
4671 mddev->reshape_position = MaxSector;
4672 spin_unlock_irq(&conf->device_lock);
4673 return ret;
4674 }
4675
4676
4677
4678
4679
4680
4681
4682 static sector_t last_dev_address(sector_t s, struct geom *geo)
4683 {
4684 s = (s | geo->chunk_mask) + 1;
4685 s >>= geo->chunk_shift;
4686 s *= geo->near_copies;
4687 s = DIV_ROUND_UP_SECTOR_T(s, geo->raid_disks);
4688 s *= geo->far_copies;
4689 s <<= geo->chunk_shift;
4690 return s;
4691 }
4692
4693
4694
4695
4696
4697 static sector_t first_dev_address(sector_t s, struct geom *geo)
4698 {
4699 s >>= geo->chunk_shift;
4700 s *= geo->near_copies;
4701 sector_div(s, geo->raid_disks);
4702 s *= geo->far_copies;
4703 s <<= geo->chunk_shift;
4704 return s;
4705 }
4706
4707 static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
4708 int *skipped)
4709 {
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747 struct r10conf *conf = mddev->private;
4748 struct r10bio *r10_bio;
4749 sector_t next, safe, last;
4750 int max_sectors;
4751 int nr_sectors;
4752 int s;
4753 struct md_rdev *rdev;
4754 int need_flush = 0;
4755 struct bio *blist;
4756 struct bio *bio, *read_bio;
4757 int sectors_done = 0;
4758 struct page **pages;
4759
4760 if (sector_nr == 0) {
4761
4762 if (mddev->reshape_backwards &&
4763 conf->reshape_progress < raid10_size(mddev, 0, 0)) {
4764 sector_nr = (raid10_size(mddev, 0, 0)
4765 - conf->reshape_progress);
4766 } else if (!mddev->reshape_backwards &&
4767 conf->reshape_progress > 0)
4768 sector_nr = conf->reshape_progress;
4769 if (sector_nr) {
4770 mddev->curr_resync_completed = sector_nr;
4771 sysfs_notify_dirent_safe(mddev->sysfs_completed);
4772 *skipped = 1;
4773 return sector_nr;
4774 }
4775 }
4776
4777
4778
4779
4780
4781 if (mddev->reshape_backwards) {
4782
4783
4784
4785 next = first_dev_address(conf->reshape_progress - 1,
4786 &conf->geo);
4787
4788
4789
4790
4791 safe = last_dev_address(conf->reshape_safe - 1,
4792 &conf->prev);
4793
4794 if (next + conf->offset_diff < safe)
4795 need_flush = 1;
4796
4797 last = conf->reshape_progress - 1;
4798 sector_nr = last & ~(sector_t)(conf->geo.chunk_mask
4799 & conf->prev.chunk_mask);
4800 if (sector_nr + RESYNC_SECTORS < last)
4801 sector_nr = last + 1 - RESYNC_SECTORS;
4802 } else {
4803
4804
4805
4806 next = last_dev_address(conf->reshape_progress, &conf->geo);
4807
4808
4809
4810
4811 safe = first_dev_address(conf->reshape_safe, &conf->prev);
4812
4813
4814
4815
4816 if (next > safe + conf->offset_diff)
4817 need_flush = 1;
4818
4819 sector_nr = conf->reshape_progress;
4820 last = sector_nr | (conf->geo.chunk_mask
4821 & conf->prev.chunk_mask);
4822
4823 if (sector_nr + RESYNC_SECTORS <= last)
4824 last = sector_nr + RESYNC_SECTORS - 1;
4825 }
4826
4827 if (need_flush ||
4828 time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
4829
4830 wait_barrier(conf, false);
4831 mddev->reshape_position = conf->reshape_progress;
4832 if (mddev->reshape_backwards)
4833 mddev->curr_resync_completed = raid10_size(mddev, 0, 0)
4834 - conf->reshape_progress;
4835 else
4836 mddev->curr_resync_completed = conf->reshape_progress;
4837 conf->reshape_checkpoint = jiffies;
4838 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
4839 md_wakeup_thread(mddev->thread);
4840 wait_event(mddev->sb_wait, mddev->sb_flags == 0 ||
4841 test_bit(MD_RECOVERY_INTR, &mddev->recovery));
4842 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
4843 allow_barrier(conf);
4844 return sectors_done;
4845 }
4846 conf->reshape_safe = mddev->reshape_position;
4847 allow_barrier(conf);
4848 }
4849
4850 raise_barrier(conf, 0);
4851 read_more:
4852
4853 r10_bio = raid10_alloc_init_r10buf(conf);
4854 r10_bio->state = 0;
4855 raise_barrier(conf, 1);
4856 atomic_set(&r10_bio->remaining, 0);
4857 r10_bio->mddev = mddev;
4858 r10_bio->sector = sector_nr;
4859 set_bit(R10BIO_IsReshape, &r10_bio->state);
4860 r10_bio->sectors = last - sector_nr + 1;
4861 rdev = read_balance(conf, r10_bio, &max_sectors);
4862 BUG_ON(!test_bit(R10BIO_Previous, &r10_bio->state));
4863
4864 if (!rdev) {
4865
4866
4867
4868
4869 mempool_free(r10_bio, &conf->r10buf_pool);
4870 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4871 return sectors_done;
4872 }
4873
4874 read_bio = bio_alloc_bioset(rdev->bdev, RESYNC_PAGES, REQ_OP_READ,
4875 GFP_KERNEL, &mddev->bio_set);
4876 read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
4877 + rdev->data_offset);
4878 read_bio->bi_private = r10_bio;
4879 read_bio->bi_end_io = end_reshape_read;
4880 r10_bio->master_bio = read_bio;
4881 r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum;
4882
4883
4884
4885
4886
4887 if (mddev_is_clustered(mddev) && conf->cluster_sync_high <= sector_nr) {
4888 struct mdp_superblock_1 *sb = NULL;
4889 int sb_reshape_pos = 0;
4890
4891 conf->cluster_sync_low = sector_nr;
4892 conf->cluster_sync_high = sector_nr + CLUSTER_RESYNC_WINDOW_SECTORS;
4893 sb = page_address(rdev->sb_page);
4894 if (sb) {
4895 sb_reshape_pos = le64_to_cpu(sb->reshape_position);
4896
4897
4898
4899
4900
4901 if (sb_reshape_pos < conf->cluster_sync_low)
4902 conf->cluster_sync_low = sb_reshape_pos;
4903 }
4904
4905 md_cluster_ops->resync_info_update(mddev, conf->cluster_sync_low,
4906 conf->cluster_sync_high);
4907 }
4908
4909
4910 __raid10_find_phys(&conf->geo, r10_bio);
4911
4912 blist = read_bio;
4913 read_bio->bi_next = NULL;
4914
4915 rcu_read_lock();
4916 for (s = 0; s < conf->copies*2; s++) {
4917 struct bio *b;
4918 int d = r10_bio->devs[s/2].devnum;
4919 struct md_rdev *rdev2;
4920 if (s&1) {
4921 rdev2 = rcu_dereference(conf->mirrors[d].replacement);
4922 b = r10_bio->devs[s/2].repl_bio;
4923 } else {
4924 rdev2 = rcu_dereference(conf->mirrors[d].rdev);
4925 b = r10_bio->devs[s/2].bio;
4926 }
4927 if (!rdev2 || test_bit(Faulty, &rdev2->flags))
4928 continue;
4929
4930 bio_set_dev(b, rdev2->bdev);
4931 b->bi_iter.bi_sector = r10_bio->devs[s/2].addr +
4932 rdev2->new_data_offset;
4933 b->bi_end_io = end_reshape_write;
4934 bio_set_op_attrs(b, REQ_OP_WRITE, 0);
4935 b->bi_next = blist;
4936 blist = b;
4937 }
4938
4939
4940
4941 nr_sectors = 0;
4942 pages = get_resync_pages(r10_bio->devs[0].bio)->pages;
4943 for (s = 0 ; s < max_sectors; s += PAGE_SIZE >> 9) {
4944 struct page *page = pages[s / (PAGE_SIZE >> 9)];
4945 int len = (max_sectors - s) << 9;
4946 if (len > PAGE_SIZE)
4947 len = PAGE_SIZE;
4948 for (bio = blist; bio ; bio = bio->bi_next) {
4949
4950
4951
4952
4953 bio_add_page(bio, page, len, 0);
4954 }
4955 sector_nr += len >> 9;
4956 nr_sectors += len >> 9;
4957 }
4958 rcu_read_unlock();
4959 r10_bio->sectors = nr_sectors;
4960
4961
4962 md_sync_acct_bio(read_bio, r10_bio->sectors);
4963 atomic_inc(&r10_bio->remaining);
4964 read_bio->bi_next = NULL;
4965 submit_bio_noacct(read_bio);
4966 sectors_done += nr_sectors;
4967 if (sector_nr <= last)
4968 goto read_more;
4969
4970 lower_barrier(conf);
4971
4972
4973
4974
4975 if (mddev->reshape_backwards)
4976 conf->reshape_progress -= sectors_done;
4977 else
4978 conf->reshape_progress += sectors_done;
4979
4980 return sectors_done;
4981 }
4982
4983 static void end_reshape_request(struct r10bio *r10_bio);
4984 static int handle_reshape_read_error(struct mddev *mddev,
4985 struct r10bio *r10_bio);
4986 static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio)
4987 {
4988
4989
4990
4991
4992
4993 struct r10conf *conf = mddev->private;
4994 int s;
4995
4996 if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
4997 if (handle_reshape_read_error(mddev, r10_bio) < 0) {
4998
4999 md_done_sync(mddev, r10_bio->sectors, 0);
5000 return;
5001 }
5002
5003
5004
5005
5006 atomic_set(&r10_bio->remaining, 1);
5007 for (s = 0; s < conf->copies*2; s++) {
5008 struct bio *b;
5009 int d = r10_bio->devs[s/2].devnum;
5010 struct md_rdev *rdev;
5011 rcu_read_lock();
5012 if (s&1) {
5013 rdev = rcu_dereference(conf->mirrors[d].replacement);
5014 b = r10_bio->devs[s/2].repl_bio;
5015 } else {
5016 rdev = rcu_dereference(conf->mirrors[d].rdev);
5017 b = r10_bio->devs[s/2].bio;
5018 }
5019 if (!rdev || test_bit(Faulty, &rdev->flags)) {
5020 rcu_read_unlock();
5021 continue;
5022 }
5023 atomic_inc(&rdev->nr_pending);
5024 rcu_read_unlock();
5025 md_sync_acct_bio(b, r10_bio->sectors);
5026 atomic_inc(&r10_bio->remaining);
5027 b->bi_next = NULL;
5028 submit_bio_noacct(b);
5029 }
5030 end_reshape_request(r10_bio);
5031 }
5032
5033 static void end_reshape(struct r10conf *conf)
5034 {
5035 if (test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery))
5036 return;
5037
5038 spin_lock_irq(&conf->device_lock);
5039 conf->prev = conf->geo;
5040 md_finish_reshape(conf->mddev);
5041 smp_wmb();
5042 conf->reshape_progress = MaxSector;
5043 conf->reshape_safe = MaxSector;
5044 spin_unlock_irq(&conf->device_lock);
5045
5046 if (conf->mddev->queue)
5047 raid10_set_io_opt(conf);
5048 conf->fullsync = 0;
5049 }
5050
5051 static void raid10_update_reshape_pos(struct mddev *mddev)
5052 {
5053 struct r10conf *conf = mddev->private;
5054 sector_t lo, hi;
5055
5056 md_cluster_ops->resync_info_get(mddev, &lo, &hi);
5057 if (((mddev->reshape_position <= hi) && (mddev->reshape_position >= lo))
5058 || mddev->reshape_position == MaxSector)
5059 conf->reshape_progress = mddev->reshape_position;
5060 else
5061 WARN_ON_ONCE(1);
5062 }
5063
5064 static int handle_reshape_read_error(struct mddev *mddev,
5065 struct r10bio *r10_bio)
5066 {
5067
5068 int sectors = r10_bio->sectors;
5069 struct r10conf *conf = mddev->private;
5070 struct r10bio *r10b;
5071 int slot = 0;
5072 int idx = 0;
5073 struct page **pages;
5074
5075 r10b = kmalloc(struct_size(r10b, devs, conf->copies), GFP_NOIO);
5076 if (!r10b) {
5077 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
5078 return -ENOMEM;
5079 }
5080
5081
5082 pages = get_resync_pages(r10_bio->devs[0].bio)->pages;
5083
5084 r10b->sector = r10_bio->sector;
5085 __raid10_find_phys(&conf->prev, r10b);
5086
5087 while (sectors) {
5088 int s = sectors;
5089 int success = 0;
5090 int first_slot = slot;
5091
5092 if (s > (PAGE_SIZE >> 9))
5093 s = PAGE_SIZE >> 9;
5094
5095 rcu_read_lock();
5096 while (!success) {
5097 int d = r10b->devs[slot].devnum;
5098 struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
5099 sector_t addr;
5100 if (rdev == NULL ||
5101 test_bit(Faulty, &rdev->flags) ||
5102 !test_bit(In_sync, &rdev->flags))
5103 goto failed;
5104
5105 addr = r10b->devs[slot].addr + idx * PAGE_SIZE;
5106 atomic_inc(&rdev->nr_pending);
5107 rcu_read_unlock();
5108 success = sync_page_io(rdev,
5109 addr,
5110 s << 9,
5111 pages[idx],
5112 REQ_OP_READ, false);
5113 rdev_dec_pending(rdev, mddev);
5114 rcu_read_lock();
5115 if (success)
5116 break;
5117 failed:
5118 slot++;
5119 if (slot >= conf->copies)
5120 slot = 0;
5121 if (slot == first_slot)
5122 break;
5123 }
5124 rcu_read_unlock();
5125 if (!success) {
5126
5127 set_bit(MD_RECOVERY_INTR,
5128 &mddev->recovery);
5129 kfree(r10b);
5130 return -EIO;
5131 }
5132 sectors -= s;
5133 idx++;
5134 }
5135 kfree(r10b);
5136 return 0;
5137 }
5138
5139 static void end_reshape_write(struct bio *bio)
5140 {
5141 struct r10bio *r10_bio = get_resync_r10bio(bio);
5142 struct mddev *mddev = r10_bio->mddev;
5143 struct r10conf *conf = mddev->private;
5144 int d;
5145 int slot;
5146 int repl;
5147 struct md_rdev *rdev = NULL;
5148
5149 d = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
5150 if (repl)
5151 rdev = conf->mirrors[d].replacement;
5152 if (!rdev) {
5153 smp_mb();
5154 rdev = conf->mirrors[d].rdev;
5155 }
5156
5157 if (bio->bi_status) {
5158
5159 md_error(mddev, rdev);
5160 }
5161
5162 rdev_dec_pending(rdev, mddev);
5163 end_reshape_request(r10_bio);
5164 }
5165
5166 static void end_reshape_request(struct r10bio *r10_bio)
5167 {
5168 if (!atomic_dec_and_test(&r10_bio->remaining))
5169 return;
5170 md_done_sync(r10_bio->mddev, r10_bio->sectors, 1);
5171 bio_put(r10_bio->master_bio);
5172 put_buf(r10_bio);
5173 }
5174
5175 static void raid10_finish_reshape(struct mddev *mddev)
5176 {
5177 struct r10conf *conf = mddev->private;
5178
5179 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
5180 return;
5181
5182 if (mddev->delta_disks > 0) {
5183 if (mddev->recovery_cp > mddev->resync_max_sectors) {
5184 mddev->recovery_cp = mddev->resync_max_sectors;
5185 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
5186 }
5187 mddev->resync_max_sectors = mddev->array_sectors;
5188 } else {
5189 int d;
5190 rcu_read_lock();
5191 for (d = conf->geo.raid_disks ;
5192 d < conf->geo.raid_disks - mddev->delta_disks;
5193 d++) {
5194 struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
5195 if (rdev)
5196 clear_bit(In_sync, &rdev->flags);
5197 rdev = rcu_dereference(conf->mirrors[d].replacement);
5198 if (rdev)
5199 clear_bit(In_sync, &rdev->flags);
5200 }
5201 rcu_read_unlock();
5202 }
5203 mddev->layout = mddev->new_layout;
5204 mddev->chunk_sectors = 1 << conf->geo.chunk_shift;
5205 mddev->reshape_position = MaxSector;
5206 mddev->delta_disks = 0;
5207 mddev->reshape_backwards = 0;
5208 }
5209
5210 static struct md_personality raid10_personality =
5211 {
5212 .name = "raid10",
5213 .level = 10,
5214 .owner = THIS_MODULE,
5215 .make_request = raid10_make_request,
5216 .run = raid10_run,
5217 .free = raid10_free,
5218 .status = raid10_status,
5219 .error_handler = raid10_error,
5220 .hot_add_disk = raid10_add_disk,
5221 .hot_remove_disk= raid10_remove_disk,
5222 .spare_active = raid10_spare_active,
5223 .sync_request = raid10_sync_request,
5224 .quiesce = raid10_quiesce,
5225 .size = raid10_size,
5226 .resize = raid10_resize,
5227 .takeover = raid10_takeover,
5228 .check_reshape = raid10_check_reshape,
5229 .start_reshape = raid10_start_reshape,
5230 .finish_reshape = raid10_finish_reshape,
5231 .update_reshape_pos = raid10_update_reshape_pos,
5232 };
5233
5234 static int __init raid_init(void)
5235 {
5236 return register_md_personality(&raid10_personality);
5237 }
5238
5239 static void raid_exit(void)
5240 {
5241 unregister_md_personality(&raid10_personality);
5242 }
5243
5244 module_init(raid_init);
5245 module_exit(raid_exit);
5246 MODULE_LICENSE("GPL");
5247 MODULE_DESCRIPTION("RAID10 (striped mirror) personality for MD");
5248 MODULE_ALIAS("md-personality-9");
5249 MODULE_ALIAS("md-raid10");
5250 MODULE_ALIAS("md-level-10");