0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014 #include <linux/blkdev.h>
0015 #include <linux/module.h>
0016 #include <linux/raid/md_u.h>
0017 #include <linux/seq_file.h>
0018 #include <linux/slab.h>
0019 #include "md.h"
0020 #include "md-multipath.h"
0021
0022 #define MAX_WORK_PER_DISK 128
0023
0024 #define NR_RESERVED_BUFS 32
0025
0026 static int multipath_map (struct mpconf *conf)
0027 {
0028 int i, disks = conf->raid_disks;
0029
0030
0031
0032
0033
0034
0035 rcu_read_lock();
0036 for (i = 0; i < disks; i++) {
0037 struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
0038 if (rdev && test_bit(In_sync, &rdev->flags) &&
0039 !test_bit(Faulty, &rdev->flags)) {
0040 atomic_inc(&rdev->nr_pending);
0041 rcu_read_unlock();
0042 return i;
0043 }
0044 }
0045 rcu_read_unlock();
0046
0047 pr_crit_ratelimited("multipath_map(): no more operational IO paths?\n");
0048 return (-1);
0049 }
0050
0051 static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
0052 {
0053 unsigned long flags;
0054 struct mddev *mddev = mp_bh->mddev;
0055 struct mpconf *conf = mddev->private;
0056
0057 spin_lock_irqsave(&conf->device_lock, flags);
0058 list_add(&mp_bh->retry_list, &conf->retry_list);
0059 spin_unlock_irqrestore(&conf->device_lock, flags);
0060 md_wakeup_thread(mddev->thread);
0061 }
0062
0063
0064
0065
0066
0067
0068 static void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status)
0069 {
0070 struct bio *bio = mp_bh->master_bio;
0071 struct mpconf *conf = mp_bh->mddev->private;
0072
0073 bio->bi_status = status;
0074 bio_endio(bio);
0075 mempool_free(mp_bh, &conf->pool);
0076 }
0077
0078 static void multipath_end_request(struct bio *bio)
0079 {
0080 struct multipath_bh *mp_bh = bio->bi_private;
0081 struct mpconf *conf = mp_bh->mddev->private;
0082 struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev;
0083
0084 if (!bio->bi_status)
0085 multipath_end_bh_io(mp_bh, 0);
0086 else if (!(bio->bi_opf & REQ_RAHEAD)) {
0087
0088
0089
0090 md_error (mp_bh->mddev, rdev);
0091 pr_info("multipath: %pg: rescheduling sector %llu\n",
0092 rdev->bdev,
0093 (unsigned long long)bio->bi_iter.bi_sector);
0094 multipath_reschedule_retry(mp_bh);
0095 } else
0096 multipath_end_bh_io(mp_bh, bio->bi_status);
0097 rdev_dec_pending(rdev, conf->mddev);
0098 }
0099
0100 static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
0101 {
0102 struct mpconf *conf = mddev->private;
0103 struct multipath_bh * mp_bh;
0104 struct multipath_info *multipath;
0105
0106 if (unlikely(bio->bi_opf & REQ_PREFLUSH)
0107 && md_flush_request(mddev, bio))
0108 return true;
0109
0110 mp_bh = mempool_alloc(&conf->pool, GFP_NOIO);
0111
0112 mp_bh->master_bio = bio;
0113 mp_bh->mddev = mddev;
0114
0115 mp_bh->path = multipath_map(conf);
0116 if (mp_bh->path < 0) {
0117 bio_io_error(bio);
0118 mempool_free(mp_bh, &conf->pool);
0119 return true;
0120 }
0121 multipath = conf->multipaths + mp_bh->path;
0122
0123 bio_init_clone(multipath->rdev->bdev, &mp_bh->bio, bio, GFP_NOIO);
0124
0125 mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
0126 mp_bh->bio.bi_opf |= REQ_FAILFAST_TRANSPORT;
0127 mp_bh->bio.bi_end_io = multipath_end_request;
0128 mp_bh->bio.bi_private = mp_bh;
0129 mddev_check_write_zeroes(mddev, &mp_bh->bio);
0130 submit_bio_noacct(&mp_bh->bio);
0131 return true;
0132 }
0133
0134 static void multipath_status(struct seq_file *seq, struct mddev *mddev)
0135 {
0136 struct mpconf *conf = mddev->private;
0137 int i;
0138
0139 seq_printf (seq, " [%d/%d] [", conf->raid_disks,
0140 conf->raid_disks - mddev->degraded);
0141 rcu_read_lock();
0142 for (i = 0; i < conf->raid_disks; i++) {
0143 struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
0144 seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
0145 }
0146 rcu_read_unlock();
0147 seq_putc(seq, ']');
0148 }
0149
0150
0151
0152
0153 static void multipath_error (struct mddev *mddev, struct md_rdev *rdev)
0154 {
0155 struct mpconf *conf = mddev->private;
0156
0157 if (conf->raid_disks - mddev->degraded <= 1) {
0158
0159
0160
0161
0162
0163 pr_warn("multipath: only one IO path left and IO error.\n");
0164
0165 return;
0166 }
0167
0168
0169
0170 if (test_and_clear_bit(In_sync, &rdev->flags)) {
0171 unsigned long flags;
0172 spin_lock_irqsave(&conf->device_lock, flags);
0173 mddev->degraded++;
0174 spin_unlock_irqrestore(&conf->device_lock, flags);
0175 }
0176 set_bit(Faulty, &rdev->flags);
0177 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
0178 pr_err("multipath: IO failure on %pg, disabling IO path.\n"
0179 "multipath: Operation continuing on %d IO paths.\n",
0180 rdev->bdev,
0181 conf->raid_disks - mddev->degraded);
0182 }
0183
0184 static void print_multipath_conf (struct mpconf *conf)
0185 {
0186 int i;
0187 struct multipath_info *tmp;
0188
0189 pr_debug("MULTIPATH conf printout:\n");
0190 if (!conf) {
0191 pr_debug("(conf==NULL)\n");
0192 return;
0193 }
0194 pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
0195 conf->raid_disks);
0196
0197 for (i = 0; i < conf->raid_disks; i++) {
0198 tmp = conf->multipaths + i;
0199 if (tmp->rdev)
0200 pr_debug(" disk%d, o:%d, dev:%pg\n",
0201 i,!test_bit(Faulty, &tmp->rdev->flags),
0202 tmp->rdev->bdev);
0203 }
0204 }
0205
0206 static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev)
0207 {
0208 struct mpconf *conf = mddev->private;
0209 int err = -EEXIST;
0210 int path;
0211 struct multipath_info *p;
0212 int first = 0;
0213 int last = mddev->raid_disks - 1;
0214
0215 if (rdev->raid_disk >= 0)
0216 first = last = rdev->raid_disk;
0217
0218 print_multipath_conf(conf);
0219
0220 for (path = first; path <= last; path++)
0221 if ((p=conf->multipaths+path)->rdev == NULL) {
0222 disk_stack_limits(mddev->gendisk, rdev->bdev,
0223 rdev->data_offset << 9);
0224
0225 err = md_integrity_add_rdev(rdev, mddev);
0226 if (err)
0227 break;
0228 spin_lock_irq(&conf->device_lock);
0229 mddev->degraded--;
0230 rdev->raid_disk = path;
0231 set_bit(In_sync, &rdev->flags);
0232 spin_unlock_irq(&conf->device_lock);
0233 rcu_assign_pointer(p->rdev, rdev);
0234 err = 0;
0235 break;
0236 }
0237
0238 print_multipath_conf(conf);
0239
0240 return err;
0241 }
0242
0243 static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
0244 {
0245 struct mpconf *conf = mddev->private;
0246 int err = 0;
0247 int number = rdev->raid_disk;
0248 struct multipath_info *p = conf->multipaths + number;
0249
0250 print_multipath_conf(conf);
0251
0252 if (rdev == p->rdev) {
0253 if (test_bit(In_sync, &rdev->flags) ||
0254 atomic_read(&rdev->nr_pending)) {
0255 pr_warn("hot-remove-disk, slot %d is identified but is still operational!\n", number);
0256 err = -EBUSY;
0257 goto abort;
0258 }
0259 p->rdev = NULL;
0260 if (!test_bit(RemoveSynchronized, &rdev->flags)) {
0261 synchronize_rcu();
0262 if (atomic_read(&rdev->nr_pending)) {
0263
0264 err = -EBUSY;
0265 p->rdev = rdev;
0266 goto abort;
0267 }
0268 }
0269 err = md_integrity_register(mddev);
0270 }
0271 abort:
0272
0273 print_multipath_conf(conf);
0274 return err;
0275 }
0276
0277
0278
0279
0280
0281
0282
0283
0284
0285 static void multipathd(struct md_thread *thread)
0286 {
0287 struct mddev *mddev = thread->mddev;
0288 struct multipath_bh *mp_bh;
0289 struct bio *bio;
0290 unsigned long flags;
0291 struct mpconf *conf = mddev->private;
0292 struct list_head *head = &conf->retry_list;
0293
0294 md_check_recovery(mddev);
0295 for (;;) {
0296 spin_lock_irqsave(&conf->device_lock, flags);
0297 if (list_empty(head))
0298 break;
0299 mp_bh = list_entry(head->prev, struct multipath_bh, retry_list);
0300 list_del(head->prev);
0301 spin_unlock_irqrestore(&conf->device_lock, flags);
0302
0303 bio = &mp_bh->bio;
0304 bio->bi_iter.bi_sector = mp_bh->master_bio->bi_iter.bi_sector;
0305
0306 if ((mp_bh->path = multipath_map (conf))<0) {
0307 pr_err("multipath: %pg: unrecoverable IO read error for block %llu\n",
0308 bio->bi_bdev,
0309 (unsigned long long)bio->bi_iter.bi_sector);
0310 multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
0311 } else {
0312 pr_err("multipath: %pg: redirecting sector %llu to another IO path\n",
0313 bio->bi_bdev,
0314 (unsigned long long)bio->bi_iter.bi_sector);
0315 *bio = *(mp_bh->master_bio);
0316 bio->bi_iter.bi_sector +=
0317 conf->multipaths[mp_bh->path].rdev->data_offset;
0318 bio_set_dev(bio, conf->multipaths[mp_bh->path].rdev->bdev);
0319 bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
0320 bio->bi_end_io = multipath_end_request;
0321 bio->bi_private = mp_bh;
0322 submit_bio_noacct(bio);
0323 }
0324 }
0325 spin_unlock_irqrestore(&conf->device_lock, flags);
0326 }
0327
0328 static sector_t multipath_size(struct mddev *mddev, sector_t sectors, int raid_disks)
0329 {
0330 WARN_ONCE(sectors || raid_disks,
0331 "%s does not support generic reshape\n", __func__);
0332
0333 return mddev->dev_sectors;
0334 }
0335
0336 static int multipath_run (struct mddev *mddev)
0337 {
0338 struct mpconf *conf;
0339 int disk_idx;
0340 struct multipath_info *disk;
0341 struct md_rdev *rdev;
0342 int working_disks;
0343 int ret;
0344
0345 if (md_check_no_bitmap(mddev))
0346 return -EINVAL;
0347
0348 if (mddev->level != LEVEL_MULTIPATH) {
0349 pr_warn("multipath: %s: raid level not set to multipath IO (%d)\n",
0350 mdname(mddev), mddev->level);
0351 goto out;
0352 }
0353
0354
0355
0356
0357
0358
0359 conf = kzalloc(sizeof(struct mpconf), GFP_KERNEL);
0360 mddev->private = conf;
0361 if (!conf)
0362 goto out;
0363
0364 conf->multipaths = kcalloc(mddev->raid_disks,
0365 sizeof(struct multipath_info),
0366 GFP_KERNEL);
0367 if (!conf->multipaths)
0368 goto out_free_conf;
0369
0370 working_disks = 0;
0371 rdev_for_each(rdev, mddev) {
0372 disk_idx = rdev->raid_disk;
0373 if (disk_idx < 0 ||
0374 disk_idx >= mddev->raid_disks)
0375 continue;
0376
0377 disk = conf->multipaths + disk_idx;
0378 disk->rdev = rdev;
0379 disk_stack_limits(mddev->gendisk, rdev->bdev,
0380 rdev->data_offset << 9);
0381
0382 if (!test_bit(Faulty, &rdev->flags))
0383 working_disks++;
0384 }
0385
0386 conf->raid_disks = mddev->raid_disks;
0387 conf->mddev = mddev;
0388 spin_lock_init(&conf->device_lock);
0389 INIT_LIST_HEAD(&conf->retry_list);
0390
0391 if (!working_disks) {
0392 pr_warn("multipath: no operational IO paths for %s\n",
0393 mdname(mddev));
0394 goto out_free_conf;
0395 }
0396 mddev->degraded = conf->raid_disks - working_disks;
0397
0398 ret = mempool_init_kmalloc_pool(&conf->pool, NR_RESERVED_BUFS,
0399 sizeof(struct multipath_bh));
0400 if (ret)
0401 goto out_free_conf;
0402
0403 mddev->thread = md_register_thread(multipathd, mddev,
0404 "multipath");
0405 if (!mddev->thread)
0406 goto out_free_conf;
0407
0408 pr_info("multipath: array %s active with %d out of %d IO paths\n",
0409 mdname(mddev), conf->raid_disks - mddev->degraded,
0410 mddev->raid_disks);
0411
0412
0413
0414 md_set_array_sectors(mddev, multipath_size(mddev, 0, 0));
0415
0416 if (md_integrity_register(mddev))
0417 goto out_free_conf;
0418
0419 return 0;
0420
0421 out_free_conf:
0422 mempool_exit(&conf->pool);
0423 kfree(conf->multipaths);
0424 kfree(conf);
0425 mddev->private = NULL;
0426 out:
0427 return -EIO;
0428 }
0429
0430 static void multipath_free(struct mddev *mddev, void *priv)
0431 {
0432 struct mpconf *conf = priv;
0433
0434 mempool_exit(&conf->pool);
0435 kfree(conf->multipaths);
0436 kfree(conf);
0437 }
0438
0439 static struct md_personality multipath_personality =
0440 {
0441 .name = "multipath",
0442 .level = LEVEL_MULTIPATH,
0443 .owner = THIS_MODULE,
0444 .make_request = multipath_make_request,
0445 .run = multipath_run,
0446 .free = multipath_free,
0447 .status = multipath_status,
0448 .error_handler = multipath_error,
0449 .hot_add_disk = multipath_add_disk,
0450 .hot_remove_disk= multipath_remove_disk,
0451 .size = multipath_size,
0452 };
0453
0454 static int __init multipath_init (void)
0455 {
0456 return register_md_personality (&multipath_personality);
0457 }
0458
0459 static void __exit multipath_exit (void)
0460 {
0461 unregister_md_personality (&multipath_personality);
0462 }
0463
0464 module_init(multipath_init);
0465 module_exit(multipath_exit);
0466 MODULE_LICENSE("GPL");
0467 MODULE_DESCRIPTION("simple multi-path personality for MD (deprecated)");
0468 MODULE_ALIAS("md-personality-7");
0469 MODULE_ALIAS("md-multipath");
0470 MODULE_ALIAS("md-level--4");