Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * multipath.c : Multiple Devices driver for Linux
0004  *
0005  * Copyright (C) 1999, 2000, 2001 Ingo Molnar, Red Hat
0006  *
0007  * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman
0008  *
0009  * MULTIPATH management functions.
0010  *
0011  * derived from raid1.c.
0012  */
0013 
0014 #include <linux/blkdev.h>
0015 #include <linux/module.h>
0016 #include <linux/raid/md_u.h>
0017 #include <linux/seq_file.h>
0018 #include <linux/slab.h>
0019 #include "md.h"
0020 #include "md-multipath.h"
0021 
0022 #define MAX_WORK_PER_DISK 128
0023 
0024 #define NR_RESERVED_BUFS    32
0025 
0026 static int multipath_map (struct mpconf *conf)
0027 {
0028     int i, disks = conf->raid_disks;
0029 
0030     /*
0031      * Later we do read balancing on the read side
0032      * now we use the first available disk.
0033      */
0034 
0035     rcu_read_lock();
0036     for (i = 0; i < disks; i++) {
0037         struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
0038         if (rdev && test_bit(In_sync, &rdev->flags) &&
0039             !test_bit(Faulty, &rdev->flags)) {
0040             atomic_inc(&rdev->nr_pending);
0041             rcu_read_unlock();
0042             return i;
0043         }
0044     }
0045     rcu_read_unlock();
0046 
0047     pr_crit_ratelimited("multipath_map(): no more operational IO paths?\n");
0048     return (-1);
0049 }
0050 
0051 static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
0052 {
0053     unsigned long flags;
0054     struct mddev *mddev = mp_bh->mddev;
0055     struct mpconf *conf = mddev->private;
0056 
0057     spin_lock_irqsave(&conf->device_lock, flags);
0058     list_add(&mp_bh->retry_list, &conf->retry_list);
0059     spin_unlock_irqrestore(&conf->device_lock, flags);
0060     md_wakeup_thread(mddev->thread);
0061 }
0062 
0063 /*
0064  * multipath_end_bh_io() is called when we have finished servicing a multipathed
0065  * operation and are ready to return a success/failure code to the buffer
0066  * cache layer.
0067  */
0068 static void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status)
0069 {
0070     struct bio *bio = mp_bh->master_bio;
0071     struct mpconf *conf = mp_bh->mddev->private;
0072 
0073     bio->bi_status = status;
0074     bio_endio(bio);
0075     mempool_free(mp_bh, &conf->pool);
0076 }
0077 
0078 static void multipath_end_request(struct bio *bio)
0079 {
0080     struct multipath_bh *mp_bh = bio->bi_private;
0081     struct mpconf *conf = mp_bh->mddev->private;
0082     struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev;
0083 
0084     if (!bio->bi_status)
0085         multipath_end_bh_io(mp_bh, 0);
0086     else if (!(bio->bi_opf & REQ_RAHEAD)) {
0087         /*
0088          * oops, IO error:
0089          */
0090         md_error (mp_bh->mddev, rdev);
0091         pr_info("multipath: %pg: rescheduling sector %llu\n",
0092             rdev->bdev,
0093             (unsigned long long)bio->bi_iter.bi_sector);
0094         multipath_reschedule_retry(mp_bh);
0095     } else
0096         multipath_end_bh_io(mp_bh, bio->bi_status);
0097     rdev_dec_pending(rdev, conf->mddev);
0098 }
0099 
0100 static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
0101 {
0102     struct mpconf *conf = mddev->private;
0103     struct multipath_bh * mp_bh;
0104     struct multipath_info *multipath;
0105 
0106     if (unlikely(bio->bi_opf & REQ_PREFLUSH)
0107         && md_flush_request(mddev, bio))
0108         return true;
0109 
0110     mp_bh = mempool_alloc(&conf->pool, GFP_NOIO);
0111 
0112     mp_bh->master_bio = bio;
0113     mp_bh->mddev = mddev;
0114 
0115     mp_bh->path = multipath_map(conf);
0116     if (mp_bh->path < 0) {
0117         bio_io_error(bio);
0118         mempool_free(mp_bh, &conf->pool);
0119         return true;
0120     }
0121     multipath = conf->multipaths + mp_bh->path;
0122 
0123     bio_init_clone(multipath->rdev->bdev, &mp_bh->bio, bio, GFP_NOIO);
0124 
0125     mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
0126     mp_bh->bio.bi_opf |= REQ_FAILFAST_TRANSPORT;
0127     mp_bh->bio.bi_end_io = multipath_end_request;
0128     mp_bh->bio.bi_private = mp_bh;
0129     mddev_check_write_zeroes(mddev, &mp_bh->bio);
0130     submit_bio_noacct(&mp_bh->bio);
0131     return true;
0132 }
0133 
0134 static void multipath_status(struct seq_file *seq, struct mddev *mddev)
0135 {
0136     struct mpconf *conf = mddev->private;
0137     int i;
0138 
0139     seq_printf (seq, " [%d/%d] [", conf->raid_disks,
0140             conf->raid_disks - mddev->degraded);
0141     rcu_read_lock();
0142     for (i = 0; i < conf->raid_disks; i++) {
0143         struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
0144         seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
0145     }
0146     rcu_read_unlock();
0147     seq_putc(seq, ']');
0148 }
0149 
0150 /*
0151  * Careful, this can execute in IRQ contexts as well!
0152  */
0153 static void multipath_error (struct mddev *mddev, struct md_rdev *rdev)
0154 {
0155     struct mpconf *conf = mddev->private;
0156 
0157     if (conf->raid_disks - mddev->degraded <= 1) {
0158         /*
0159          * Uh oh, we can do nothing if this is our last path, but
0160          * first check if this is a queued request for a device
0161          * which has just failed.
0162          */
0163         pr_warn("multipath: only one IO path left and IO error.\n");
0164         /* leave it active... it's all we have */
0165         return;
0166     }
0167     /*
0168      * Mark disk as unusable
0169      */
0170     if (test_and_clear_bit(In_sync, &rdev->flags)) {
0171         unsigned long flags;
0172         spin_lock_irqsave(&conf->device_lock, flags);
0173         mddev->degraded++;
0174         spin_unlock_irqrestore(&conf->device_lock, flags);
0175     }
0176     set_bit(Faulty, &rdev->flags);
0177     set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
0178     pr_err("multipath: IO failure on %pg, disabling IO path.\n"
0179            "multipath: Operation continuing on %d IO paths.\n",
0180            rdev->bdev,
0181            conf->raid_disks - mddev->degraded);
0182 }
0183 
0184 static void print_multipath_conf (struct mpconf *conf)
0185 {
0186     int i;
0187     struct multipath_info *tmp;
0188 
0189     pr_debug("MULTIPATH conf printout:\n");
0190     if (!conf) {
0191         pr_debug("(conf==NULL)\n");
0192         return;
0193     }
0194     pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
0195          conf->raid_disks);
0196 
0197     for (i = 0; i < conf->raid_disks; i++) {
0198         tmp = conf->multipaths + i;
0199         if (tmp->rdev)
0200             pr_debug(" disk%d, o:%d, dev:%pg\n",
0201                  i,!test_bit(Faulty, &tmp->rdev->flags),
0202                  tmp->rdev->bdev);
0203     }
0204 }
0205 
0206 static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev)
0207 {
0208     struct mpconf *conf = mddev->private;
0209     int err = -EEXIST;
0210     int path;
0211     struct multipath_info *p;
0212     int first = 0;
0213     int last = mddev->raid_disks - 1;
0214 
0215     if (rdev->raid_disk >= 0)
0216         first = last = rdev->raid_disk;
0217 
0218     print_multipath_conf(conf);
0219 
0220     for (path = first; path <= last; path++)
0221         if ((p=conf->multipaths+path)->rdev == NULL) {
0222             disk_stack_limits(mddev->gendisk, rdev->bdev,
0223                       rdev->data_offset << 9);
0224 
0225             err = md_integrity_add_rdev(rdev, mddev);
0226             if (err)
0227                 break;
0228             spin_lock_irq(&conf->device_lock);
0229             mddev->degraded--;
0230             rdev->raid_disk = path;
0231             set_bit(In_sync, &rdev->flags);
0232             spin_unlock_irq(&conf->device_lock);
0233             rcu_assign_pointer(p->rdev, rdev);
0234             err = 0;
0235             break;
0236         }
0237 
0238     print_multipath_conf(conf);
0239 
0240     return err;
0241 }
0242 
0243 static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
0244 {
0245     struct mpconf *conf = mddev->private;
0246     int err = 0;
0247     int number = rdev->raid_disk;
0248     struct multipath_info *p = conf->multipaths + number;
0249 
0250     print_multipath_conf(conf);
0251 
0252     if (rdev == p->rdev) {
0253         if (test_bit(In_sync, &rdev->flags) ||
0254             atomic_read(&rdev->nr_pending)) {
0255             pr_warn("hot-remove-disk, slot %d is identified but is still operational!\n", number);
0256             err = -EBUSY;
0257             goto abort;
0258         }
0259         p->rdev = NULL;
0260         if (!test_bit(RemoveSynchronized, &rdev->flags)) {
0261             synchronize_rcu();
0262             if (atomic_read(&rdev->nr_pending)) {
0263                 /* lost the race, try later */
0264                 err = -EBUSY;
0265                 p->rdev = rdev;
0266                 goto abort;
0267             }
0268         }
0269         err = md_integrity_register(mddev);
0270     }
0271 abort:
0272 
0273     print_multipath_conf(conf);
0274     return err;
0275 }
0276 
0277 /*
0278  * This is a kernel thread which:
0279  *
0280  *  1.  Retries failed read operations on working multipaths.
0281  *  2.  Updates the raid superblock when problems encounter.
0282  *  3.  Performs writes following reads for array syncronising.
0283  */
0284 
0285 static void multipathd(struct md_thread *thread)
0286 {
0287     struct mddev *mddev = thread->mddev;
0288     struct multipath_bh *mp_bh;
0289     struct bio *bio;
0290     unsigned long flags;
0291     struct mpconf *conf = mddev->private;
0292     struct list_head *head = &conf->retry_list;
0293 
0294     md_check_recovery(mddev);
0295     for (;;) {
0296         spin_lock_irqsave(&conf->device_lock, flags);
0297         if (list_empty(head))
0298             break;
0299         mp_bh = list_entry(head->prev, struct multipath_bh, retry_list);
0300         list_del(head->prev);
0301         spin_unlock_irqrestore(&conf->device_lock, flags);
0302 
0303         bio = &mp_bh->bio;
0304         bio->bi_iter.bi_sector = mp_bh->master_bio->bi_iter.bi_sector;
0305 
0306         if ((mp_bh->path = multipath_map (conf))<0) {
0307             pr_err("multipath: %pg: unrecoverable IO read error for block %llu\n",
0308                    bio->bi_bdev,
0309                    (unsigned long long)bio->bi_iter.bi_sector);
0310             multipath_end_bh_io(mp_bh, BLK_STS_IOERR);
0311         } else {
0312             pr_err("multipath: %pg: redirecting sector %llu to another IO path\n",
0313                    bio->bi_bdev,
0314                    (unsigned long long)bio->bi_iter.bi_sector);
0315             *bio = *(mp_bh->master_bio);
0316             bio->bi_iter.bi_sector +=
0317                 conf->multipaths[mp_bh->path].rdev->data_offset;
0318             bio_set_dev(bio, conf->multipaths[mp_bh->path].rdev->bdev);
0319             bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
0320             bio->bi_end_io = multipath_end_request;
0321             bio->bi_private = mp_bh;
0322             submit_bio_noacct(bio);
0323         }
0324     }
0325     spin_unlock_irqrestore(&conf->device_lock, flags);
0326 }
0327 
0328 static sector_t multipath_size(struct mddev *mddev, sector_t sectors, int raid_disks)
0329 {
0330     WARN_ONCE(sectors || raid_disks,
0331           "%s does not support generic reshape\n", __func__);
0332 
0333     return mddev->dev_sectors;
0334 }
0335 
0336 static int multipath_run (struct mddev *mddev)
0337 {
0338     struct mpconf *conf;
0339     int disk_idx;
0340     struct multipath_info *disk;
0341     struct md_rdev *rdev;
0342     int working_disks;
0343     int ret;
0344 
0345     if (md_check_no_bitmap(mddev))
0346         return -EINVAL;
0347 
0348     if (mddev->level != LEVEL_MULTIPATH) {
0349         pr_warn("multipath: %s: raid level not set to multipath IO (%d)\n",
0350             mdname(mddev), mddev->level);
0351         goto out;
0352     }
0353     /*
0354      * copy the already verified devices into our private MULTIPATH
0355      * bookkeeping area. [whatever we allocate in multipath_run(),
0356      * should be freed in multipath_free()]
0357      */
0358 
0359     conf = kzalloc(sizeof(struct mpconf), GFP_KERNEL);
0360     mddev->private = conf;
0361     if (!conf)
0362         goto out;
0363 
0364     conf->multipaths = kcalloc(mddev->raid_disks,
0365                    sizeof(struct multipath_info),
0366                    GFP_KERNEL);
0367     if (!conf->multipaths)
0368         goto out_free_conf;
0369 
0370     working_disks = 0;
0371     rdev_for_each(rdev, mddev) {
0372         disk_idx = rdev->raid_disk;
0373         if (disk_idx < 0 ||
0374             disk_idx >= mddev->raid_disks)
0375             continue;
0376 
0377         disk = conf->multipaths + disk_idx;
0378         disk->rdev = rdev;
0379         disk_stack_limits(mddev->gendisk, rdev->bdev,
0380                   rdev->data_offset << 9);
0381 
0382         if (!test_bit(Faulty, &rdev->flags))
0383             working_disks++;
0384     }
0385 
0386     conf->raid_disks = mddev->raid_disks;
0387     conf->mddev = mddev;
0388     spin_lock_init(&conf->device_lock);
0389     INIT_LIST_HEAD(&conf->retry_list);
0390 
0391     if (!working_disks) {
0392         pr_warn("multipath: no operational IO paths for %s\n",
0393             mdname(mddev));
0394         goto out_free_conf;
0395     }
0396     mddev->degraded = conf->raid_disks - working_disks;
0397 
0398     ret = mempool_init_kmalloc_pool(&conf->pool, NR_RESERVED_BUFS,
0399                     sizeof(struct multipath_bh));
0400     if (ret)
0401         goto out_free_conf;
0402 
0403     mddev->thread = md_register_thread(multipathd, mddev,
0404                        "multipath");
0405     if (!mddev->thread)
0406         goto out_free_conf;
0407 
0408     pr_info("multipath: array %s active with %d out of %d IO paths\n",
0409         mdname(mddev), conf->raid_disks - mddev->degraded,
0410         mddev->raid_disks);
0411     /*
0412      * Ok, everything is just fine now
0413      */
0414     md_set_array_sectors(mddev, multipath_size(mddev, 0, 0));
0415 
0416     if (md_integrity_register(mddev))
0417         goto out_free_conf;
0418 
0419     return 0;
0420 
0421 out_free_conf:
0422     mempool_exit(&conf->pool);
0423     kfree(conf->multipaths);
0424     kfree(conf);
0425     mddev->private = NULL;
0426 out:
0427     return -EIO;
0428 }
0429 
0430 static void multipath_free(struct mddev *mddev, void *priv)
0431 {
0432     struct mpconf *conf = priv;
0433 
0434     mempool_exit(&conf->pool);
0435     kfree(conf->multipaths);
0436     kfree(conf);
0437 }
0438 
0439 static struct md_personality multipath_personality =
0440 {
0441     .name       = "multipath",
0442     .level      = LEVEL_MULTIPATH,
0443     .owner      = THIS_MODULE,
0444     .make_request   = multipath_make_request,
0445     .run        = multipath_run,
0446     .free       = multipath_free,
0447     .status     = multipath_status,
0448     .error_handler  = multipath_error,
0449     .hot_add_disk   = multipath_add_disk,
0450     .hot_remove_disk= multipath_remove_disk,
0451     .size       = multipath_size,
0452 };
0453 
0454 static int __init multipath_init (void)
0455 {
0456     return register_md_personality (&multipath_personality);
0457 }
0458 
0459 static void __exit multipath_exit (void)
0460 {
0461     unregister_md_personality (&multipath_personality);
0462 }
0463 
0464 module_init(multipath_init);
0465 module_exit(multipath_exit);
0466 MODULE_LICENSE("GPL");
0467 MODULE_DESCRIPTION("simple multi-path personality for MD (deprecated)");
0468 MODULE_ALIAS("md-personality-7"); /* MULTIPATH */
0469 MODULE_ALIAS("md-multipath");
0470 MODULE_ALIAS("md-level--4");