Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (C) 2001-2003 Sistina Software (UK) Limited.
0003  *
0004  * This file is released under the GPL.
0005  */
0006 
0007 #include "dm.h"
0008 #include <linux/device-mapper.h>
0009 
0010 #include <linux/module.h>
0011 #include <linux/init.h>
0012 #include <linux/blkdev.h>
0013 #include <linux/bio.h>
0014 #include <linux/dax.h>
0015 #include <linux/slab.h>
0016 #include <linux/log2.h>
0017 
0018 #define DM_MSG_PREFIX "striped"
0019 #define DM_IO_ERROR_THRESHOLD 15
0020 
0021 struct stripe {
0022     struct dm_dev *dev;
0023     sector_t physical_start;
0024 
0025     atomic_t error_count;
0026 };
0027 
0028 struct stripe_c {
0029     uint32_t stripes;
0030     int stripes_shift;
0031 
0032     /* The size of this target / num. stripes */
0033     sector_t stripe_width;
0034 
0035     uint32_t chunk_size;
0036     int chunk_size_shift;
0037 
0038     /* Needed for handling events */
0039     struct dm_target *ti;
0040 
0041     /* Work struct used for triggering events*/
0042     struct work_struct trigger_event;
0043 
0044     struct stripe stripe[];
0045 };
0046 
0047 /*
0048  * An event is triggered whenever a drive
0049  * drops out of a stripe volume.
0050  */
0051 static void trigger_event(struct work_struct *work)
0052 {
0053     struct stripe_c *sc = container_of(work, struct stripe_c,
0054                        trigger_event);
0055     dm_table_event(sc->ti->table);
0056 }
0057 
0058 /*
0059  * Parse a single <dev> <sector> pair
0060  */
0061 static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
0062               unsigned int stripe, char **argv)
0063 {
0064     unsigned long long start;
0065     char dummy;
0066     int ret;
0067 
0068     if (sscanf(argv[1], "%llu%c", &start, &dummy) != 1)
0069         return -EINVAL;
0070 
0071     ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
0072                 &sc->stripe[stripe].dev);
0073     if (ret)
0074         return ret;
0075 
0076     sc->stripe[stripe].physical_start = start;
0077 
0078     return 0;
0079 }
0080 
0081 /*
0082  * Construct a striped mapping.
0083  * <number of stripes> <chunk size> [<dev_path> <offset>]+
0084  */
0085 static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
0086 {
0087     struct stripe_c *sc;
0088     sector_t width, tmp_len;
0089     uint32_t stripes;
0090     uint32_t chunk_size;
0091     int r;
0092     unsigned int i;
0093 
0094     if (argc < 2) {
0095         ti->error = "Not enough arguments";
0096         return -EINVAL;
0097     }
0098 
0099     if (kstrtouint(argv[0], 10, &stripes) || !stripes) {
0100         ti->error = "Invalid stripe count";
0101         return -EINVAL;
0102     }
0103 
0104     if (kstrtouint(argv[1], 10, &chunk_size) || !chunk_size) {
0105         ti->error = "Invalid chunk_size";
0106         return -EINVAL;
0107     }
0108 
0109     width = ti->len;
0110     if (sector_div(width, stripes)) {
0111         ti->error = "Target length not divisible by "
0112             "number of stripes";
0113         return -EINVAL;
0114     }
0115 
0116     tmp_len = width;
0117     if (sector_div(tmp_len, chunk_size)) {
0118         ti->error = "Target length not divisible by "
0119             "chunk size";
0120         return -EINVAL;
0121     }
0122 
0123     /*
0124      * Do we have enough arguments for that many stripes ?
0125      */
0126     if (argc != (2 + 2 * stripes)) {
0127         ti->error = "Not enough destinations "
0128             "specified";
0129         return -EINVAL;
0130     }
0131 
0132     sc = kmalloc(struct_size(sc, stripe, stripes), GFP_KERNEL);
0133     if (!sc) {
0134         ti->error = "Memory allocation for striped context "
0135             "failed";
0136         return -ENOMEM;
0137     }
0138 
0139     INIT_WORK(&sc->trigger_event, trigger_event);
0140 
0141     /* Set pointer to dm target; used in trigger_event */
0142     sc->ti = ti;
0143     sc->stripes = stripes;
0144     sc->stripe_width = width;
0145 
0146     if (stripes & (stripes - 1))
0147         sc->stripes_shift = -1;
0148     else
0149         sc->stripes_shift = __ffs(stripes);
0150 
0151     r = dm_set_target_max_io_len(ti, chunk_size);
0152     if (r) {
0153         kfree(sc);
0154         return r;
0155     }
0156 
0157     ti->num_flush_bios = stripes;
0158     ti->num_discard_bios = stripes;
0159     ti->num_secure_erase_bios = stripes;
0160     ti->num_write_zeroes_bios = stripes;
0161 
0162     sc->chunk_size = chunk_size;
0163     if (chunk_size & (chunk_size - 1))
0164         sc->chunk_size_shift = -1;
0165     else
0166         sc->chunk_size_shift = __ffs(chunk_size);
0167 
0168     /*
0169      * Get the stripe destinations.
0170      */
0171     for (i = 0; i < stripes; i++) {
0172         argv += 2;
0173 
0174         r = get_stripe(ti, sc, i, argv);
0175         if (r < 0) {
0176             ti->error = "Couldn't parse stripe destination";
0177             while (i--)
0178                 dm_put_device(ti, sc->stripe[i].dev);
0179             kfree(sc);
0180             return r;
0181         }
0182         atomic_set(&(sc->stripe[i].error_count), 0);
0183     }
0184 
0185     ti->private = sc;
0186 
0187     return 0;
0188 }
0189 
0190 static void stripe_dtr(struct dm_target *ti)
0191 {
0192     unsigned int i;
0193     struct stripe_c *sc = (struct stripe_c *) ti->private;
0194 
0195     for (i = 0; i < sc->stripes; i++)
0196         dm_put_device(ti, sc->stripe[i].dev);
0197 
0198     flush_work(&sc->trigger_event);
0199     kfree(sc);
0200 }
0201 
0202 static void stripe_map_sector(struct stripe_c *sc, sector_t sector,
0203                   uint32_t *stripe, sector_t *result)
0204 {
0205     sector_t chunk = dm_target_offset(sc->ti, sector);
0206     sector_t chunk_offset;
0207 
0208     if (sc->chunk_size_shift < 0)
0209         chunk_offset = sector_div(chunk, sc->chunk_size);
0210     else {
0211         chunk_offset = chunk & (sc->chunk_size - 1);
0212         chunk >>= sc->chunk_size_shift;
0213     }
0214 
0215     if (sc->stripes_shift < 0)
0216         *stripe = sector_div(chunk, sc->stripes);
0217     else {
0218         *stripe = chunk & (sc->stripes - 1);
0219         chunk >>= sc->stripes_shift;
0220     }
0221 
0222     if (sc->chunk_size_shift < 0)
0223         chunk *= sc->chunk_size;
0224     else
0225         chunk <<= sc->chunk_size_shift;
0226 
0227     *result = chunk + chunk_offset;
0228 }
0229 
0230 static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector,
0231                     uint32_t target_stripe, sector_t *result)
0232 {
0233     uint32_t stripe;
0234 
0235     stripe_map_sector(sc, sector, &stripe, result);
0236     if (stripe == target_stripe)
0237         return;
0238 
0239     /* round down */
0240     sector = *result;
0241     if (sc->chunk_size_shift < 0)
0242         *result -= sector_div(sector, sc->chunk_size);
0243     else
0244         *result = sector & ~(sector_t)(sc->chunk_size - 1);
0245 
0246     if (target_stripe < stripe)
0247         *result += sc->chunk_size;      /* next chunk */
0248 }
0249 
0250 static int stripe_map_range(struct stripe_c *sc, struct bio *bio,
0251                 uint32_t target_stripe)
0252 {
0253     sector_t begin, end;
0254 
0255     stripe_map_range_sector(sc, bio->bi_iter.bi_sector,
0256                 target_stripe, &begin);
0257     stripe_map_range_sector(sc, bio_end_sector(bio),
0258                 target_stripe, &end);
0259     if (begin < end) {
0260         bio_set_dev(bio, sc->stripe[target_stripe].dev->bdev);
0261         bio->bi_iter.bi_sector = begin +
0262             sc->stripe[target_stripe].physical_start;
0263         bio->bi_iter.bi_size = to_bytes(end - begin);
0264         return DM_MAPIO_REMAPPED;
0265     } else {
0266         /* The range doesn't map to the target stripe */
0267         bio_endio(bio);
0268         return DM_MAPIO_SUBMITTED;
0269     }
0270 }
0271 
0272 static int stripe_map(struct dm_target *ti, struct bio *bio)
0273 {
0274     struct stripe_c *sc = ti->private;
0275     uint32_t stripe;
0276     unsigned target_bio_nr;
0277 
0278     if (bio->bi_opf & REQ_PREFLUSH) {
0279         target_bio_nr = dm_bio_get_target_bio_nr(bio);
0280         BUG_ON(target_bio_nr >= sc->stripes);
0281         bio_set_dev(bio, sc->stripe[target_bio_nr].dev->bdev);
0282         return DM_MAPIO_REMAPPED;
0283     }
0284     if (unlikely(bio_op(bio) == REQ_OP_DISCARD) ||
0285         unlikely(bio_op(bio) == REQ_OP_SECURE_ERASE) ||
0286         unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES)) {
0287         target_bio_nr = dm_bio_get_target_bio_nr(bio);
0288         BUG_ON(target_bio_nr >= sc->stripes);
0289         return stripe_map_range(sc, bio, target_bio_nr);
0290     }
0291 
0292     stripe_map_sector(sc, bio->bi_iter.bi_sector,
0293               &stripe, &bio->bi_iter.bi_sector);
0294 
0295     bio->bi_iter.bi_sector += sc->stripe[stripe].physical_start;
0296     bio_set_dev(bio, sc->stripe[stripe].dev->bdev);
0297 
0298     return DM_MAPIO_REMAPPED;
0299 }
0300 
0301 #if IS_ENABLED(CONFIG_FS_DAX)
0302 static struct dax_device *stripe_dax_pgoff(struct dm_target *ti, pgoff_t *pgoff)
0303 {
0304     struct stripe_c *sc = ti->private;
0305     struct block_device *bdev;
0306     sector_t dev_sector;
0307     uint32_t stripe;
0308 
0309     stripe_map_sector(sc, *pgoff * PAGE_SECTORS, &stripe, &dev_sector);
0310     dev_sector += sc->stripe[stripe].physical_start;
0311     bdev = sc->stripe[stripe].dev->bdev;
0312 
0313     *pgoff = (get_start_sect(bdev) + dev_sector) >> PAGE_SECTORS_SHIFT;
0314     return sc->stripe[stripe].dev->dax_dev;
0315 }
0316 
0317 static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
0318         long nr_pages, enum dax_access_mode mode, void **kaddr,
0319         pfn_t *pfn)
0320 {
0321     struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff);
0322 
0323     return dax_direct_access(dax_dev, pgoff, nr_pages, mode, kaddr, pfn);
0324 }
0325 
0326 static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
0327                       size_t nr_pages)
0328 {
0329     struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff);
0330 
0331     return dax_zero_page_range(dax_dev, pgoff, nr_pages);
0332 }
0333 
0334 static size_t stripe_dax_recovery_write(struct dm_target *ti, pgoff_t pgoff,
0335         void *addr, size_t bytes, struct iov_iter *i)
0336 {
0337     struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff);
0338 
0339     return dax_recovery_write(dax_dev, pgoff, addr, bytes, i);
0340 }
0341 
0342 #else
0343 #define stripe_dax_direct_access NULL
0344 #define stripe_dax_zero_page_range NULL
0345 #define stripe_dax_recovery_write NULL
0346 #endif
0347 
0348 /*
0349  * Stripe status:
0350  *
0351  * INFO
0352  * #stripes [stripe_name <stripe_name>] [group word count]
0353  * [error count 'A|D' <error count 'A|D'>]
0354  *
0355  * TABLE
0356  * #stripes [stripe chunk size]
0357  * [stripe_name physical_start <stripe_name physical_start>]
0358  *
0359  */
0360 
0361 static void stripe_status(struct dm_target *ti, status_type_t type,
0362               unsigned status_flags, char *result, unsigned maxlen)
0363 {
0364     struct stripe_c *sc = (struct stripe_c *) ti->private;
0365     unsigned int sz = 0;
0366     unsigned int i;
0367 
0368     switch (type) {
0369     case STATUSTYPE_INFO:
0370         DMEMIT("%d ", sc->stripes);
0371         for (i = 0; i < sc->stripes; i++)  {
0372             DMEMIT("%s ", sc->stripe[i].dev->name);
0373         }
0374         DMEMIT("1 ");
0375         for (i = 0; i < sc->stripes; i++) {
0376             DMEMIT("%c", atomic_read(&(sc->stripe[i].error_count)) ?
0377                    'D' : 'A');
0378         }
0379         break;
0380 
0381     case STATUSTYPE_TABLE:
0382         DMEMIT("%d %llu", sc->stripes,
0383             (unsigned long long)sc->chunk_size);
0384         for (i = 0; i < sc->stripes; i++)
0385             DMEMIT(" %s %llu", sc->stripe[i].dev->name,
0386                 (unsigned long long)sc->stripe[i].physical_start);
0387         break;
0388 
0389     case STATUSTYPE_IMA:
0390         DMEMIT_TARGET_NAME_VERSION(ti->type);
0391         DMEMIT(",stripes=%d,chunk_size=%llu", sc->stripes,
0392                (unsigned long long)sc->chunk_size);
0393 
0394         for (i = 0; i < sc->stripes; i++) {
0395             DMEMIT(",stripe_%d_device_name=%s", i, sc->stripe[i].dev->name);
0396             DMEMIT(",stripe_%d_physical_start=%llu", i,
0397                    (unsigned long long)sc->stripe[i].physical_start);
0398             DMEMIT(",stripe_%d_status=%c", i,
0399                    atomic_read(&(sc->stripe[i].error_count)) ? 'D' : 'A');
0400         }
0401         DMEMIT(";");
0402         break;
0403     }
0404 }
0405 
0406 static int stripe_end_io(struct dm_target *ti, struct bio *bio,
0407         blk_status_t *error)
0408 {
0409     unsigned i;
0410     char major_minor[16];
0411     struct stripe_c *sc = ti->private;
0412 
0413     if (!*error)
0414         return DM_ENDIO_DONE; /* I/O complete */
0415 
0416     if (bio->bi_opf & REQ_RAHEAD)
0417         return DM_ENDIO_DONE;
0418 
0419     if (*error == BLK_STS_NOTSUPP)
0420         return DM_ENDIO_DONE;
0421 
0422     memset(major_minor, 0, sizeof(major_minor));
0423     sprintf(major_minor, "%d:%d", MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)));
0424 
0425     /*
0426      * Test to see which stripe drive triggered the event
0427      * and increment error count for all stripes on that device.
0428      * If the error count for a given device exceeds the threshold
0429      * value we will no longer trigger any further events.
0430      */
0431     for (i = 0; i < sc->stripes; i++)
0432         if (!strcmp(sc->stripe[i].dev->name, major_minor)) {
0433             atomic_inc(&(sc->stripe[i].error_count));
0434             if (atomic_read(&(sc->stripe[i].error_count)) <
0435                 DM_IO_ERROR_THRESHOLD)
0436                 schedule_work(&sc->trigger_event);
0437         }
0438 
0439     return DM_ENDIO_DONE;
0440 }
0441 
0442 static int stripe_iterate_devices(struct dm_target *ti,
0443                   iterate_devices_callout_fn fn, void *data)
0444 {
0445     struct stripe_c *sc = ti->private;
0446     int ret = 0;
0447     unsigned i = 0;
0448 
0449     do {
0450         ret = fn(ti, sc->stripe[i].dev,
0451              sc->stripe[i].physical_start,
0452              sc->stripe_width, data);
0453     } while (!ret && ++i < sc->stripes);
0454 
0455     return ret;
0456 }
0457 
0458 static void stripe_io_hints(struct dm_target *ti,
0459                 struct queue_limits *limits)
0460 {
0461     struct stripe_c *sc = ti->private;
0462     unsigned chunk_size = sc->chunk_size << SECTOR_SHIFT;
0463 
0464     blk_limits_io_min(limits, chunk_size);
0465     blk_limits_io_opt(limits, chunk_size * sc->stripes);
0466 }
0467 
0468 static struct target_type stripe_target = {
0469     .name   = "striped",
0470     .version = {1, 6, 0},
0471     .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT,
0472     .module = THIS_MODULE,
0473     .ctr    = stripe_ctr,
0474     .dtr    = stripe_dtr,
0475     .map    = stripe_map,
0476     .end_io = stripe_end_io,
0477     .status = stripe_status,
0478     .iterate_devices = stripe_iterate_devices,
0479     .io_hints = stripe_io_hints,
0480     .direct_access = stripe_dax_direct_access,
0481     .dax_zero_page_range = stripe_dax_zero_page_range,
0482     .dax_recovery_write = stripe_dax_recovery_write,
0483 };
0484 
0485 int __init dm_stripe_init(void)
0486 {
0487     int r;
0488 
0489     r = dm_register_target(&stripe_target);
0490     if (r < 0)
0491         DMWARN("target registration failed");
0492 
0493     return r;
0494 }
0495 
0496 void dm_stripe_exit(void)
0497 {
0498     dm_unregister_target(&stripe_target);
0499 }