0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #include <linux/module.h>
0011 #include <linux/init.h>
0012 #include <linux/blkdev.h>
0013 #include <linux/bio.h>
0014 #include <linux/slab.h>
0015
0016 #include <linux/device-mapper.h>
0017
0018 #define DM_MSG_PREFIX "delay"
0019
0020 struct delay_class {
0021 struct dm_dev *dev;
0022 sector_t start;
0023 unsigned delay;
0024 unsigned ops;
0025 };
0026
0027 struct delay_c {
0028 struct timer_list delay_timer;
0029 struct mutex timer_lock;
0030 struct workqueue_struct *kdelayd_wq;
0031 struct work_struct flush_expired_bios;
0032 struct list_head delayed_bios;
0033 atomic_t may_delay;
0034
0035 struct delay_class read;
0036 struct delay_class write;
0037 struct delay_class flush;
0038
0039 int argc;
0040 };
0041
0042 struct dm_delay_info {
0043 struct delay_c *context;
0044 struct delay_class *class;
0045 struct list_head list;
0046 unsigned long expires;
0047 };
0048
0049 static DEFINE_MUTEX(delayed_bios_lock);
0050
0051 static void handle_delayed_timer(struct timer_list *t)
0052 {
0053 struct delay_c *dc = from_timer(dc, t, delay_timer);
0054
0055 queue_work(dc->kdelayd_wq, &dc->flush_expired_bios);
0056 }
0057
0058 static void queue_timeout(struct delay_c *dc, unsigned long expires)
0059 {
0060 mutex_lock(&dc->timer_lock);
0061
0062 if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires)
0063 mod_timer(&dc->delay_timer, expires);
0064
0065 mutex_unlock(&dc->timer_lock);
0066 }
0067
0068 static void flush_bios(struct bio *bio)
0069 {
0070 struct bio *n;
0071
0072 while (bio) {
0073 n = bio->bi_next;
0074 bio->bi_next = NULL;
0075 dm_submit_bio_remap(bio, NULL);
0076 bio = n;
0077 }
0078 }
0079
0080 static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all)
0081 {
0082 struct dm_delay_info *delayed, *next;
0083 unsigned long next_expires = 0;
0084 unsigned long start_timer = 0;
0085 struct bio_list flush_bios = { };
0086
0087 mutex_lock(&delayed_bios_lock);
0088 list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
0089 if (flush_all || time_after_eq(jiffies, delayed->expires)) {
0090 struct bio *bio = dm_bio_from_per_bio_data(delayed,
0091 sizeof(struct dm_delay_info));
0092 list_del(&delayed->list);
0093 bio_list_add(&flush_bios, bio);
0094 delayed->class->ops--;
0095 continue;
0096 }
0097
0098 if (!start_timer) {
0099 start_timer = 1;
0100 next_expires = delayed->expires;
0101 } else
0102 next_expires = min(next_expires, delayed->expires);
0103 }
0104 mutex_unlock(&delayed_bios_lock);
0105
0106 if (start_timer)
0107 queue_timeout(dc, next_expires);
0108
0109 return bio_list_get(&flush_bios);
0110 }
0111
0112 static void flush_expired_bios(struct work_struct *work)
0113 {
0114 struct delay_c *dc;
0115
0116 dc = container_of(work, struct delay_c, flush_expired_bios);
0117 flush_bios(flush_delayed_bios(dc, 0));
0118 }
0119
0120 static void delay_dtr(struct dm_target *ti)
0121 {
0122 struct delay_c *dc = ti->private;
0123
0124 if (dc->kdelayd_wq)
0125 destroy_workqueue(dc->kdelayd_wq);
0126
0127 if (dc->read.dev)
0128 dm_put_device(ti, dc->read.dev);
0129 if (dc->write.dev)
0130 dm_put_device(ti, dc->write.dev);
0131 if (dc->flush.dev)
0132 dm_put_device(ti, dc->flush.dev);
0133
0134 mutex_destroy(&dc->timer_lock);
0135
0136 kfree(dc);
0137 }
0138
0139 static int delay_class_ctr(struct dm_target *ti, struct delay_class *c, char **argv)
0140 {
0141 int ret;
0142 unsigned long long tmpll;
0143 char dummy;
0144
0145 if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1 || tmpll != (sector_t)tmpll) {
0146 ti->error = "Invalid device sector";
0147 return -EINVAL;
0148 }
0149 c->start = tmpll;
0150
0151 if (sscanf(argv[2], "%u%c", &c->delay, &dummy) != 1) {
0152 ti->error = "Invalid delay";
0153 return -EINVAL;
0154 }
0155
0156 ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &c->dev);
0157 if (ret) {
0158 ti->error = "Device lookup failed";
0159 return ret;
0160 }
0161
0162 return 0;
0163 }
0164
0165
0166
0167
0168
0169
0170
0171
0172
0173 static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
0174 {
0175 struct delay_c *dc;
0176 int ret;
0177
0178 if (argc != 3 && argc != 6 && argc != 9) {
0179 ti->error = "Requires exactly 3, 6 or 9 arguments";
0180 return -EINVAL;
0181 }
0182
0183 dc = kzalloc(sizeof(*dc), GFP_KERNEL);
0184 if (!dc) {
0185 ti->error = "Cannot allocate context";
0186 return -ENOMEM;
0187 }
0188
0189 ti->private = dc;
0190 timer_setup(&dc->delay_timer, handle_delayed_timer, 0);
0191 INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
0192 INIT_LIST_HEAD(&dc->delayed_bios);
0193 mutex_init(&dc->timer_lock);
0194 atomic_set(&dc->may_delay, 1);
0195 dc->argc = argc;
0196
0197 ret = delay_class_ctr(ti, &dc->read, argv);
0198 if (ret)
0199 goto bad;
0200
0201 if (argc == 3) {
0202 ret = delay_class_ctr(ti, &dc->write, argv);
0203 if (ret)
0204 goto bad;
0205 ret = delay_class_ctr(ti, &dc->flush, argv);
0206 if (ret)
0207 goto bad;
0208 goto out;
0209 }
0210
0211 ret = delay_class_ctr(ti, &dc->write, argv + 3);
0212 if (ret)
0213 goto bad;
0214 if (argc == 6) {
0215 ret = delay_class_ctr(ti, &dc->flush, argv + 3);
0216 if (ret)
0217 goto bad;
0218 goto out;
0219 }
0220
0221 ret = delay_class_ctr(ti, &dc->flush, argv + 6);
0222 if (ret)
0223 goto bad;
0224
0225 out:
0226 dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
0227 if (!dc->kdelayd_wq) {
0228 ret = -EINVAL;
0229 DMERR("Couldn't start kdelayd");
0230 goto bad;
0231 }
0232
0233 ti->num_flush_bios = 1;
0234 ti->num_discard_bios = 1;
0235 ti->accounts_remapped_io = true;
0236 ti->per_io_data_size = sizeof(struct dm_delay_info);
0237 return 0;
0238
0239 bad:
0240 delay_dtr(ti);
0241 return ret;
0242 }
0243
0244 static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
0245 {
0246 struct dm_delay_info *delayed;
0247 unsigned long expires = 0;
0248
0249 if (!c->delay || !atomic_read(&dc->may_delay))
0250 return DM_MAPIO_REMAPPED;
0251
0252 delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
0253
0254 delayed->context = dc;
0255 delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
0256
0257 mutex_lock(&delayed_bios_lock);
0258 c->ops++;
0259 list_add_tail(&delayed->list, &dc->delayed_bios);
0260 mutex_unlock(&delayed_bios_lock);
0261
0262 queue_timeout(dc, expires);
0263
0264 return DM_MAPIO_SUBMITTED;
0265 }
0266
0267 static void delay_presuspend(struct dm_target *ti)
0268 {
0269 struct delay_c *dc = ti->private;
0270
0271 atomic_set(&dc->may_delay, 0);
0272 del_timer_sync(&dc->delay_timer);
0273 flush_bios(flush_delayed_bios(dc, 1));
0274 }
0275
0276 static void delay_resume(struct dm_target *ti)
0277 {
0278 struct delay_c *dc = ti->private;
0279
0280 atomic_set(&dc->may_delay, 1);
0281 }
0282
0283 static int delay_map(struct dm_target *ti, struct bio *bio)
0284 {
0285 struct delay_c *dc = ti->private;
0286 struct delay_class *c;
0287 struct dm_delay_info *delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
0288
0289 if (bio_data_dir(bio) == WRITE) {
0290 if (unlikely(bio->bi_opf & REQ_PREFLUSH))
0291 c = &dc->flush;
0292 else
0293 c = &dc->write;
0294 } else {
0295 c = &dc->read;
0296 }
0297 delayed->class = c;
0298 bio_set_dev(bio, c->dev->bdev);
0299 bio->bi_iter.bi_sector = c->start + dm_target_offset(ti, bio->bi_iter.bi_sector);
0300
0301 return delay_bio(dc, c, bio);
0302 }
0303
0304 #define DMEMIT_DELAY_CLASS(c) \
0305 DMEMIT("%s %llu %u", (c)->dev->name, (unsigned long long)(c)->start, (c)->delay)
0306
0307 static void delay_status(struct dm_target *ti, status_type_t type,
0308 unsigned status_flags, char *result, unsigned maxlen)
0309 {
0310 struct delay_c *dc = ti->private;
0311 int sz = 0;
0312
0313 switch (type) {
0314 case STATUSTYPE_INFO:
0315 DMEMIT("%u %u %u", dc->read.ops, dc->write.ops, dc->flush.ops);
0316 break;
0317
0318 case STATUSTYPE_TABLE:
0319 DMEMIT_DELAY_CLASS(&dc->read);
0320 if (dc->argc >= 6) {
0321 DMEMIT(" ");
0322 DMEMIT_DELAY_CLASS(&dc->write);
0323 }
0324 if (dc->argc >= 9) {
0325 DMEMIT(" ");
0326 DMEMIT_DELAY_CLASS(&dc->flush);
0327 }
0328 break;
0329
0330 case STATUSTYPE_IMA:
0331 *result = '\0';
0332 break;
0333 }
0334 }
0335
0336 static int delay_iterate_devices(struct dm_target *ti,
0337 iterate_devices_callout_fn fn, void *data)
0338 {
0339 struct delay_c *dc = ti->private;
0340 int ret = 0;
0341
0342 ret = fn(ti, dc->read.dev, dc->read.start, ti->len, data);
0343 if (ret)
0344 goto out;
0345 ret = fn(ti, dc->write.dev, dc->write.start, ti->len, data);
0346 if (ret)
0347 goto out;
0348 ret = fn(ti, dc->flush.dev, dc->flush.start, ti->len, data);
0349 if (ret)
0350 goto out;
0351
0352 out:
0353 return ret;
0354 }
0355
0356 static struct target_type delay_target = {
0357 .name = "delay",
0358 .version = {1, 3, 0},
0359 .features = DM_TARGET_PASSES_INTEGRITY,
0360 .module = THIS_MODULE,
0361 .ctr = delay_ctr,
0362 .dtr = delay_dtr,
0363 .map = delay_map,
0364 .presuspend = delay_presuspend,
0365 .resume = delay_resume,
0366 .status = delay_status,
0367 .iterate_devices = delay_iterate_devices,
0368 };
0369
0370 static int __init dm_delay_init(void)
0371 {
0372 int r;
0373
0374 r = dm_register_target(&delay_target);
0375 if (r < 0) {
0376 DMERR("register failed %d", r);
0377 goto bad_register;
0378 }
0379
0380 return 0;
0381
0382 bad_register:
0383 return r;
0384 }
0385
0386 static void __exit dm_delay_exit(void)
0387 {
0388 dm_unregister_target(&delay_target);
0389 }
0390
0391
0392 module_init(dm_delay_init);
0393 module_exit(dm_delay_exit);
0394
0395 MODULE_DESCRIPTION(DM_NAME " delay target");
0396 MODULE_AUTHOR("Heinz Mauelshagen <mauelshagen@redhat.com>");
0397 MODULE_LICENSE("GPL");