0001
0002
0003
0004
0005
0006
0007
0008 #include "dm-core.h"
0009
0010 #include <linux/device-mapper.h>
0011
0012 #include <linux/bio.h>
0013 #include <linux/completion.h>
0014 #include <linux/mempool.h>
0015 #include <linux/module.h>
0016 #include <linux/sched.h>
0017 #include <linux/slab.h>
0018 #include <linux/dm-io.h>
0019
0020 #define DM_MSG_PREFIX "io"
0021
0022 #define DM_IO_MAX_REGIONS BITS_PER_LONG
0023
0024 struct dm_io_client {
0025 mempool_t pool;
0026 struct bio_set bios;
0027 };
0028
0029
0030
0031
0032
0033 struct io {
0034 unsigned long error_bits;
0035 atomic_t count;
0036 struct dm_io_client *client;
0037 io_notify_fn callback;
0038 void *context;
0039 void *vma_invalidate_address;
0040 unsigned long vma_invalidate_size;
0041 } __attribute__((aligned(DM_IO_MAX_REGIONS)));
0042
0043 static struct kmem_cache *_dm_io_cache;
0044
0045
0046
0047
0048 struct dm_io_client *dm_io_client_create(void)
0049 {
0050 struct dm_io_client *client;
0051 unsigned min_ios = dm_get_reserved_bio_based_ios();
0052 int ret;
0053
0054 client = kzalloc(sizeof(*client), GFP_KERNEL);
0055 if (!client)
0056 return ERR_PTR(-ENOMEM);
0057
0058 ret = mempool_init_slab_pool(&client->pool, min_ios, _dm_io_cache);
0059 if (ret)
0060 goto bad;
0061
0062 ret = bioset_init(&client->bios, min_ios, 0, BIOSET_NEED_BVECS);
0063 if (ret)
0064 goto bad;
0065
0066 return client;
0067
0068 bad:
0069 mempool_exit(&client->pool);
0070 kfree(client);
0071 return ERR_PTR(ret);
0072 }
0073 EXPORT_SYMBOL(dm_io_client_create);
0074
0075 void dm_io_client_destroy(struct dm_io_client *client)
0076 {
0077 mempool_exit(&client->pool);
0078 bioset_exit(&client->bios);
0079 kfree(client);
0080 }
0081 EXPORT_SYMBOL(dm_io_client_destroy);
0082
0083
0084
0085
0086
0087
0088
0089
0090 static void store_io_and_region_in_bio(struct bio *bio, struct io *io,
0091 unsigned region)
0092 {
0093 if (unlikely(!IS_ALIGNED((unsigned long)io, DM_IO_MAX_REGIONS))) {
0094 DMCRIT("Unaligned struct io pointer %p", io);
0095 BUG();
0096 }
0097
0098 bio->bi_private = (void *)((unsigned long)io | region);
0099 }
0100
0101 static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io,
0102 unsigned *region)
0103 {
0104 unsigned long val = (unsigned long)bio->bi_private;
0105
0106 *io = (void *)(val & -(unsigned long)DM_IO_MAX_REGIONS);
0107 *region = val & (DM_IO_MAX_REGIONS - 1);
0108 }
0109
0110
0111
0112
0113
0114 static void complete_io(struct io *io)
0115 {
0116 unsigned long error_bits = io->error_bits;
0117 io_notify_fn fn = io->callback;
0118 void *context = io->context;
0119
0120 if (io->vma_invalidate_size)
0121 invalidate_kernel_vmap_range(io->vma_invalidate_address,
0122 io->vma_invalidate_size);
0123
0124 mempool_free(io, &io->client->pool);
0125 fn(error_bits, context);
0126 }
0127
0128 static void dec_count(struct io *io, unsigned int region, blk_status_t error)
0129 {
0130 if (error)
0131 set_bit(region, &io->error_bits);
0132
0133 if (atomic_dec_and_test(&io->count))
0134 complete_io(io);
0135 }
0136
0137 static void endio(struct bio *bio)
0138 {
0139 struct io *io;
0140 unsigned region;
0141 blk_status_t error;
0142
0143 if (bio->bi_status && bio_data_dir(bio) == READ)
0144 zero_fill_bio(bio);
0145
0146
0147
0148
0149 retrieve_io_and_region_from_bio(bio, &io, ®ion);
0150
0151 error = bio->bi_status;
0152 bio_put(bio);
0153
0154 dec_count(io, region, error);
0155 }
0156
0157
0158
0159
0160
0161 struct dpages {
0162 void (*get_page)(struct dpages *dp,
0163 struct page **p, unsigned long *len, unsigned *offset);
0164 void (*next_page)(struct dpages *dp);
0165
0166 union {
0167 unsigned context_u;
0168 struct bvec_iter context_bi;
0169 };
0170 void *context_ptr;
0171
0172 void *vma_invalidate_address;
0173 unsigned long vma_invalidate_size;
0174 };
0175
0176
0177
0178
0179 static void list_get_page(struct dpages *dp,
0180 struct page **p, unsigned long *len, unsigned *offset)
0181 {
0182 unsigned o = dp->context_u;
0183 struct page_list *pl = (struct page_list *) dp->context_ptr;
0184
0185 *p = pl->page;
0186 *len = PAGE_SIZE - o;
0187 *offset = o;
0188 }
0189
0190 static void list_next_page(struct dpages *dp)
0191 {
0192 struct page_list *pl = (struct page_list *) dp->context_ptr;
0193 dp->context_ptr = pl->next;
0194 dp->context_u = 0;
0195 }
0196
0197 static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offset)
0198 {
0199 dp->get_page = list_get_page;
0200 dp->next_page = list_next_page;
0201 dp->context_u = offset;
0202 dp->context_ptr = pl;
0203 }
0204
0205
0206
0207
0208 static void bio_get_page(struct dpages *dp, struct page **p,
0209 unsigned long *len, unsigned *offset)
0210 {
0211 struct bio_vec bvec = bvec_iter_bvec((struct bio_vec *)dp->context_ptr,
0212 dp->context_bi);
0213
0214 *p = bvec.bv_page;
0215 *len = bvec.bv_len;
0216 *offset = bvec.bv_offset;
0217
0218
0219 dp->context_bi.bi_sector = (sector_t)bvec.bv_len;
0220 }
0221
0222 static void bio_next_page(struct dpages *dp)
0223 {
0224 unsigned int len = (unsigned int)dp->context_bi.bi_sector;
0225
0226 bvec_iter_advance((struct bio_vec *)dp->context_ptr,
0227 &dp->context_bi, len);
0228 }
0229
0230 static void bio_dp_init(struct dpages *dp, struct bio *bio)
0231 {
0232 dp->get_page = bio_get_page;
0233 dp->next_page = bio_next_page;
0234
0235
0236
0237
0238
0239 dp->context_ptr = bio->bi_io_vec;
0240 dp->context_bi = bio->bi_iter;
0241 }
0242
0243
0244
0245
0246 static void vm_get_page(struct dpages *dp,
0247 struct page **p, unsigned long *len, unsigned *offset)
0248 {
0249 *p = vmalloc_to_page(dp->context_ptr);
0250 *offset = dp->context_u;
0251 *len = PAGE_SIZE - dp->context_u;
0252 }
0253
0254 static void vm_next_page(struct dpages *dp)
0255 {
0256 dp->context_ptr += PAGE_SIZE - dp->context_u;
0257 dp->context_u = 0;
0258 }
0259
0260 static void vm_dp_init(struct dpages *dp, void *data)
0261 {
0262 dp->get_page = vm_get_page;
0263 dp->next_page = vm_next_page;
0264 dp->context_u = offset_in_page(data);
0265 dp->context_ptr = data;
0266 }
0267
0268
0269
0270
0271 static void km_get_page(struct dpages *dp, struct page **p, unsigned long *len,
0272 unsigned *offset)
0273 {
0274 *p = virt_to_page(dp->context_ptr);
0275 *offset = dp->context_u;
0276 *len = PAGE_SIZE - dp->context_u;
0277 }
0278
0279 static void km_next_page(struct dpages *dp)
0280 {
0281 dp->context_ptr += PAGE_SIZE - dp->context_u;
0282 dp->context_u = 0;
0283 }
0284
0285 static void km_dp_init(struct dpages *dp, void *data)
0286 {
0287 dp->get_page = km_get_page;
0288 dp->next_page = km_next_page;
0289 dp->context_u = offset_in_page(data);
0290 dp->context_ptr = data;
0291 }
0292
0293
0294
0295
0296 static void do_region(const blk_opf_t opf, unsigned region,
0297 struct dm_io_region *where, struct dpages *dp,
0298 struct io *io)
0299 {
0300 struct bio *bio;
0301 struct page *page;
0302 unsigned long len;
0303 unsigned offset;
0304 unsigned num_bvecs;
0305 sector_t remaining = where->count;
0306 struct request_queue *q = bdev_get_queue(where->bdev);
0307 sector_t num_sectors;
0308 unsigned int special_cmd_max_sectors;
0309 const enum req_op op = opf & REQ_OP_MASK;
0310
0311
0312
0313
0314 if (op == REQ_OP_DISCARD)
0315 special_cmd_max_sectors = bdev_max_discard_sectors(where->bdev);
0316 else if (op == REQ_OP_WRITE_ZEROES)
0317 special_cmd_max_sectors = q->limits.max_write_zeroes_sectors;
0318 if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) &&
0319 special_cmd_max_sectors == 0) {
0320 atomic_inc(&io->count);
0321 dec_count(io, region, BLK_STS_NOTSUPP);
0322 return;
0323 }
0324
0325
0326
0327
0328
0329 do {
0330
0331
0332
0333 switch (op) {
0334 case REQ_OP_DISCARD:
0335 case REQ_OP_WRITE_ZEROES:
0336 num_bvecs = 0;
0337 break;
0338 default:
0339 num_bvecs = bio_max_segs(dm_sector_div_up(remaining,
0340 (PAGE_SIZE >> SECTOR_SHIFT)));
0341 }
0342
0343 bio = bio_alloc_bioset(where->bdev, num_bvecs, opf, GFP_NOIO,
0344 &io->client->bios);
0345 bio->bi_iter.bi_sector = where->sector + (where->count - remaining);
0346 bio->bi_end_io = endio;
0347 store_io_and_region_in_bio(bio, io, region);
0348
0349 if (op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) {
0350 num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining);
0351 bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT;
0352 remaining -= num_sectors;
0353 } else while (remaining) {
0354
0355
0356
0357 dp->get_page(dp, &page, &len, &offset);
0358 len = min(len, to_bytes(remaining));
0359 if (!bio_add_page(bio, page, len, offset))
0360 break;
0361
0362 offset = 0;
0363 remaining -= to_sector(len);
0364 dp->next_page(dp);
0365 }
0366
0367 atomic_inc(&io->count);
0368 submit_bio(bio);
0369 } while (remaining);
0370 }
0371
0372 static void dispatch_io(blk_opf_t opf, unsigned int num_regions,
0373 struct dm_io_region *where, struct dpages *dp,
0374 struct io *io, int sync)
0375 {
0376 int i;
0377 struct dpages old_pages = *dp;
0378
0379 BUG_ON(num_regions > DM_IO_MAX_REGIONS);
0380
0381 if (sync)
0382 opf |= REQ_SYNC;
0383
0384
0385
0386
0387
0388 for (i = 0; i < num_regions; i++) {
0389 *dp = old_pages;
0390 if (where[i].count || (opf & REQ_PREFLUSH))
0391 do_region(opf, i, where + i, dp, io);
0392 }
0393
0394
0395
0396
0397
0398 dec_count(io, 0, 0);
0399 }
0400
0401 struct sync_io {
0402 unsigned long error_bits;
0403 struct completion wait;
0404 };
0405
0406 static void sync_io_complete(unsigned long error, void *context)
0407 {
0408 struct sync_io *sio = context;
0409
0410 sio->error_bits = error;
0411 complete(&sio->wait);
0412 }
0413
0414 static int sync_io(struct dm_io_client *client, unsigned int num_regions,
0415 struct dm_io_region *where, blk_opf_t opf, struct dpages *dp,
0416 unsigned long *error_bits)
0417 {
0418 struct io *io;
0419 struct sync_io sio;
0420
0421 if (num_regions > 1 && !op_is_write(opf)) {
0422 WARN_ON(1);
0423 return -EIO;
0424 }
0425
0426 init_completion(&sio.wait);
0427
0428 io = mempool_alloc(&client->pool, GFP_NOIO);
0429 io->error_bits = 0;
0430 atomic_set(&io->count, 1);
0431 io->client = client;
0432 io->callback = sync_io_complete;
0433 io->context = &sio;
0434
0435 io->vma_invalidate_address = dp->vma_invalidate_address;
0436 io->vma_invalidate_size = dp->vma_invalidate_size;
0437
0438 dispatch_io(opf, num_regions, where, dp, io, 1);
0439
0440 wait_for_completion_io(&sio.wait);
0441
0442 if (error_bits)
0443 *error_bits = sio.error_bits;
0444
0445 return sio.error_bits ? -EIO : 0;
0446 }
0447
0448 static int async_io(struct dm_io_client *client, unsigned int num_regions,
0449 struct dm_io_region *where, blk_opf_t opf,
0450 struct dpages *dp, io_notify_fn fn, void *context)
0451 {
0452 struct io *io;
0453
0454 if (num_regions > 1 && !op_is_write(opf)) {
0455 WARN_ON(1);
0456 fn(1, context);
0457 return -EIO;
0458 }
0459
0460 io = mempool_alloc(&client->pool, GFP_NOIO);
0461 io->error_bits = 0;
0462 atomic_set(&io->count, 1);
0463 io->client = client;
0464 io->callback = fn;
0465 io->context = context;
0466
0467 io->vma_invalidate_address = dp->vma_invalidate_address;
0468 io->vma_invalidate_size = dp->vma_invalidate_size;
0469
0470 dispatch_io(opf, num_regions, where, dp, io, 0);
0471 return 0;
0472 }
0473
0474 static int dp_init(struct dm_io_request *io_req, struct dpages *dp,
0475 unsigned long size)
0476 {
0477
0478
0479 dp->vma_invalidate_address = NULL;
0480 dp->vma_invalidate_size = 0;
0481
0482 switch (io_req->mem.type) {
0483 case DM_IO_PAGE_LIST:
0484 list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset);
0485 break;
0486
0487 case DM_IO_BIO:
0488 bio_dp_init(dp, io_req->mem.ptr.bio);
0489 break;
0490
0491 case DM_IO_VMA:
0492 flush_kernel_vmap_range(io_req->mem.ptr.vma, size);
0493 if ((io_req->bi_opf & REQ_OP_MASK) == REQ_OP_READ) {
0494 dp->vma_invalidate_address = io_req->mem.ptr.vma;
0495 dp->vma_invalidate_size = size;
0496 }
0497 vm_dp_init(dp, io_req->mem.ptr.vma);
0498 break;
0499
0500 case DM_IO_KMEM:
0501 km_dp_init(dp, io_req->mem.ptr.addr);
0502 break;
0503
0504 default:
0505 return -EINVAL;
0506 }
0507
0508 return 0;
0509 }
0510
0511 int dm_io(struct dm_io_request *io_req, unsigned num_regions,
0512 struct dm_io_region *where, unsigned long *sync_error_bits)
0513 {
0514 int r;
0515 struct dpages dp;
0516
0517 r = dp_init(io_req, &dp, (unsigned long)where->count << SECTOR_SHIFT);
0518 if (r)
0519 return r;
0520
0521 if (!io_req->notify.fn)
0522 return sync_io(io_req->client, num_regions, where,
0523 io_req->bi_opf, &dp, sync_error_bits);
0524
0525 return async_io(io_req->client, num_regions, where,
0526 io_req->bi_opf, &dp, io_req->notify.fn,
0527 io_req->notify.context);
0528 }
0529 EXPORT_SYMBOL(dm_io);
0530
0531 int __init dm_io_init(void)
0532 {
0533 _dm_io_cache = KMEM_CACHE(io, 0);
0534 if (!_dm_io_cache)
0535 return -ENOMEM;
0536
0537 return 0;
0538 }
0539
0540 void dm_io_exit(void)
0541 {
0542 kmem_cache_destroy(_dm_io_cache);
0543 _dm_io_cache = NULL;
0544 }