0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033 #include <linux/module.h>
0034 #include <linux/init.h>
0035 #include <linux/mount.h>
0036 #include <linux/namei.h>
0037 #include <linux/bio.h> /* struct bio */
0038 #include <linux/prefetch.h>
0039 #include <linux/pagevec.h>
0040
0041 #include "../pnfs.h"
0042 #include "../nfs4session.h"
0043 #include "../internal.h"
0044 #include "blocklayout.h"
0045
0046 #define NFSDBG_FACILITY NFSDBG_PNFS_LD
0047
0048 MODULE_LICENSE("GPL");
0049 MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>");
0050 MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver");
0051
0052 static bool is_hole(struct pnfs_block_extent *be)
0053 {
0054 switch (be->be_state) {
0055 case PNFS_BLOCK_NONE_DATA:
0056 return true;
0057 case PNFS_BLOCK_INVALID_DATA:
0058 return be->be_tag ? false : true;
0059 default:
0060 return false;
0061 }
0062 }
0063
0064
0065
0066
0067 struct parallel_io {
0068 struct kref refcnt;
0069 void (*pnfs_callback) (void *data);
0070 void *data;
0071 };
0072
0073 static inline struct parallel_io *alloc_parallel(void *data)
0074 {
0075 struct parallel_io *rv;
0076
0077 rv = kmalloc(sizeof(*rv), GFP_NOFS);
0078 if (rv) {
0079 rv->data = data;
0080 kref_init(&rv->refcnt);
0081 }
0082 return rv;
0083 }
0084
0085 static inline void get_parallel(struct parallel_io *p)
0086 {
0087 kref_get(&p->refcnt);
0088 }
0089
0090 static void destroy_parallel(struct kref *kref)
0091 {
0092 struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
0093
0094 dprintk("%s enter\n", __func__);
0095 p->pnfs_callback(p->data);
0096 kfree(p);
0097 }
0098
0099 static inline void put_parallel(struct parallel_io *p)
0100 {
0101 kref_put(&p->refcnt, destroy_parallel);
0102 }
0103
0104 static struct bio *
0105 bl_submit_bio(struct bio *bio)
0106 {
0107 if (bio) {
0108 get_parallel(bio->bi_private);
0109 dprintk("%s submitting %s bio %u@%llu\n", __func__,
0110 bio_op(bio) == READ ? "read" : "write",
0111 bio->bi_iter.bi_size,
0112 (unsigned long long)bio->bi_iter.bi_sector);
0113 submit_bio(bio);
0114 }
0115 return NULL;
0116 }
0117
0118 static bool offset_in_map(u64 offset, struct pnfs_block_dev_map *map)
0119 {
0120 return offset >= map->start && offset < map->start + map->len;
0121 }
0122
0123 static struct bio *
0124 do_add_page_to_bio(struct bio *bio, int npg, enum req_op op, sector_t isect,
0125 struct page *page, struct pnfs_block_dev_map *map,
0126 struct pnfs_block_extent *be, bio_end_io_t end_io,
0127 struct parallel_io *par, unsigned int offset, int *len)
0128 {
0129 struct pnfs_block_dev *dev =
0130 container_of(be->be_device, struct pnfs_block_dev, node);
0131 u64 disk_addr, end;
0132
0133 dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__,
0134 npg, (__force u32)op, (unsigned long long)isect, offset, *len);
0135
0136
0137 isect += be->be_v_offset;
0138 isect -= be->be_f_offset;
0139
0140
0141 disk_addr = (u64)isect << SECTOR_SHIFT;
0142 if (!offset_in_map(disk_addr, map)) {
0143 if (!dev->map(dev, disk_addr, map) || !offset_in_map(disk_addr, map))
0144 return ERR_PTR(-EIO);
0145 bio = bl_submit_bio(bio);
0146 }
0147 disk_addr += map->disk_offset;
0148 disk_addr -= map->start;
0149
0150
0151 end = disk_addr + *len;
0152 if (end >= map->start + map->len)
0153 *len = map->start + map->len - disk_addr;
0154
0155 retry:
0156 if (!bio) {
0157 bio = bio_alloc(map->bdev, bio_max_segs(npg), op, GFP_NOIO);
0158 bio->bi_iter.bi_sector = disk_addr >> SECTOR_SHIFT;
0159 bio->bi_end_io = end_io;
0160 bio->bi_private = par;
0161 }
0162 if (bio_add_page(bio, page, *len, offset) < *len) {
0163 bio = bl_submit_bio(bio);
0164 goto retry;
0165 }
0166 return bio;
0167 }
0168
0169 static void bl_mark_devices_unavailable(struct nfs_pgio_header *header, bool rw)
0170 {
0171 struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg);
0172 size_t bytes_left = header->args.count;
0173 sector_t isect, extent_length = 0;
0174 struct pnfs_block_extent be;
0175
0176 isect = header->args.offset >> SECTOR_SHIFT;
0177 bytes_left += header->args.offset - (isect << SECTOR_SHIFT);
0178
0179 while (bytes_left > 0) {
0180 if (!ext_tree_lookup(bl, isect, &be, rw))
0181 return;
0182 extent_length = be.be_length - (isect - be.be_f_offset);
0183 nfs4_mark_deviceid_unavailable(be.be_device);
0184 isect += extent_length;
0185 if (bytes_left > extent_length << SECTOR_SHIFT)
0186 bytes_left -= extent_length << SECTOR_SHIFT;
0187 else
0188 bytes_left = 0;
0189 }
0190 }
0191
0192 static void bl_end_io_read(struct bio *bio)
0193 {
0194 struct parallel_io *par = bio->bi_private;
0195
0196 if (bio->bi_status) {
0197 struct nfs_pgio_header *header = par->data;
0198
0199 if (!header->pnfs_error)
0200 header->pnfs_error = -EIO;
0201 pnfs_set_lo_fail(header->lseg);
0202 bl_mark_devices_unavailable(header, false);
0203 }
0204
0205 bio_put(bio);
0206 put_parallel(par);
0207 }
0208
0209 static void bl_read_cleanup(struct work_struct *work)
0210 {
0211 struct rpc_task *task;
0212 struct nfs_pgio_header *hdr;
0213 dprintk("%s enter\n", __func__);
0214 task = container_of(work, struct rpc_task, u.tk_work);
0215 hdr = container_of(task, struct nfs_pgio_header, task);
0216 pnfs_ld_read_done(hdr);
0217 }
0218
0219 static void
0220 bl_end_par_io_read(void *data)
0221 {
0222 struct nfs_pgio_header *hdr = data;
0223
0224 hdr->task.tk_status = hdr->pnfs_error;
0225 INIT_WORK(&hdr->task.u.tk_work, bl_read_cleanup);
0226 schedule_work(&hdr->task.u.tk_work);
0227 }
0228
0229 static enum pnfs_try_status
0230 bl_read_pagelist(struct nfs_pgio_header *header)
0231 {
0232 struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg);
0233 struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 };
0234 struct bio *bio = NULL;
0235 struct pnfs_block_extent be;
0236 sector_t isect, extent_length = 0;
0237 struct parallel_io *par;
0238 loff_t f_offset = header->args.offset;
0239 size_t bytes_left = header->args.count;
0240 unsigned int pg_offset = header->args.pgbase, pg_len;
0241 struct page **pages = header->args.pages;
0242 int pg_index = header->args.pgbase >> PAGE_SHIFT;
0243 const bool is_dio = (header->dreq != NULL);
0244 struct blk_plug plug;
0245 int i;
0246
0247 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
0248 header->page_array.npages, f_offset,
0249 (unsigned int)header->args.count);
0250
0251 par = alloc_parallel(header);
0252 if (!par)
0253 return PNFS_NOT_ATTEMPTED;
0254 par->pnfs_callback = bl_end_par_io_read;
0255
0256 blk_start_plug(&plug);
0257
0258 isect = (sector_t) (f_offset >> SECTOR_SHIFT);
0259
0260 for (i = pg_index; i < header->page_array.npages; i++) {
0261 if (extent_length <= 0) {
0262
0263 bio = bl_submit_bio(bio);
0264
0265
0266 if (!ext_tree_lookup(bl, isect, &be, false)) {
0267 header->pnfs_error = -EIO;
0268 goto out;
0269 }
0270 extent_length = be.be_length - (isect - be.be_f_offset);
0271 }
0272
0273 if (is_dio) {
0274 if (pg_offset + bytes_left > PAGE_SIZE)
0275 pg_len = PAGE_SIZE - pg_offset;
0276 else
0277 pg_len = bytes_left;
0278 } else {
0279 BUG_ON(pg_offset != 0);
0280 pg_len = PAGE_SIZE;
0281 }
0282
0283 if (is_hole(&be)) {
0284 bio = bl_submit_bio(bio);
0285
0286 dprintk("%s Zeroing page for hole\n", __func__);
0287 zero_user_segment(pages[i], pg_offset, pg_len);
0288
0289
0290 map.start = NFS4_MAX_UINT64;
0291 } else {
0292 bio = do_add_page_to_bio(bio,
0293 header->page_array.npages - i,
0294 REQ_OP_READ,
0295 isect, pages[i], &map, &be,
0296 bl_end_io_read, par,
0297 pg_offset, &pg_len);
0298 if (IS_ERR(bio)) {
0299 header->pnfs_error = PTR_ERR(bio);
0300 bio = NULL;
0301 goto out;
0302 }
0303 }
0304 isect += (pg_len >> SECTOR_SHIFT);
0305 extent_length -= (pg_len >> SECTOR_SHIFT);
0306 f_offset += pg_len;
0307 bytes_left -= pg_len;
0308 pg_offset = 0;
0309 }
0310 if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
0311 header->res.eof = 1;
0312 header->res.count = header->inode->i_size - header->args.offset;
0313 } else {
0314 header->res.count = (isect << SECTOR_SHIFT) - header->args.offset;
0315 }
0316 out:
0317 bl_submit_bio(bio);
0318 blk_finish_plug(&plug);
0319 put_parallel(par);
0320 return PNFS_ATTEMPTED;
0321 }
0322
0323 static void bl_end_io_write(struct bio *bio)
0324 {
0325 struct parallel_io *par = bio->bi_private;
0326 struct nfs_pgio_header *header = par->data;
0327
0328 if (bio->bi_status) {
0329 if (!header->pnfs_error)
0330 header->pnfs_error = -EIO;
0331 pnfs_set_lo_fail(header->lseg);
0332 bl_mark_devices_unavailable(header, true);
0333 }
0334 bio_put(bio);
0335 put_parallel(par);
0336 }
0337
0338
0339
0340
0341 static void bl_write_cleanup(struct work_struct *work)
0342 {
0343 struct rpc_task *task = container_of(work, struct rpc_task, u.tk_work);
0344 struct nfs_pgio_header *hdr =
0345 container_of(task, struct nfs_pgio_header, task);
0346
0347 dprintk("%s enter\n", __func__);
0348
0349 if (likely(!hdr->pnfs_error)) {
0350 struct pnfs_block_layout *bl = BLK_LSEG2EXT(hdr->lseg);
0351 u64 start = hdr->args.offset & (loff_t)PAGE_MASK;
0352 u64 end = (hdr->args.offset + hdr->args.count +
0353 PAGE_SIZE - 1) & (loff_t)PAGE_MASK;
0354 u64 lwb = hdr->args.offset + hdr->args.count;
0355
0356 ext_tree_mark_written(bl, start >> SECTOR_SHIFT,
0357 (end - start) >> SECTOR_SHIFT, lwb);
0358 }
0359
0360 pnfs_ld_write_done(hdr);
0361 }
0362
0363
0364 static void bl_end_par_io_write(void *data)
0365 {
0366 struct nfs_pgio_header *hdr = data;
0367
0368 hdr->task.tk_status = hdr->pnfs_error;
0369 hdr->verf.committed = NFS_FILE_SYNC;
0370 INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup);
0371 schedule_work(&hdr->task.u.tk_work);
0372 }
0373
0374 static enum pnfs_try_status
0375 bl_write_pagelist(struct nfs_pgio_header *header, int sync)
0376 {
0377 struct pnfs_block_layout *bl = BLK_LSEG2EXT(header->lseg);
0378 struct pnfs_block_dev_map map = { .start = NFS4_MAX_UINT64 };
0379 struct bio *bio = NULL;
0380 struct pnfs_block_extent be;
0381 sector_t isect, extent_length = 0;
0382 struct parallel_io *par = NULL;
0383 loff_t offset = header->args.offset;
0384 size_t count = header->args.count;
0385 struct page **pages = header->args.pages;
0386 int pg_index = header->args.pgbase >> PAGE_SHIFT;
0387 unsigned int pg_len;
0388 struct blk_plug plug;
0389 int i;
0390
0391 dprintk("%s enter, %zu@%lld\n", __func__, count, offset);
0392
0393
0394
0395
0396
0397 par = alloc_parallel(header);
0398 if (!par)
0399 return PNFS_NOT_ATTEMPTED;
0400 par->pnfs_callback = bl_end_par_io_write;
0401
0402 blk_start_plug(&plug);
0403
0404
0405 offset = offset & (loff_t)PAGE_MASK;
0406 isect = offset >> SECTOR_SHIFT;
0407
0408 for (i = pg_index; i < header->page_array.npages; i++) {
0409 if (extent_length <= 0) {
0410
0411 bio = bl_submit_bio(bio);
0412
0413 if (!ext_tree_lookup(bl, isect, &be, true)) {
0414 header->pnfs_error = -EINVAL;
0415 goto out;
0416 }
0417
0418 extent_length = be.be_length - (isect - be.be_f_offset);
0419 }
0420
0421 pg_len = PAGE_SIZE;
0422 bio = do_add_page_to_bio(bio, header->page_array.npages - i,
0423 REQ_OP_WRITE, isect, pages[i], &map,
0424 &be, bl_end_io_write, par, 0, &pg_len);
0425 if (IS_ERR(bio)) {
0426 header->pnfs_error = PTR_ERR(bio);
0427 bio = NULL;
0428 goto out;
0429 }
0430
0431 offset += pg_len;
0432 count -= pg_len;
0433 isect += (pg_len >> SECTOR_SHIFT);
0434 extent_length -= (pg_len >> SECTOR_SHIFT);
0435 }
0436
0437 header->res.count = header->args.count;
0438 out:
0439 bl_submit_bio(bio);
0440 blk_finish_plug(&plug);
0441 put_parallel(par);
0442 return PNFS_ATTEMPTED;
0443 }
0444
0445 static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo)
0446 {
0447 struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
0448 int err;
0449
0450 dprintk("%s enter\n", __func__);
0451
0452 err = ext_tree_remove(bl, true, 0, LLONG_MAX);
0453 WARN_ON(err);
0454
0455 kfree_rcu(bl, bl_layout.plh_rcu);
0456 }
0457
0458 static struct pnfs_layout_hdr *__bl_alloc_layout_hdr(struct inode *inode,
0459 gfp_t gfp_flags, bool is_scsi_layout)
0460 {
0461 struct pnfs_block_layout *bl;
0462
0463 dprintk("%s enter\n", __func__);
0464 bl = kzalloc(sizeof(*bl), gfp_flags);
0465 if (!bl)
0466 return NULL;
0467
0468 bl->bl_ext_rw = RB_ROOT;
0469 bl->bl_ext_ro = RB_ROOT;
0470 spin_lock_init(&bl->bl_ext_lock);
0471
0472 bl->bl_scsi_layout = is_scsi_layout;
0473 return &bl->bl_layout;
0474 }
0475
0476 static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
0477 gfp_t gfp_flags)
0478 {
0479 return __bl_alloc_layout_hdr(inode, gfp_flags, false);
0480 }
0481
0482 static struct pnfs_layout_hdr *sl_alloc_layout_hdr(struct inode *inode,
0483 gfp_t gfp_flags)
0484 {
0485 return __bl_alloc_layout_hdr(inode, gfp_flags, true);
0486 }
0487
0488 static void bl_free_lseg(struct pnfs_layout_segment *lseg)
0489 {
0490 dprintk("%s enter\n", __func__);
0491 kfree(lseg);
0492 }
0493
0494
0495 struct layout_verification {
0496 u32 mode;
0497 u64 start;
0498 u64 inval;
0499 u64 cowread;
0500 };
0501
0502
0503
0504
0505 static int verify_extent(struct pnfs_block_extent *be,
0506 struct layout_verification *lv)
0507 {
0508 if (lv->mode == IOMODE_READ) {
0509 if (be->be_state == PNFS_BLOCK_READWRITE_DATA ||
0510 be->be_state == PNFS_BLOCK_INVALID_DATA)
0511 return -EIO;
0512 if (be->be_f_offset != lv->start)
0513 return -EIO;
0514 lv->start += be->be_length;
0515 return 0;
0516 }
0517
0518 if (be->be_state == PNFS_BLOCK_READWRITE_DATA) {
0519 if (be->be_f_offset != lv->start)
0520 return -EIO;
0521 if (lv->cowread > lv->start)
0522 return -EIO;
0523 lv->start += be->be_length;
0524 lv->inval = lv->start;
0525 return 0;
0526 } else if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
0527 if (be->be_f_offset != lv->start)
0528 return -EIO;
0529 lv->start += be->be_length;
0530 return 0;
0531 } else if (be->be_state == PNFS_BLOCK_READ_DATA) {
0532 if (be->be_f_offset > lv->start)
0533 return -EIO;
0534 if (be->be_f_offset < lv->inval)
0535 return -EIO;
0536 if (be->be_f_offset < lv->cowread)
0537 return -EIO;
0538
0539
0540
0541 lv->inval = lv->inval + be->be_length;
0542 lv->cowread = be->be_f_offset + be->be_length;
0543 return 0;
0544 } else
0545 return -EIO;
0546 }
0547
0548 static int decode_sector_number(__be32 **rp, sector_t *sp)
0549 {
0550 uint64_t s;
0551
0552 *rp = xdr_decode_hyper(*rp, &s);
0553 if (s & 0x1ff) {
0554 printk(KERN_WARNING "NFS: %s: sector not aligned\n", __func__);
0555 return -1;
0556 }
0557 *sp = s >> SECTOR_SHIFT;
0558 return 0;
0559 }
0560
0561 static struct nfs4_deviceid_node *
0562 bl_find_get_deviceid(struct nfs_server *server,
0563 const struct nfs4_deviceid *id, const struct cred *cred,
0564 gfp_t gfp_mask)
0565 {
0566 struct nfs4_deviceid_node *node;
0567 unsigned long start, end;
0568
0569 retry:
0570 node = nfs4_find_get_deviceid(server, id, cred, gfp_mask);
0571 if (!node)
0572 return ERR_PTR(-ENODEV);
0573
0574 if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0)
0575 return node;
0576
0577 end = jiffies;
0578 start = end - PNFS_DEVICE_RETRY_TIMEOUT;
0579 if (!time_in_range(node->timestamp_unavailable, start, end)) {
0580 nfs4_delete_deviceid(node->ld, node->nfs_client, id);
0581 goto retry;
0582 }
0583 return ERR_PTR(-ENODEV);
0584 }
0585
0586 static int
0587 bl_alloc_extent(struct xdr_stream *xdr, struct pnfs_layout_hdr *lo,
0588 struct layout_verification *lv, struct list_head *extents,
0589 gfp_t gfp_mask)
0590 {
0591 struct pnfs_block_extent *be;
0592 struct nfs4_deviceid id;
0593 int error;
0594 __be32 *p;
0595
0596 p = xdr_inline_decode(xdr, 28 + NFS4_DEVICEID4_SIZE);
0597 if (!p)
0598 return -EIO;
0599
0600 be = kzalloc(sizeof(*be), GFP_NOFS);
0601 if (!be)
0602 return -ENOMEM;
0603
0604 memcpy(&id, p, NFS4_DEVICEID4_SIZE);
0605 p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
0606
0607 be->be_device = bl_find_get_deviceid(NFS_SERVER(lo->plh_inode), &id,
0608 lo->plh_lc_cred, gfp_mask);
0609 if (IS_ERR(be->be_device)) {
0610 error = PTR_ERR(be->be_device);
0611 goto out_free_be;
0612 }
0613
0614
0615
0616
0617
0618 error = -EIO;
0619 if (decode_sector_number(&p, &be->be_f_offset) < 0)
0620 goto out_put_deviceid;
0621 if (decode_sector_number(&p, &be->be_length) < 0)
0622 goto out_put_deviceid;
0623 if (decode_sector_number(&p, &be->be_v_offset) < 0)
0624 goto out_put_deviceid;
0625 be->be_state = be32_to_cpup(p++);
0626
0627 error = verify_extent(be, lv);
0628 if (error) {
0629 dprintk("%s: extent verification failed\n", __func__);
0630 goto out_put_deviceid;
0631 }
0632
0633 list_add_tail(&be->be_list, extents);
0634 return 0;
0635
0636 out_put_deviceid:
0637 nfs4_put_deviceid_node(be->be_device);
0638 out_free_be:
0639 kfree(be);
0640 return error;
0641 }
0642
0643 static struct pnfs_layout_segment *
0644 bl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr,
0645 gfp_t gfp_mask)
0646 {
0647 struct layout_verification lv = {
0648 .mode = lgr->range.iomode,
0649 .start = lgr->range.offset >> SECTOR_SHIFT,
0650 .inval = lgr->range.offset >> SECTOR_SHIFT,
0651 .cowread = lgr->range.offset >> SECTOR_SHIFT,
0652 };
0653 struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
0654 struct pnfs_layout_segment *lseg;
0655 struct xdr_buf buf;
0656 struct xdr_stream xdr;
0657 struct page *scratch;
0658 int status, i;
0659 uint32_t count;
0660 __be32 *p;
0661 LIST_HEAD(extents);
0662
0663 dprintk("---> %s\n", __func__);
0664
0665 lseg = kzalloc(sizeof(*lseg), gfp_mask);
0666 if (!lseg)
0667 return ERR_PTR(-ENOMEM);
0668
0669 status = -ENOMEM;
0670 scratch = alloc_page(gfp_mask);
0671 if (!scratch)
0672 goto out;
0673
0674 xdr_init_decode_pages(&xdr, &buf,
0675 lgr->layoutp->pages, lgr->layoutp->len);
0676 xdr_set_scratch_page(&xdr, scratch);
0677
0678 status = -EIO;
0679 p = xdr_inline_decode(&xdr, 4);
0680 if (unlikely(!p))
0681 goto out_free_scratch;
0682
0683 count = be32_to_cpup(p++);
0684 dprintk("%s: number of extents %d\n", __func__, count);
0685
0686
0687
0688
0689
0690 for (i = 0; i < count; i++) {
0691 status = bl_alloc_extent(&xdr, lo, &lv, &extents, gfp_mask);
0692 if (status)
0693 goto process_extents;
0694 }
0695
0696 if (lgr->range.offset + lgr->range.length !=
0697 lv.start << SECTOR_SHIFT) {
0698 dprintk("%s Final length mismatch\n", __func__);
0699 status = -EIO;
0700 goto process_extents;
0701 }
0702
0703 if (lv.start < lv.cowread) {
0704 dprintk("%s Final uncovered COW extent\n", __func__);
0705 status = -EIO;
0706 }
0707
0708 process_extents:
0709 while (!list_empty(&extents)) {
0710 struct pnfs_block_extent *be =
0711 list_first_entry(&extents, struct pnfs_block_extent,
0712 be_list);
0713 list_del(&be->be_list);
0714
0715 if (!status)
0716 status = ext_tree_insert(bl, be);
0717
0718 if (status) {
0719 nfs4_put_deviceid_node(be->be_device);
0720 kfree(be);
0721 }
0722 }
0723
0724 out_free_scratch:
0725 __free_page(scratch);
0726 out:
0727 dprintk("%s returns %d\n", __func__, status);
0728 switch (status) {
0729 case -ENODEV:
0730
0731 set_bit(NFS_LSEG_UNAVAILABLE, &lseg->pls_flags);
0732 fallthrough;
0733 case 0:
0734 return lseg;
0735 default:
0736 kfree(lseg);
0737 return ERR_PTR(status);
0738 }
0739 }
0740
0741 static void
0742 bl_return_range(struct pnfs_layout_hdr *lo,
0743 struct pnfs_layout_range *range)
0744 {
0745 struct pnfs_block_layout *bl = BLK_LO2EXT(lo);
0746 sector_t offset = range->offset >> SECTOR_SHIFT, end;
0747
0748 if (range->offset % 8) {
0749 dprintk("%s: offset %lld not block size aligned\n",
0750 __func__, range->offset);
0751 return;
0752 }
0753
0754 if (range->length != NFS4_MAX_UINT64) {
0755 if (range->length % 8) {
0756 dprintk("%s: length %lld not block size aligned\n",
0757 __func__, range->length);
0758 return;
0759 }
0760
0761 end = offset + (range->length >> SECTOR_SHIFT);
0762 } else {
0763 end = round_down(NFS4_MAX_UINT64, PAGE_SIZE);
0764 }
0765
0766 ext_tree_remove(bl, range->iomode & IOMODE_RW, offset, end);
0767 }
0768
0769 static int
0770 bl_prepare_layoutcommit(struct nfs4_layoutcommit_args *arg)
0771 {
0772 return ext_tree_prepare_commit(arg);
0773 }
0774
0775 static void
0776 bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata)
0777 {
0778 ext_tree_mark_committed(&lcdata->args, lcdata->res.status);
0779 }
0780
0781 static int
0782 bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
0783 {
0784 dprintk("%s enter\n", __func__);
0785
0786 if (server->pnfs_blksize == 0) {
0787 dprintk("%s Server did not return blksize\n", __func__);
0788 return -EINVAL;
0789 }
0790 if (server->pnfs_blksize > PAGE_SIZE) {
0791 printk(KERN_ERR "%s: pNFS blksize %d not supported.\n",
0792 __func__, server->pnfs_blksize);
0793 return -EINVAL;
0794 }
0795
0796 return 0;
0797 }
0798
0799 static bool
0800 is_aligned_req(struct nfs_pageio_descriptor *pgio,
0801 struct nfs_page *req, unsigned int alignment, bool is_write)
0802 {
0803
0804
0805
0806
0807 if (pgio->pg_dreq == NULL)
0808 return true;
0809
0810 if (!IS_ALIGNED(req->wb_offset, alignment))
0811 return false;
0812
0813 if (IS_ALIGNED(req->wb_bytes, alignment))
0814 return true;
0815
0816 if (is_write &&
0817 (req_offset(req) + req->wb_bytes == i_size_read(pgio->pg_inode))) {
0818
0819
0820
0821
0822
0823
0824
0825 return true;
0826 }
0827
0828 return false;
0829 }
0830
0831 static void
0832 bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
0833 {
0834 if (!is_aligned_req(pgio, req, SECTOR_SIZE, false)) {
0835 nfs_pageio_reset_read_mds(pgio);
0836 return;
0837 }
0838
0839 pnfs_generic_pg_init_read(pgio, req);
0840
0841 if (pgio->pg_lseg &&
0842 test_bit(NFS_LSEG_UNAVAILABLE, &pgio->pg_lseg->pls_flags)) {
0843 pnfs_error_mark_layout_for_return(pgio->pg_inode, pgio->pg_lseg);
0844 pnfs_set_lo_fail(pgio->pg_lseg);
0845 nfs_pageio_reset_read_mds(pgio);
0846 }
0847 }
0848
0849
0850
0851
0852
0853 static size_t
0854 bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
0855 struct nfs_page *req)
0856 {
0857 if (!is_aligned_req(pgio, req, SECTOR_SIZE, false))
0858 return 0;
0859 return pnfs_generic_pg_test(pgio, prev, req);
0860 }
0861
0862
0863
0864
0865
0866 static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx)
0867 {
0868 struct address_space *mapping = inode->i_mapping;
0869 pgoff_t end;
0870
0871
0872 end = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
0873 if (end != inode->i_mapping->nrpages) {
0874 rcu_read_lock();
0875 end = page_cache_next_miss(mapping, idx + 1, ULONG_MAX);
0876 rcu_read_unlock();
0877 }
0878
0879 if (!end)
0880 return i_size_read(inode) - (idx << PAGE_SHIFT);
0881 else
0882 return (end - idx) << PAGE_SHIFT;
0883 }
0884
0885 static void
0886 bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
0887 {
0888 u64 wb_size;
0889
0890 if (!is_aligned_req(pgio, req, PAGE_SIZE, true)) {
0891 nfs_pageio_reset_write_mds(pgio);
0892 return;
0893 }
0894
0895 if (pgio->pg_dreq == NULL)
0896 wb_size = pnfs_num_cont_bytes(pgio->pg_inode,
0897 req->wb_index);
0898 else
0899 wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
0900
0901 pnfs_generic_pg_init_write(pgio, req, wb_size);
0902
0903 if (pgio->pg_lseg &&
0904 test_bit(NFS_LSEG_UNAVAILABLE, &pgio->pg_lseg->pls_flags)) {
0905
0906 pnfs_error_mark_layout_for_return(pgio->pg_inode, pgio->pg_lseg);
0907 pnfs_set_lo_fail(pgio->pg_lseg);
0908 nfs_pageio_reset_write_mds(pgio);
0909 }
0910 }
0911
0912
0913
0914
0915
0916 static size_t
0917 bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
0918 struct nfs_page *req)
0919 {
0920 if (!is_aligned_req(pgio, req, PAGE_SIZE, true))
0921 return 0;
0922 return pnfs_generic_pg_test(pgio, prev, req);
0923 }
0924
0925 static const struct nfs_pageio_ops bl_pg_read_ops = {
0926 .pg_init = bl_pg_init_read,
0927 .pg_test = bl_pg_test_read,
0928 .pg_doio = pnfs_generic_pg_readpages,
0929 .pg_cleanup = pnfs_generic_pg_cleanup,
0930 };
0931
0932 static const struct nfs_pageio_ops bl_pg_write_ops = {
0933 .pg_init = bl_pg_init_write,
0934 .pg_test = bl_pg_test_write,
0935 .pg_doio = pnfs_generic_pg_writepages,
0936 .pg_cleanup = pnfs_generic_pg_cleanup,
0937 };
0938
0939 static struct pnfs_layoutdriver_type blocklayout_type = {
0940 .id = LAYOUT_BLOCK_VOLUME,
0941 .name = "LAYOUT_BLOCK_VOLUME",
0942 .owner = THIS_MODULE,
0943 .flags = PNFS_LAYOUTRET_ON_SETATTR |
0944 PNFS_LAYOUTRET_ON_ERROR |
0945 PNFS_READ_WHOLE_PAGE,
0946 .read_pagelist = bl_read_pagelist,
0947 .write_pagelist = bl_write_pagelist,
0948 .alloc_layout_hdr = bl_alloc_layout_hdr,
0949 .free_layout_hdr = bl_free_layout_hdr,
0950 .alloc_lseg = bl_alloc_lseg,
0951 .free_lseg = bl_free_lseg,
0952 .return_range = bl_return_range,
0953 .prepare_layoutcommit = bl_prepare_layoutcommit,
0954 .cleanup_layoutcommit = bl_cleanup_layoutcommit,
0955 .set_layoutdriver = bl_set_layoutdriver,
0956 .alloc_deviceid_node = bl_alloc_deviceid_node,
0957 .free_deviceid_node = bl_free_deviceid_node,
0958 .pg_read_ops = &bl_pg_read_ops,
0959 .pg_write_ops = &bl_pg_write_ops,
0960 .sync = pnfs_generic_sync,
0961 };
0962
0963 static struct pnfs_layoutdriver_type scsilayout_type = {
0964 .id = LAYOUT_SCSI,
0965 .name = "LAYOUT_SCSI",
0966 .owner = THIS_MODULE,
0967 .flags = PNFS_LAYOUTRET_ON_SETATTR |
0968 PNFS_LAYOUTRET_ON_ERROR |
0969 PNFS_READ_WHOLE_PAGE,
0970 .read_pagelist = bl_read_pagelist,
0971 .write_pagelist = bl_write_pagelist,
0972 .alloc_layout_hdr = sl_alloc_layout_hdr,
0973 .free_layout_hdr = bl_free_layout_hdr,
0974 .alloc_lseg = bl_alloc_lseg,
0975 .free_lseg = bl_free_lseg,
0976 .return_range = bl_return_range,
0977 .prepare_layoutcommit = bl_prepare_layoutcommit,
0978 .cleanup_layoutcommit = bl_cleanup_layoutcommit,
0979 .set_layoutdriver = bl_set_layoutdriver,
0980 .alloc_deviceid_node = bl_alloc_deviceid_node,
0981 .free_deviceid_node = bl_free_deviceid_node,
0982 .pg_read_ops = &bl_pg_read_ops,
0983 .pg_write_ops = &bl_pg_write_ops,
0984 .sync = pnfs_generic_sync,
0985 };
0986
0987
0988 static int __init nfs4blocklayout_init(void)
0989 {
0990 int ret;
0991
0992 dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
0993
0994 ret = bl_init_pipefs();
0995 if (ret)
0996 goto out;
0997
0998 ret = pnfs_register_layoutdriver(&blocklayout_type);
0999 if (ret)
1000 goto out_cleanup_pipe;
1001
1002 ret = pnfs_register_layoutdriver(&scsilayout_type);
1003 if (ret)
1004 goto out_unregister_block;
1005 return 0;
1006
1007 out_unregister_block:
1008 pnfs_unregister_layoutdriver(&blocklayout_type);
1009 out_cleanup_pipe:
1010 bl_cleanup_pipefs();
1011 out:
1012 return ret;
1013 }
1014
1015 static void __exit nfs4blocklayout_exit(void)
1016 {
1017 dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
1018 __func__);
1019
1020 pnfs_unregister_layoutdriver(&scsilayout_type);
1021 pnfs_unregister_layoutdriver(&blocklayout_type);
1022 bl_cleanup_pipefs();
1023 }
1024
1025 MODULE_ALIAS("nfs-layouttype4-3");
1026 MODULE_ALIAS("nfs-layouttype4-5");
1027
1028 module_init(nfs4blocklayout_init);
1029 module_exit(nfs4blocklayout_exit);