Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (C) 1991, 1992  Linus Torvalds
0004  * Copyright (C) 2001  Andrea Arcangeli <andrea@suse.de> SuSE
0005  * Copyright (C) 2016 - 2020 Christoph Hellwig
0006  */
0007 #include <linux/init.h>
0008 #include <linux/mm.h>
0009 #include <linux/blkdev.h>
0010 #include <linux/buffer_head.h>
0011 #include <linux/mpage.h>
0012 #include <linux/uio.h>
0013 #include <linux/namei.h>
0014 #include <linux/task_io_accounting_ops.h>
0015 #include <linux/falloc.h>
0016 #include <linux/suspend.h>
0017 #include <linux/fs.h>
0018 #include <linux/module.h>
0019 #include "blk.h"
0020 
0021 static inline struct inode *bdev_file_inode(struct file *file)
0022 {
0023     return file->f_mapping->host;
0024 }
0025 
0026 static int blkdev_get_block(struct inode *inode, sector_t iblock,
0027         struct buffer_head *bh, int create)
0028 {
0029     bh->b_bdev = I_BDEV(inode);
0030     bh->b_blocknr = iblock;
0031     set_buffer_mapped(bh);
0032     return 0;
0033 }
0034 
0035 static blk_opf_t dio_bio_write_op(struct kiocb *iocb)
0036 {
0037     blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
0038 
0039     /* avoid the need for a I/O completion work item */
0040     if (iocb_is_dsync(iocb))
0041         opf |= REQ_FUA;
0042     return opf;
0043 }
0044 
0045 static bool blkdev_dio_unaligned(struct block_device *bdev, loff_t pos,
0046                   struct iov_iter *iter)
0047 {
0048     return pos & (bdev_logical_block_size(bdev) - 1) ||
0049         !bdev_iter_is_aligned(bdev, iter);
0050 }
0051 
0052 #define DIO_INLINE_BIO_VECS 4
0053 
0054 static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
0055         struct iov_iter *iter, unsigned int nr_pages)
0056 {
0057     struct block_device *bdev = iocb->ki_filp->private_data;
0058     struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
0059     loff_t pos = iocb->ki_pos;
0060     bool should_dirty = false;
0061     struct bio bio;
0062     ssize_t ret;
0063 
0064     if (blkdev_dio_unaligned(bdev, pos, iter))
0065         return -EINVAL;
0066 
0067     if (nr_pages <= DIO_INLINE_BIO_VECS)
0068         vecs = inline_vecs;
0069     else {
0070         vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
0071                      GFP_KERNEL);
0072         if (!vecs)
0073             return -ENOMEM;
0074     }
0075 
0076     if (iov_iter_rw(iter) == READ) {
0077         bio_init(&bio, bdev, vecs, nr_pages, REQ_OP_READ);
0078         if (user_backed_iter(iter))
0079             should_dirty = true;
0080     } else {
0081         bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
0082     }
0083     bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
0084     bio.bi_ioprio = iocb->ki_ioprio;
0085 
0086     ret = bio_iov_iter_get_pages(&bio, iter);
0087     if (unlikely(ret))
0088         goto out;
0089     ret = bio.bi_iter.bi_size;
0090 
0091     if (iov_iter_rw(iter) == WRITE)
0092         task_io_account_write(ret);
0093 
0094     if (iocb->ki_flags & IOCB_NOWAIT)
0095         bio.bi_opf |= REQ_NOWAIT;
0096 
0097     submit_bio_wait(&bio);
0098 
0099     bio_release_pages(&bio, should_dirty);
0100     if (unlikely(bio.bi_status))
0101         ret = blk_status_to_errno(bio.bi_status);
0102 
0103 out:
0104     if (vecs != inline_vecs)
0105         kfree(vecs);
0106 
0107     bio_uninit(&bio);
0108 
0109     return ret;
0110 }
0111 
0112 enum {
0113     DIO_SHOULD_DIRTY    = 1,
0114     DIO_IS_SYNC     = 2,
0115 };
0116 
0117 struct blkdev_dio {
0118     union {
0119         struct kiocb        *iocb;
0120         struct task_struct  *waiter;
0121     };
0122     size_t          size;
0123     atomic_t        ref;
0124     unsigned int        flags;
0125     struct bio      bio ____cacheline_aligned_in_smp;
0126 };
0127 
0128 static struct bio_set blkdev_dio_pool;
0129 
0130 static void blkdev_bio_end_io(struct bio *bio)
0131 {
0132     struct blkdev_dio *dio = bio->bi_private;
0133     bool should_dirty = dio->flags & DIO_SHOULD_DIRTY;
0134 
0135     if (bio->bi_status && !dio->bio.bi_status)
0136         dio->bio.bi_status = bio->bi_status;
0137 
0138     if (atomic_dec_and_test(&dio->ref)) {
0139         if (!(dio->flags & DIO_IS_SYNC)) {
0140             struct kiocb *iocb = dio->iocb;
0141             ssize_t ret;
0142 
0143             WRITE_ONCE(iocb->private, NULL);
0144 
0145             if (likely(!dio->bio.bi_status)) {
0146                 ret = dio->size;
0147                 iocb->ki_pos += ret;
0148             } else {
0149                 ret = blk_status_to_errno(dio->bio.bi_status);
0150             }
0151 
0152             dio->iocb->ki_complete(iocb, ret);
0153             bio_put(&dio->bio);
0154         } else {
0155             struct task_struct *waiter = dio->waiter;
0156 
0157             WRITE_ONCE(dio->waiter, NULL);
0158             blk_wake_io_task(waiter);
0159         }
0160     }
0161 
0162     if (should_dirty) {
0163         bio_check_pages_dirty(bio);
0164     } else {
0165         bio_release_pages(bio, false);
0166         bio_put(bio);
0167     }
0168 }
0169 
0170 static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
0171         unsigned int nr_pages)
0172 {
0173     struct block_device *bdev = iocb->ki_filp->private_data;
0174     struct blk_plug plug;
0175     struct blkdev_dio *dio;
0176     struct bio *bio;
0177     bool is_read = (iov_iter_rw(iter) == READ), is_sync;
0178     blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
0179     loff_t pos = iocb->ki_pos;
0180     int ret = 0;
0181 
0182     if (blkdev_dio_unaligned(bdev, pos, iter))
0183         return -EINVAL;
0184 
0185     if (iocb->ki_flags & IOCB_ALLOC_CACHE)
0186         opf |= REQ_ALLOC_CACHE;
0187     bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
0188                    &blkdev_dio_pool);
0189     dio = container_of(bio, struct blkdev_dio, bio);
0190     atomic_set(&dio->ref, 1);
0191     /*
0192      * Grab an extra reference to ensure the dio structure which is embedded
0193      * into the first bio stays around.
0194      */
0195     bio_get(bio);
0196 
0197     is_sync = is_sync_kiocb(iocb);
0198     if (is_sync) {
0199         dio->flags = DIO_IS_SYNC;
0200         dio->waiter = current;
0201     } else {
0202         dio->flags = 0;
0203         dio->iocb = iocb;
0204     }
0205 
0206     dio->size = 0;
0207     if (is_read && user_backed_iter(iter))
0208         dio->flags |= DIO_SHOULD_DIRTY;
0209 
0210     blk_start_plug(&plug);
0211 
0212     for (;;) {
0213         bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
0214         bio->bi_private = dio;
0215         bio->bi_end_io = blkdev_bio_end_io;
0216         bio->bi_ioprio = iocb->ki_ioprio;
0217 
0218         ret = bio_iov_iter_get_pages(bio, iter);
0219         if (unlikely(ret)) {
0220             bio->bi_status = BLK_STS_IOERR;
0221             bio_endio(bio);
0222             break;
0223         }
0224 
0225         if (is_read) {
0226             if (dio->flags & DIO_SHOULD_DIRTY)
0227                 bio_set_pages_dirty(bio);
0228         } else {
0229             task_io_account_write(bio->bi_iter.bi_size);
0230         }
0231         if (iocb->ki_flags & IOCB_NOWAIT)
0232             bio->bi_opf |= REQ_NOWAIT;
0233 
0234         dio->size += bio->bi_iter.bi_size;
0235         pos += bio->bi_iter.bi_size;
0236 
0237         nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS);
0238         if (!nr_pages) {
0239             submit_bio(bio);
0240             break;
0241         }
0242         atomic_inc(&dio->ref);
0243         submit_bio(bio);
0244         bio = bio_alloc(bdev, nr_pages, opf, GFP_KERNEL);
0245     }
0246 
0247     blk_finish_plug(&plug);
0248 
0249     if (!is_sync)
0250         return -EIOCBQUEUED;
0251 
0252     for (;;) {
0253         set_current_state(TASK_UNINTERRUPTIBLE);
0254         if (!READ_ONCE(dio->waiter))
0255             break;
0256         blk_io_schedule();
0257     }
0258     __set_current_state(TASK_RUNNING);
0259 
0260     if (!ret)
0261         ret = blk_status_to_errno(dio->bio.bi_status);
0262     if (likely(!ret))
0263         ret = dio->size;
0264 
0265     bio_put(&dio->bio);
0266     return ret;
0267 }
0268 
0269 static void blkdev_bio_end_io_async(struct bio *bio)
0270 {
0271     struct blkdev_dio *dio = container_of(bio, struct blkdev_dio, bio);
0272     struct kiocb *iocb = dio->iocb;
0273     ssize_t ret;
0274 
0275     WRITE_ONCE(iocb->private, NULL);
0276 
0277     if (likely(!bio->bi_status)) {
0278         ret = dio->size;
0279         iocb->ki_pos += ret;
0280     } else {
0281         ret = blk_status_to_errno(bio->bi_status);
0282     }
0283 
0284     iocb->ki_complete(iocb, ret);
0285 
0286     if (dio->flags & DIO_SHOULD_DIRTY) {
0287         bio_check_pages_dirty(bio);
0288     } else {
0289         bio_release_pages(bio, false);
0290         bio_put(bio);
0291     }
0292 }
0293 
0294 static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
0295                     struct iov_iter *iter,
0296                     unsigned int nr_pages)
0297 {
0298     struct block_device *bdev = iocb->ki_filp->private_data;
0299     bool is_read = iov_iter_rw(iter) == READ;
0300     blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
0301     struct blkdev_dio *dio;
0302     struct bio *bio;
0303     loff_t pos = iocb->ki_pos;
0304     int ret = 0;
0305 
0306     if (blkdev_dio_unaligned(bdev, pos, iter))
0307         return -EINVAL;
0308 
0309     if (iocb->ki_flags & IOCB_ALLOC_CACHE)
0310         opf |= REQ_ALLOC_CACHE;
0311     bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
0312                    &blkdev_dio_pool);
0313     dio = container_of(bio, struct blkdev_dio, bio);
0314     dio->flags = 0;
0315     dio->iocb = iocb;
0316     bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
0317     bio->bi_end_io = blkdev_bio_end_io_async;
0318     bio->bi_ioprio = iocb->ki_ioprio;
0319 
0320     if (iov_iter_is_bvec(iter)) {
0321         /*
0322          * Users don't rely on the iterator being in any particular
0323          * state for async I/O returning -EIOCBQUEUED, hence we can
0324          * avoid expensive iov_iter_advance(). Bypass
0325          * bio_iov_iter_get_pages() and set the bvec directly.
0326          */
0327         bio_iov_bvec_set(bio, iter);
0328     } else {
0329         ret = bio_iov_iter_get_pages(bio, iter);
0330         if (unlikely(ret)) {
0331             bio_put(bio);
0332             return ret;
0333         }
0334     }
0335     dio->size = bio->bi_iter.bi_size;
0336 
0337     if (is_read) {
0338         if (user_backed_iter(iter)) {
0339             dio->flags |= DIO_SHOULD_DIRTY;
0340             bio_set_pages_dirty(bio);
0341         }
0342     } else {
0343         task_io_account_write(bio->bi_iter.bi_size);
0344     }
0345 
0346     if (iocb->ki_flags & IOCB_HIPRI) {
0347         bio->bi_opf |= REQ_POLLED | REQ_NOWAIT;
0348         submit_bio(bio);
0349         WRITE_ONCE(iocb->private, bio);
0350     } else {
0351         if (iocb->ki_flags & IOCB_NOWAIT)
0352             bio->bi_opf |= REQ_NOWAIT;
0353         submit_bio(bio);
0354     }
0355     return -EIOCBQUEUED;
0356 }
0357 
0358 static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
0359 {
0360     unsigned int nr_pages;
0361 
0362     if (!iov_iter_count(iter))
0363         return 0;
0364 
0365     nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
0366     if (likely(nr_pages <= BIO_MAX_VECS)) {
0367         if (is_sync_kiocb(iocb))
0368             return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
0369         return __blkdev_direct_IO_async(iocb, iter, nr_pages);
0370     }
0371     return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
0372 }
0373 
0374 static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
0375 {
0376     return block_write_full_page(page, blkdev_get_block, wbc);
0377 }
0378 
0379 static int blkdev_read_folio(struct file *file, struct folio *folio)
0380 {
0381     return block_read_full_folio(folio, blkdev_get_block);
0382 }
0383 
0384 static void blkdev_readahead(struct readahead_control *rac)
0385 {
0386     mpage_readahead(rac, blkdev_get_block);
0387 }
0388 
0389 static int blkdev_write_begin(struct file *file, struct address_space *mapping,
0390         loff_t pos, unsigned len, struct page **pagep, void **fsdata)
0391 {
0392     return block_write_begin(mapping, pos, len, pagep, blkdev_get_block);
0393 }
0394 
0395 static int blkdev_write_end(struct file *file, struct address_space *mapping,
0396         loff_t pos, unsigned len, unsigned copied, struct page *page,
0397         void *fsdata)
0398 {
0399     int ret;
0400     ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
0401 
0402     unlock_page(page);
0403     put_page(page);
0404 
0405     return ret;
0406 }
0407 
0408 static int blkdev_writepages(struct address_space *mapping,
0409                  struct writeback_control *wbc)
0410 {
0411     return generic_writepages(mapping, wbc);
0412 }
0413 
0414 const struct address_space_operations def_blk_aops = {
0415     .dirty_folio    = block_dirty_folio,
0416     .invalidate_folio = block_invalidate_folio,
0417     .read_folio = blkdev_read_folio,
0418     .readahead  = blkdev_readahead,
0419     .writepage  = blkdev_writepage,
0420     .write_begin    = blkdev_write_begin,
0421     .write_end  = blkdev_write_end,
0422     .writepages = blkdev_writepages,
0423     .direct_IO  = blkdev_direct_IO,
0424     .migrate_folio  = buffer_migrate_folio_norefs,
0425     .is_dirty_writeback = buffer_check_dirty_writeback,
0426 };
0427 
0428 /*
0429  * for a block special file file_inode(file)->i_size is zero
0430  * so we compute the size by hand (just as in block_read/write above)
0431  */
0432 static loff_t blkdev_llseek(struct file *file, loff_t offset, int whence)
0433 {
0434     struct inode *bd_inode = bdev_file_inode(file);
0435     loff_t retval;
0436 
0437     inode_lock(bd_inode);
0438     retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
0439     inode_unlock(bd_inode);
0440     return retval;
0441 }
0442 
0443 static int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
0444         int datasync)
0445 {
0446     struct block_device *bdev = filp->private_data;
0447     int error;
0448 
0449     error = file_write_and_wait_range(filp, start, end);
0450     if (error)
0451         return error;
0452 
0453     /*
0454      * There is no need to serialise calls to blkdev_issue_flush with
0455      * i_mutex and doing so causes performance issues with concurrent
0456      * O_SYNC writers to a block device.
0457      */
0458     error = blkdev_issue_flush(bdev);
0459     if (error == -EOPNOTSUPP)
0460         error = 0;
0461 
0462     return error;
0463 }
0464 
0465 static int blkdev_open(struct inode *inode, struct file *filp)
0466 {
0467     struct block_device *bdev;
0468 
0469     /*
0470      * Preserve backwards compatibility and allow large file access
0471      * even if userspace doesn't ask for it explicitly. Some mkfs
0472      * binary needs it. We might want to drop this workaround
0473      * during an unstable branch.
0474      */
0475     filp->f_flags |= O_LARGEFILE;
0476     filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
0477 
0478     if (filp->f_flags & O_NDELAY)
0479         filp->f_mode |= FMODE_NDELAY;
0480     if (filp->f_flags & O_EXCL)
0481         filp->f_mode |= FMODE_EXCL;
0482     if ((filp->f_flags & O_ACCMODE) == 3)
0483         filp->f_mode |= FMODE_WRITE_IOCTL;
0484 
0485     bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp);
0486     if (IS_ERR(bdev))
0487         return PTR_ERR(bdev);
0488 
0489     filp->private_data = bdev;
0490     filp->f_mapping = bdev->bd_inode->i_mapping;
0491     filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
0492     return 0;
0493 }
0494 
0495 static int blkdev_close(struct inode *inode, struct file *filp)
0496 {
0497     struct block_device *bdev = filp->private_data;
0498 
0499     blkdev_put(bdev, filp->f_mode);
0500     return 0;
0501 }
0502 
0503 /*
0504  * Write data to the block device.  Only intended for the block device itself
0505  * and the raw driver which basically is a fake block device.
0506  *
0507  * Does not take i_mutex for the write and thus is not for general purpose
0508  * use.
0509  */
0510 static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
0511 {
0512     struct block_device *bdev = iocb->ki_filp->private_data;
0513     struct inode *bd_inode = bdev->bd_inode;
0514     loff_t size = bdev_nr_bytes(bdev);
0515     struct blk_plug plug;
0516     size_t shorted = 0;
0517     ssize_t ret;
0518 
0519     if (bdev_read_only(bdev))
0520         return -EPERM;
0521 
0522     if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev))
0523         return -ETXTBSY;
0524 
0525     if (!iov_iter_count(from))
0526         return 0;
0527 
0528     if (iocb->ki_pos >= size)
0529         return -ENOSPC;
0530 
0531     if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
0532         return -EOPNOTSUPP;
0533 
0534     size -= iocb->ki_pos;
0535     if (iov_iter_count(from) > size) {
0536         shorted = iov_iter_count(from) - size;
0537         iov_iter_truncate(from, size);
0538     }
0539 
0540     blk_start_plug(&plug);
0541     ret = __generic_file_write_iter(iocb, from);
0542     if (ret > 0)
0543         ret = generic_write_sync(iocb, ret);
0544     iov_iter_reexpand(from, iov_iter_count(from) + shorted);
0545     blk_finish_plug(&plug);
0546     return ret;
0547 }
0548 
0549 static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
0550 {
0551     struct block_device *bdev = iocb->ki_filp->private_data;
0552     loff_t size = bdev_nr_bytes(bdev);
0553     loff_t pos = iocb->ki_pos;
0554     size_t shorted = 0;
0555     ssize_t ret = 0;
0556     size_t count;
0557 
0558     if (unlikely(pos + iov_iter_count(to) > size)) {
0559         if (pos >= size)
0560             return 0;
0561         size -= pos;
0562         shorted = iov_iter_count(to) - size;
0563         iov_iter_truncate(to, size);
0564     }
0565 
0566     count = iov_iter_count(to);
0567     if (!count)
0568         goto reexpand; /* skip atime */
0569 
0570     if (iocb->ki_flags & IOCB_DIRECT) {
0571         struct address_space *mapping = iocb->ki_filp->f_mapping;
0572 
0573         if (iocb->ki_flags & IOCB_NOWAIT) {
0574             if (filemap_range_needs_writeback(mapping, pos,
0575                               pos + count - 1)) {
0576                 ret = -EAGAIN;
0577                 goto reexpand;
0578             }
0579         } else {
0580             ret = filemap_write_and_wait_range(mapping, pos,
0581                                pos + count - 1);
0582             if (ret < 0)
0583                 goto reexpand;
0584         }
0585 
0586         file_accessed(iocb->ki_filp);
0587 
0588         ret = blkdev_direct_IO(iocb, to);
0589         if (ret >= 0) {
0590             iocb->ki_pos += ret;
0591             count -= ret;
0592         }
0593         iov_iter_revert(to, count - iov_iter_count(to));
0594         if (ret < 0 || !count)
0595             goto reexpand;
0596     }
0597 
0598     ret = filemap_read(iocb, to, ret);
0599 
0600 reexpand:
0601     if (unlikely(shorted))
0602         iov_iter_reexpand(to, iov_iter_count(to) + shorted);
0603     return ret;
0604 }
0605 
0606 #define BLKDEV_FALLOC_FL_SUPPORTED                  \
0607         (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |       \
0608          FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
0609 
0610 static long blkdev_fallocate(struct file *file, int mode, loff_t start,
0611                  loff_t len)
0612 {
0613     struct inode *inode = bdev_file_inode(file);
0614     struct block_device *bdev = I_BDEV(inode);
0615     loff_t end = start + len - 1;
0616     loff_t isize;
0617     int error;
0618 
0619     /* Fail if we don't recognize the flags. */
0620     if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
0621         return -EOPNOTSUPP;
0622 
0623     /* Don't go off the end of the device. */
0624     isize = bdev_nr_bytes(bdev);
0625     if (start >= isize)
0626         return -EINVAL;
0627     if (end >= isize) {
0628         if (mode & FALLOC_FL_KEEP_SIZE) {
0629             len = isize - start;
0630             end = start + len - 1;
0631         } else
0632             return -EINVAL;
0633     }
0634 
0635     /*
0636      * Don't allow IO that isn't aligned to logical block size.
0637      */
0638     if ((start | len) & (bdev_logical_block_size(bdev) - 1))
0639         return -EINVAL;
0640 
0641     filemap_invalidate_lock(inode->i_mapping);
0642 
0643     /* Invalidate the page cache, including dirty pages. */
0644     error = truncate_bdev_range(bdev, file->f_mode, start, end);
0645     if (error)
0646         goto fail;
0647 
0648     switch (mode) {
0649     case FALLOC_FL_ZERO_RANGE:
0650     case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
0651         error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT,
0652                          len >> SECTOR_SHIFT, GFP_KERNEL,
0653                          BLKDEV_ZERO_NOUNMAP);
0654         break;
0655     case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
0656         error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT,
0657                          len >> SECTOR_SHIFT, GFP_KERNEL,
0658                          BLKDEV_ZERO_NOFALLBACK);
0659         break;
0660     case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
0661         error = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
0662                          len >> SECTOR_SHIFT, GFP_KERNEL);
0663         break;
0664     default:
0665         error = -EOPNOTSUPP;
0666     }
0667 
0668  fail:
0669     filemap_invalidate_unlock(inode->i_mapping);
0670     return error;
0671 }
0672 
0673 const struct file_operations def_blk_fops = {
0674     .open       = blkdev_open,
0675     .release    = blkdev_close,
0676     .llseek     = blkdev_llseek,
0677     .read_iter  = blkdev_read_iter,
0678     .write_iter = blkdev_write_iter,
0679     .iopoll     = iocb_bio_iopoll,
0680     .mmap       = generic_file_mmap,
0681     .fsync      = blkdev_fsync,
0682     .unlocked_ioctl = blkdev_ioctl,
0683 #ifdef CONFIG_COMPAT
0684     .compat_ioctl   = compat_blkdev_ioctl,
0685 #endif
0686     .splice_read    = generic_file_splice_read,
0687     .splice_write   = iter_file_splice_write,
0688     .fallocate  = blkdev_fallocate,
0689 };
0690 
0691 static __init int blkdev_init(void)
0692 {
0693     return bioset_init(&blkdev_dio_pool, 4,
0694                 offsetof(struct blkdev_dio, bio),
0695                 BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE);
0696 }
0697 module_init(blkdev_init);