Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Functions related to mapping data to requests
0004  */
0005 #include <linux/kernel.h>
0006 #include <linux/sched/task_stack.h>
0007 #include <linux/module.h>
0008 #include <linux/bio.h>
0009 #include <linux/blkdev.h>
0010 #include <linux/uio.h>
0011 
0012 #include "blk.h"
0013 
0014 struct bio_map_data {
0015     bool is_our_pages : 1;
0016     bool is_null_mapped : 1;
0017     struct iov_iter iter;
0018     struct iovec iov[];
0019 };
0020 
0021 static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
0022                            gfp_t gfp_mask)
0023 {
0024     struct bio_map_data *bmd;
0025 
0026     if (data->nr_segs > UIO_MAXIOV)
0027         return NULL;
0028 
0029     bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask);
0030     if (!bmd)
0031         return NULL;
0032     memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs);
0033     bmd->iter = *data;
0034     bmd->iter.iov = bmd->iov;
0035     return bmd;
0036 }
0037 
0038 /**
0039  * bio_copy_from_iter - copy all pages from iov_iter to bio
0040  * @bio: The &struct bio which describes the I/O as destination
0041  * @iter: iov_iter as source
0042  *
0043  * Copy all pages from iov_iter to bio.
0044  * Returns 0 on success, or error on failure.
0045  */
0046 static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
0047 {
0048     struct bio_vec *bvec;
0049     struct bvec_iter_all iter_all;
0050 
0051     bio_for_each_segment_all(bvec, bio, iter_all) {
0052         ssize_t ret;
0053 
0054         ret = copy_page_from_iter(bvec->bv_page,
0055                       bvec->bv_offset,
0056                       bvec->bv_len,
0057                       iter);
0058 
0059         if (!iov_iter_count(iter))
0060             break;
0061 
0062         if (ret < bvec->bv_len)
0063             return -EFAULT;
0064     }
0065 
0066     return 0;
0067 }
0068 
0069 /**
0070  * bio_copy_to_iter - copy all pages from bio to iov_iter
0071  * @bio: The &struct bio which describes the I/O as source
0072  * @iter: iov_iter as destination
0073  *
0074  * Copy all pages from bio to iov_iter.
0075  * Returns 0 on success, or error on failure.
0076  */
0077 static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
0078 {
0079     struct bio_vec *bvec;
0080     struct bvec_iter_all iter_all;
0081 
0082     bio_for_each_segment_all(bvec, bio, iter_all) {
0083         ssize_t ret;
0084 
0085         ret = copy_page_to_iter(bvec->bv_page,
0086                     bvec->bv_offset,
0087                     bvec->bv_len,
0088                     &iter);
0089 
0090         if (!iov_iter_count(&iter))
0091             break;
0092 
0093         if (ret < bvec->bv_len)
0094             return -EFAULT;
0095     }
0096 
0097     return 0;
0098 }
0099 
0100 /**
0101  *  bio_uncopy_user -   finish previously mapped bio
0102  *  @bio: bio being terminated
0103  *
0104  *  Free pages allocated from bio_copy_user_iov() and write back data
0105  *  to user space in case of a read.
0106  */
0107 static int bio_uncopy_user(struct bio *bio)
0108 {
0109     struct bio_map_data *bmd = bio->bi_private;
0110     int ret = 0;
0111 
0112     if (!bmd->is_null_mapped) {
0113         /*
0114          * if we're in a workqueue, the request is orphaned, so
0115          * don't copy into a random user address space, just free
0116          * and return -EINTR so user space doesn't expect any data.
0117          */
0118         if (!current->mm)
0119             ret = -EINTR;
0120         else if (bio_data_dir(bio) == READ)
0121             ret = bio_copy_to_iter(bio, bmd->iter);
0122         if (bmd->is_our_pages)
0123             bio_free_pages(bio);
0124     }
0125     kfree(bmd);
0126     return ret;
0127 }
0128 
0129 static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
0130         struct iov_iter *iter, gfp_t gfp_mask)
0131 {
0132     struct bio_map_data *bmd;
0133     struct page *page;
0134     struct bio *bio;
0135     int i = 0, ret;
0136     int nr_pages;
0137     unsigned int len = iter->count;
0138     unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
0139 
0140     bmd = bio_alloc_map_data(iter, gfp_mask);
0141     if (!bmd)
0142         return -ENOMEM;
0143 
0144     /*
0145      * We need to do a deep copy of the iov_iter including the iovecs.
0146      * The caller provided iov might point to an on-stack or otherwise
0147      * shortlived one.
0148      */
0149     bmd->is_our_pages = !map_data;
0150     bmd->is_null_mapped = (map_data && map_data->null_mapped);
0151 
0152     nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE));
0153 
0154     ret = -ENOMEM;
0155     bio = bio_kmalloc(nr_pages, gfp_mask);
0156     if (!bio)
0157         goto out_bmd;
0158     bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, req_op(rq));
0159 
0160     if (map_data) {
0161         nr_pages = 1 << map_data->page_order;
0162         i = map_data->offset / PAGE_SIZE;
0163     }
0164     while (len) {
0165         unsigned int bytes = PAGE_SIZE;
0166 
0167         bytes -= offset;
0168 
0169         if (bytes > len)
0170             bytes = len;
0171 
0172         if (map_data) {
0173             if (i == map_data->nr_entries * nr_pages) {
0174                 ret = -ENOMEM;
0175                 goto cleanup;
0176             }
0177 
0178             page = map_data->pages[i / nr_pages];
0179             page += (i % nr_pages);
0180 
0181             i++;
0182         } else {
0183             page = alloc_page(GFP_NOIO | gfp_mask);
0184             if (!page) {
0185                 ret = -ENOMEM;
0186                 goto cleanup;
0187             }
0188         }
0189 
0190         if (bio_add_pc_page(rq->q, bio, page, bytes, offset) < bytes) {
0191             if (!map_data)
0192                 __free_page(page);
0193             break;
0194         }
0195 
0196         len -= bytes;
0197         offset = 0;
0198     }
0199 
0200     if (map_data)
0201         map_data->offset += bio->bi_iter.bi_size;
0202 
0203     /*
0204      * success
0205      */
0206     if ((iov_iter_rw(iter) == WRITE &&
0207          (!map_data || !map_data->null_mapped)) ||
0208         (map_data && map_data->from_user)) {
0209         ret = bio_copy_from_iter(bio, iter);
0210         if (ret)
0211             goto cleanup;
0212     } else {
0213         if (bmd->is_our_pages)
0214             zero_fill_bio(bio);
0215         iov_iter_advance(iter, bio->bi_iter.bi_size);
0216     }
0217 
0218     bio->bi_private = bmd;
0219 
0220     ret = blk_rq_append_bio(rq, bio);
0221     if (ret)
0222         goto cleanup;
0223     return 0;
0224 cleanup:
0225     if (!map_data)
0226         bio_free_pages(bio);
0227     bio_uninit(bio);
0228     kfree(bio);
0229 out_bmd:
0230     kfree(bmd);
0231     return ret;
0232 }
0233 
0234 static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
0235         gfp_t gfp_mask)
0236 {
0237     unsigned int max_sectors = queue_max_hw_sectors(rq->q);
0238     unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS);
0239     struct bio *bio;
0240     int ret;
0241     int j;
0242 
0243     if (!iov_iter_count(iter))
0244         return -EINVAL;
0245 
0246     bio = bio_kmalloc(nr_vecs, gfp_mask);
0247     if (!bio)
0248         return -ENOMEM;
0249     bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, req_op(rq));
0250 
0251     while (iov_iter_count(iter)) {
0252         struct page **pages;
0253         ssize_t bytes;
0254         size_t offs, added = 0;
0255         int npages;
0256 
0257         bytes = iov_iter_get_pages_alloc2(iter, &pages, LONG_MAX, &offs);
0258         if (unlikely(bytes <= 0)) {
0259             ret = bytes ? bytes : -EFAULT;
0260             goto out_unmap;
0261         }
0262 
0263         npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
0264 
0265         if (unlikely(offs & queue_dma_alignment(rq->q)))
0266             j = 0;
0267         else {
0268             for (j = 0; j < npages; j++) {
0269                 struct page *page = pages[j];
0270                 unsigned int n = PAGE_SIZE - offs;
0271                 bool same_page = false;
0272 
0273                 if (n > bytes)
0274                     n = bytes;
0275 
0276                 if (!bio_add_hw_page(rq->q, bio, page, n, offs,
0277                              max_sectors, &same_page)) {
0278                     if (same_page)
0279                         put_page(page);
0280                     break;
0281                 }
0282 
0283                 added += n;
0284                 bytes -= n;
0285                 offs = 0;
0286             }
0287         }
0288         /*
0289          * release the pages we didn't map into the bio, if any
0290          */
0291         while (j < npages)
0292             put_page(pages[j++]);
0293         kvfree(pages);
0294         /* couldn't stuff something into bio? */
0295         if (bytes) {
0296             iov_iter_revert(iter, bytes);
0297             break;
0298         }
0299     }
0300 
0301     ret = blk_rq_append_bio(rq, bio);
0302     if (ret)
0303         goto out_unmap;
0304     return 0;
0305 
0306  out_unmap:
0307     bio_release_pages(bio, false);
0308     bio_uninit(bio);
0309     kfree(bio);
0310     return ret;
0311 }
0312 
0313 static void bio_invalidate_vmalloc_pages(struct bio *bio)
0314 {
0315 #ifdef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE
0316     if (bio->bi_private && !op_is_write(bio_op(bio))) {
0317         unsigned long i, len = 0;
0318 
0319         for (i = 0; i < bio->bi_vcnt; i++)
0320             len += bio->bi_io_vec[i].bv_len;
0321         invalidate_kernel_vmap_range(bio->bi_private, len);
0322     }
0323 #endif
0324 }
0325 
0326 static void bio_map_kern_endio(struct bio *bio)
0327 {
0328     bio_invalidate_vmalloc_pages(bio);
0329     bio_uninit(bio);
0330     kfree(bio);
0331 }
0332 
0333 /**
0334  *  bio_map_kern    -   map kernel address into bio
0335  *  @q: the struct request_queue for the bio
0336  *  @data: pointer to buffer to map
0337  *  @len: length in bytes
0338  *  @gfp_mask: allocation flags for bio allocation
0339  *
0340  *  Map the kernel address into a bio suitable for io to a block
0341  *  device. Returns an error pointer in case of error.
0342  */
0343 static struct bio *bio_map_kern(struct request_queue *q, void *data,
0344         unsigned int len, gfp_t gfp_mask)
0345 {
0346     unsigned long kaddr = (unsigned long)data;
0347     unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
0348     unsigned long start = kaddr >> PAGE_SHIFT;
0349     const int nr_pages = end - start;
0350     bool is_vmalloc = is_vmalloc_addr(data);
0351     struct page *page;
0352     int offset, i;
0353     struct bio *bio;
0354 
0355     bio = bio_kmalloc(nr_pages, gfp_mask);
0356     if (!bio)
0357         return ERR_PTR(-ENOMEM);
0358     bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0);
0359 
0360     if (is_vmalloc) {
0361         flush_kernel_vmap_range(data, len);
0362         bio->bi_private = data;
0363     }
0364 
0365     offset = offset_in_page(kaddr);
0366     for (i = 0; i < nr_pages; i++) {
0367         unsigned int bytes = PAGE_SIZE - offset;
0368 
0369         if (len <= 0)
0370             break;
0371 
0372         if (bytes > len)
0373             bytes = len;
0374 
0375         if (!is_vmalloc)
0376             page = virt_to_page(data);
0377         else
0378             page = vmalloc_to_page(data);
0379         if (bio_add_pc_page(q, bio, page, bytes,
0380                     offset) < bytes) {
0381             /* we don't support partial mappings */
0382             bio_uninit(bio);
0383             kfree(bio);
0384             return ERR_PTR(-EINVAL);
0385         }
0386 
0387         data += bytes;
0388         len -= bytes;
0389         offset = 0;
0390     }
0391 
0392     bio->bi_end_io = bio_map_kern_endio;
0393     return bio;
0394 }
0395 
0396 static void bio_copy_kern_endio(struct bio *bio)
0397 {
0398     bio_free_pages(bio);
0399     bio_uninit(bio);
0400     kfree(bio);
0401 }
0402 
0403 static void bio_copy_kern_endio_read(struct bio *bio)
0404 {
0405     char *p = bio->bi_private;
0406     struct bio_vec *bvec;
0407     struct bvec_iter_all iter_all;
0408 
0409     bio_for_each_segment_all(bvec, bio, iter_all) {
0410         memcpy_from_bvec(p, bvec);
0411         p += bvec->bv_len;
0412     }
0413 
0414     bio_copy_kern_endio(bio);
0415 }
0416 
0417 /**
0418  *  bio_copy_kern   -   copy kernel address into bio
0419  *  @q: the struct request_queue for the bio
0420  *  @data: pointer to buffer to copy
0421  *  @len: length in bytes
0422  *  @gfp_mask: allocation flags for bio and page allocation
0423  *  @reading: data direction is READ
0424  *
0425  *  copy the kernel address into a bio suitable for io to a block
0426  *  device. Returns an error pointer in case of error.
0427  */
0428 static struct bio *bio_copy_kern(struct request_queue *q, void *data,
0429         unsigned int len, gfp_t gfp_mask, int reading)
0430 {
0431     unsigned long kaddr = (unsigned long)data;
0432     unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
0433     unsigned long start = kaddr >> PAGE_SHIFT;
0434     struct bio *bio;
0435     void *p = data;
0436     int nr_pages = 0;
0437 
0438     /*
0439      * Overflow, abort
0440      */
0441     if (end < start)
0442         return ERR_PTR(-EINVAL);
0443 
0444     nr_pages = end - start;
0445     bio = bio_kmalloc(nr_pages, gfp_mask);
0446     if (!bio)
0447         return ERR_PTR(-ENOMEM);
0448     bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0);
0449 
0450     while (len) {
0451         struct page *page;
0452         unsigned int bytes = PAGE_SIZE;
0453 
0454         if (bytes > len)
0455             bytes = len;
0456 
0457         page = alloc_page(GFP_NOIO | __GFP_ZERO | gfp_mask);
0458         if (!page)
0459             goto cleanup;
0460 
0461         if (!reading)
0462             memcpy(page_address(page), p, bytes);
0463 
0464         if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
0465             break;
0466 
0467         len -= bytes;
0468         p += bytes;
0469     }
0470 
0471     if (reading) {
0472         bio->bi_end_io = bio_copy_kern_endio_read;
0473         bio->bi_private = data;
0474     } else {
0475         bio->bi_end_io = bio_copy_kern_endio;
0476     }
0477 
0478     return bio;
0479 
0480 cleanup:
0481     bio_free_pages(bio);
0482     bio_uninit(bio);
0483     kfree(bio);
0484     return ERR_PTR(-ENOMEM);
0485 }
0486 
0487 /*
0488  * Append a bio to a passthrough request.  Only works if the bio can be merged
0489  * into the request based on the driver constraints.
0490  */
0491 int blk_rq_append_bio(struct request *rq, struct bio *bio)
0492 {
0493     struct bvec_iter iter;
0494     struct bio_vec bv;
0495     unsigned int nr_segs = 0;
0496 
0497     bio_for_each_bvec(bv, bio, iter)
0498         nr_segs++;
0499 
0500     if (!rq->bio) {
0501         blk_rq_bio_prep(rq, bio, nr_segs);
0502     } else {
0503         if (!ll_back_merge_fn(rq, bio, nr_segs))
0504             return -EINVAL;
0505         rq->biotail->bi_next = bio;
0506         rq->biotail = bio;
0507         rq->__data_len += (bio)->bi_iter.bi_size;
0508         bio_crypt_free_ctx(bio);
0509     }
0510 
0511     return 0;
0512 }
0513 EXPORT_SYMBOL(blk_rq_append_bio);
0514 
0515 /**
0516  * blk_rq_map_user_iov - map user data to a request, for passthrough requests
0517  * @q:      request queue where request should be inserted
0518  * @rq:     request to map data to
0519  * @map_data:   pointer to the rq_map_data holding pages (if necessary)
0520  * @iter:   iovec iterator
0521  * @gfp_mask:   memory allocation flags
0522  *
0523  * Description:
0524  *    Data will be mapped directly for zero copy I/O, if possible. Otherwise
0525  *    a kernel bounce buffer is used.
0526  *
0527  *    A matching blk_rq_unmap_user() must be issued at the end of I/O, while
0528  *    still in process context.
0529  */
0530 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
0531             struct rq_map_data *map_data,
0532             const struct iov_iter *iter, gfp_t gfp_mask)
0533 {
0534     bool copy = false;
0535     unsigned long align = q->dma_pad_mask | queue_dma_alignment(q);
0536     struct bio *bio = NULL;
0537     struct iov_iter i;
0538     int ret = -EINVAL;
0539 
0540     if (!iter_is_iovec(iter))
0541         goto fail;
0542 
0543     if (map_data)
0544         copy = true;
0545     else if (blk_queue_may_bounce(q))
0546         copy = true;
0547     else if (iov_iter_alignment(iter) & align)
0548         copy = true;
0549     else if (queue_virt_boundary(q))
0550         copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter);
0551 
0552     i = *iter;
0553     do {
0554         if (copy)
0555             ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask);
0556         else
0557             ret = bio_map_user_iov(rq, &i, gfp_mask);
0558         if (ret)
0559             goto unmap_rq;
0560         if (!bio)
0561             bio = rq->bio;
0562     } while (iov_iter_count(&i));
0563 
0564     return 0;
0565 
0566 unmap_rq:
0567     blk_rq_unmap_user(bio);
0568 fail:
0569     rq->bio = NULL;
0570     return ret;
0571 }
0572 EXPORT_SYMBOL(blk_rq_map_user_iov);
0573 
0574 int blk_rq_map_user(struct request_queue *q, struct request *rq,
0575             struct rq_map_data *map_data, void __user *ubuf,
0576             unsigned long len, gfp_t gfp_mask)
0577 {
0578     struct iovec iov;
0579     struct iov_iter i;
0580     int ret = import_single_range(rq_data_dir(rq), ubuf, len, &iov, &i);
0581 
0582     if (unlikely(ret < 0))
0583         return ret;
0584 
0585     return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask);
0586 }
0587 EXPORT_SYMBOL(blk_rq_map_user);
0588 
0589 /**
0590  * blk_rq_unmap_user - unmap a request with user data
0591  * @bio:           start of bio list
0592  *
0593  * Description:
0594  *    Unmap a rq previously mapped by blk_rq_map_user(). The caller must
0595  *    supply the original rq->bio from the blk_rq_map_user() return, since
0596  *    the I/O completion may have changed rq->bio.
0597  */
0598 int blk_rq_unmap_user(struct bio *bio)
0599 {
0600     struct bio *next_bio;
0601     int ret = 0, ret2;
0602 
0603     while (bio) {
0604         if (bio->bi_private) {
0605             ret2 = bio_uncopy_user(bio);
0606             if (ret2 && !ret)
0607                 ret = ret2;
0608         } else {
0609             bio_release_pages(bio, bio_data_dir(bio) == READ);
0610         }
0611 
0612         next_bio = bio;
0613         bio = bio->bi_next;
0614         bio_uninit(next_bio);
0615         kfree(next_bio);
0616     }
0617 
0618     return ret;
0619 }
0620 EXPORT_SYMBOL(blk_rq_unmap_user);
0621 
0622 /**
0623  * blk_rq_map_kern - map kernel data to a request, for passthrough requests
0624  * @q:      request queue where request should be inserted
0625  * @rq:     request to fill
0626  * @kbuf:   the kernel buffer
0627  * @len:    length of user data
0628  * @gfp_mask:   memory allocation flags
0629  *
0630  * Description:
0631  *    Data will be mapped directly if possible. Otherwise a bounce
0632  *    buffer is used. Can be called multiple times to append multiple
0633  *    buffers.
0634  */
0635 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
0636             unsigned int len, gfp_t gfp_mask)
0637 {
0638     int reading = rq_data_dir(rq) == READ;
0639     unsigned long addr = (unsigned long) kbuf;
0640     struct bio *bio;
0641     int ret;
0642 
0643     if (len > (queue_max_hw_sectors(q) << 9))
0644         return -EINVAL;
0645     if (!len || !kbuf)
0646         return -EINVAL;
0647 
0648     if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf) ||
0649         blk_queue_may_bounce(q))
0650         bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
0651     else
0652         bio = bio_map_kern(q, kbuf, len, gfp_mask);
0653 
0654     if (IS_ERR(bio))
0655         return PTR_ERR(bio);
0656 
0657     bio->bi_opf &= ~REQ_OP_MASK;
0658     bio->bi_opf |= req_op(rq);
0659 
0660     ret = blk_rq_append_bio(rq, bio);
0661     if (unlikely(ret)) {
0662         bio_uninit(bio);
0663         kfree(bio);
0664     }
0665     return ret;
0666 }
0667 EXPORT_SYMBOL(blk_rq_map_kern);