Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * linux/fs/nfs/read.c
0004  *
0005  * Block I/O for NFS
0006  *
0007  * Partial copy of Linus' read cache modifications to fs/nfs/file.c
0008  * modified for async RPC by okir@monad.swb.de
0009  */
0010 
0011 #include <linux/time.h>
0012 #include <linux/kernel.h>
0013 #include <linux/errno.h>
0014 #include <linux/fcntl.h>
0015 #include <linux/stat.h>
0016 #include <linux/mm.h>
0017 #include <linux/slab.h>
0018 #include <linux/pagemap.h>
0019 #include <linux/sunrpc/clnt.h>
0020 #include <linux/nfs_fs.h>
0021 #include <linux/nfs_page.h>
0022 #include <linux/module.h>
0023 
0024 #include "nfs4_fs.h"
0025 #include "internal.h"
0026 #include "iostat.h"
0027 #include "fscache.h"
0028 #include "pnfs.h"
0029 #include "nfstrace.h"
0030 
0031 #define NFSDBG_FACILITY     NFSDBG_PAGECACHE
0032 
0033 static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
0034 static const struct nfs_rw_ops nfs_rw_read_ops;
0035 
0036 static struct kmem_cache *nfs_rdata_cachep;
0037 
0038 static struct nfs_pgio_header *nfs_readhdr_alloc(void)
0039 {
0040     struct nfs_pgio_header *p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
0041 
0042     if (p)
0043         p->rw_mode = FMODE_READ;
0044     return p;
0045 }
0046 
0047 static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
0048 {
0049     kmem_cache_free(nfs_rdata_cachep, rhdr);
0050 }
0051 
0052 static
0053 int nfs_return_empty_page(struct page *page)
0054 {
0055     zero_user(page, 0, PAGE_SIZE);
0056     SetPageUptodate(page);
0057     unlock_page(page);
0058     return 0;
0059 }
0060 
0061 void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
0062                   struct inode *inode, bool force_mds,
0063                   const struct nfs_pgio_completion_ops *compl_ops)
0064 {
0065     struct nfs_server *server = NFS_SERVER(inode);
0066     const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
0067 
0068 #ifdef CONFIG_NFS_V4_1
0069     if (server->pnfs_curr_ld && !force_mds)
0070         pg_ops = server->pnfs_curr_ld->pg_read_ops;
0071 #endif
0072     nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops,
0073             server->rsize, 0);
0074 }
0075 EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
0076 
0077 static void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio)
0078 {
0079     struct nfs_pgio_mirror *pgm;
0080     unsigned long npages;
0081 
0082     nfs_pageio_complete(pgio);
0083 
0084     /* It doesn't make sense to do mirrored reads! */
0085     WARN_ON_ONCE(pgio->pg_mirror_count != 1);
0086 
0087     pgm = &pgio->pg_mirrors[0];
0088     NFS_I(pgio->pg_inode)->read_io += pgm->pg_bytes_written;
0089     npages = (pgm->pg_bytes_written + PAGE_SIZE - 1) >> PAGE_SHIFT;
0090     nfs_add_stats(pgio->pg_inode, NFSIOS_READPAGES, npages);
0091 }
0092 
0093 
0094 void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
0095 {
0096     struct nfs_pgio_mirror *mirror;
0097 
0098     if (pgio->pg_ops && pgio->pg_ops->pg_cleanup)
0099         pgio->pg_ops->pg_cleanup(pgio);
0100 
0101     pgio->pg_ops = &nfs_pgio_rw_ops;
0102 
0103     /* read path should never have more than one mirror */
0104     WARN_ON_ONCE(pgio->pg_mirror_count != 1);
0105 
0106     mirror = &pgio->pg_mirrors[0];
0107     mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
0108 }
0109 EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
0110 
0111 static void nfs_readpage_release(struct nfs_page *req, int error)
0112 {
0113     struct inode *inode = d_inode(nfs_req_openctx(req)->dentry);
0114     struct page *page = req->wb_page;
0115 
0116     dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
0117         (unsigned long long)NFS_FILEID(inode), req->wb_bytes,
0118         (long long)req_offset(req));
0119 
0120     if (nfs_error_is_fatal_on_server(error) && error != -ETIMEDOUT)
0121         SetPageError(page);
0122     if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
0123         if (PageUptodate(page))
0124             nfs_fscache_write_page(inode, page);
0125         unlock_page(page);
0126     }
0127     nfs_release_request(req);
0128 }
0129 
0130 struct nfs_readdesc {
0131     struct nfs_pageio_descriptor pgio;
0132     struct nfs_open_context *ctx;
0133 };
0134 
0135 static void nfs_page_group_set_uptodate(struct nfs_page *req)
0136 {
0137     if (nfs_page_group_sync_on_bit(req, PG_UPTODATE))
0138         SetPageUptodate(req->wb_page);
0139 }
0140 
0141 static void nfs_read_completion(struct nfs_pgio_header *hdr)
0142 {
0143     unsigned long bytes = 0;
0144     int error;
0145 
0146     if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
0147         goto out;
0148     while (!list_empty(&hdr->pages)) {
0149         struct nfs_page *req = nfs_list_entry(hdr->pages.next);
0150         struct page *page = req->wb_page;
0151         unsigned long start = req->wb_pgbase;
0152         unsigned long end = req->wb_pgbase + req->wb_bytes;
0153 
0154         if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
0155             /* note: regions of the page not covered by a
0156              * request are zeroed in readpage_async_filler */
0157             if (bytes > hdr->good_bytes) {
0158                 /* nothing in this request was good, so zero
0159                  * the full extent of the request */
0160                 zero_user_segment(page, start, end);
0161 
0162             } else if (hdr->good_bytes - bytes < req->wb_bytes) {
0163                 /* part of this request has good bytes, but
0164                  * not all. zero the bad bytes */
0165                 start += hdr->good_bytes - bytes;
0166                 WARN_ON(start < req->wb_pgbase);
0167                 zero_user_segment(page, start, end);
0168             }
0169         }
0170         error = 0;
0171         bytes += req->wb_bytes;
0172         if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
0173             if (bytes <= hdr->good_bytes)
0174                 nfs_page_group_set_uptodate(req);
0175             else {
0176                 error = hdr->error;
0177                 xchg(&nfs_req_openctx(req)->error, error);
0178             }
0179         } else
0180             nfs_page_group_set_uptodate(req);
0181         nfs_list_remove_request(req);
0182         nfs_readpage_release(req, error);
0183     }
0184 out:
0185     hdr->release(hdr);
0186 }
0187 
0188 static void nfs_initiate_read(struct nfs_pgio_header *hdr,
0189                   struct rpc_message *msg,
0190                   const struct nfs_rpc_ops *rpc_ops,
0191                   struct rpc_task_setup *task_setup_data, int how)
0192 {
0193     rpc_ops->read_setup(hdr, msg);
0194     trace_nfs_initiate_read(hdr);
0195 }
0196 
0197 static void
0198 nfs_async_read_error(struct list_head *head, int error)
0199 {
0200     struct nfs_page *req;
0201 
0202     while (!list_empty(head)) {
0203         req = nfs_list_entry(head->next);
0204         nfs_list_remove_request(req);
0205         nfs_readpage_release(req, error);
0206     }
0207 }
0208 
0209 static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
0210     .error_cleanup = nfs_async_read_error,
0211     .completion = nfs_read_completion,
0212 };
0213 
0214 /*
0215  * This is the callback from RPC telling us whether a reply was
0216  * received or some error occurred (timeout or socket shutdown).
0217  */
0218 static int nfs_readpage_done(struct rpc_task *task,
0219                  struct nfs_pgio_header *hdr,
0220                  struct inode *inode)
0221 {
0222     int status = NFS_PROTO(inode)->read_done(task, hdr);
0223     if (status != 0)
0224         return status;
0225 
0226     nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count);
0227     trace_nfs_readpage_done(task, hdr);
0228 
0229     if (task->tk_status == -ESTALE) {
0230         nfs_set_inode_stale(inode);
0231         nfs_mark_for_revalidate(inode);
0232     }
0233     return 0;
0234 }
0235 
0236 static void nfs_readpage_retry(struct rpc_task *task,
0237                    struct nfs_pgio_header *hdr)
0238 {
0239     struct nfs_pgio_args *argp = &hdr->args;
0240     struct nfs_pgio_res  *resp = &hdr->res;
0241 
0242     /* This is a short read! */
0243     nfs_inc_stats(hdr->inode, NFSIOS_SHORTREAD);
0244     trace_nfs_readpage_short(task, hdr);
0245 
0246     /* Has the server at least made some progress? */
0247     if (resp->count == 0) {
0248         nfs_set_pgio_error(hdr, -EIO, argp->offset);
0249         return;
0250     }
0251 
0252     /* For non rpc-based layout drivers, retry-through-MDS */
0253     if (!task->tk_ops) {
0254         hdr->pnfs_error = -EAGAIN;
0255         return;
0256     }
0257 
0258     /* Yes, so retry the read at the end of the hdr */
0259     hdr->mds_offset += resp->count;
0260     argp->offset += resp->count;
0261     argp->pgbase += resp->count;
0262     argp->count -= resp->count;
0263     resp->count = 0;
0264     resp->eof = 0;
0265     rpc_restart_call_prepare(task);
0266 }
0267 
0268 static void nfs_readpage_result(struct rpc_task *task,
0269                 struct nfs_pgio_header *hdr)
0270 {
0271     if (hdr->res.eof) {
0272         loff_t pos = hdr->args.offset + hdr->res.count;
0273         unsigned int new = pos - hdr->io_start;
0274 
0275         if (hdr->good_bytes > new) {
0276             hdr->good_bytes = new;
0277             set_bit(NFS_IOHDR_EOF, &hdr->flags);
0278             clear_bit(NFS_IOHDR_ERROR, &hdr->flags);
0279         }
0280     } else if (hdr->res.count < hdr->args.count)
0281         nfs_readpage_retry(task, hdr);
0282 }
0283 
0284 static int
0285 readpage_async_filler(struct nfs_readdesc *desc, struct page *page)
0286 {
0287     struct inode *inode = page_file_mapping(page)->host;
0288     unsigned int rsize = NFS_SERVER(inode)->rsize;
0289     struct nfs_page *new;
0290     unsigned int len, aligned_len;
0291     int error;
0292 
0293     len = nfs_page_length(page);
0294     if (len == 0)
0295         return nfs_return_empty_page(page);
0296 
0297     aligned_len = min_t(unsigned int, ALIGN(len, rsize), PAGE_SIZE);
0298 
0299     if (!IS_SYNC(page->mapping->host)) {
0300         error = nfs_fscache_read_page(page->mapping->host, page);
0301         if (error == 0)
0302             goto out_unlock;
0303     }
0304 
0305     new = nfs_create_request(desc->ctx, page, 0, aligned_len);
0306     if (IS_ERR(new))
0307         goto out_error;
0308 
0309     if (len < PAGE_SIZE)
0310         zero_user_segment(page, len, PAGE_SIZE);
0311     if (!nfs_pageio_add_request(&desc->pgio, new)) {
0312         nfs_list_remove_request(new);
0313         error = desc->pgio.pg_error;
0314         nfs_readpage_release(new, error);
0315         goto out;
0316     }
0317     return 0;
0318 out_error:
0319     error = PTR_ERR(new);
0320 out_unlock:
0321     unlock_page(page);
0322 out:
0323     return error;
0324 }
0325 
0326 /*
0327  * Read a page over NFS.
0328  * We read the page synchronously in the following case:
0329  *  -   The error flag is set for this page. This happens only when a
0330  *  previous async read operation failed.
0331  */
0332 int nfs_read_folio(struct file *file, struct folio *folio)
0333 {
0334     struct page *page = &folio->page;
0335     struct nfs_readdesc desc;
0336     struct inode *inode = page_file_mapping(page)->host;
0337     int ret;
0338 
0339     trace_nfs_aop_readpage(inode, page);
0340     nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
0341 
0342     /*
0343      * Try to flush any pending writes to the file..
0344      *
0345      * NOTE! Because we own the page lock, there cannot
0346      * be any new pending writes generated at this point
0347      * for this page (other pages can be written to).
0348      */
0349     ret = nfs_wb_page(inode, page);
0350     if (ret)
0351         goto out_unlock;
0352     if (PageUptodate(page))
0353         goto out_unlock;
0354 
0355     ret = -ESTALE;
0356     if (NFS_STALE(inode))
0357         goto out_unlock;
0358 
0359     if (file == NULL) {
0360         ret = -EBADF;
0361         desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
0362         if (desc.ctx == NULL)
0363             goto out_unlock;
0364     } else
0365         desc.ctx = get_nfs_open_context(nfs_file_open_context(file));
0366 
0367     xchg(&desc.ctx->error, 0);
0368     nfs_pageio_init_read(&desc.pgio, inode, false,
0369                  &nfs_async_read_completion_ops);
0370 
0371     ret = readpage_async_filler(&desc, page);
0372     if (ret)
0373         goto out;
0374 
0375     nfs_pageio_complete_read(&desc.pgio);
0376     ret = desc.pgio.pg_error < 0 ? desc.pgio.pg_error : 0;
0377     if (!ret) {
0378         ret = wait_on_page_locked_killable(page);
0379         if (!PageUptodate(page) && !ret)
0380             ret = xchg(&desc.ctx->error, 0);
0381     }
0382 out:
0383     put_nfs_open_context(desc.ctx);
0384     trace_nfs_aop_readpage_done(inode, page, ret);
0385     return ret;
0386 out_unlock:
0387     unlock_page(page);
0388     trace_nfs_aop_readpage_done(inode, page, ret);
0389     return ret;
0390 }
0391 
0392 void nfs_readahead(struct readahead_control *ractl)
0393 {
0394     unsigned int nr_pages = readahead_count(ractl);
0395     struct file *file = ractl->file;
0396     struct nfs_readdesc desc;
0397     struct inode *inode = ractl->mapping->host;
0398     struct page *page;
0399     int ret;
0400 
0401     trace_nfs_aop_readahead(inode, readahead_pos(ractl), nr_pages);
0402     nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
0403 
0404     ret = -ESTALE;
0405     if (NFS_STALE(inode))
0406         goto out;
0407 
0408     if (file == NULL) {
0409         ret = -EBADF;
0410         desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
0411         if (desc.ctx == NULL)
0412             goto out;
0413     } else
0414         desc.ctx = get_nfs_open_context(nfs_file_open_context(file));
0415 
0416     nfs_pageio_init_read(&desc.pgio, inode, false,
0417                  &nfs_async_read_completion_ops);
0418 
0419     while ((page = readahead_page(ractl)) != NULL) {
0420         ret = readpage_async_filler(&desc, page);
0421         put_page(page);
0422         if (ret)
0423             break;
0424     }
0425 
0426     nfs_pageio_complete_read(&desc.pgio);
0427 
0428     put_nfs_open_context(desc.ctx);
0429 out:
0430     trace_nfs_aop_readahead_done(inode, nr_pages, ret);
0431 }
0432 
0433 int __init nfs_init_readpagecache(void)
0434 {
0435     nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
0436                          sizeof(struct nfs_pgio_header),
0437                          0, SLAB_HWCACHE_ALIGN,
0438                          NULL);
0439     if (nfs_rdata_cachep == NULL)
0440         return -ENOMEM;
0441 
0442     return 0;
0443 }
0444 
0445 void nfs_destroy_readpagecache(void)
0446 {
0447     kmem_cache_destroy(nfs_rdata_cachep);
0448 }
0449 
0450 static const struct nfs_rw_ops nfs_rw_read_ops = {
0451     .rw_alloc_header    = nfs_readhdr_alloc,
0452     .rw_free_header     = nfs_readhdr_free,
0453     .rw_done        = nfs_readpage_done,
0454     .rw_result      = nfs_readpage_result,
0455     .rw_initiate        = nfs_initiate_read,
0456 };