Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /* Network filesystem high-level buffered read support.
0003  *
0004  * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
0005  * Written by David Howells (dhowells@redhat.com)
0006  */
0007 
0008 #include <linux/export.h>
0009 #include <linux/task_io_accounting_ops.h>
0010 #include "internal.h"
0011 
0012 /*
0013  * Unlock the folios in a read operation.  We need to set PG_fscache on any
0014  * folios we're going to write back before we unlock them.
0015  */
0016 void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
0017 {
0018     struct netfs_io_subrequest *subreq;
0019     struct folio *folio;
0020     unsigned int iopos, account = 0;
0021     pgoff_t start_page = rreq->start / PAGE_SIZE;
0022     pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
0023     bool subreq_failed = false;
0024 
0025     XA_STATE(xas, &rreq->mapping->i_pages, start_page);
0026 
0027     if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) {
0028         __clear_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags);
0029         list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
0030             __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
0031         }
0032     }
0033 
0034     /* Walk through the pagecache and the I/O request lists simultaneously.
0035      * We may have a mixture of cached and uncached sections and we only
0036      * really want to write out the uncached sections.  This is slightly
0037      * complicated by the possibility that we might have huge pages with a
0038      * mixture inside.
0039      */
0040     subreq = list_first_entry(&rreq->subrequests,
0041                   struct netfs_io_subrequest, rreq_link);
0042     iopos = 0;
0043     subreq_failed = (subreq->error < 0);
0044 
0045     trace_netfs_rreq(rreq, netfs_rreq_trace_unlock);
0046 
0047     rcu_read_lock();
0048     xas_for_each(&xas, folio, last_page) {
0049         unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE;
0050         unsigned int pgend = pgpos + folio_size(folio);
0051         bool pg_failed = false;
0052 
0053         for (;;) {
0054             if (!subreq) {
0055                 pg_failed = true;
0056                 break;
0057             }
0058             if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags))
0059                 folio_start_fscache(folio);
0060             pg_failed |= subreq_failed;
0061             if (pgend < iopos + subreq->len)
0062                 break;
0063 
0064             account += subreq->transferred;
0065             iopos += subreq->len;
0066             if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
0067                 subreq = list_next_entry(subreq, rreq_link);
0068                 subreq_failed = (subreq->error < 0);
0069             } else {
0070                 subreq = NULL;
0071                 subreq_failed = false;
0072             }
0073             if (pgend == iopos)
0074                 break;
0075         }
0076 
0077         if (!pg_failed) {
0078             flush_dcache_folio(folio);
0079             folio_mark_uptodate(folio);
0080         }
0081 
0082         if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
0083             if (folio_index(folio) == rreq->no_unlock_folio &&
0084                 test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
0085                 _debug("no unlock");
0086             else
0087                 folio_unlock(folio);
0088         }
0089     }
0090     rcu_read_unlock();
0091 
0092     task_io_account_read(account);
0093     if (rreq->netfs_ops->done)
0094         rreq->netfs_ops->done(rreq);
0095 }
0096 
0097 static void netfs_cache_expand_readahead(struct netfs_io_request *rreq,
0098                      loff_t *_start, size_t *_len, loff_t i_size)
0099 {
0100     struct netfs_cache_resources *cres = &rreq->cache_resources;
0101 
0102     if (cres->ops && cres->ops->expand_readahead)
0103         cres->ops->expand_readahead(cres, _start, _len, i_size);
0104 }
0105 
0106 static void netfs_rreq_expand(struct netfs_io_request *rreq,
0107                   struct readahead_control *ractl)
0108 {
0109     /* Give the cache a chance to change the request parameters.  The
0110      * resultant request must contain the original region.
0111      */
0112     netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size);
0113 
0114     /* Give the netfs a chance to change the request parameters.  The
0115      * resultant request must contain the original region.
0116      */
0117     if (rreq->netfs_ops->expand_readahead)
0118         rreq->netfs_ops->expand_readahead(rreq);
0119 
0120     /* Expand the request if the cache wants it to start earlier.  Note
0121      * that the expansion may get further extended if the VM wishes to
0122      * insert THPs and the preferred start and/or end wind up in the middle
0123      * of THPs.
0124      *
0125      * If this is the case, however, the THP size should be an integer
0126      * multiple of the cache granule size, so we get a whole number of
0127      * granules to deal with.
0128      */
0129     if (rreq->start  != readahead_pos(ractl) ||
0130         rreq->len != readahead_length(ractl)) {
0131         readahead_expand(ractl, rreq->start, rreq->len);
0132         rreq->start  = readahead_pos(ractl);
0133         rreq->len = readahead_length(ractl);
0134 
0135         trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
0136                  netfs_read_trace_expanded);
0137     }
0138 }
0139 
0140 /**
0141  * netfs_readahead - Helper to manage a read request
0142  * @ractl: The description of the readahead request
0143  *
0144  * Fulfil a readahead request by drawing data from the cache if possible, or
0145  * the netfs if not.  Space beyond the EOF is zero-filled.  Multiple I/O
0146  * requests from different sources will get munged together.  If necessary, the
0147  * readahead window can be expanded in either direction to a more convenient
0148  * alighment for RPC efficiency or to make storage in the cache feasible.
0149  *
0150  * The calling netfs must initialise a netfs context contiguous to the vfs
0151  * inode before calling this.
0152  *
0153  * This is usable whether or not caching is enabled.
0154  */
0155 void netfs_readahead(struct readahead_control *ractl)
0156 {
0157     struct netfs_io_request *rreq;
0158     struct netfs_inode *ctx = netfs_inode(ractl->mapping->host);
0159     int ret;
0160 
0161     _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
0162 
0163     if (readahead_count(ractl) == 0)
0164         return;
0165 
0166     rreq = netfs_alloc_request(ractl->mapping, ractl->file,
0167                    readahead_pos(ractl),
0168                    readahead_length(ractl),
0169                    NETFS_READAHEAD);
0170     if (IS_ERR(rreq))
0171         return;
0172 
0173     if (ctx->ops->begin_cache_operation) {
0174         ret = ctx->ops->begin_cache_operation(rreq);
0175         if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
0176             goto cleanup_free;
0177     }
0178 
0179     netfs_stat(&netfs_n_rh_readahead);
0180     trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
0181              netfs_read_trace_readahead);
0182 
0183     netfs_rreq_expand(rreq, ractl);
0184 
0185     /* Drop the refs on the folios here rather than in the cache or
0186      * filesystem.  The locks will be dropped in netfs_rreq_unlock().
0187      */
0188     while (readahead_folio(ractl))
0189         ;
0190 
0191     netfs_begin_read(rreq, false);
0192     return;
0193 
0194 cleanup_free:
0195     netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
0196     return;
0197 }
0198 EXPORT_SYMBOL(netfs_readahead);
0199 
0200 /**
0201  * netfs_read_folio - Helper to manage a read_folio request
0202  * @file: The file to read from
0203  * @folio: The folio to read
0204  *
0205  * Fulfil a read_folio request by drawing data from the cache if
0206  * possible, or the netfs if not.  Space beyond the EOF is zero-filled.
0207  * Multiple I/O requests from different sources will get munged together.
0208  *
0209  * The calling netfs must initialise a netfs context contiguous to the vfs
0210  * inode before calling this.
0211  *
0212  * This is usable whether or not caching is enabled.
0213  */
0214 int netfs_read_folio(struct file *file, struct folio *folio)
0215 {
0216     struct address_space *mapping = folio_file_mapping(folio);
0217     struct netfs_io_request *rreq;
0218     struct netfs_inode *ctx = netfs_inode(mapping->host);
0219     int ret;
0220 
0221     _enter("%lx", folio_index(folio));
0222 
0223     rreq = netfs_alloc_request(mapping, file,
0224                    folio_file_pos(folio), folio_size(folio),
0225                    NETFS_READPAGE);
0226     if (IS_ERR(rreq)) {
0227         ret = PTR_ERR(rreq);
0228         goto alloc_error;
0229     }
0230 
0231     if (ctx->ops->begin_cache_operation) {
0232         ret = ctx->ops->begin_cache_operation(rreq);
0233         if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
0234             goto discard;
0235     }
0236 
0237     netfs_stat(&netfs_n_rh_readpage);
0238     trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
0239     return netfs_begin_read(rreq, true);
0240 
0241 discard:
0242     netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
0243 alloc_error:
0244     folio_unlock(folio);
0245     return ret;
0246 }
0247 EXPORT_SYMBOL(netfs_read_folio);
0248 
0249 /*
0250  * Prepare a folio for writing without reading first
0251  * @folio: The folio being prepared
0252  * @pos: starting position for the write
0253  * @len: length of write
0254  * @always_fill: T if the folio should always be completely filled/cleared
0255  *
0256  * In some cases, write_begin doesn't need to read at all:
0257  * - full folio write
0258  * - write that lies in a folio that is completely beyond EOF
0259  * - write that covers the folio from start to EOF or beyond it
0260  *
0261  * If any of these criteria are met, then zero out the unwritten parts
0262  * of the folio and return true. Otherwise, return false.
0263  */
0264 static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
0265                  bool always_fill)
0266 {
0267     struct inode *inode = folio_inode(folio);
0268     loff_t i_size = i_size_read(inode);
0269     size_t offset = offset_in_folio(folio, pos);
0270     size_t plen = folio_size(folio);
0271 
0272     if (unlikely(always_fill)) {
0273         if (pos - offset + len <= i_size)
0274             return false; /* Page entirely before EOF */
0275         zero_user_segment(&folio->page, 0, plen);
0276         folio_mark_uptodate(folio);
0277         return true;
0278     }
0279 
0280     /* Full folio write */
0281     if (offset == 0 && len >= plen)
0282         return true;
0283 
0284     /* Page entirely beyond the end of the file */
0285     if (pos - offset >= i_size)
0286         goto zero_out;
0287 
0288     /* Write that covers from the start of the folio to EOF or beyond */
0289     if (offset == 0 && (pos + len) >= i_size)
0290         goto zero_out;
0291 
0292     return false;
0293 zero_out:
0294     zero_user_segments(&folio->page, 0, offset, offset + len, plen);
0295     return true;
0296 }
0297 
0298 /**
0299  * netfs_write_begin - Helper to prepare for writing
0300  * @ctx: The netfs context
0301  * @file: The file to read from
0302  * @mapping: The mapping to read from
0303  * @pos: File position at which the write will begin
0304  * @len: The length of the write (may extend beyond the end of the folio chosen)
0305  * @_folio: Where to put the resultant folio
0306  * @_fsdata: Place for the netfs to store a cookie
0307  *
0308  * Pre-read data for a write-begin request by drawing data from the cache if
0309  * possible, or the netfs if not.  Space beyond the EOF is zero-filled.
0310  * Multiple I/O requests from different sources will get munged together.  If
0311  * necessary, the readahead window can be expanded in either direction to a
0312  * more convenient alighment for RPC efficiency or to make storage in the cache
0313  * feasible.
0314  *
0315  * The calling netfs must provide a table of operations, only one of which,
0316  * issue_op, is mandatory.
0317  *
0318  * The check_write_begin() operation can be provided to check for and flush
0319  * conflicting writes once the folio is grabbed and locked.  It is passed a
0320  * pointer to the fsdata cookie that gets returned to the VM to be passed to
0321  * write_end.  It is permitted to sleep.  It should return 0 if the request
0322  * should go ahead or it may return an error.  It may also unlock and put the
0323  * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0
0324  * will cause the folio to be re-got and the process to be retried.
0325  *
0326  * The calling netfs must initialise a netfs context contiguous to the vfs
0327  * inode before calling this.
0328  *
0329  * This is usable whether or not caching is enabled.
0330  */
0331 int netfs_write_begin(struct netfs_inode *ctx,
0332               struct file *file, struct address_space *mapping,
0333               loff_t pos, unsigned int len, struct folio **_folio,
0334               void **_fsdata)
0335 {
0336     struct netfs_io_request *rreq;
0337     struct folio *folio;
0338     unsigned int fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
0339     pgoff_t index = pos >> PAGE_SHIFT;
0340     int ret;
0341 
0342     DEFINE_READAHEAD(ractl, file, NULL, mapping, index);
0343 
0344 retry:
0345     folio = __filemap_get_folio(mapping, index, fgp_flags,
0346                     mapping_gfp_mask(mapping));
0347     if (!folio)
0348         return -ENOMEM;
0349 
0350     if (ctx->ops->check_write_begin) {
0351         /* Allow the netfs (eg. ceph) to flush conflicts. */
0352         ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata);
0353         if (ret < 0) {
0354             trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
0355             goto error;
0356         }
0357         if (!folio)
0358             goto retry;
0359     }
0360 
0361     if (folio_test_uptodate(folio))
0362         goto have_folio;
0363 
0364     /* If the page is beyond the EOF, we want to clear it - unless it's
0365      * within the cache granule containing the EOF, in which case we need
0366      * to preload the granule.
0367      */
0368     if (!netfs_is_cache_enabled(ctx) &&
0369         netfs_skip_folio_read(folio, pos, len, false)) {
0370         netfs_stat(&netfs_n_rh_write_zskip);
0371         goto have_folio_no_wait;
0372     }
0373 
0374     rreq = netfs_alloc_request(mapping, file,
0375                    folio_file_pos(folio), folio_size(folio),
0376                    NETFS_READ_FOR_WRITE);
0377     if (IS_ERR(rreq)) {
0378         ret = PTR_ERR(rreq);
0379         goto error;
0380     }
0381     rreq->no_unlock_folio   = folio_index(folio);
0382     __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
0383 
0384     if (ctx->ops->begin_cache_operation) {
0385         ret = ctx->ops->begin_cache_operation(rreq);
0386         if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
0387             goto error_put;
0388     }
0389 
0390     netfs_stat(&netfs_n_rh_write_begin);
0391     trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
0392 
0393     /* Expand the request to meet caching requirements and download
0394      * preferences.
0395      */
0396     ractl._nr_pages = folio_nr_pages(folio);
0397     netfs_rreq_expand(rreq, &ractl);
0398 
0399     /* We hold the folio locks, so we can drop the references */
0400     folio_get(folio);
0401     while (readahead_folio(&ractl))
0402         ;
0403 
0404     ret = netfs_begin_read(rreq, true);
0405     if (ret < 0)
0406         goto error;
0407 
0408 have_folio:
0409     ret = folio_wait_fscache_killable(folio);
0410     if (ret < 0)
0411         goto error;
0412 have_folio_no_wait:
0413     *_folio = folio;
0414     _leave(" = 0");
0415     return 0;
0416 
0417 error_put:
0418     netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
0419 error:
0420     if (folio) {
0421         folio_unlock(folio);
0422         folio_put(folio);
0423     }
0424     _leave(" = %d", ret);
0425     return ret;
0426 }
0427 EXPORT_SYMBOL(netfs_write_begin);