Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * linux/fs/nfs/write.c
0004  *
0005  * Write file data over NFS.
0006  *
0007  * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
0008  */
0009 
0010 #include <linux/types.h>
0011 #include <linux/slab.h>
0012 #include <linux/mm.h>
0013 #include <linux/pagemap.h>
0014 #include <linux/file.h>
0015 #include <linux/writeback.h>
0016 #include <linux/swap.h>
0017 #include <linux/migrate.h>
0018 
0019 #include <linux/sunrpc/clnt.h>
0020 #include <linux/nfs_fs.h>
0021 #include <linux/nfs_mount.h>
0022 #include <linux/nfs_page.h>
0023 #include <linux/backing-dev.h>
0024 #include <linux/export.h>
0025 #include <linux/freezer.h>
0026 #include <linux/wait.h>
0027 #include <linux/iversion.h>
0028 
0029 #include <linux/uaccess.h>
0030 #include <linux/sched/mm.h>
0031 
0032 #include "delegation.h"
0033 #include "internal.h"
0034 #include "iostat.h"
0035 #include "nfs4_fs.h"
0036 #include "fscache.h"
0037 #include "pnfs.h"
0038 
0039 #include "nfstrace.h"
0040 
0041 #define NFSDBG_FACILITY     NFSDBG_PAGECACHE
0042 
0043 #define MIN_POOL_WRITE      (32)
0044 #define MIN_POOL_COMMIT     (4)
0045 
0046 struct nfs_io_completion {
0047     void (*complete)(void *data);
0048     void *data;
0049     struct kref refcount;
0050 };
0051 
0052 /*
0053  * Local function declarations
0054  */
0055 static void nfs_redirty_request(struct nfs_page *req);
0056 static const struct rpc_call_ops nfs_commit_ops;
0057 static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
0058 static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
0059 static const struct nfs_rw_ops nfs_rw_write_ops;
0060 static void nfs_inode_remove_request(struct nfs_page *req);
0061 static void nfs_clear_request_commit(struct nfs_page *req);
0062 static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
0063                       struct inode *inode);
0064 static struct nfs_page *
0065 nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
0066                         struct page *page);
0067 
0068 static struct kmem_cache *nfs_wdata_cachep;
0069 static mempool_t *nfs_wdata_mempool;
0070 static struct kmem_cache *nfs_cdata_cachep;
0071 static mempool_t *nfs_commit_mempool;
0072 
0073 struct nfs_commit_data *nfs_commitdata_alloc(void)
0074 {
0075     struct nfs_commit_data *p;
0076 
0077     p = kmem_cache_zalloc(nfs_cdata_cachep, nfs_io_gfp_mask());
0078     if (!p) {
0079         p = mempool_alloc(nfs_commit_mempool, GFP_NOWAIT);
0080         if (!p)
0081             return NULL;
0082         memset(p, 0, sizeof(*p));
0083     }
0084     INIT_LIST_HEAD(&p->pages);
0085     return p;
0086 }
0087 EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
0088 
0089 void nfs_commit_free(struct nfs_commit_data *p)
0090 {
0091     mempool_free(p, nfs_commit_mempool);
0092 }
0093 EXPORT_SYMBOL_GPL(nfs_commit_free);
0094 
0095 static struct nfs_pgio_header *nfs_writehdr_alloc(void)
0096 {
0097     struct nfs_pgio_header *p;
0098 
0099     p = kmem_cache_zalloc(nfs_wdata_cachep, nfs_io_gfp_mask());
0100     if (!p) {
0101         p = mempool_alloc(nfs_wdata_mempool, GFP_NOWAIT);
0102         if (!p)
0103             return NULL;
0104         memset(p, 0, sizeof(*p));
0105     }
0106     p->rw_mode = FMODE_WRITE;
0107     return p;
0108 }
0109 
0110 static void nfs_writehdr_free(struct nfs_pgio_header *hdr)
0111 {
0112     mempool_free(hdr, nfs_wdata_mempool);
0113 }
0114 
0115 static struct nfs_io_completion *nfs_io_completion_alloc(gfp_t gfp_flags)
0116 {
0117     return kmalloc(sizeof(struct nfs_io_completion), gfp_flags);
0118 }
0119 
0120 static void nfs_io_completion_init(struct nfs_io_completion *ioc,
0121         void (*complete)(void *), void *data)
0122 {
0123     ioc->complete = complete;
0124     ioc->data = data;
0125     kref_init(&ioc->refcount);
0126 }
0127 
0128 static void nfs_io_completion_release(struct kref *kref)
0129 {
0130     struct nfs_io_completion *ioc = container_of(kref,
0131             struct nfs_io_completion, refcount);
0132     ioc->complete(ioc->data);
0133     kfree(ioc);
0134 }
0135 
0136 static void nfs_io_completion_get(struct nfs_io_completion *ioc)
0137 {
0138     if (ioc != NULL)
0139         kref_get(&ioc->refcount);
0140 }
0141 
0142 static void nfs_io_completion_put(struct nfs_io_completion *ioc)
0143 {
0144     if (ioc != NULL)
0145         kref_put(&ioc->refcount, nfs_io_completion_release);
0146 }
0147 
0148 static void
0149 nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode)
0150 {
0151     if (!test_and_set_bit(PG_INODE_REF, &req->wb_flags)) {
0152         kref_get(&req->wb_kref);
0153         atomic_long_inc(&NFS_I(inode)->nrequests);
0154     }
0155 }
0156 
0157 static int
0158 nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode)
0159 {
0160     int ret;
0161 
0162     if (!test_bit(PG_REMOVE, &req->wb_flags))
0163         return 0;
0164     ret = nfs_page_group_lock(req);
0165     if (ret)
0166         return ret;
0167     if (test_and_clear_bit(PG_REMOVE, &req->wb_flags))
0168         nfs_page_set_inode_ref(req, inode);
0169     nfs_page_group_unlock(req);
0170     return 0;
0171 }
0172 
0173 static struct nfs_page *
0174 nfs_page_private_request(struct page *page)
0175 {
0176     if (!PagePrivate(page))
0177         return NULL;
0178     return (struct nfs_page *)page_private(page);
0179 }
0180 
0181 /*
0182  * nfs_page_find_head_request_locked - find head request associated with @page
0183  *
0184  * must be called while holding the inode lock.
0185  *
0186  * returns matching head request with reference held, or NULL if not found.
0187  */
0188 static struct nfs_page *
0189 nfs_page_find_private_request(struct page *page)
0190 {
0191     struct address_space *mapping = page_file_mapping(page);
0192     struct nfs_page *req;
0193 
0194     if (!PagePrivate(page))
0195         return NULL;
0196     spin_lock(&mapping->private_lock);
0197     req = nfs_page_private_request(page);
0198     if (req) {
0199         WARN_ON_ONCE(req->wb_head != req);
0200         kref_get(&req->wb_kref);
0201     }
0202     spin_unlock(&mapping->private_lock);
0203     return req;
0204 }
0205 
0206 static struct nfs_page *
0207 nfs_page_find_swap_request(struct page *page)
0208 {
0209     struct inode *inode = page_file_mapping(page)->host;
0210     struct nfs_inode *nfsi = NFS_I(inode);
0211     struct nfs_page *req = NULL;
0212     if (!PageSwapCache(page))
0213         return NULL;
0214     mutex_lock(&nfsi->commit_mutex);
0215     if (PageSwapCache(page)) {
0216         req = nfs_page_search_commits_for_head_request_locked(nfsi,
0217             page);
0218         if (req) {
0219             WARN_ON_ONCE(req->wb_head != req);
0220             kref_get(&req->wb_kref);
0221         }
0222     }
0223     mutex_unlock(&nfsi->commit_mutex);
0224     return req;
0225 }
0226 
0227 /*
0228  * nfs_page_find_head_request - find head request associated with @page
0229  *
0230  * returns matching head request with reference held, or NULL if not found.
0231  */
0232 static struct nfs_page *nfs_page_find_head_request(struct page *page)
0233 {
0234     struct nfs_page *req;
0235 
0236     req = nfs_page_find_private_request(page);
0237     if (!req)
0238         req = nfs_page_find_swap_request(page);
0239     return req;
0240 }
0241 
0242 static struct nfs_page *nfs_find_and_lock_page_request(struct page *page)
0243 {
0244     struct inode *inode = page_file_mapping(page)->host;
0245     struct nfs_page *req, *head;
0246     int ret;
0247 
0248     for (;;) {
0249         req = nfs_page_find_head_request(page);
0250         if (!req)
0251             return req;
0252         head = nfs_page_group_lock_head(req);
0253         if (head != req)
0254             nfs_release_request(req);
0255         if (IS_ERR(head))
0256             return head;
0257         ret = nfs_cancel_remove_inode(head, inode);
0258         if (ret < 0) {
0259             nfs_unlock_and_release_request(head);
0260             return ERR_PTR(ret);
0261         }
0262         /* Ensure that nobody removed the request before we locked it */
0263         if (head == nfs_page_private_request(page))
0264             break;
0265         if (PageSwapCache(page))
0266             break;
0267         nfs_unlock_and_release_request(head);
0268     }
0269     return head;
0270 }
0271 
0272 /* Adjust the file length if we're writing beyond the end */
0273 static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
0274 {
0275     struct inode *inode = page_file_mapping(page)->host;
0276     loff_t end, i_size;
0277     pgoff_t end_index;
0278 
0279     spin_lock(&inode->i_lock);
0280     i_size = i_size_read(inode);
0281     end_index = (i_size - 1) >> PAGE_SHIFT;
0282     if (i_size > 0 && page_index(page) < end_index)
0283         goto out;
0284     end = page_file_offset(page) + ((loff_t)offset+count);
0285     if (i_size >= end)
0286         goto out;
0287     trace_nfs_size_grow(inode, end);
0288     i_size_write(inode, end);
0289     NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE;
0290     nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
0291 out:
0292     spin_unlock(&inode->i_lock);
0293     nfs_fscache_invalidate(inode, 0);
0294 }
0295 
0296 /* A writeback failed: mark the page as bad, and invalidate the page cache */
0297 static void nfs_set_pageerror(struct address_space *mapping)
0298 {
0299     struct inode *inode = mapping->host;
0300 
0301     nfs_zap_mapping(mapping->host, mapping);
0302     /* Force file size revalidation */
0303     spin_lock(&inode->i_lock);
0304     nfs_set_cache_invalid(inode, NFS_INO_REVAL_FORCED |
0305                          NFS_INO_INVALID_CHANGE |
0306                          NFS_INO_INVALID_SIZE);
0307     spin_unlock(&inode->i_lock);
0308 }
0309 
0310 static void nfs_mapping_set_error(struct page *page, int error)
0311 {
0312     struct address_space *mapping = page_file_mapping(page);
0313 
0314     SetPageError(page);
0315     filemap_set_wb_err(mapping, error);
0316     if (mapping->host)
0317         errseq_set(&mapping->host->i_sb->s_wb_err,
0318                error == -ENOSPC ? -ENOSPC : -EIO);
0319     nfs_set_pageerror(mapping);
0320 }
0321 
0322 /*
0323  * nfs_page_group_search_locked
0324  * @head - head request of page group
0325  * @page_offset - offset into page
0326  *
0327  * Search page group with head @head to find a request that contains the
0328  * page offset @page_offset.
0329  *
0330  * Returns a pointer to the first matching nfs request, or NULL if no
0331  * match is found.
0332  *
0333  * Must be called with the page group lock held
0334  */
0335 static struct nfs_page *
0336 nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset)
0337 {
0338     struct nfs_page *req;
0339 
0340     req = head;
0341     do {
0342         if (page_offset >= req->wb_pgbase &&
0343             page_offset < (req->wb_pgbase + req->wb_bytes))
0344             return req;
0345 
0346         req = req->wb_this_page;
0347     } while (req != head);
0348 
0349     return NULL;
0350 }
0351 
0352 /*
0353  * nfs_page_group_covers_page
0354  * @head - head request of page group
0355  *
0356  * Return true if the page group with head @head covers the whole page,
0357  * returns false otherwise
0358  */
0359 static bool nfs_page_group_covers_page(struct nfs_page *req)
0360 {
0361     struct nfs_page *tmp;
0362     unsigned int pos = 0;
0363     unsigned int len = nfs_page_length(req->wb_page);
0364 
0365     nfs_page_group_lock(req);
0366 
0367     for (;;) {
0368         tmp = nfs_page_group_search_locked(req->wb_head, pos);
0369         if (!tmp)
0370             break;
0371         pos = tmp->wb_pgbase + tmp->wb_bytes;
0372     }
0373 
0374     nfs_page_group_unlock(req);
0375     return pos >= len;
0376 }
0377 
0378 /* We can set the PG_uptodate flag if we see that a write request
0379  * covers the full page.
0380  */
0381 static void nfs_mark_uptodate(struct nfs_page *req)
0382 {
0383     if (PageUptodate(req->wb_page))
0384         return;
0385     if (!nfs_page_group_covers_page(req))
0386         return;
0387     SetPageUptodate(req->wb_page);
0388 }
0389 
0390 static int wb_priority(struct writeback_control *wbc)
0391 {
0392     int ret = 0;
0393 
0394     if (wbc->sync_mode == WB_SYNC_ALL)
0395         ret = FLUSH_COND_STABLE;
0396     return ret;
0397 }
0398 
0399 /*
0400  * NFS congestion control
0401  */
0402 
0403 int nfs_congestion_kb;
0404 
0405 #define NFS_CONGESTION_ON_THRESH    (nfs_congestion_kb >> (PAGE_SHIFT-10))
0406 #define NFS_CONGESTION_OFF_THRESH   \
0407     (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))
0408 
0409 static void nfs_set_page_writeback(struct page *page)
0410 {
0411     struct inode *inode = page_file_mapping(page)->host;
0412     struct nfs_server *nfss = NFS_SERVER(inode);
0413     int ret = test_set_page_writeback(page);
0414 
0415     WARN_ON_ONCE(ret != 0);
0416 
0417     if (atomic_long_inc_return(&nfss->writeback) >
0418             NFS_CONGESTION_ON_THRESH)
0419         nfss->write_congested = 1;
0420 }
0421 
0422 static void nfs_end_page_writeback(struct nfs_page *req)
0423 {
0424     struct inode *inode = page_file_mapping(req->wb_page)->host;
0425     struct nfs_server *nfss = NFS_SERVER(inode);
0426     bool is_done;
0427 
0428     is_done = nfs_page_group_sync_on_bit(req, PG_WB_END);
0429     nfs_unlock_request(req);
0430     if (!is_done)
0431         return;
0432 
0433     end_page_writeback(req->wb_page);
0434     if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
0435         nfss->write_congested = 0;
0436 }
0437 
0438 /*
0439  * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests
0440  *
0441  * @destroy_list - request list (using wb_this_page) terminated by @old_head
0442  * @old_head - the old head of the list
0443  *
0444  * All subrequests must be locked and removed from all lists, so at this point
0445  * they are only "active" in this function, and possibly in nfs_wait_on_request
0446  * with a reference held by some other context.
0447  */
0448 static void
0449 nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
0450                  struct nfs_page *old_head,
0451                  struct inode *inode)
0452 {
0453     while (destroy_list) {
0454         struct nfs_page *subreq = destroy_list;
0455 
0456         destroy_list = (subreq->wb_this_page == old_head) ?
0457                    NULL : subreq->wb_this_page;
0458 
0459         /* Note: lock subreq in order to change subreq->wb_head */
0460         nfs_page_set_headlock(subreq);
0461         WARN_ON_ONCE(old_head != subreq->wb_head);
0462 
0463         /* make sure old group is not used */
0464         subreq->wb_this_page = subreq;
0465         subreq->wb_head = subreq;
0466 
0467         clear_bit(PG_REMOVE, &subreq->wb_flags);
0468 
0469         /* Note: races with nfs_page_group_destroy() */
0470         if (!kref_read(&subreq->wb_kref)) {
0471             /* Check if we raced with nfs_page_group_destroy() */
0472             if (test_and_clear_bit(PG_TEARDOWN, &subreq->wb_flags)) {
0473                 nfs_page_clear_headlock(subreq);
0474                 nfs_free_request(subreq);
0475             } else
0476                 nfs_page_clear_headlock(subreq);
0477             continue;
0478         }
0479         nfs_page_clear_headlock(subreq);
0480 
0481         nfs_release_request(old_head);
0482 
0483         if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) {
0484             nfs_release_request(subreq);
0485             atomic_long_dec(&NFS_I(inode)->nrequests);
0486         }
0487 
0488         /* subreq is now totally disconnected from page group or any
0489          * write / commit lists. last chance to wake any waiters */
0490         nfs_unlock_and_release_request(subreq);
0491     }
0492 }
0493 
0494 /*
0495  * nfs_join_page_group - destroy subrequests of the head req
0496  * @head: the page used to lookup the "page group" of nfs_page structures
0497  * @inode: Inode to which the request belongs.
0498  *
0499  * This function joins all sub requests to the head request by first
0500  * locking all requests in the group, cancelling any pending operations
0501  * and finally updating the head request to cover the whole range covered by
0502  * the (former) group.  All subrequests are removed from any write or commit
0503  * lists, unlinked from the group and destroyed.
0504  */
0505 void
0506 nfs_join_page_group(struct nfs_page *head, struct inode *inode)
0507 {
0508     struct nfs_page *subreq;
0509     struct nfs_page *destroy_list = NULL;
0510     unsigned int pgbase, off, bytes;
0511 
0512     pgbase = head->wb_pgbase;
0513     bytes = head->wb_bytes;
0514     off = head->wb_offset;
0515     for (subreq = head->wb_this_page; subreq != head;
0516             subreq = subreq->wb_this_page) {
0517         /* Subrequests should always form a contiguous range */
0518         if (pgbase > subreq->wb_pgbase) {
0519             off -= pgbase - subreq->wb_pgbase;
0520             bytes += pgbase - subreq->wb_pgbase;
0521             pgbase = subreq->wb_pgbase;
0522         }
0523         bytes = max(subreq->wb_pgbase + subreq->wb_bytes
0524                 - pgbase, bytes);
0525     }
0526 
0527     /* Set the head request's range to cover the former page group */
0528     head->wb_pgbase = pgbase;
0529     head->wb_bytes = bytes;
0530     head->wb_offset = off;
0531 
0532     /* Now that all requests are locked, make sure they aren't on any list.
0533      * Commit list removal accounting is done after locks are dropped */
0534     subreq = head;
0535     do {
0536         nfs_clear_request_commit(subreq);
0537         subreq = subreq->wb_this_page;
0538     } while (subreq != head);
0539 
0540     /* unlink subrequests from head, destroy them later */
0541     if (head->wb_this_page != head) {
0542         /* destroy list will be terminated by head */
0543         destroy_list = head->wb_this_page;
0544         head->wb_this_page = head;
0545     }
0546 
0547     nfs_destroy_unlinked_subrequests(destroy_list, head, inode);
0548 }
0549 
0550 /*
0551  * nfs_lock_and_join_requests - join all subreqs to the head req
0552  * @page: the page used to lookup the "page group" of nfs_page structures
0553  *
0554  * This function joins all sub requests to the head request by first
0555  * locking all requests in the group, cancelling any pending operations
0556  * and finally updating the head request to cover the whole range covered by
0557  * the (former) group.  All subrequests are removed from any write or commit
0558  * lists, unlinked from the group and destroyed.
0559  *
0560  * Returns a locked, referenced pointer to the head request - which after
0561  * this call is guaranteed to be the only request associated with the page.
0562  * Returns NULL if no requests are found for @page, or a ERR_PTR if an
0563  * error was encountered.
0564  */
0565 static struct nfs_page *
0566 nfs_lock_and_join_requests(struct page *page)
0567 {
0568     struct inode *inode = page_file_mapping(page)->host;
0569     struct nfs_page *head;
0570     int ret;
0571 
0572     /*
0573      * A reference is taken only on the head request which acts as a
0574      * reference to the whole page group - the group will not be destroyed
0575      * until the head reference is released.
0576      */
0577     head = nfs_find_and_lock_page_request(page);
0578     if (IS_ERR_OR_NULL(head))
0579         return head;
0580 
0581     /* lock each request in the page group */
0582     ret = nfs_page_group_lock_subrequests(head);
0583     if (ret < 0) {
0584         nfs_unlock_and_release_request(head);
0585         return ERR_PTR(ret);
0586     }
0587 
0588     nfs_join_page_group(head, inode);
0589 
0590     return head;
0591 }
0592 
0593 static void nfs_write_error(struct nfs_page *req, int error)
0594 {
0595     trace_nfs_write_error(page_file_mapping(req->wb_page)->host, req,
0596                   error);
0597     nfs_mapping_set_error(req->wb_page, error);
0598     nfs_inode_remove_request(req);
0599     nfs_end_page_writeback(req);
0600     nfs_release_request(req);
0601 }
0602 
0603 /*
0604  * Find an associated nfs write request, and prepare to flush it out
0605  * May return an error if the user signalled nfs_wait_on_request().
0606  */
0607 static int nfs_page_async_flush(struct page *page,
0608                 struct writeback_control *wbc,
0609                 struct nfs_pageio_descriptor *pgio)
0610 {
0611     struct nfs_page *req;
0612     int ret = 0;
0613 
0614     req = nfs_lock_and_join_requests(page);
0615     if (!req)
0616         goto out;
0617     ret = PTR_ERR(req);
0618     if (IS_ERR(req))
0619         goto out;
0620 
0621     nfs_set_page_writeback(page);
0622     WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
0623 
0624     /* If there is a fatal error that covers this write, just exit */
0625     ret = pgio->pg_error;
0626     if (nfs_error_is_fatal_on_server(ret))
0627         goto out_launder;
0628 
0629     ret = 0;
0630     if (!nfs_pageio_add_request(pgio, req)) {
0631         ret = pgio->pg_error;
0632         /*
0633          * Remove the problematic req upon fatal errors on the server
0634          */
0635         if (nfs_error_is_fatal_on_server(ret))
0636             goto out_launder;
0637         if (wbc->sync_mode == WB_SYNC_NONE)
0638             ret = AOP_WRITEPAGE_ACTIVATE;
0639         redirty_page_for_writepage(wbc, page);
0640         nfs_redirty_request(req);
0641         pgio->pg_error = 0;
0642     } else
0643         nfs_add_stats(page_file_mapping(page)->host,
0644                 NFSIOS_WRITEPAGES, 1);
0645 out:
0646     return ret;
0647 out_launder:
0648     nfs_write_error(req, ret);
0649     return 0;
0650 }
0651 
0652 static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
0653                 struct nfs_pageio_descriptor *pgio)
0654 {
0655     nfs_pageio_cond_complete(pgio, page_index(page));
0656     return nfs_page_async_flush(page, wbc, pgio);
0657 }
0658 
0659 /*
0660  * Write an mmapped page to the server.
0661  */
0662 static int nfs_writepage_locked(struct page *page,
0663                 struct writeback_control *wbc)
0664 {
0665     struct nfs_pageio_descriptor pgio;
0666     struct inode *inode = page_file_mapping(page)->host;
0667     int err;
0668 
0669     if (wbc->sync_mode == WB_SYNC_NONE &&
0670         NFS_SERVER(inode)->write_congested)
0671         return AOP_WRITEPAGE_ACTIVATE;
0672 
0673     nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
0674     nfs_pageio_init_write(&pgio, inode, 0,
0675                 false, &nfs_async_write_completion_ops);
0676     err = nfs_do_writepage(page, wbc, &pgio);
0677     pgio.pg_error = 0;
0678     nfs_pageio_complete(&pgio);
0679     return err;
0680 }
0681 
0682 int nfs_writepage(struct page *page, struct writeback_control *wbc)
0683 {
0684     int ret;
0685 
0686     ret = nfs_writepage_locked(page, wbc);
0687     if (ret != AOP_WRITEPAGE_ACTIVATE)
0688         unlock_page(page);
0689     return ret;
0690 }
0691 
0692 static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data)
0693 {
0694     int ret;
0695 
0696     ret = nfs_do_writepage(page, wbc, data);
0697     if (ret != AOP_WRITEPAGE_ACTIVATE)
0698         unlock_page(page);
0699     return ret;
0700 }
0701 
0702 static void nfs_io_completion_commit(void *inode)
0703 {
0704     nfs_commit_inode(inode, 0);
0705 }
0706 
0707 int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
0708 {
0709     struct inode *inode = mapping->host;
0710     struct nfs_pageio_descriptor pgio;
0711     struct nfs_io_completion *ioc = NULL;
0712     unsigned int mntflags = NFS_SERVER(inode)->flags;
0713     int priority = 0;
0714     int err;
0715 
0716     if (wbc->sync_mode == WB_SYNC_NONE &&
0717         NFS_SERVER(inode)->write_congested)
0718         return 0;
0719 
0720     nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
0721 
0722     if (!(mntflags & NFS_MOUNT_WRITE_EAGER) || wbc->for_kupdate ||
0723         wbc->for_background || wbc->for_sync || wbc->for_reclaim) {
0724         ioc = nfs_io_completion_alloc(GFP_KERNEL);
0725         if (ioc)
0726             nfs_io_completion_init(ioc, nfs_io_completion_commit,
0727                            inode);
0728         priority = wb_priority(wbc);
0729     }
0730 
0731     do {
0732         nfs_pageio_init_write(&pgio, inode, priority, false,
0733                       &nfs_async_write_completion_ops);
0734         pgio.pg_io_completion = ioc;
0735         err = write_cache_pages(mapping, wbc, nfs_writepages_callback,
0736                     &pgio);
0737         pgio.pg_error = 0;
0738         nfs_pageio_complete(&pgio);
0739     } while (err < 0 && !nfs_error_is_fatal(err));
0740     nfs_io_completion_put(ioc);
0741 
0742     if (err < 0)
0743         goto out_err;
0744     return 0;
0745 out_err:
0746     return err;
0747 }
0748 
0749 /*
0750  * Insert a write request into an inode
0751  */
0752 static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
0753 {
0754     struct address_space *mapping = page_file_mapping(req->wb_page);
0755     struct nfs_inode *nfsi = NFS_I(inode);
0756 
0757     WARN_ON_ONCE(req->wb_this_page != req);
0758 
0759     /* Lock the request! */
0760     nfs_lock_request(req);
0761 
0762     /*
0763      * Swap-space should not get truncated. Hence no need to plug the race
0764      * with invalidate/truncate.
0765      */
0766     spin_lock(&mapping->private_lock);
0767     if (likely(!PageSwapCache(req->wb_page))) {
0768         set_bit(PG_MAPPED, &req->wb_flags);
0769         SetPagePrivate(req->wb_page);
0770         set_page_private(req->wb_page, (unsigned long)req);
0771     }
0772     spin_unlock(&mapping->private_lock);
0773     atomic_long_inc(&nfsi->nrequests);
0774     /* this a head request for a page group - mark it as having an
0775      * extra reference so sub groups can follow suit.
0776      * This flag also informs pgio layer when to bump nrequests when
0777      * adding subrequests. */
0778     WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
0779     kref_get(&req->wb_kref);
0780 }
0781 
0782 /*
0783  * Remove a write request from an inode
0784  */
0785 static void nfs_inode_remove_request(struct nfs_page *req)
0786 {
0787     struct address_space *mapping = page_file_mapping(req->wb_page);
0788     struct inode *inode = mapping->host;
0789     struct nfs_inode *nfsi = NFS_I(inode);
0790     struct nfs_page *head;
0791 
0792     if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
0793         head = req->wb_head;
0794 
0795         spin_lock(&mapping->private_lock);
0796         if (likely(head->wb_page && !PageSwapCache(head->wb_page))) {
0797             set_page_private(head->wb_page, 0);
0798             ClearPagePrivate(head->wb_page);
0799             clear_bit(PG_MAPPED, &head->wb_flags);
0800         }
0801         spin_unlock(&mapping->private_lock);
0802     }
0803 
0804     if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) {
0805         nfs_release_request(req);
0806         atomic_long_dec(&nfsi->nrequests);
0807     }
0808 }
0809 
0810 static void
0811 nfs_mark_request_dirty(struct nfs_page *req)
0812 {
0813     if (req->wb_page)
0814         __set_page_dirty_nobuffers(req->wb_page);
0815 }
0816 
0817 /*
0818  * nfs_page_search_commits_for_head_request_locked
0819  *
0820  * Search through commit lists on @inode for the head request for @page.
0821  * Must be called while holding the inode (which is cinfo) lock.
0822  *
0823  * Returns the head request if found, or NULL if not found.
0824  */
0825 static struct nfs_page *
0826 nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
0827                         struct page *page)
0828 {
0829     struct nfs_page *freq, *t;
0830     struct nfs_commit_info cinfo;
0831     struct inode *inode = &nfsi->vfs_inode;
0832 
0833     nfs_init_cinfo_from_inode(&cinfo, inode);
0834 
0835     /* search through pnfs commit lists */
0836     freq = pnfs_search_commit_reqs(inode, &cinfo, page);
0837     if (freq)
0838         return freq->wb_head;
0839 
0840     /* Linearly search the commit list for the correct request */
0841     list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) {
0842         if (freq->wb_page == page)
0843             return freq->wb_head;
0844     }
0845 
0846     return NULL;
0847 }
0848 
0849 /**
0850  * nfs_request_add_commit_list_locked - add request to a commit list
0851  * @req: pointer to a struct nfs_page
0852  * @dst: commit list head
0853  * @cinfo: holds list lock and accounting info
0854  *
0855  * This sets the PG_CLEAN bit, updates the cinfo count of
0856  * number of outstanding requests requiring a commit as well as
0857  * the MM page stats.
0858  *
0859  * The caller must hold NFS_I(cinfo->inode)->commit_mutex, and the
0860  * nfs_page lock.
0861  */
0862 void
0863 nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst,
0864                 struct nfs_commit_info *cinfo)
0865 {
0866     set_bit(PG_CLEAN, &req->wb_flags);
0867     nfs_list_add_request(req, dst);
0868     atomic_long_inc(&cinfo->mds->ncommit);
0869 }
0870 EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);
0871 
0872 /**
0873  * nfs_request_add_commit_list - add request to a commit list
0874  * @req: pointer to a struct nfs_page
0875  * @cinfo: holds list lock and accounting info
0876  *
0877  * This sets the PG_CLEAN bit, updates the cinfo count of
0878  * number of outstanding requests requiring a commit as well as
0879  * the MM page stats.
0880  *
0881  * The caller must _not_ hold the cinfo->lock, but must be
0882  * holding the nfs_page lock.
0883  */
0884 void
0885 nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo)
0886 {
0887     mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
0888     nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo);
0889     mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
0890     if (req->wb_page)
0891         nfs_mark_page_unstable(req->wb_page, cinfo);
0892 }
0893 EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
0894 
0895 /**
0896  * nfs_request_remove_commit_list - Remove request from a commit list
0897  * @req: pointer to a nfs_page
0898  * @cinfo: holds list lock and accounting info
0899  *
0900  * This clears the PG_CLEAN bit, and updates the cinfo's count of
0901  * number of outstanding requests requiring a commit
0902  * It does not update the MM page stats.
0903  *
0904  * The caller _must_ hold the cinfo->lock and the nfs_page lock.
0905  */
0906 void
0907 nfs_request_remove_commit_list(struct nfs_page *req,
0908                    struct nfs_commit_info *cinfo)
0909 {
0910     if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
0911         return;
0912     nfs_list_remove_request(req);
0913     atomic_long_dec(&cinfo->mds->ncommit);
0914 }
0915 EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);
0916 
0917 static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
0918                       struct inode *inode)
0919 {
0920     cinfo->inode = inode;
0921     cinfo->mds = &NFS_I(inode)->commit_info;
0922     cinfo->ds = pnfs_get_ds_info(inode);
0923     cinfo->dreq = NULL;
0924     cinfo->completion_ops = &nfs_commit_completion_ops;
0925 }
0926 
0927 void nfs_init_cinfo(struct nfs_commit_info *cinfo,
0928             struct inode *inode,
0929             struct nfs_direct_req *dreq)
0930 {
0931     if (dreq)
0932         nfs_init_cinfo_from_dreq(cinfo, dreq);
0933     else
0934         nfs_init_cinfo_from_inode(cinfo, inode);
0935 }
0936 EXPORT_SYMBOL_GPL(nfs_init_cinfo);
0937 
0938 /*
0939  * Add a request to the inode's commit list.
0940  */
0941 void
0942 nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
0943             struct nfs_commit_info *cinfo, u32 ds_commit_idx)
0944 {
0945     if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx))
0946         return;
0947     nfs_request_add_commit_list(req, cinfo);
0948 }
0949 
0950 static void
0951 nfs_clear_page_commit(struct page *page)
0952 {
0953     dec_node_page_state(page, NR_WRITEBACK);
0954     dec_wb_stat(&inode_to_bdi(page_file_mapping(page)->host)->wb,
0955             WB_WRITEBACK);
0956 }
0957 
0958 /* Called holding the request lock on @req */
0959 static void
0960 nfs_clear_request_commit(struct nfs_page *req)
0961 {
0962     if (test_bit(PG_CLEAN, &req->wb_flags)) {
0963         struct nfs_open_context *ctx = nfs_req_openctx(req);
0964         struct inode *inode = d_inode(ctx->dentry);
0965         struct nfs_commit_info cinfo;
0966 
0967         nfs_init_cinfo_from_inode(&cinfo, inode);
0968         mutex_lock(&NFS_I(inode)->commit_mutex);
0969         if (!pnfs_clear_request_commit(req, &cinfo)) {
0970             nfs_request_remove_commit_list(req, &cinfo);
0971         }
0972         mutex_unlock(&NFS_I(inode)->commit_mutex);
0973         nfs_clear_page_commit(req->wb_page);
0974     }
0975 }
0976 
0977 int nfs_write_need_commit(struct nfs_pgio_header *hdr)
0978 {
0979     if (hdr->verf.committed == NFS_DATA_SYNC)
0980         return hdr->lseg == NULL;
0981     return hdr->verf.committed != NFS_FILE_SYNC;
0982 }
0983 
0984 static void nfs_async_write_init(struct nfs_pgio_header *hdr)
0985 {
0986     nfs_io_completion_get(hdr->io_completion);
0987 }
0988 
0989 static void nfs_write_completion(struct nfs_pgio_header *hdr)
0990 {
0991     struct nfs_commit_info cinfo;
0992     unsigned long bytes = 0;
0993 
0994     if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
0995         goto out;
0996     nfs_init_cinfo_from_inode(&cinfo, hdr->inode);
0997     while (!list_empty(&hdr->pages)) {
0998         struct nfs_page *req = nfs_list_entry(hdr->pages.next);
0999 
1000         bytes += req->wb_bytes;
1001         nfs_list_remove_request(req);
1002         if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
1003             (hdr->good_bytes < bytes)) {
1004             trace_nfs_comp_error(hdr->inode, req, hdr->error);
1005             nfs_mapping_set_error(req->wb_page, hdr->error);
1006             goto remove_req;
1007         }
1008         if (nfs_write_need_commit(hdr)) {
1009             /* Reset wb_nio, since the write was successful. */
1010             req->wb_nio = 0;
1011             memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
1012             nfs_mark_request_commit(req, hdr->lseg, &cinfo,
1013                 hdr->pgio_mirror_idx);
1014             goto next;
1015         }
1016 remove_req:
1017         nfs_inode_remove_request(req);
1018 next:
1019         nfs_end_page_writeback(req);
1020         nfs_release_request(req);
1021     }
1022 out:
1023     nfs_io_completion_put(hdr->io_completion);
1024     hdr->release(hdr);
1025 }
1026 
1027 unsigned long
1028 nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
1029 {
1030     return atomic_long_read(&cinfo->mds->ncommit);
1031 }
1032 
1033 /* NFS_I(cinfo->inode)->commit_mutex held by caller */
1034 int
1035 nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
1036              struct nfs_commit_info *cinfo, int max)
1037 {
1038     struct nfs_page *req, *tmp;
1039     int ret = 0;
1040 
1041     list_for_each_entry_safe(req, tmp, src, wb_list) {
1042         kref_get(&req->wb_kref);
1043         if (!nfs_lock_request(req)) {
1044             nfs_release_request(req);
1045             continue;
1046         }
1047         nfs_request_remove_commit_list(req, cinfo);
1048         clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
1049         nfs_list_add_request(req, dst);
1050         ret++;
1051         if ((ret == max) && !cinfo->dreq)
1052             break;
1053         cond_resched();
1054     }
1055     return ret;
1056 }
1057 EXPORT_SYMBOL_GPL(nfs_scan_commit_list);
1058 
1059 /*
1060  * nfs_scan_commit - Scan an inode for commit requests
1061  * @inode: NFS inode to scan
1062  * @dst: mds destination list
1063  * @cinfo: mds and ds lists of reqs ready to commit
1064  *
1065  * Moves requests from the inode's 'commit' request list.
1066  * The requests are *not* checked to ensure that they form a contiguous set.
1067  */
1068 int
1069 nfs_scan_commit(struct inode *inode, struct list_head *dst,
1070         struct nfs_commit_info *cinfo)
1071 {
1072     int ret = 0;
1073 
1074     if (!atomic_long_read(&cinfo->mds->ncommit))
1075         return 0;
1076     mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
1077     if (atomic_long_read(&cinfo->mds->ncommit) > 0) {
1078         const int max = INT_MAX;
1079 
1080         ret = nfs_scan_commit_list(&cinfo->mds->list, dst,
1081                        cinfo, max);
1082         ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);
1083     }
1084     mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
1085     return ret;
1086 }
1087 
1088 /*
1089  * Search for an existing write request, and attempt to update
1090  * it to reflect a new dirty region on a given page.
1091  *
1092  * If the attempt fails, then the existing request is flushed out
1093  * to disk.
1094  */
1095 static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
1096         struct page *page,
1097         unsigned int offset,
1098         unsigned int bytes)
1099 {
1100     struct nfs_page *req;
1101     unsigned int rqend;
1102     unsigned int end;
1103     int error;
1104 
1105     end = offset + bytes;
1106 
1107     req = nfs_lock_and_join_requests(page);
1108     if (IS_ERR_OR_NULL(req))
1109         return req;
1110 
1111     rqend = req->wb_offset + req->wb_bytes;
1112     /*
1113      * Tell the caller to flush out the request if
1114      * the offsets are non-contiguous.
1115      * Note: nfs_flush_incompatible() will already
1116      * have flushed out requests having wrong owners.
1117      */
1118     if (offset > rqend || end < req->wb_offset)
1119         goto out_flushme;
1120 
1121     /* Okay, the request matches. Update the region */
1122     if (offset < req->wb_offset) {
1123         req->wb_offset = offset;
1124         req->wb_pgbase = offset;
1125     }
1126     if (end > rqend)
1127         req->wb_bytes = end - req->wb_offset;
1128     else
1129         req->wb_bytes = rqend - req->wb_offset;
1130     req->wb_nio = 0;
1131     return req;
1132 out_flushme:
1133     /*
1134      * Note: we mark the request dirty here because
1135      * nfs_lock_and_join_requests() cannot preserve
1136      * commit flags, so we have to replay the write.
1137      */
1138     nfs_mark_request_dirty(req);
1139     nfs_unlock_and_release_request(req);
1140     error = nfs_wb_page(inode, page);
1141     return (error < 0) ? ERR_PTR(error) : NULL;
1142 }
1143 
1144 /*
1145  * Try to update an existing write request, or create one if there is none.
1146  *
1147  * Note: Should always be called with the Page Lock held to prevent races
1148  * if we have to add a new request. Also assumes that the caller has
1149  * already called nfs_flush_incompatible() if necessary.
1150  */
1151 static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
1152         struct page *page, unsigned int offset, unsigned int bytes)
1153 {
1154     struct inode *inode = page_file_mapping(page)->host;
1155     struct nfs_page *req;
1156 
1157     req = nfs_try_to_update_request(inode, page, offset, bytes);
1158     if (req != NULL)
1159         goto out;
1160     req = nfs_create_request(ctx, page, offset, bytes);
1161     if (IS_ERR(req))
1162         goto out;
1163     nfs_inode_add_request(inode, req);
1164 out:
1165     return req;
1166 }
1167 
1168 static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
1169         unsigned int offset, unsigned int count)
1170 {
1171     struct nfs_page *req;
1172 
1173     req = nfs_setup_write_request(ctx, page, offset, count);
1174     if (IS_ERR(req))
1175         return PTR_ERR(req);
1176     /* Update file length */
1177     nfs_grow_file(page, offset, count);
1178     nfs_mark_uptodate(req);
1179     nfs_mark_request_dirty(req);
1180     nfs_unlock_and_release_request(req);
1181     return 0;
1182 }
1183 
1184 int nfs_flush_incompatible(struct file *file, struct page *page)
1185 {
1186     struct nfs_open_context *ctx = nfs_file_open_context(file);
1187     struct nfs_lock_context *l_ctx;
1188     struct file_lock_context *flctx = file_inode(file)->i_flctx;
1189     struct nfs_page *req;
1190     int do_flush, status;
1191     /*
1192      * Look for a request corresponding to this page. If there
1193      * is one, and it belongs to another file, we flush it out
1194      * before we try to copy anything into the page. Do this
1195      * due to the lack of an ACCESS-type call in NFSv2.
1196      * Also do the same if we find a request from an existing
1197      * dropped page.
1198      */
1199     do {
1200         req = nfs_page_find_head_request(page);
1201         if (req == NULL)
1202             return 0;
1203         l_ctx = req->wb_lock_context;
1204         do_flush = req->wb_page != page ||
1205             !nfs_match_open_context(nfs_req_openctx(req), ctx);
1206         if (l_ctx && flctx &&
1207             !(list_empty_careful(&flctx->flc_posix) &&
1208               list_empty_careful(&flctx->flc_flock))) {
1209             do_flush |= l_ctx->lockowner != current->files;
1210         }
1211         nfs_release_request(req);
1212         if (!do_flush)
1213             return 0;
1214         status = nfs_wb_page(page_file_mapping(page)->host, page);
1215     } while (status == 0);
1216     return status;
1217 }
1218 
1219 /*
1220  * Avoid buffered writes when a open context credential's key would
1221  * expire soon.
1222  *
1223  * Returns -EACCES if the key will expire within RPC_KEY_EXPIRE_FAIL.
1224  *
1225  * Return 0 and set a credential flag which triggers the inode to flush
1226  * and performs  NFS_FILE_SYNC writes if the key will expired within
1227  * RPC_KEY_EXPIRE_TIMEO.
1228  */
1229 int
1230 nfs_key_timeout_notify(struct file *filp, struct inode *inode)
1231 {
1232     struct nfs_open_context *ctx = nfs_file_open_context(filp);
1233 
1234     if (nfs_ctx_key_to_expire(ctx, inode) &&
1235         !rcu_access_pointer(ctx->ll_cred))
1236         /* Already expired! */
1237         return -EACCES;
1238     return 0;
1239 }
1240 
1241 /*
1242  * Test if the open context credential key is marked to expire soon.
1243  */
1244 bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode)
1245 {
1246     struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth;
1247     struct rpc_cred *cred, *new, *old = NULL;
1248     struct auth_cred acred = {
1249         .cred = ctx->cred,
1250     };
1251     bool ret = false;
1252 
1253     rcu_read_lock();
1254     cred = rcu_dereference(ctx->ll_cred);
1255     if (cred && !(cred->cr_ops->crkey_timeout &&
1256               cred->cr_ops->crkey_timeout(cred)))
1257         goto out;
1258     rcu_read_unlock();
1259 
1260     new = auth->au_ops->lookup_cred(auth, &acred, 0);
1261     if (new == cred) {
1262         put_rpccred(new);
1263         return true;
1264     }
1265     if (IS_ERR_OR_NULL(new)) {
1266         new = NULL;
1267         ret = true;
1268     } else if (new->cr_ops->crkey_timeout &&
1269            new->cr_ops->crkey_timeout(new))
1270         ret = true;
1271 
1272     rcu_read_lock();
1273     old = rcu_dereference_protected(xchg(&ctx->ll_cred,
1274                          RCU_INITIALIZER(new)), 1);
1275 out:
1276     rcu_read_unlock();
1277     put_rpccred(old);
1278     return ret;
1279 }
1280 
1281 /*
1282  * If the page cache is marked as unsafe or invalid, then we can't rely on
1283  * the PageUptodate() flag. In this case, we will need to turn off
1284  * write optimisations that depend on the page contents being correct.
1285  */
1286 static bool nfs_write_pageuptodate(struct page *page, struct inode *inode,
1287                    unsigned int pagelen)
1288 {
1289     struct nfs_inode *nfsi = NFS_I(inode);
1290 
1291     if (nfs_have_delegated_attributes(inode))
1292         goto out;
1293     if (nfsi->cache_validity &
1294         (NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE))
1295         return false;
1296     smp_rmb();
1297     if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags) && pagelen != 0)
1298         return false;
1299 out:
1300     if (nfsi->cache_validity & NFS_INO_INVALID_DATA && pagelen != 0)
1301         return false;
1302     return PageUptodate(page) != 0;
1303 }
1304 
1305 static bool
1306 is_whole_file_wrlock(struct file_lock *fl)
1307 {
1308     return fl->fl_start == 0 && fl->fl_end == OFFSET_MAX &&
1309             fl->fl_type == F_WRLCK;
1310 }
1311 
1312 /* If we know the page is up to date, and we're not using byte range locks (or
1313  * if we have the whole file locked for writing), it may be more efficient to
1314  * extend the write to cover the entire page in order to avoid fragmentation
1315  * inefficiencies.
1316  *
1317  * If the file is opened for synchronous writes then we can just skip the rest
1318  * of the checks.
1319  */
1320 static int nfs_can_extend_write(struct file *file, struct page *page,
1321                 struct inode *inode, unsigned int pagelen)
1322 {
1323     int ret;
1324     struct file_lock_context *flctx = inode->i_flctx;
1325     struct file_lock *fl;
1326 
1327     if (file->f_flags & O_DSYNC)
1328         return 0;
1329     if (!nfs_write_pageuptodate(page, inode, pagelen))
1330         return 0;
1331     if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
1332         return 1;
1333     if (!flctx || (list_empty_careful(&flctx->flc_flock) &&
1334                list_empty_careful(&flctx->flc_posix)))
1335         return 1;
1336 
1337     /* Check to see if there are whole file write locks */
1338     ret = 0;
1339     spin_lock(&flctx->flc_lock);
1340     if (!list_empty(&flctx->flc_posix)) {
1341         fl = list_first_entry(&flctx->flc_posix, struct file_lock,
1342                     fl_list);
1343         if (is_whole_file_wrlock(fl))
1344             ret = 1;
1345     } else if (!list_empty(&flctx->flc_flock)) {
1346         fl = list_first_entry(&flctx->flc_flock, struct file_lock,
1347                     fl_list);
1348         if (fl->fl_type == F_WRLCK)
1349             ret = 1;
1350     }
1351     spin_unlock(&flctx->flc_lock);
1352     return ret;
1353 }
1354 
1355 /*
1356  * Update and possibly write a cached page of an NFS file.
1357  *
1358  * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
1359  * things with a page scheduled for an RPC call (e.g. invalidate it).
1360  */
1361 int nfs_updatepage(struct file *file, struct page *page,
1362         unsigned int offset, unsigned int count)
1363 {
1364     struct nfs_open_context *ctx = nfs_file_open_context(file);
1365     struct address_space *mapping = page_file_mapping(page);
1366     struct inode    *inode = mapping->host;
1367     unsigned int    pagelen = nfs_page_length(page);
1368     int     status = 0;
1369 
1370     nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
1371 
1372     dprintk("NFS:       nfs_updatepage(%pD2 %d@%lld)\n",
1373         file, count, (long long)(page_file_offset(page) + offset));
1374 
1375     if (!count)
1376         goto out;
1377 
1378     if (nfs_can_extend_write(file, page, inode, pagelen)) {
1379         count = max(count + offset, pagelen);
1380         offset = 0;
1381     }
1382 
1383     status = nfs_writepage_setup(ctx, page, offset, count);
1384     if (status < 0)
1385         nfs_set_pageerror(mapping);
1386 out:
1387     dprintk("NFS:       nfs_updatepage returns %d (isize %lld)\n",
1388             status, (long long)i_size_read(inode));
1389     return status;
1390 }
1391 
1392 static int flush_task_priority(int how)
1393 {
1394     switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
1395         case FLUSH_HIGHPRI:
1396             return RPC_PRIORITY_HIGH;
1397         case FLUSH_LOWPRI:
1398             return RPC_PRIORITY_LOW;
1399     }
1400     return RPC_PRIORITY_NORMAL;
1401 }
1402 
1403 static void nfs_initiate_write(struct nfs_pgio_header *hdr,
1404                    struct rpc_message *msg,
1405                    const struct nfs_rpc_ops *rpc_ops,
1406                    struct rpc_task_setup *task_setup_data, int how)
1407 {
1408     int priority = flush_task_priority(how);
1409 
1410     if (IS_SWAPFILE(hdr->inode))
1411         task_setup_data->flags |= RPC_TASK_SWAPPER;
1412     task_setup_data->priority = priority;
1413     rpc_ops->write_setup(hdr, msg, &task_setup_data->rpc_client);
1414     trace_nfs_initiate_write(hdr);
1415 }
1416 
1417 /* If a nfs_flush_* function fails, it should remove reqs from @head and
1418  * call this on each, which will prepare them to be retried on next
1419  * writeback using standard nfs.
1420  */
1421 static void nfs_redirty_request(struct nfs_page *req)
1422 {
1423     struct nfs_inode *nfsi = NFS_I(page_file_mapping(req->wb_page)->host);
1424 
1425     /* Bump the transmission count */
1426     req->wb_nio++;
1427     nfs_mark_request_dirty(req);
1428     atomic_long_inc(&nfsi->redirtied_pages);
1429     nfs_end_page_writeback(req);
1430     nfs_release_request(req);
1431 }
1432 
1433 static void nfs_async_write_error(struct list_head *head, int error)
1434 {
1435     struct nfs_page *req;
1436 
1437     while (!list_empty(head)) {
1438         req = nfs_list_entry(head->next);
1439         nfs_list_remove_request(req);
1440         if (nfs_error_is_fatal_on_server(error))
1441             nfs_write_error(req, error);
1442         else
1443             nfs_redirty_request(req);
1444     }
1445 }
1446 
1447 static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
1448 {
1449     nfs_async_write_error(&hdr->pages, 0);
1450 }
1451 
1452 static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
1453     .init_hdr = nfs_async_write_init,
1454     .error_cleanup = nfs_async_write_error,
1455     .completion = nfs_write_completion,
1456     .reschedule_io = nfs_async_write_reschedule_io,
1457 };
1458 
1459 void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
1460                    struct inode *inode, int ioflags, bool force_mds,
1461                    const struct nfs_pgio_completion_ops *compl_ops)
1462 {
1463     struct nfs_server *server = NFS_SERVER(inode);
1464     const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
1465 
1466 #ifdef CONFIG_NFS_V4_1
1467     if (server->pnfs_curr_ld && !force_mds)
1468         pg_ops = server->pnfs_curr_ld->pg_write_ops;
1469 #endif
1470     nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops,
1471             server->wsize, ioflags);
1472 }
1473 EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
1474 
1475 void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
1476 {
1477     struct nfs_pgio_mirror *mirror;
1478 
1479     if (pgio->pg_ops && pgio->pg_ops->pg_cleanup)
1480         pgio->pg_ops->pg_cleanup(pgio);
1481 
1482     pgio->pg_ops = &nfs_pgio_rw_ops;
1483 
1484     nfs_pageio_stop_mirroring(pgio);
1485 
1486     mirror = &pgio->pg_mirrors[0];
1487     mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
1488 }
1489 EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
1490 
1491 
1492 void nfs_commit_prepare(struct rpc_task *task, void *calldata)
1493 {
1494     struct nfs_commit_data *data = calldata;
1495 
1496     NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
1497 }
1498 
1499 static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr,
1500         struct nfs_fattr *fattr)
1501 {
1502     struct nfs_pgio_args *argp = &hdr->args;
1503     struct nfs_pgio_res *resp = &hdr->res;
1504     u64 size = argp->offset + resp->count;
1505 
1506     if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
1507         fattr->size = size;
1508     if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode)) {
1509         fattr->valid &= ~NFS_ATTR_FATTR_SIZE;
1510         return;
1511     }
1512     if (size != fattr->size)
1513         return;
1514     /* Set attribute barrier */
1515     nfs_fattr_set_barrier(fattr);
1516     /* ...and update size */
1517     fattr->valid |= NFS_ATTR_FATTR_SIZE;
1518 }
1519 
1520 void nfs_writeback_update_inode(struct nfs_pgio_header *hdr)
1521 {
1522     struct nfs_fattr *fattr = &hdr->fattr;
1523     struct inode *inode = hdr->inode;
1524 
1525     spin_lock(&inode->i_lock);
1526     nfs_writeback_check_extend(hdr, fattr);
1527     nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
1528     spin_unlock(&inode->i_lock);
1529 }
1530 EXPORT_SYMBOL_GPL(nfs_writeback_update_inode);
1531 
1532 /*
1533  * This function is called when the WRITE call is complete.
1534  */
1535 static int nfs_writeback_done(struct rpc_task *task,
1536                   struct nfs_pgio_header *hdr,
1537                   struct inode *inode)
1538 {
1539     int status;
1540 
1541     /*
1542      * ->write_done will attempt to use post-op attributes to detect
1543      * conflicting writes by other clients.  A strict interpretation
1544      * of close-to-open would allow us to continue caching even if
1545      * another writer had changed the file, but some applications
1546      * depend on tighter cache coherency when writing.
1547      */
1548     status = NFS_PROTO(inode)->write_done(task, hdr);
1549     if (status != 0)
1550         return status;
1551 
1552     nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
1553     trace_nfs_writeback_done(task, hdr);
1554 
1555     if (task->tk_status >= 0) {
1556         enum nfs3_stable_how committed = hdr->res.verf->committed;
1557 
1558         if (committed == NFS_UNSTABLE) {
1559             /*
1560              * We have some uncommitted data on the server at
1561              * this point, so ensure that we keep track of that
1562              * fact irrespective of what later writes do.
1563              */
1564             set_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags);
1565         }
1566 
1567         if (committed < hdr->args.stable) {
1568             /* We tried a write call, but the server did not
1569              * commit data to stable storage even though we
1570              * requested it.
1571              * Note: There is a known bug in Tru64 < 5.0 in which
1572              *   the server reports NFS_DATA_SYNC, but performs
1573              *   NFS_FILE_SYNC. We therefore implement this checking
1574              *   as a dprintk() in order to avoid filling syslog.
1575              */
1576             static unsigned long    complain;
1577 
1578             /* Note this will print the MDS for a DS write */
1579             if (time_before(complain, jiffies)) {
1580                 dprintk("NFS:       faulty NFS server %s:"
1581                     " (committed = %d) != (stable = %d)\n",
1582                     NFS_SERVER(inode)->nfs_client->cl_hostname,
1583                     committed, hdr->args.stable);
1584                 complain = jiffies + 300 * HZ;
1585             }
1586         }
1587     }
1588 
1589     /* Deal with the suid/sgid bit corner case */
1590     if (nfs_should_remove_suid(inode)) {
1591         spin_lock(&inode->i_lock);
1592         nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
1593         spin_unlock(&inode->i_lock);
1594     }
1595     return 0;
1596 }
1597 
1598 /*
1599  * This function is called when the WRITE call is complete.
1600  */
1601 static void nfs_writeback_result(struct rpc_task *task,
1602                  struct nfs_pgio_header *hdr)
1603 {
1604     struct nfs_pgio_args    *argp = &hdr->args;
1605     struct nfs_pgio_res *resp = &hdr->res;
1606 
1607     if (resp->count < argp->count) {
1608         static unsigned long    complain;
1609 
1610         /* This a short write! */
1611         nfs_inc_stats(hdr->inode, NFSIOS_SHORTWRITE);
1612 
1613         /* Has the server at least made some progress? */
1614         if (resp->count == 0) {
1615             if (time_before(complain, jiffies)) {
1616                 printk(KERN_WARNING
1617                        "NFS: Server wrote zero bytes, expected %u.\n",
1618                        argp->count);
1619                 complain = jiffies + 300 * HZ;
1620             }
1621             nfs_set_pgio_error(hdr, -EIO, argp->offset);
1622             task->tk_status = -EIO;
1623             return;
1624         }
1625 
1626         /* For non rpc-based layout drivers, retry-through-MDS */
1627         if (!task->tk_ops) {
1628             hdr->pnfs_error = -EAGAIN;
1629             return;
1630         }
1631 
1632         /* Was this an NFSv2 write or an NFSv3 stable write? */
1633         if (resp->verf->committed != NFS_UNSTABLE) {
1634             /* Resend from where the server left off */
1635             hdr->mds_offset += resp->count;
1636             argp->offset += resp->count;
1637             argp->pgbase += resp->count;
1638             argp->count -= resp->count;
1639         } else {
1640             /* Resend as a stable write in order to avoid
1641              * headaches in the case of a server crash.
1642              */
1643             argp->stable = NFS_FILE_SYNC;
1644         }
1645         resp->count = 0;
1646         resp->verf->committed = 0;
1647         rpc_restart_call_prepare(task);
1648     }
1649 }
1650 
1651 static int wait_on_commit(struct nfs_mds_commit_info *cinfo)
1652 {
1653     return wait_var_event_killable(&cinfo->rpcs_out,
1654                        !atomic_read(&cinfo->rpcs_out));
1655 }
1656 
1657 static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
1658 {
1659     atomic_inc(&cinfo->rpcs_out);
1660 }
1661 
1662 bool nfs_commit_end(struct nfs_mds_commit_info *cinfo)
1663 {
1664     if (atomic_dec_and_test(&cinfo->rpcs_out)) {
1665         wake_up_var(&cinfo->rpcs_out);
1666         return true;
1667     }
1668     return false;
1669 }
1670 
1671 void nfs_commitdata_release(struct nfs_commit_data *data)
1672 {
1673     put_nfs_open_context(data->context);
1674     nfs_commit_free(data);
1675 }
1676 EXPORT_SYMBOL_GPL(nfs_commitdata_release);
1677 
1678 int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
1679             const struct nfs_rpc_ops *nfs_ops,
1680             const struct rpc_call_ops *call_ops,
1681             int how, int flags)
1682 {
1683     struct rpc_task *task;
1684     int priority = flush_task_priority(how);
1685     struct rpc_message msg = {
1686         .rpc_argp = &data->args,
1687         .rpc_resp = &data->res,
1688         .rpc_cred = data->cred,
1689     };
1690     struct rpc_task_setup task_setup_data = {
1691         .task = &data->task,
1692         .rpc_client = clnt,
1693         .rpc_message = &msg,
1694         .callback_ops = call_ops,
1695         .callback_data = data,
1696         .workqueue = nfsiod_workqueue,
1697         .flags = RPC_TASK_ASYNC | flags,
1698         .priority = priority,
1699     };
1700 
1701     if (nfs_server_capable(data->inode, NFS_CAP_MOVEABLE))
1702         task_setup_data.flags |= RPC_TASK_MOVEABLE;
1703 
1704     /* Set up the initial task struct.  */
1705     nfs_ops->commit_setup(data, &msg, &task_setup_data.rpc_client);
1706     trace_nfs_initiate_commit(data);
1707 
1708     dprintk("NFS: initiated commit call\n");
1709 
1710     task = rpc_run_task(&task_setup_data);
1711     if (IS_ERR(task))
1712         return PTR_ERR(task);
1713     if (how & FLUSH_SYNC)
1714         rpc_wait_for_completion_task(task);
1715     rpc_put_task(task);
1716     return 0;
1717 }
1718 EXPORT_SYMBOL_GPL(nfs_initiate_commit);
1719 
1720 static loff_t nfs_get_lwb(struct list_head *head)
1721 {
1722     loff_t lwb = 0;
1723     struct nfs_page *req;
1724 
1725     list_for_each_entry(req, head, wb_list)
1726         if (lwb < (req_offset(req) + req->wb_bytes))
1727             lwb = req_offset(req) + req->wb_bytes;
1728 
1729     return lwb;
1730 }
1731 
1732 /*
1733  * Set up the argument/result storage required for the RPC call.
1734  */
1735 void nfs_init_commit(struct nfs_commit_data *data,
1736              struct list_head *head,
1737              struct pnfs_layout_segment *lseg,
1738              struct nfs_commit_info *cinfo)
1739 {
1740     struct nfs_page *first;
1741     struct nfs_open_context *ctx;
1742     struct inode *inode;
1743 
1744     /* Set up the RPC argument and reply structs
1745      * NB: take care not to mess about with data->commit et al. */
1746 
1747     if (head)
1748         list_splice_init(head, &data->pages);
1749 
1750     first = nfs_list_entry(data->pages.next);
1751     ctx = nfs_req_openctx(first);
1752     inode = d_inode(ctx->dentry);
1753 
1754     data->inode   = inode;
1755     data->cred    = ctx->cred;
1756     data->lseg    = lseg; /* reference transferred */
1757     /* only set lwb for pnfs commit */
1758     if (lseg)
1759         data->lwb = nfs_get_lwb(&data->pages);
1760     data->mds_ops     = &nfs_commit_ops;
1761     data->completion_ops = cinfo->completion_ops;
1762     data->dreq    = cinfo->dreq;
1763 
1764     data->args.fh     = NFS_FH(data->inode);
1765     /* Note: we always request a commit of the entire inode */
1766     data->args.offset = 0;
1767     data->args.count  = 0;
1768     data->context     = get_nfs_open_context(ctx);
1769     data->res.fattr   = &data->fattr;
1770     data->res.verf    = &data->verf;
1771     nfs_fattr_init(&data->fattr);
1772     nfs_commit_begin(cinfo->mds);
1773 }
1774 EXPORT_SYMBOL_GPL(nfs_init_commit);
1775 
1776 void nfs_retry_commit(struct list_head *page_list,
1777               struct pnfs_layout_segment *lseg,
1778               struct nfs_commit_info *cinfo,
1779               u32 ds_commit_idx)
1780 {
1781     struct nfs_page *req;
1782 
1783     while (!list_empty(page_list)) {
1784         req = nfs_list_entry(page_list->next);
1785         nfs_list_remove_request(req);
1786         nfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx);
1787         if (!cinfo->dreq)
1788             nfs_clear_page_commit(req->wb_page);
1789         nfs_unlock_and_release_request(req);
1790     }
1791 }
1792 EXPORT_SYMBOL_GPL(nfs_retry_commit);
1793 
1794 static void
1795 nfs_commit_resched_write(struct nfs_commit_info *cinfo,
1796         struct nfs_page *req)
1797 {
1798     __set_page_dirty_nobuffers(req->wb_page);
1799 }
1800 
1801 /*
1802  * Commit dirty pages
1803  */
1804 static int
1805 nfs_commit_list(struct inode *inode, struct list_head *head, int how,
1806         struct nfs_commit_info *cinfo)
1807 {
1808     struct nfs_commit_data  *data;
1809     unsigned short task_flags = 0;
1810 
1811     /* another commit raced with us */
1812     if (list_empty(head))
1813         return 0;
1814 
1815     data = nfs_commitdata_alloc();
1816     if (!data) {
1817         nfs_retry_commit(head, NULL, cinfo, -1);
1818         return -ENOMEM;
1819     }
1820 
1821     /* Set up the argument struct */
1822     nfs_init_commit(data, head, NULL, cinfo);
1823     if (NFS_SERVER(inode)->nfs_client->cl_minorversion)
1824         task_flags = RPC_TASK_MOVEABLE;
1825     return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode),
1826                    data->mds_ops, how,
1827                    RPC_TASK_CRED_NOREF | task_flags);
1828 }
1829 
1830 /*
1831  * COMMIT call returned
1832  */
1833 static void nfs_commit_done(struct rpc_task *task, void *calldata)
1834 {
1835     struct nfs_commit_data  *data = calldata;
1836 
1837     /* Call the NFS version-specific code */
1838     NFS_PROTO(data->inode)->commit_done(task, data);
1839     trace_nfs_commit_done(task, data);
1840 }
1841 
1842 static void nfs_commit_release_pages(struct nfs_commit_data *data)
1843 {
1844     const struct nfs_writeverf *verf = data->res.verf;
1845     struct nfs_page *req;
1846     int status = data->task.tk_status;
1847     struct nfs_commit_info cinfo;
1848     struct nfs_server *nfss;
1849 
1850     while (!list_empty(&data->pages)) {
1851         req = nfs_list_entry(data->pages.next);
1852         nfs_list_remove_request(req);
1853         if (req->wb_page)
1854             nfs_clear_page_commit(req->wb_page);
1855 
1856         dprintk("NFS:       commit (%s/%llu %d@%lld)",
1857             nfs_req_openctx(req)->dentry->d_sb->s_id,
1858             (unsigned long long)NFS_FILEID(d_inode(nfs_req_openctx(req)->dentry)),
1859             req->wb_bytes,
1860             (long long)req_offset(req));
1861         if (status < 0) {
1862             if (req->wb_page) {
1863                 trace_nfs_commit_error(data->inode, req,
1864                                status);
1865                 nfs_mapping_set_error(req->wb_page, status);
1866                 nfs_inode_remove_request(req);
1867             }
1868             dprintk_cont(", error = %d\n", status);
1869             goto next;
1870         }
1871 
1872         /* Okay, COMMIT succeeded, apparently. Check the verifier
1873          * returned by the server against all stored verfs. */
1874         if (nfs_write_match_verf(verf, req)) {
1875             /* We have a match */
1876             if (req->wb_page)
1877                 nfs_inode_remove_request(req);
1878             dprintk_cont(" OK\n");
1879             goto next;
1880         }
1881         /* We have a mismatch. Write the page again */
1882         dprintk_cont(" mismatch\n");
1883         nfs_mark_request_dirty(req);
1884         atomic_long_inc(&NFS_I(data->inode)->redirtied_pages);
1885     next:
1886         nfs_unlock_and_release_request(req);
1887         /* Latency breaker */
1888         cond_resched();
1889     }
1890     nfss = NFS_SERVER(data->inode);
1891     if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
1892         nfss->write_congested = 0;
1893 
1894     nfs_init_cinfo(&cinfo, data->inode, data->dreq);
1895     nfs_commit_end(cinfo.mds);
1896 }
1897 
1898 static void nfs_commit_release(void *calldata)
1899 {
1900     struct nfs_commit_data *data = calldata;
1901 
1902     data->completion_ops->completion(data);
1903     nfs_commitdata_release(calldata);
1904 }
1905 
1906 static const struct rpc_call_ops nfs_commit_ops = {
1907     .rpc_call_prepare = nfs_commit_prepare,
1908     .rpc_call_done = nfs_commit_done,
1909     .rpc_release = nfs_commit_release,
1910 };
1911 
1912 static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
1913     .completion = nfs_commit_release_pages,
1914     .resched_write = nfs_commit_resched_write,
1915 };
1916 
1917 int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
1918                 int how, struct nfs_commit_info *cinfo)
1919 {
1920     int status;
1921 
1922     status = pnfs_commit_list(inode, head, how, cinfo);
1923     if (status == PNFS_NOT_ATTEMPTED)
1924         status = nfs_commit_list(inode, head, how, cinfo);
1925     return status;
1926 }
1927 
1928 static int __nfs_commit_inode(struct inode *inode, int how,
1929         struct writeback_control *wbc)
1930 {
1931     LIST_HEAD(head);
1932     struct nfs_commit_info cinfo;
1933     int may_wait = how & FLUSH_SYNC;
1934     int ret, nscan;
1935 
1936     how &= ~FLUSH_SYNC;
1937     nfs_init_cinfo_from_inode(&cinfo, inode);
1938     nfs_commit_begin(cinfo.mds);
1939     for (;;) {
1940         ret = nscan = nfs_scan_commit(inode, &head, &cinfo);
1941         if (ret <= 0)
1942             break;
1943         ret = nfs_generic_commit_list(inode, &head, how, &cinfo);
1944         if (ret < 0)
1945             break;
1946         ret = 0;
1947         if (wbc && wbc->sync_mode == WB_SYNC_NONE) {
1948             if (nscan < wbc->nr_to_write)
1949                 wbc->nr_to_write -= nscan;
1950             else
1951                 wbc->nr_to_write = 0;
1952         }
1953         if (nscan < INT_MAX)
1954             break;
1955         cond_resched();
1956     }
1957     nfs_commit_end(cinfo.mds);
1958     if (ret || !may_wait)
1959         return ret;
1960     return wait_on_commit(cinfo.mds);
1961 }
1962 
1963 int nfs_commit_inode(struct inode *inode, int how)
1964 {
1965     return __nfs_commit_inode(inode, how, NULL);
1966 }
1967 EXPORT_SYMBOL_GPL(nfs_commit_inode);
1968 
1969 int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1970 {
1971     struct nfs_inode *nfsi = NFS_I(inode);
1972     int flags = FLUSH_SYNC;
1973     int ret = 0;
1974 
1975     if (wbc->sync_mode == WB_SYNC_NONE) {
1976         /* no commits means nothing needs to be done */
1977         if (!atomic_long_read(&nfsi->commit_info.ncommit))
1978             goto check_requests_outstanding;
1979 
1980         /* Don't commit yet if this is a non-blocking flush and there
1981          * are a lot of outstanding writes for this mapping.
1982          */
1983         if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))
1984             goto out_mark_dirty;
1985 
1986         /* don't wait for the COMMIT response */
1987         flags = 0;
1988     }
1989 
1990     ret = __nfs_commit_inode(inode, flags, wbc);
1991     if (!ret) {
1992         if (flags & FLUSH_SYNC)
1993             return 0;
1994     } else if (atomic_long_read(&nfsi->commit_info.ncommit))
1995         goto out_mark_dirty;
1996 
1997 check_requests_outstanding:
1998     if (!atomic_read(&nfsi->commit_info.rpcs_out))
1999         return ret;
2000 out_mark_dirty:
2001     __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
2002     return ret;
2003 }
2004 EXPORT_SYMBOL_GPL(nfs_write_inode);
2005 
2006 /*
2007  * Wrapper for filemap_write_and_wait_range()
2008  *
2009  * Needed for pNFS in order to ensure data becomes visible to the
2010  * client.
2011  */
2012 int nfs_filemap_write_and_wait_range(struct address_space *mapping,
2013         loff_t lstart, loff_t lend)
2014 {
2015     int ret;
2016 
2017     ret = filemap_write_and_wait_range(mapping, lstart, lend);
2018     if (ret == 0)
2019         ret = pnfs_sync_inode(mapping->host, true);
2020     return ret;
2021 }
2022 EXPORT_SYMBOL_GPL(nfs_filemap_write_and_wait_range);
2023 
2024 /*
2025  * flush the inode to disk.
2026  */
2027 int nfs_wb_all(struct inode *inode)
2028 {
2029     int ret;
2030 
2031     trace_nfs_writeback_inode_enter(inode);
2032 
2033     ret = filemap_write_and_wait(inode->i_mapping);
2034     if (ret)
2035         goto out;
2036     ret = nfs_commit_inode(inode, FLUSH_SYNC);
2037     if (ret < 0)
2038         goto out;
2039     pnfs_sync_inode(inode, true);
2040     ret = 0;
2041 
2042 out:
2043     trace_nfs_writeback_inode_exit(inode, ret);
2044     return ret;
2045 }
2046 EXPORT_SYMBOL_GPL(nfs_wb_all);
2047 
2048 int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio)
2049 {
2050     struct nfs_page *req;
2051     int ret = 0;
2052 
2053     folio_wait_writeback(folio);
2054 
2055     /* blocking call to cancel all requests and join to a single (head)
2056      * request */
2057     req = nfs_lock_and_join_requests(&folio->page);
2058 
2059     if (IS_ERR(req)) {
2060         ret = PTR_ERR(req);
2061     } else if (req) {
2062         /* all requests from this folio have been cancelled by
2063          * nfs_lock_and_join_requests, so just remove the head
2064          * request from the inode / page_private pointer and
2065          * release it */
2066         nfs_inode_remove_request(req);
2067         nfs_unlock_and_release_request(req);
2068     }
2069 
2070     return ret;
2071 }
2072 
2073 /*
2074  * Write back all requests on one page - we do this before reading it.
2075  */
2076 int nfs_wb_page(struct inode *inode, struct page *page)
2077 {
2078     loff_t range_start = page_file_offset(page);
2079     loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
2080     struct writeback_control wbc = {
2081         .sync_mode = WB_SYNC_ALL,
2082         .nr_to_write = 0,
2083         .range_start = range_start,
2084         .range_end = range_end,
2085     };
2086     int ret;
2087 
2088     trace_nfs_writeback_page_enter(inode);
2089 
2090     for (;;) {
2091         wait_on_page_writeback(page);
2092         if (clear_page_dirty_for_io(page)) {
2093             ret = nfs_writepage_locked(page, &wbc);
2094             if (ret < 0)
2095                 goto out_error;
2096             continue;
2097         }
2098         ret = 0;
2099         if (!PagePrivate(page))
2100             break;
2101         ret = nfs_commit_inode(inode, FLUSH_SYNC);
2102         if (ret < 0)
2103             goto out_error;
2104     }
2105 out_error:
2106     trace_nfs_writeback_page_exit(inode, ret);
2107     return ret;
2108 }
2109 
2110 #ifdef CONFIG_MIGRATION
2111 int nfs_migrate_folio(struct address_space *mapping, struct folio *dst,
2112         struct folio *src, enum migrate_mode mode)
2113 {
2114     /*
2115      * If the private flag is set, the folio is currently associated with
2116      * an in-progress read or write request. Don't try to migrate it.
2117      *
2118      * FIXME: we could do this in principle, but we'll need a way to ensure
2119      *        that we can safely release the inode reference while holding
2120      *        the folio lock.
2121      */
2122     if (folio_test_private(src))
2123         return -EBUSY;
2124 
2125     if (folio_test_fscache(src)) {
2126         if (mode == MIGRATE_ASYNC)
2127             return -EBUSY;
2128         folio_wait_fscache(src);
2129     }
2130 
2131     return migrate_folio(mapping, dst, src, mode);
2132 }
2133 #endif
2134 
2135 int __init nfs_init_writepagecache(void)
2136 {
2137     nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
2138                          sizeof(struct nfs_pgio_header),
2139                          0, SLAB_HWCACHE_ALIGN,
2140                          NULL);
2141     if (nfs_wdata_cachep == NULL)
2142         return -ENOMEM;
2143 
2144     nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE,
2145                              nfs_wdata_cachep);
2146     if (nfs_wdata_mempool == NULL)
2147         goto out_destroy_write_cache;
2148 
2149     nfs_cdata_cachep = kmem_cache_create("nfs_commit_data",
2150                          sizeof(struct nfs_commit_data),
2151                          0, SLAB_HWCACHE_ALIGN,
2152                          NULL);
2153     if (nfs_cdata_cachep == NULL)
2154         goto out_destroy_write_mempool;
2155 
2156     nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
2157                               nfs_cdata_cachep);
2158     if (nfs_commit_mempool == NULL)
2159         goto out_destroy_commit_cache;
2160 
2161     /*
2162      * NFS congestion size, scale with available memory.
2163      *
2164      *  64MB:    8192k
2165      * 128MB:   11585k
2166      * 256MB:   16384k
2167      * 512MB:   23170k
2168      *   1GB:   32768k
2169      *   2GB:   46340k
2170      *   4GB:   65536k
2171      *   8GB:   92681k
2172      *  16GB:  131072k
2173      *
2174      * This allows larger machines to have larger/more transfers.
2175      * Limit the default to 256M
2176      */
2177     nfs_congestion_kb = (16*int_sqrt(totalram_pages())) << (PAGE_SHIFT-10);
2178     if (nfs_congestion_kb > 256*1024)
2179         nfs_congestion_kb = 256*1024;
2180 
2181     return 0;
2182 
2183 out_destroy_commit_cache:
2184     kmem_cache_destroy(nfs_cdata_cachep);
2185 out_destroy_write_mempool:
2186     mempool_destroy(nfs_wdata_mempool);
2187 out_destroy_write_cache:
2188     kmem_cache_destroy(nfs_wdata_cachep);
2189     return -ENOMEM;
2190 }
2191 
2192 void nfs_destroy_writepagecache(void)
2193 {
2194     mempool_destroy(nfs_commit_mempool);
2195     kmem_cache_destroy(nfs_cdata_cachep);
2196     mempool_destroy(nfs_wdata_mempool);
2197     kmem_cache_destroy(nfs_wdata_cachep);
2198 }
2199 
2200 static const struct nfs_rw_ops nfs_rw_write_ops = {
2201     .rw_alloc_header    = nfs_writehdr_alloc,
2202     .rw_free_header     = nfs_writehdr_free,
2203     .rw_done        = nfs_writeback_done,
2204     .rw_result      = nfs_writeback_result,
2205     .rw_initiate        = nfs_initiate_write,
2206 };