fs/ubifs/file.c

0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * This file is part of UBIFS.
0004  *
0005  * Copyright (C) 2006-2008 Nokia Corporation.
0006  *
0007  * Authors: Artem Bityutskiy (Битюцкий Артём)
0008  *          Adrian Hunter
0009  */
0010
0011 /*
0012  * This file implements VFS file and inode operations for regular files, device
0013  * nodes and symlinks as well as address space operations.
0014  *
0015  * UBIFS uses 2 page flags: @PG_private and @PG_checked. @PG_private is set if
0016  * the page is dirty and is used for optimization purposes - dirty pages are
0017  * not budgeted so the flag shows that 'ubifs_write_end()' should not release
0018  * the budget for this page. The @PG_checked flag is set if full budgeting is
0019  * required for the page e.g., when it corresponds to a file hole or it is
0020  * beyond the file size. The budgeting is done in 'ubifs_write_begin()', because
0021  * it is OK to fail in this function, and the budget is released in
0022  * 'ubifs_write_end()'. So the @PG_private and @PG_checked flags carry
0023  * information about how the page was budgeted, to make it possible to release
0024  * the budget properly.
0025  *
0026  * A thing to keep in mind: inode @i_mutex is locked in most VFS operations we
0027  * implement. However, this is not true for 'ubifs_writepage()', which may be
0028  * called with @i_mutex unlocked. For example, when flusher thread is doing
0029  * background write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex.
0030  * At "normal" work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g.
0031  * in the "sys_write -> alloc_pages -> direct reclaim path". So, in
0032  * 'ubifs_writepage()' we are only guaranteed that the page is locked.
0033  *
0034  * Similarly, @i_mutex is not always locked in 'ubifs_read_folio()', e.g., the
0035  * read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
0036  * ondemand_readahead -> read_folio"). In case of readahead, @I_SYNC flag is not
0037  * set as well. However, UBIFS disables readahead.
0038  */
0039
0040 #include "ubifs.h"
0041 #include <linux/mount.h>
0042 #include <linux/slab.h>
0043 #include <linux/migrate.h>
0044
0045 static int read_block(struct inode *inode, void *addr, unsigned int block,
0046               struct ubifs_data_node *dn)
0047 {
0048     struct ubifs_info *c = inode->i_sb->s_fs_info;
0049     int err, len, out_len;
0050     union ubifs_key key;
0051     unsigned int dlen;
0052
0053     data_key_init(c, &key, inode->i_ino, block);
0054     err = ubifs_tnc_lookup(c, &key, dn);
0055     if (err) {
0056         if (err == -ENOENT)
0057             /* Not found, so it must be a hole */
0058             memset(addr, 0, UBIFS_BLOCK_SIZE);
0059         return err;
0060     }
0061
0062     ubifs_assert(c, le64_to_cpu(dn->ch.sqnum) >
0063              ubifs_inode(inode)->creat_sqnum);
0064     len = le32_to_cpu(dn->size);
0065     if (len <= 0 || len > UBIFS_BLOCK_SIZE)
0066         goto dump;
0067
0068     dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
0069
0070     if (IS_ENCRYPTED(inode)) {
0071         err = ubifs_decrypt(inode, dn, &dlen, block);
0072         if (err)
0073             goto dump;
0074     }
0075
0076     out_len = UBIFS_BLOCK_SIZE;
0077     err = ubifs_decompress(c, &dn->data, dlen, addr, &out_len,
0078                    le16_to_cpu(dn->compr_type));
0079     if (err || len != out_len)
0080         goto dump;
0081
0082     /*
0083      * Data length can be less than a full block, even for blocks that are
0084      * not the last in the file (e.g., as a result of making a hole and
0085      * appending data). Ensure that the remainder is zeroed out.
0086      */
0087     if (len < UBIFS_BLOCK_SIZE)
0088         memset(addr + len, 0, UBIFS_BLOCK_SIZE - len);
0089
0090     return 0;
0091
0092 dump:
0093     ubifs_err(c, "bad data node (block %u, inode %lu)",
0094           block, inode->i_ino);
0095     ubifs_dump_node(c, dn, UBIFS_MAX_DATA_NODE_SZ);
0096     return -EINVAL;
0097 }
0098
0099 static int do_readpage(struct page *page)
0100 {
0101     void *addr;
0102     int err = 0, i;
0103     unsigned int block, beyond;
0104     struct ubifs_data_node *dn;
0105     struct inode *inode = page->mapping->host;
0106     struct ubifs_info *c = inode->i_sb->s_fs_info;
0107     loff_t i_size = i_size_read(inode);
0108
0109     dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
0110         inode->i_ino, page->index, i_size, page->flags);
0111     ubifs_assert(c, !PageChecked(page));
0112     ubifs_assert(c, !PagePrivate(page));
0113
0114     addr = kmap(page);
0115
0116     block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
0117     beyond = (i_size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT;
0118     if (block >= beyond) {
0119         /* Reading beyond inode */
0120         SetPageChecked(page);
0121         memset(addr, 0, PAGE_SIZE);
0122         goto out;
0123     }
0124
0125     dn = kmalloc(UBIFS_MAX_DATA_NODE_SZ, GFP_NOFS);
0126     if (!dn) {
0127         err = -ENOMEM;
0128         goto error;
0129     }
0130
0131     i = 0;
0132     while (1) {
0133         int ret;
0134
0135         if (block >= beyond) {
0136             /* Reading beyond inode */
0137             err = -ENOENT;
0138             memset(addr, 0, UBIFS_BLOCK_SIZE);
0139         } else {
0140             ret = read_block(inode, addr, block, dn);
0141             if (ret) {
0142                 err = ret;
0143                 if (err != -ENOENT)
0144                     break;
0145             } else if (block + 1 == beyond) {
0146                 int dlen = le32_to_cpu(dn->size);
0147                 int ilen = i_size & (UBIFS_BLOCK_SIZE - 1);
0148
0149                 if (ilen && ilen < dlen)
0150                     memset(addr + ilen, 0, dlen - ilen);
0151             }
0152         }
0153         if (++i >= UBIFS_BLOCKS_PER_PAGE)
0154             break;
0155         block += 1;
0156         addr += UBIFS_BLOCK_SIZE;
0157     }
0158     if (err) {
0159         struct ubifs_info *c = inode->i_sb->s_fs_info;
0160         if (err == -ENOENT) {
0161             /* Not found, so it must be a hole */
0162             SetPageChecked(page);
0163             dbg_gen("hole");
0164             goto out_free;
0165         }
0166         ubifs_err(c, "cannot read page %lu of inode %lu, error %d",
0167               page->index, inode->i_ino, err);
0168         goto error;
0169     }
0170
0171 out_free:
0172     kfree(dn);
0173 out:
0174     SetPageUptodate(page);
0175     ClearPageError(page);
0176     flush_dcache_page(page);
0177     kunmap(page);
0178     return 0;
0179
0180 error:
0181     kfree(dn);
0182     ClearPageUptodate(page);
0183     SetPageError(page);
0184     flush_dcache_page(page);
0185     kunmap(page);
0186     return err;
0187 }
0188
0189 /**
0190  * release_new_page_budget - release budget of a new page.
0191  * @c: UBIFS file-system description object
0192  *
0193  * This is a helper function which releases budget corresponding to the budget
0194  * of one new page of data.
0195  */
0196 static void release_new_page_budget(struct ubifs_info *c)
0197 {
0198     struct ubifs_budget_req req = { .recalculate = 1, .new_page = 1 };
0199
0200     ubifs_release_budget(c, &req);
0201 }
0202
0203 /**
0204  * release_existing_page_budget - release budget of an existing page.
0205  * @c: UBIFS file-system description object
0206  *
0207  * This is a helper function which releases budget corresponding to the budget
0208  * of changing one page of data which already exists on the flash media.
0209  */
0210 static void release_existing_page_budget(struct ubifs_info *c)
0211 {
0212     struct ubifs_budget_req req = { .dd_growth = c->bi.page_budget};
0213
0214     ubifs_release_budget(c, &req);
0215 }
0216
0217 static int write_begin_slow(struct address_space *mapping,
0218                 loff_t pos, unsigned len, struct page **pagep)
0219 {
0220     struct inode *inode = mapping->host;
0221     struct ubifs_info *c = inode->i_sb->s_fs_info;
0222     pgoff_t index = pos >> PAGE_SHIFT;
0223     struct ubifs_budget_req req = { .new_page = 1 };
0224     int err, appending = !!(pos + len > inode->i_size);
0225     struct page *page;
0226
0227     dbg_gen("ino %lu, pos %llu, len %u, i_size %lld",
0228         inode->i_ino, pos, len, inode->i_size);
0229
0230     /*
0231      * At the slow path we have to budget before locking the page, because
0232      * budgeting may force write-back, which would wait on locked pages and
0233      * deadlock if we had the page locked. At this point we do not know
0234      * anything about the page, so assume that this is a new page which is
0235      * written to a hole. This corresponds to largest budget. Later the
0236      * budget will be amended if this is not true.
0237      */
0238     if (appending)
0239         /* We are appending data, budget for inode change */
0240         req.dirtied_ino = 1;
0241
0242     err = ubifs_budget_space(c, &req);
0243     if (unlikely(err))
0244         return err;
0245
0246     page = grab_cache_page_write_begin(mapping, index);
0247     if (unlikely(!page)) {
0248         ubifs_release_budget(c, &req);
0249         return -ENOMEM;
0250     }
0251
0252     if (!PageUptodate(page)) {
0253         if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE)
0254             SetPageChecked(page);
0255         else {
0256             err = do_readpage(page);
0257             if (err) {
0258                 unlock_page(page);
0259                 put_page(page);
0260                 ubifs_release_budget(c, &req);
0261                 return err;
0262             }
0263         }
0264
0265         SetPageUptodate(page);
0266         ClearPageError(page);
0267     }
0268
0269     if (PagePrivate(page))
0270         /*
0271          * The page is dirty, which means it was budgeted twice:
0272          *   o first time the budget was allocated by the task which
0273          *     made the page dirty and set the PG_private flag;
0274          *   o and then we budgeted for it for the second time at the
0275          *     very beginning of this function.
0276          *
0277          * So what we have to do is to release the page budget we
0278          * allocated.
0279          */
0280         release_new_page_budget(c);
0281     else if (!PageChecked(page))
0282         /*
0283          * We are changing a page which already exists on the media.
0284          * This means that changing the page does not make the amount
0285          * of indexing information larger, and this part of the budget
0286          * which we have already acquired may be released.
0287          */
0288         ubifs_convert_page_budget(c);
0289
0290     if (appending) {
0291         struct ubifs_inode *ui = ubifs_inode(inode);
0292
0293         /*
0294          * 'ubifs_write_end()' is optimized from the fast-path part of
0295          * 'ubifs_write_begin()' and expects the @ui_mutex to be locked
0296          * if data is appended.
0297          */
0298         mutex_lock(&ui->ui_mutex);
0299         if (ui->dirty)
0300             /*
0301              * The inode is dirty already, so we may free the
0302              * budget we allocated.
0303              */
0304             ubifs_release_dirty_inode_budget(c, ui);
0305     }
0306
0307     *pagep = page;
0308     return 0;
0309 }
0310
0311 /**
0312  * allocate_budget - allocate budget for 'ubifs_write_begin()'.
0313  * @c: UBIFS file-system description object
0314  * @page: page to allocate budget for
0315  * @ui: UBIFS inode object the page belongs to
0316  * @appending: non-zero if the page is appended
0317  *
0318  * This is a helper function for 'ubifs_write_begin()' which allocates budget
0319  * for the operation. The budget is allocated differently depending on whether
0320  * this is appending, whether the page is dirty or not, and so on. This
0321  * function leaves the @ui->ui_mutex locked in case of appending. Returns zero
0322  * in case of success and %-ENOSPC in case of failure.
0323  */
0324 static int allocate_budget(struct ubifs_info *c, struct page *page,
0325                struct ubifs_inode *ui, int appending)
0326 {
0327     struct ubifs_budget_req req = { .fast = 1 };
0328
0329     if (PagePrivate(page)) {
0330         if (!appending)
0331             /*
0332              * The page is dirty and we are not appending, which
0333              * means no budget is needed at all.
0334              */
0335             return 0;
0336
0337         mutex_lock(&ui->ui_mutex);
0338         if (ui->dirty)
0339             /*
0340              * The page is dirty and we are appending, so the inode
0341              * has to be marked as dirty. However, it is already
0342              * dirty, so we do not need any budget. We may return,
0343              * but @ui->ui_mutex hast to be left locked because we
0344              * should prevent write-back from flushing the inode
0345              * and freeing the budget. The lock will be released in
0346              * 'ubifs_write_end()'.
0347              */
0348             return 0;
0349
0350         /*
0351          * The page is dirty, we are appending, the inode is clean, so
0352          * we need to budget the inode change.
0353          */
0354         req.dirtied_ino = 1;
0355     } else {
0356         if (PageChecked(page))
0357             /*
0358              * The page corresponds to a hole and does not
0359              * exist on the media. So changing it makes
0360              * make the amount of indexing information
0361              * larger, and we have to budget for a new
0362              * page.
0363              */
0364             req.new_page = 1;
0365         else
0366             /*
0367              * Not a hole, the change will not add any new
0368              * indexing information, budget for page
0369              * change.
0370              */
0371             req.dirtied_page = 1;
0372
0373         if (appending) {
0374             mutex_lock(&ui->ui_mutex);
0375             if (!ui->dirty)
0376                 /*
0377                  * The inode is clean but we will have to mark
0378                  * it as dirty because we are appending. This
0379                  * needs a budget.
0380                  */
0381                 req.dirtied_ino = 1;
0382         }
0383     }
0384
0385     return ubifs_budget_space(c, &req);
0386 }
0387
0388 /*
0389  * This function is called when a page of data is going to be written. Since
0390  * the page of data will not necessarily go to the flash straight away, UBIFS
0391  * has to reserve space on the media for it, which is done by means of
0392  * budgeting.
0393  *
0394  * This is the hot-path of the file-system and we are trying to optimize it as
0395  * much as possible. For this reasons it is split on 2 parts - slow and fast.
0396  *
0397  * There many budgeting cases:
0398  *     o a new page is appended - we have to budget for a new page and for
0399  *       changing the inode; however, if the inode is already dirty, there is
0400  *       no need to budget for it;
0401  *     o an existing clean page is changed - we have budget for it; if the page
0402  *       does not exist on the media (a hole), we have to budget for a new
0403  *       page; otherwise, we may budget for changing an existing page; the
0404  *       difference between these cases is that changing an existing page does
0405  *       not introduce anything new to the FS indexing information, so it does
0406  *       not grow, and smaller budget is acquired in this case;
0407  *     o an existing dirty page is changed - no need to budget at all, because
0408  *       the page budget has been acquired by earlier, when the page has been
0409  *       marked dirty.
0410  *
0411  * UBIFS budgeting sub-system may force write-back if it thinks there is no
0412  * space to reserve. This imposes some locking restrictions and makes it
0413  * impossible to take into account the above cases, and makes it impossible to
0414  * optimize budgeting.
0415  *
0416  * The solution for this is that the fast path of 'ubifs_write_begin()' assumes
0417  * there is a plenty of flash space and the budget will be acquired quickly,
0418  * without forcing write-back. The slow path does not make this assumption.
0419  */
0420 static int ubifs_write_begin(struct file *file, struct address_space *mapping,
0421                  loff_t pos, unsigned len,
0422                  struct page **pagep, void **fsdata)
0423 {
0424     struct inode *inode = mapping->host;
0425     struct ubifs_info *c = inode->i_sb->s_fs_info;
0426     struct ubifs_inode *ui = ubifs_inode(inode);
0427     pgoff_t index = pos >> PAGE_SHIFT;
0428     int err, appending = !!(pos + len > inode->i_size);
0429     int skipped_read = 0;
0430     struct page *page;
0431
0432     ubifs_assert(c, ubifs_inode(inode)->ui_size == inode->i_size);
0433     ubifs_assert(c, !c->ro_media && !c->ro_mount);
0434
0435     if (unlikely(c->ro_error))
0436         return -EROFS;
0437
0438     /* Try out the fast-path part first */
0439     page = grab_cache_page_write_begin(mapping, index);
0440     if (unlikely(!page))
0441         return -ENOMEM;
0442
0443     if (!PageUptodate(page)) {
0444         /* The page is not loaded from the flash */
0445         if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) {
0446             /*
0447              * We change whole page so no need to load it. But we
0448              * do not know whether this page exists on the media or
0449              * not, so we assume the latter because it requires
0450              * larger budget. The assumption is that it is better
0451              * to budget a bit more than to read the page from the
0452              * media. Thus, we are setting the @PG_checked flag
0453              * here.
0454              */
0455             SetPageChecked(page);
0456             skipped_read = 1;
0457         } else {
0458             err = do_readpage(page);
0459             if (err) {
0460                 unlock_page(page);
0461                 put_page(page);
0462                 return err;
0463             }
0464         }
0465
0466         SetPageUptodate(page);
0467         ClearPageError(page);
0468     }
0469
0470     err = allocate_budget(c, page, ui, appending);
0471     if (unlikely(err)) {
0472         ubifs_assert(c, err == -ENOSPC);
0473         /*
0474          * If we skipped reading the page because we were going to
0475          * write all of it, then it is not up to date.
0476          */
0477         if (skipped_read) {
0478             ClearPageChecked(page);
0479             ClearPageUptodate(page);
0480         }
0481         /*
0482          * Budgeting failed which means it would have to force
0483          * write-back but didn't, because we set the @fast flag in the
0484          * request. Write-back cannot be done now, while we have the
0485          * page locked, because it would deadlock. Unlock and free
0486          * everything and fall-back to slow-path.
0487          */
0488         if (appending) {
0489             ubifs_assert(c, mutex_is_locked(&ui->ui_mutex));
0490             mutex_unlock(&ui->ui_mutex);
0491         }
0492         unlock_page(page);
0493         put_page(page);
0494
0495         return write_begin_slow(mapping, pos, len, pagep);
0496     }
0497
0498     /*
0499      * Whee, we acquired budgeting quickly - without involving
0500      * garbage-collection, committing or forcing write-back. We return
0501      * with @ui->ui_mutex locked if we are appending pages, and unlocked
0502      * otherwise. This is an optimization (slightly hacky though).
0503      */
0504     *pagep = page;
0505     return 0;
0506
0507 }
0508
0509 /**
0510  * cancel_budget - cancel budget.
0511  * @c: UBIFS file-system description object
0512  * @page: page to cancel budget for
0513  * @ui: UBIFS inode object the page belongs to
0514  * @appending: non-zero if the page is appended
0515  *
0516  * This is a helper function for a page write operation. It unlocks the
0517  * @ui->ui_mutex in case of appending.
0518  */
0519 static void cancel_budget(struct ubifs_info *c, struct page *page,
0520               struct ubifs_inode *ui, int appending)
0521 {
0522     if (appending) {
0523         if (!ui->dirty)
0524             ubifs_release_dirty_inode_budget(c, ui);
0525         mutex_unlock(&ui->ui_mutex);
0526     }
0527     if (!PagePrivate(page)) {
0528         if (PageChecked(page))
0529             release_new_page_budget(c);
0530         else
0531             release_existing_page_budget(c);
0532     }
0533 }
0534
0535 static int ubifs_write_end(struct file *file, struct address_space *mapping,
0536                loff_t pos, unsigned len, unsigned copied,
0537                struct page *page, void *fsdata)
0538 {
0539     struct inode *inode = mapping->host;
0540     struct ubifs_inode *ui = ubifs_inode(inode);
0541     struct ubifs_info *c = inode->i_sb->s_fs_info;
0542     loff_t end_pos = pos + len;
0543     int appending = !!(end_pos > inode->i_size);
0544
0545     dbg_gen("ino %lu, pos %llu, pg %lu, len %u, copied %d, i_size %lld",
0546         inode->i_ino, pos, page->index, len, copied, inode->i_size);
0547
0548     if (unlikely(copied < len && len == PAGE_SIZE)) {
0549         /*
0550          * VFS copied less data to the page that it intended and
0551          * declared in its '->write_begin()' call via the @len
0552          * argument. If the page was not up-to-date, and @len was
0553          * @PAGE_SIZE, the 'ubifs_write_begin()' function did
0554          * not load it from the media (for optimization reasons). This
0555          * means that part of the page contains garbage. So read the
0556          * page now.
0557          */
0558         dbg_gen("copied %d instead of %d, read page and repeat",
0559             copied, len);
0560         cancel_budget(c, page, ui, appending);
0561         ClearPageChecked(page);
0562
0563         /*
0564          * Return 0 to force VFS to repeat the whole operation, or the
0565          * error code if 'do_readpage()' fails.
0566          */
0567         copied = do_readpage(page);
0568         goto out;
0569     }
0570
0571     if (!PagePrivate(page)) {
0572         attach_page_private(page, (void *)1);
0573         atomic_long_inc(&c->dirty_pg_cnt);
0574         __set_page_dirty_nobuffers(page);
0575     }
0576
0577     if (appending) {
0578         i_size_write(inode, end_pos);
0579         ui->ui_size = end_pos;
0580         /*
0581          * Note, we do not set @I_DIRTY_PAGES (which means that the
0582          * inode has dirty pages), this has been done in
0583          * '__set_page_dirty_nobuffers()'.
0584          */
0585         __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
0586         ubifs_assert(c, mutex_is_locked(&ui->ui_mutex));
0587         mutex_unlock(&ui->ui_mutex);
0588     }
0589
0590 out:
0591     unlock_page(page);
0592     put_page(page);
0593     return copied;
0594 }
0595
0596 /**
0597  * populate_page - copy data nodes into a page for bulk-read.
0598  * @c: UBIFS file-system description object
0599  * @page: page
0600  * @bu: bulk-read information
0601  * @n: next zbranch slot
0602  *
0603  * This function returns %0 on success and a negative error code on failure.
0604  */
0605 static int populate_page(struct ubifs_info *c, struct page *page,
0606              struct bu_info *bu, int *n)
0607 {
0608     int i = 0, nn = *n, offs = bu->zbranch[0].offs, hole = 0, read = 0;
0609     struct inode *inode = page->mapping->host;
0610     loff_t i_size = i_size_read(inode);
0611     unsigned int page_block;
0612     void *addr, *zaddr;
0613     pgoff_t end_index;
0614
0615     dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
0616         inode->i_ino, page->index, i_size, page->flags);
0617
0618     addr = zaddr = kmap(page);
0619
0620     end_index = (i_size - 1) >> PAGE_SHIFT;
0621     if (!i_size || page->index > end_index) {
0622         hole = 1;
0623         memset(addr, 0, PAGE_SIZE);
0624         goto out_hole;
0625     }
0626
0627     page_block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
0628     while (1) {
0629         int err, len, out_len, dlen;
0630
0631         if (nn >= bu->cnt) {
0632             hole = 1;
0633             memset(addr, 0, UBIFS_BLOCK_SIZE);
0634         } else if (key_block(c, &bu->zbranch[nn].key) == page_block) {
0635             struct ubifs_data_node *dn;
0636
0637             dn = bu->buf + (bu->zbranch[nn].offs - offs);
0638
0639             ubifs_assert(c, le64_to_cpu(dn->ch.sqnum) >
0640                      ubifs_inode(inode)->creat_sqnum);
0641
0642             len = le32_to_cpu(dn->size);
0643             if (len <= 0 || len > UBIFS_BLOCK_SIZE)
0644                 goto out_err;
0645
0646             dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
0647             out_len = UBIFS_BLOCK_SIZE;
0648
0649             if (IS_ENCRYPTED(inode)) {
0650                 err = ubifs_decrypt(inode, dn, &dlen, page_block);
0651                 if (err)
0652                     goto out_err;
0653             }
0654
0655             err = ubifs_decompress(c, &dn->data, dlen, addr, &out_len,
0656                            le16_to_cpu(dn->compr_type));
0657             if (err || len != out_len)
0658                 goto out_err;
0659
0660             if (len < UBIFS_BLOCK_SIZE)
0661                 memset(addr + len, 0, UBIFS_BLOCK_SIZE - len);
0662
0663             nn += 1;
0664             read = (i << UBIFS_BLOCK_SHIFT) + len;
0665         } else if (key_block(c, &bu->zbranch[nn].key) < page_block) {
0666             nn += 1;
0667             continue;
0668         } else {
0669             hole = 1;
0670             memset(addr, 0, UBIFS_BLOCK_SIZE);
0671         }
0672         if (++i >= UBIFS_BLOCKS_PER_PAGE)
0673             break;
0674         addr += UBIFS_BLOCK_SIZE;
0675         page_block += 1;
0676     }
0677
0678     if (end_index == page->index) {
0679         int len = i_size & (PAGE_SIZE - 1);
0680
0681         if (len && len < read)
0682             memset(zaddr + len, 0, read - len);
0683     }
0684
0685 out_hole:
0686     if (hole) {
0687         SetPageChecked(page);
0688         dbg_gen("hole");
0689     }
0690
0691     SetPageUptodate(page);
0692     ClearPageError(page);
0693     flush_dcache_page(page);
0694     kunmap(page);
0695     *n = nn;
0696     return 0;
0697
0698 out_err:
0699     ClearPageUptodate(page);
0700     SetPageError(page);
0701     flush_dcache_page(page);
0702     kunmap(page);
0703     ubifs_err(c, "bad data node (block %u, inode %lu)",
0704           page_block, inode->i_ino);
0705     return -EINVAL;
0706 }
0707
0708 /**
0709  * ubifs_do_bulk_read - do bulk-read.
0710  * @c: UBIFS file-system description object
0711  * @bu: bulk-read information
0712  * @page1: first page to read
0713  *
0714  * This function returns %1 if the bulk-read is done, otherwise %0 is returned.
0715  */
0716 static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
0717                   struct page *page1)
0718 {
0719     pgoff_t offset = page1->index, end_index;
0720     struct address_space *mapping = page1->mapping;
0721     struct inode *inode = mapping->host;
0722     struct ubifs_inode *ui = ubifs_inode(inode);
0723     int err, page_idx, page_cnt, ret = 0, n = 0;
0724     int allocate = bu->buf ? 0 : 1;
0725     loff_t isize;
0726     gfp_t ra_gfp_mask = readahead_gfp_mask(mapping) & ~__GFP_FS;
0727
0728     err = ubifs_tnc_get_bu_keys(c, bu);
0729     if (err)
0730         goto out_warn;
0731
0732     if (bu->eof) {
0733         /* Turn off bulk-read at the end of the file */
0734         ui->read_in_a_row = 1;
0735         ui->bulk_read = 0;
0736     }
0737
0738     page_cnt = bu->blk_cnt >> UBIFS_BLOCKS_PER_PAGE_SHIFT;
0739     if (!page_cnt) {
0740         /*
0741          * This happens when there are multiple blocks per page and the
0742          * blocks for the first page we are looking for, are not
0743          * together. If all the pages were like this, bulk-read would
0744          * reduce performance, so we turn it off for a while.
0745          */
0746         goto out_bu_off;
0747     }
0748
0749     if (bu->cnt) {
0750         if (allocate) {
0751             /*
0752              * Allocate bulk-read buffer depending on how many data
0753              * nodes we are going to read.
0754              */
0755             bu->buf_len = bu->zbranch[bu->cnt - 1].offs +
0756                       bu->zbranch[bu->cnt - 1].len -
0757                       bu->zbranch[0].offs;
0758             ubifs_assert(c, bu->buf_len > 0);
0759             ubifs_assert(c, bu->buf_len <= c->leb_size);
0760             bu->buf = kmalloc(bu->buf_len, GFP_NOFS | __GFP_NOWARN);
0761             if (!bu->buf)
0762                 goto out_bu_off;
0763         }
0764
0765         err = ubifs_tnc_bulk_read(c, bu);
0766         if (err)
0767             goto out_warn;
0768     }
0769
0770     err = populate_page(c, page1, bu, &n);
0771     if (err)
0772         goto out_warn;
0773
0774     unlock_page(page1);
0775     ret = 1;
0776
0777     isize = i_size_read(inode);
0778     if (isize == 0)
0779         goto out_free;
0780     end_index = ((isize - 1) >> PAGE_SHIFT);
0781
0782     for (page_idx = 1; page_idx < page_cnt; page_idx++) {
0783         pgoff_t page_offset = offset + page_idx;
0784         struct page *page;
0785
0786         if (page_offset > end_index)
0787             break;
0788         page = pagecache_get_page(mapping, page_offset,
0789                  FGP_LOCK|FGP_ACCESSED|FGP_CREAT|FGP_NOWAIT,
0790                  ra_gfp_mask);
0791         if (!page)
0792             break;
0793         if (!PageUptodate(page))
0794             err = populate_page(c, page, bu, &n);
0795         unlock_page(page);
0796         put_page(page);
0797         if (err)
0798             break;
0799     }
0800
0801     ui->last_page_read = offset + page_idx - 1;
0802
0803 out_free:
0804     if (allocate)
0805         kfree(bu->buf);
0806     return ret;
0807
0808 out_warn:
0809     ubifs_warn(c, "ignoring error %d and skipping bulk-read", err);
0810     goto out_free;
0811
0812 out_bu_off:
0813     ui->read_in_a_row = ui->bulk_read = 0;
0814     goto out_free;
0815 }
0816
0817 /**
0818  * ubifs_bulk_read - determine whether to bulk-read and, if so, do it.
0819  * @page: page from which to start bulk-read.
0820  *
0821  * Some flash media are capable of reading sequentially at faster rates. UBIFS
0822  * bulk-read facility is designed to take advantage of that, by reading in one
0823  * go consecutive data nodes that are also located consecutively in the same
0824  * LEB. This function returns %1 if a bulk-read is done and %0 otherwise.
0825  */
0826 static int ubifs_bulk_read(struct page *page)
0827 {
0828     struct inode *inode = page->mapping->host;
0829     struct ubifs_info *c = inode->i_sb->s_fs_info;
0830     struct ubifs_inode *ui = ubifs_inode(inode);
0831     pgoff_t index = page->index, last_page_read = ui->last_page_read;
0832     struct bu_info *bu;
0833     int err = 0, allocated = 0;
0834
0835     ui->last_page_read = index;
0836     if (!c->bulk_read)
0837         return 0;
0838
0839     /*
0840      * Bulk-read is protected by @ui->ui_mutex, but it is an optimization,
0841      * so don't bother if we cannot lock the mutex.
0842      */
0843     if (!mutex_trylock(&ui->ui_mutex))
0844         return 0;
0845
0846     if (index != last_page_read + 1) {
0847         /* Turn off bulk-read if we stop reading sequentially */
0848         ui->read_in_a_row = 1;
0849         if (ui->bulk_read)
0850             ui->bulk_read = 0;
0851         goto out_unlock;
0852     }
0853
0854     if (!ui->bulk_read) {
0855         ui->read_in_a_row += 1;
0856         if (ui->read_in_a_row < 3)
0857             goto out_unlock;
0858         /* Three reads in a row, so switch on bulk-read */
0859         ui->bulk_read = 1;
0860     }
0861
0862     /*
0863      * If possible, try to use pre-allocated bulk-read information, which
0864      * is protected by @c->bu_mutex.
0865      */
0866     if (mutex_trylock(&c->bu_mutex))
0867         bu = &c->bu;
0868     else {
0869         bu = kmalloc(sizeof(struct bu_info), GFP_NOFS | __GFP_NOWARN);
0870         if (!bu)
0871             goto out_unlock;
0872
0873         bu->buf = NULL;
0874         allocated = 1;
0875     }
0876
0877     bu->buf_len = c->max_bu_buf_len;
0878     data_key_init(c, &bu->key, inode->i_ino,
0879               page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT);
0880     err = ubifs_do_bulk_read(c, bu, page);
0881
0882     if (!allocated)
0883         mutex_unlock(&c->bu_mutex);
0884     else
0885         kfree(bu);
0886
0887 out_unlock:
0888     mutex_unlock(&ui->ui_mutex);
0889     return err;
0890 }
0891
0892 static int ubifs_read_folio(struct file *file, struct folio *folio)
0893 {
0894     struct page *page = &folio->page;
0895
0896     if (ubifs_bulk_read(page))
0897         return 0;
0898     do_readpage(page);
0899     folio_unlock(folio);
0900     return 0;
0901 }
0902
0903 static int do_writepage(struct page *page, int len)
0904 {
0905     int err = 0, i, blen;
0906     unsigned int block;
0907     void *addr;
0908     union ubifs_key key;
0909     struct inode *inode = page->mapping->host;
0910     struct ubifs_info *c = inode->i_sb->s_fs_info;
0911
0912 #ifdef UBIFS_DEBUG
0913     struct ubifs_inode *ui = ubifs_inode(inode);
0914     spin_lock(&ui->ui_lock);
0915     ubifs_assert(c, page->index <= ui->synced_i_size >> PAGE_SHIFT);
0916     spin_unlock(&ui->ui_lock);
0917 #endif
0918
0919     /* Update radix tree tags */
0920     set_page_writeback(page);
0921
0922     addr = kmap(page);
0923     block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
0924     i = 0;
0925     while (len) {
0926         blen = min_t(int, len, UBIFS_BLOCK_SIZE);
0927         data_key_init(c, &key, inode->i_ino, block);
0928         err = ubifs_jnl_write_data(c, inode, &key, addr, blen);
0929         if (err)
0930             break;
0931         if (++i >= UBIFS_BLOCKS_PER_PAGE)
0932             break;
0933         block += 1;
0934         addr += blen;
0935         len -= blen;
0936     }
0937     if (err) {
0938         SetPageError(page);
0939         ubifs_err(c, "cannot write page %lu of inode %lu, error %d",
0940               page->index, inode->i_ino, err);
0941         ubifs_ro_mode(c, err);
0942     }
0943
0944     ubifs_assert(c, PagePrivate(page));
0945     if (PageChecked(page))
0946         release_new_page_budget(c);
0947     else
0948         release_existing_page_budget(c);
0949
0950     atomic_long_dec(&c->dirty_pg_cnt);
0951     detach_page_private(page);
0952     ClearPageChecked(page);
0953
0954     kunmap(page);
0955     unlock_page(page);
0956     end_page_writeback(page);
0957     return err;
0958 }
0959
0960 /*
0961  * When writing-back dirty inodes, VFS first writes-back pages belonging to the
0962  * inode, then the inode itself. For UBIFS this may cause a problem. Consider a
0963  * situation when a we have an inode with size 0, then a megabyte of data is
0964  * appended to the inode, then write-back starts and flushes some amount of the
0965  * dirty pages, the journal becomes full, commit happens and finishes, and then
0966  * an unclean reboot happens. When the file system is mounted next time, the
0967  * inode size would still be 0, but there would be many pages which are beyond
0968  * the inode size, they would be indexed and consume flash space. Because the
0969  * journal has been committed, the replay would not be able to detect this
0970  * situation and correct the inode size. This means UBIFS would have to scan
0971  * whole index and correct all inode sizes, which is long an unacceptable.
0972  *
0973  * To prevent situations like this, UBIFS writes pages back only if they are
0974  * within the last synchronized inode size, i.e. the size which has been
0975  * written to the flash media last time. Otherwise, UBIFS forces inode
0976  * write-back, thus making sure the on-flash inode contains current inode size,
0977  * and then keeps writing pages back.
0978  *
0979  * Some locking issues explanation. 'ubifs_writepage()' first is called with
0980  * the page locked, and it locks @ui_mutex. However, write-back does take inode
0981  * @i_mutex, which means other VFS operations may be run on this inode at the
0982  * same time. And the problematic one is truncation to smaller size, from where
0983  * we have to call 'truncate_setsize()', which first changes @inode->i_size,
0984  * then drops the truncated pages. And while dropping the pages, it takes the
0985  * page lock. This means that 'do_truncation()' cannot call 'truncate_setsize()'
0986  * with @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'.
0987  * This means that @inode->i_size is changed while @ui_mutex is unlocked.
0988  *
0989  * XXX(truncate): with the new truncate sequence this is not true anymore,
0990  * and the calls to truncate_setsize can be move around freely.  They should
0991  * be moved to the very end of the truncate sequence.
0992  *
0993  * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond
0994  * inode size. How do we do this if @inode->i_size may became smaller while we
0995  * are in the middle of 'ubifs_writepage()'? The UBIFS solution is the
0996  * @ui->ui_isize "shadow" field which UBIFS uses instead of @inode->i_size
0997  * internally and updates it under @ui_mutex.
0998  *
0999  * Q: why we do not worry that if we race with truncation, we may end up with a
1000  * situation when the inode is truncated while we are in the middle of
1001  * 'do_writepage()', so we do write beyond inode size?
1002  * A: If we are in the middle of 'do_writepage()', truncation would be locked
1003  * on the page lock and it would not write the truncated inode node to the
1004  * journal before we have finished.
1005  */
1006 static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
1007 {
1008     struct inode *inode = page->mapping->host;
1009     struct ubifs_info *c = inode->i_sb->s_fs_info;
1010     struct ubifs_inode *ui = ubifs_inode(inode);
1011     loff_t i_size =  i_size_read(inode), synced_i_size;
1012     pgoff_t end_index = i_size >> PAGE_SHIFT;
1013     int err, len = i_size & (PAGE_SIZE - 1);
1014     void *kaddr;
1015
1016     dbg_gen("ino %lu, pg %lu, pg flags %#lx",
1017         inode->i_ino, page->index, page->flags);
1018     ubifs_assert(c, PagePrivate(page));
1019
1020     /* Is the page fully outside @i_size? (truncate in progress) */
1021     if (page->index > end_index || (page->index == end_index && !len)) {
1022         err = 0;
1023         goto out_unlock;
1024     }
1025
1026     spin_lock(&ui->ui_lock);
1027     synced_i_size = ui->synced_i_size;
1028     spin_unlock(&ui->ui_lock);
1029
1030     /* Is the page fully inside @i_size? */
1031     if (page->index < end_index) {
1032         if (page->index >= synced_i_size >> PAGE_SHIFT) {
1033             err = inode->i_sb->s_op->write_inode(inode, NULL);
1034             if (err)
1035                 goto out_unlock;
1036             /*
1037              * The inode has been written, but the write-buffer has
1038              * not been synchronized, so in case of an unclean
1039              * reboot we may end up with some pages beyond inode
1040              * size, but they would be in the journal (because
1041              * commit flushes write buffers) and recovery would deal
1042              * with this.
1043              */
1044         }
1045         return do_writepage(page, PAGE_SIZE);
1046     }
1047
1048     /*
1049      * The page straddles @i_size. It must be zeroed out on each and every
1050      * writepage invocation because it may be mmapped. "A file is mapped
1051      * in multiples of the page size. For a file that is not a multiple of
1052      * the page size, the remaining memory is zeroed when mapped, and
1053      * writes to that region are not written out to the file."
1054      */
1055     kaddr = kmap_atomic(page);
1056     memset(kaddr + len, 0, PAGE_SIZE - len);
1057     flush_dcache_page(page);
1058     kunmap_atomic(kaddr);
1059
1060     if (i_size > synced_i_size) {
1061         err = inode->i_sb->s_op->write_inode(inode, NULL);
1062         if (err)
1063             goto out_unlock;
1064     }
1065
1066     return do_writepage(page, len);
1067
1068 out_unlock:
1069     unlock_page(page);
1070     return err;
1071 }
1072
1073 /**
1074  * do_attr_changes - change inode attributes.
1075  * @inode: inode to change attributes for
1076  * @attr: describes attributes to change
1077  */
1078 static void do_attr_changes(struct inode *inode, const struct iattr *attr)
1079 {
1080     if (attr->ia_valid & ATTR_UID)
1081         inode->i_uid = attr->ia_uid;
1082     if (attr->ia_valid & ATTR_GID)
1083         inode->i_gid = attr->ia_gid;
1084     if (attr->ia_valid & ATTR_ATIME)
1085         inode->i_atime = attr->ia_atime;
1086     if (attr->ia_valid & ATTR_MTIME)
1087         inode->i_mtime = attr->ia_mtime;
1088     if (attr->ia_valid & ATTR_CTIME)
1089         inode->i_ctime = attr->ia_ctime;
1090     if (attr->ia_valid & ATTR_MODE) {
1091         umode_t mode = attr->ia_mode;
1092
1093         if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
1094             mode &= ~S_ISGID;
1095         inode->i_mode = mode;
1096     }
1097 }
1098
1099 /**
1100  * do_truncation - truncate an inode.
1101  * @c: UBIFS file-system description object
1102  * @inode: inode to truncate
1103  * @attr: inode attribute changes description
1104  *
1105  * This function implements VFS '->setattr()' call when the inode is truncated
1106  * to a smaller size. Returns zero in case of success and a negative error code
1107  * in case of failure.
1108  */
1109 static int do_truncation(struct ubifs_info *c, struct inode *inode,
1110              const struct iattr *attr)
1111 {
1112     int err;
1113     struct ubifs_budget_req req;
1114     loff_t old_size = inode->i_size, new_size = attr->ia_size;
1115     int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1;
1116     struct ubifs_inode *ui = ubifs_inode(inode);
1117
1118     dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size);
1119     memset(&req, 0, sizeof(struct ubifs_budget_req));
1120
1121     /*
1122      * If this is truncation to a smaller size, and we do not truncate on a
1123      * block boundary, budget for changing one data block, because the last
1124      * block will be re-written.
1125      */
1126     if (new_size & (UBIFS_BLOCK_SIZE - 1))
1127         req.dirtied_page = 1;
1128
1129     req.dirtied_ino = 1;
1130     /* A funny way to budget for truncation node */
1131     req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ;
1132     err = ubifs_budget_space(c, &req);
1133     if (err) {
1134         /*
1135          * Treat truncations to zero as deletion and always allow them,
1136          * just like we do for '->unlink()'.
1137          */
1138         if (new_size || err != -ENOSPC)
1139             return err;
1140         budgeted = 0;
1141     }
1142
1143     truncate_setsize(inode, new_size);
1144
1145     if (offset) {
1146         pgoff_t index = new_size >> PAGE_SHIFT;
1147         struct page *page;
1148
1149         page = find_lock_page(inode->i_mapping, index);
1150         if (page) {
1151             if (PageDirty(page)) {
1152                 /*
1153                  * 'ubifs_jnl_truncate()' will try to truncate
1154                  * the last data node, but it contains
1155                  * out-of-date data because the page is dirty.
1156                  * Write the page now, so that
1157                  * 'ubifs_jnl_truncate()' will see an already
1158                  * truncated (and up to date) data node.
1159                  */
1160                 ubifs_assert(c, PagePrivate(page));
1161
1162                 clear_page_dirty_for_io(page);
1163                 if (UBIFS_BLOCKS_PER_PAGE_SHIFT)
1164                     offset = new_size &
1165                          (PAGE_SIZE - 1);
1166                 err = do_writepage(page, offset);
1167                 put_page(page);
1168                 if (err)
1169                     goto out_budg;
1170                 /*
1171                  * We could now tell 'ubifs_jnl_truncate()' not
1172                  * to read the last block.
1173                  */
1174             } else {
1175                 /*
1176                  * We could 'kmap()' the page and pass the data
1177                  * to 'ubifs_jnl_truncate()' to save it from
1178                  * having to read it.
1179                  */
1180                 unlock_page(page);
1181                 put_page(page);
1182             }
1183         }
1184     }
1185
1186     mutex_lock(&ui->ui_mutex);
1187     ui->ui_size = inode->i_size;
1188     /* Truncation changes inode [mc]time */
1189     inode->i_mtime = inode->i_ctime = current_time(inode);
1190     /* Other attributes may be changed at the same time as well */
1191     do_attr_changes(inode, attr);
1192     err = ubifs_jnl_truncate(c, inode, old_size, new_size);
1193     mutex_unlock(&ui->ui_mutex);
1194
1195 out_budg:
1196     if (budgeted)
1197         ubifs_release_budget(c, &req);
1198     else {
1199         c->bi.nospace = c->bi.nospace_rp = 0;
1200         smp_wmb();
1201     }
1202     return err;
1203 }
1204
1205 /**
1206  * do_setattr - change inode attributes.
1207  * @c: UBIFS file-system description object
1208  * @inode: inode to change attributes for
1209  * @attr: inode attribute changes description
1210  *
1211  * This function implements VFS '->setattr()' call for all cases except
1212  * truncations to smaller size. Returns zero in case of success and a negative
1213  * error code in case of failure.
1214  */
1215 static int do_setattr(struct ubifs_info *c, struct inode *inode,
1216               const struct iattr *attr)
1217 {
1218     int err, release;
1219     loff_t new_size = attr->ia_size;
1220     struct ubifs_inode *ui = ubifs_inode(inode);
1221     struct ubifs_budget_req req = { .dirtied_ino = 1,
1222                 .dirtied_ino_d = ALIGN(ui->data_len, 8) };
1223
1224     err = ubifs_budget_space(c, &req);
1225     if (err)
1226         return err;
1227
1228     if (attr->ia_valid & ATTR_SIZE) {
1229         dbg_gen("size %lld -> %lld", inode->i_size, new_size);
1230         truncate_setsize(inode, new_size);
1231     }
1232
1233     mutex_lock(&ui->ui_mutex);
1234     if (attr->ia_valid & ATTR_SIZE) {
1235         /* Truncation changes inode [mc]time */
1236         inode->i_mtime = inode->i_ctime = current_time(inode);
1237         /* 'truncate_setsize()' changed @i_size, update @ui_size */
1238         ui->ui_size = inode->i_size;
1239     }
1240
1241     do_attr_changes(inode, attr);
1242
1243     release = ui->dirty;
1244     if (attr->ia_valid & ATTR_SIZE)
1245         /*
1246          * Inode length changed, so we have to make sure
1247          * @I_DIRTY_DATASYNC is set.
1248          */
1249          __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1250     else
1251         mark_inode_dirty_sync(inode);
1252     mutex_unlock(&ui->ui_mutex);
1253
1254     if (release)
1255         ubifs_release_budget(c, &req);
1256     if (IS_SYNC(inode))
1257         err = inode->i_sb->s_op->write_inode(inode, NULL);
1258     return err;
1259 }
1260
1261 int ubifs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
1262           struct iattr *attr)
1263 {
1264     int err;
1265     struct inode *inode = d_inode(dentry);
1266     struct ubifs_info *c = inode->i_sb->s_fs_info;
1267
1268     dbg_gen("ino %lu, mode %#x, ia_valid %#x",
1269         inode->i_ino, inode->i_mode, attr->ia_valid);
1270     err = setattr_prepare(&init_user_ns, dentry, attr);
1271     if (err)
1272         return err;
1273
1274     err = dbg_check_synced_i_size(c, inode);
1275     if (err)
1276         return err;
1277
1278     err = fscrypt_prepare_setattr(dentry, attr);
1279     if (err)
1280         return err;
1281
1282     if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size < inode->i_size)
1283         /* Truncation to a smaller size */
1284         err = do_truncation(c, inode, attr);
1285     else
1286         err = do_setattr(c, inode, attr);
1287
1288     return err;
1289 }
1290
1291 static void ubifs_invalidate_folio(struct folio *folio, size_t offset,
1292                  size_t length)
1293 {
1294     struct inode *inode = folio->mapping->host;
1295     struct ubifs_info *c = inode->i_sb->s_fs_info;
1296
1297     ubifs_assert(c, folio_test_private(folio));
1298     if (offset || length < folio_size(folio))
1299         /* Partial folio remains dirty */
1300         return;
1301
1302     if (folio_test_checked(folio))
1303         release_new_page_budget(c);
1304     else
1305         release_existing_page_budget(c);
1306
1307     atomic_long_dec(&c->dirty_pg_cnt);
1308     folio_detach_private(folio);
1309     folio_clear_checked(folio);
1310 }
1311
1312 int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1313 {
1314     struct inode *inode = file->f_mapping->host;
1315     struct ubifs_info *c = inode->i_sb->s_fs_info;
1316     int err;
1317
1318     dbg_gen("syncing inode %lu", inode->i_ino);
1319
1320     if (c->ro_mount)
1321         /*
1322          * For some really strange reasons VFS does not filter out
1323          * 'fsync()' for R/O mounted file-systems as per 2.6.39.
1324          */
1325         return 0;
1326
1327     err = file_write_and_wait_range(file, start, end);
1328     if (err)
1329         return err;
1330     inode_lock(inode);
1331
1332     /* Synchronize the inode unless this is a 'datasync()' call. */
1333     if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) {
1334         err = inode->i_sb->s_op->write_inode(inode, NULL);
1335         if (err)
1336             goto out;
1337     }
1338
1339     /*
1340      * Nodes related to this inode may still sit in a write-buffer. Flush
1341      * them.
1342      */
1343     err = ubifs_sync_wbufs_by_inode(c, inode);
1344 out:
1345     inode_unlock(inode);
1346     return err;
1347 }
1348
1349 /**
1350  * mctime_update_needed - check if mtime or ctime update is needed.
1351  * @inode: the inode to do the check for
1352  * @now: current time
1353  *
1354  * This helper function checks if the inode mtime/ctime should be updated or
1355  * not. If current values of the time-stamps are within the UBIFS inode time
1356  * granularity, they are not updated. This is an optimization.
1357  */
1358 static inline int mctime_update_needed(const struct inode *inode,
1359                        const struct timespec64 *now)
1360 {
1361     if (!timespec64_equal(&inode->i_mtime, now) ||
1362         !timespec64_equal(&inode->i_ctime, now))
1363         return 1;
1364     return 0;
1365 }
1366
1367 /**
1368  * ubifs_update_time - update time of inode.
1369  * @inode: inode to update
1370  *
1371  * This function updates time of the inode.
1372  */
1373 int ubifs_update_time(struct inode *inode, struct timespec64 *time,
1374                  int flags)
1375 {
1376     struct ubifs_inode *ui = ubifs_inode(inode);
1377     struct ubifs_info *c = inode->i_sb->s_fs_info;
1378     struct ubifs_budget_req req = { .dirtied_ino = 1,
1379             .dirtied_ino_d = ALIGN(ui->data_len, 8) };
1380     int err, release;
1381
1382     if (!IS_ENABLED(CONFIG_UBIFS_ATIME_SUPPORT))
1383         return generic_update_time(inode, time, flags);
1384
1385     err = ubifs_budget_space(c, &req);
1386     if (err)
1387         return err;
1388
1389     mutex_lock(&ui->ui_mutex);
1390     if (flags & S_ATIME)
1391         inode->i_atime = *time;
1392     if (flags & S_CTIME)
1393         inode->i_ctime = *time;
1394     if (flags & S_MTIME)
1395         inode->i_mtime = *time;
1396
1397     release = ui->dirty;
1398     __mark_inode_dirty(inode, I_DIRTY_SYNC);
1399     mutex_unlock(&ui->ui_mutex);
1400     if (release)
1401         ubifs_release_budget(c, &req);
1402     return 0;
1403 }
1404
1405 /**
1406  * update_mctime - update mtime and ctime of an inode.
1407  * @inode: inode to update
1408  *
1409  * This function updates mtime and ctime of the inode if it is not equivalent to
1410  * current time. Returns zero in case of success and a negative error code in
1411  * case of failure.
1412  */
1413 static int update_mctime(struct inode *inode)
1414 {
1415     struct timespec64 now = current_time(inode);
1416     struct ubifs_inode *ui = ubifs_inode(inode);
1417     struct ubifs_info *c = inode->i_sb->s_fs_info;
1418
1419     if (mctime_update_needed(inode, &now)) {
1420         int err, release;
1421         struct ubifs_budget_req req = { .dirtied_ino = 1,
1422                 .dirtied_ino_d = ALIGN(ui->data_len, 8) };
1423
1424         err = ubifs_budget_space(c, &req);
1425         if (err)
1426             return err;
1427
1428         mutex_lock(&ui->ui_mutex);
1429         inode->i_mtime = inode->i_ctime = current_time(inode);
1430         release = ui->dirty;
1431         mark_inode_dirty_sync(inode);
1432         mutex_unlock(&ui->ui_mutex);
1433         if (release)
1434             ubifs_release_budget(c, &req);
1435     }
1436
1437     return 0;
1438 }
1439
1440 static ssize_t ubifs_write_iter(struct kiocb *iocb, struct iov_iter *from)
1441 {
1442     int err = update_mctime(file_inode(iocb->ki_filp));
1443     if (err)
1444         return err;
1445
1446     return generic_file_write_iter(iocb, from);
1447 }
1448
1449 static bool ubifs_dirty_folio(struct address_space *mapping,
1450         struct folio *folio)
1451 {
1452     bool ret;
1453     struct ubifs_info *c = mapping->host->i_sb->s_fs_info;
1454
1455     ret = filemap_dirty_folio(mapping, folio);
1456     /*
1457      * An attempt to dirty a page without budgeting for it - should not
1458      * happen.
1459      */
1460     ubifs_assert(c, ret == false);
1461     return ret;
1462 }
1463
1464 static bool ubifs_release_folio(struct folio *folio, gfp_t unused_gfp_flags)
1465 {
1466     struct inode *inode = folio->mapping->host;
1467     struct ubifs_info *c = inode->i_sb->s_fs_info;
1468
1469     /*
1470      * An attempt to release a dirty page without budgeting for it - should
1471      * not happen.
1472      */
1473     if (folio_test_writeback(folio))
1474         return false;
1475     ubifs_assert(c, folio_test_private(folio));
1476     ubifs_assert(c, 0);
1477     folio_detach_private(folio);
1478     folio_clear_checked(folio);
1479     return true;
1480 }
1481
1482 /*
1483  * mmap()d file has taken write protection fault and is being made writable.
1484  * UBIFS must ensure page is budgeted for.
1485  */
1486 static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
1487 {
1488     struct page *page = vmf->page;
1489     struct inode *inode = file_inode(vmf->vma->vm_file);
1490     struct ubifs_info *c = inode->i_sb->s_fs_info;
1491     struct timespec64 now = current_time(inode);
1492     struct ubifs_budget_req req = { .new_page = 1 };
1493     int err, update_time;
1494
1495     dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index,
1496         i_size_read(inode));
1497     ubifs_assert(c, !c->ro_media && !c->ro_mount);
1498
1499     if (unlikely(c->ro_error))
1500         return VM_FAULT_SIGBUS; /* -EROFS */
1501
1502     /*
1503      * We have not locked @page so far so we may budget for changing the
1504      * page. Note, we cannot do this after we locked the page, because
1505      * budgeting may cause write-back which would cause deadlock.
1506      *
1507      * At the moment we do not know whether the page is dirty or not, so we
1508      * assume that it is not and budget for a new page. We could look at
1509      * the @PG_private flag and figure this out, but we may race with write
1510      * back and the page state may change by the time we lock it, so this
1511      * would need additional care. We do not bother with this at the
1512      * moment, although it might be good idea to do. Instead, we allocate
1513      * budget for a new page and amend it later on if the page was in fact
1514      * dirty.
1515      *
1516      * The budgeting-related logic of this function is similar to what we
1517      * do in 'ubifs_write_begin()' and 'ubifs_write_end()'. Glance there
1518      * for more comments.
1519      */
1520     update_time = mctime_update_needed(inode, &now);
1521     if (update_time)
1522         /*
1523          * We have to change inode time stamp which requires extra
1524          * budgeting.
1525          */
1526         req.dirtied_ino = 1;
1527
1528     err = ubifs_budget_space(c, &req);
1529     if (unlikely(err)) {
1530         if (err == -ENOSPC)
1531             ubifs_warn(c, "out of space for mmapped file (inode number %lu)",
1532                    inode->i_ino);
1533         return VM_FAULT_SIGBUS;
1534     }
1535
1536     lock_page(page);
1537     if (unlikely(page->mapping != inode->i_mapping ||
1538              page_offset(page) > i_size_read(inode))) {
1539         /* Page got truncated out from underneath us */
1540         goto sigbus;
1541     }
1542
1543     if (PagePrivate(page))
1544         release_new_page_budget(c);
1545     else {
1546         if (!PageChecked(page))
1547             ubifs_convert_page_budget(c);
1548         attach_page_private(page, (void *)1);
1549         atomic_long_inc(&c->dirty_pg_cnt);
1550         __set_page_dirty_nobuffers(page);
1551     }
1552
1553     if (update_time) {
1554         int release;
1555         struct ubifs_inode *ui = ubifs_inode(inode);
1556
1557         mutex_lock(&ui->ui_mutex);
1558         inode->i_mtime = inode->i_ctime = current_time(inode);
1559         release = ui->dirty;
1560         mark_inode_dirty_sync(inode);
1561         mutex_unlock(&ui->ui_mutex);
1562         if (release)
1563             ubifs_release_dirty_inode_budget(c, ui);
1564     }
1565
1566     wait_for_stable_page(page);
1567     return VM_FAULT_LOCKED;
1568
1569 sigbus:
1570     unlock_page(page);
1571     ubifs_release_budget(c, &req);
1572     return VM_FAULT_SIGBUS;
1573 }
1574
1575 static const struct vm_operations_struct ubifs_file_vm_ops = {
1576     .fault        = filemap_fault,
1577     .map_pages = filemap_map_pages,
1578     .page_mkwrite = ubifs_vm_page_mkwrite,
1579 };
1580
1581 static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1582 {
1583     int err;
1584
1585     err = generic_file_mmap(file, vma);
1586     if (err)
1587         return err;
1588     vma->vm_ops = &ubifs_file_vm_ops;
1589
1590     if (IS_ENABLED(CONFIG_UBIFS_ATIME_SUPPORT))
1591         file_accessed(file);
1592
1593     return 0;
1594 }
1595
1596 static const char *ubifs_get_link(struct dentry *dentry,
1597                         struct inode *inode,
1598                         struct delayed_call *done)
1599 {
1600     struct ubifs_inode *ui = ubifs_inode(inode);
1601
1602     if (!IS_ENCRYPTED(inode))
1603         return ui->data;
1604
1605     if (!dentry)
1606         return ERR_PTR(-ECHILD);
1607
1608     return fscrypt_get_symlink(inode, ui->data, ui->data_len, done);
1609 }
1610
1611 static int ubifs_symlink_getattr(struct user_namespace *mnt_userns,
1612                  const struct path *path, struct kstat *stat,
1613                  u32 request_mask, unsigned int query_flags)
1614 {
1615     ubifs_getattr(mnt_userns, path, stat, request_mask, query_flags);
1616
1617     if (IS_ENCRYPTED(d_inode(path->dentry)))
1618         return fscrypt_symlink_getattr(path, stat);
1619     return 0;
1620 }
1621
1622 const struct address_space_operations ubifs_file_address_operations = {
1623     .read_folio     = ubifs_read_folio,
1624     .writepage      = ubifs_writepage,
1625     .write_begin    = ubifs_write_begin,
1626     .write_end      = ubifs_write_end,
1627     .invalidate_folio = ubifs_invalidate_folio,
1628     .dirty_folio    = ubifs_dirty_folio,
1629     .migrate_folio  = filemap_migrate_folio,
1630     .release_folio  = ubifs_release_folio,
1631 };
1632
1633 const struct inode_operations ubifs_file_inode_operations = {
1634     .setattr     = ubifs_setattr,
1635     .getattr     = ubifs_getattr,
1636     .listxattr   = ubifs_listxattr,
1637     .update_time = ubifs_update_time,
1638     .fileattr_get = ubifs_fileattr_get,
1639     .fileattr_set = ubifs_fileattr_set,
1640 };
1641
1642 const struct inode_operations ubifs_symlink_inode_operations = {
1643     .get_link    = ubifs_get_link,
1644     .setattr     = ubifs_setattr,
1645     .getattr     = ubifs_symlink_getattr,
1646     .listxattr   = ubifs_listxattr,
1647     .update_time = ubifs_update_time,
1648 };
1649
1650 const struct file_operations ubifs_file_operations = {
1651     .llseek         = generic_file_llseek,
1652     .read_iter      = generic_file_read_iter,
1653     .write_iter     = ubifs_write_iter,
1654     .mmap           = ubifs_file_mmap,
1655     .fsync          = ubifs_fsync,
1656     .unlocked_ioctl = ubifs_ioctl,
1657     .splice_read    = generic_file_splice_read,
1658     .splice_write   = iter_file_splice_write,
1659     .open       = fscrypt_file_open,
1660 #ifdef CONFIG_COMPAT
1661     .compat_ioctl   = ubifs_compat_ioctl,
1662 #endif
1663 };