fs/gfs2/file.c

0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
0004  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
0005  */
0006
0007 #include <linux/slab.h>
0008 #include <linux/spinlock.h>
0009 #include <linux/compat.h>
0010 #include <linux/completion.h>
0011 #include <linux/buffer_head.h>
0012 #include <linux/pagemap.h>
0013 #include <linux/uio.h>
0014 #include <linux/blkdev.h>
0015 #include <linux/mm.h>
0016 #include <linux/mount.h>
0017 #include <linux/fs.h>
0018 #include <linux/gfs2_ondisk.h>
0019 #include <linux/falloc.h>
0020 #include <linux/swap.h>
0021 #include <linux/crc32.h>
0022 #include <linux/writeback.h>
0023 #include <linux/uaccess.h>
0024 #include <linux/dlm.h>
0025 #include <linux/dlm_plock.h>
0026 #include <linux/delay.h>
0027 #include <linux/backing-dev.h>
0028 #include <linux/fileattr.h>
0029
0030 #include "gfs2.h"
0031 #include "incore.h"
0032 #include "bmap.h"
0033 #include "aops.h"
0034 #include "dir.h"
0035 #include "glock.h"
0036 #include "glops.h"
0037 #include "inode.h"
0038 #include "log.h"
0039 #include "meta_io.h"
0040 #include "quota.h"
0041 #include "rgrp.h"
0042 #include "trans.h"
0043 #include "util.h"
0044
0045 /**
0046  * gfs2_llseek - seek to a location in a file
0047  * @file: the file
0048  * @offset: the offset
0049  * @whence: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
0050  *
0051  * SEEK_END requires the glock for the file because it references the
0052  * file's size.
0053  *
0054  * Returns: The new offset, or errno
0055  */
0056
0057 static loff_t gfs2_llseek(struct file *file, loff_t offset, int whence)
0058 {
0059     struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
0060     struct gfs2_holder i_gh;
0061     loff_t error;
0062
0063     switch (whence) {
0064     case SEEK_END:
0065         error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
0066                        &i_gh);
0067         if (!error) {
0068             error = generic_file_llseek(file, offset, whence);
0069             gfs2_glock_dq_uninit(&i_gh);
0070         }
0071         break;
0072
0073     case SEEK_DATA:
0074         error = gfs2_seek_data(file, offset);
0075         break;
0076
0077     case SEEK_HOLE:
0078         error = gfs2_seek_hole(file, offset);
0079         break;
0080
0081     case SEEK_CUR:
0082     case SEEK_SET:
0083         /*
0084          * These don't reference inode->i_size and don't depend on the
0085          * block mapping, so we don't need the glock.
0086          */
0087         error = generic_file_llseek(file, offset, whence);
0088         break;
0089     default:
0090         error = -EINVAL;
0091     }
0092
0093     return error;
0094 }
0095
0096 /**
0097  * gfs2_readdir - Iterator for a directory
0098  * @file: The directory to read from
0099  * @ctx: What to feed directory entries to
0100  *
0101  * Returns: errno
0102  */
0103
0104 static int gfs2_readdir(struct file *file, struct dir_context *ctx)
0105 {
0106     struct inode *dir = file->f_mapping->host;
0107     struct gfs2_inode *dip = GFS2_I(dir);
0108     struct gfs2_holder d_gh;
0109     int error;
0110
0111     error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
0112     if (error)
0113         return error;
0114
0115     error = gfs2_dir_read(dir, ctx, &file->f_ra);
0116
0117     gfs2_glock_dq_uninit(&d_gh);
0118
0119     return error;
0120 }
0121
0122 /*
0123  * struct fsflag_gfs2flag
0124  *
0125  * The FS_JOURNAL_DATA_FL flag maps to GFS2_DIF_INHERIT_JDATA for directories,
0126  * and to GFS2_DIF_JDATA for non-directories.
0127  */
0128 static struct {
0129     u32 fsflag;
0130     u32 gfsflag;
0131 } fsflag_gfs2flag[] = {
0132     {FS_SYNC_FL, GFS2_DIF_SYNC},
0133     {FS_IMMUTABLE_FL, GFS2_DIF_IMMUTABLE},
0134     {FS_APPEND_FL, GFS2_DIF_APPENDONLY},
0135     {FS_NOATIME_FL, GFS2_DIF_NOATIME},
0136     {FS_INDEX_FL, GFS2_DIF_EXHASH},
0137     {FS_TOPDIR_FL, GFS2_DIF_TOPDIR},
0138     {FS_JOURNAL_DATA_FL, GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA},
0139 };
0140
0141 static inline u32 gfs2_gfsflags_to_fsflags(struct inode *inode, u32 gfsflags)
0142 {
0143     int i;
0144     u32 fsflags = 0;
0145
0146     if (S_ISDIR(inode->i_mode))
0147         gfsflags &= ~GFS2_DIF_JDATA;
0148     else
0149         gfsflags &= ~GFS2_DIF_INHERIT_JDATA;
0150
0151     for (i = 0; i < ARRAY_SIZE(fsflag_gfs2flag); i++)
0152         if (gfsflags & fsflag_gfs2flag[i].gfsflag)
0153             fsflags |= fsflag_gfs2flag[i].fsflag;
0154     return fsflags;
0155 }
0156
0157 int gfs2_fileattr_get(struct dentry *dentry, struct fileattr *fa)
0158 {
0159     struct inode *inode = d_inode(dentry);
0160     struct gfs2_inode *ip = GFS2_I(inode);
0161     struct gfs2_holder gh;
0162     int error;
0163     u32 fsflags;
0164
0165     if (d_is_special(dentry))
0166         return -ENOTTY;
0167
0168     gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
0169     error = gfs2_glock_nq(&gh);
0170     if (error)
0171         goto out_uninit;
0172
0173     fsflags = gfs2_gfsflags_to_fsflags(inode, ip->i_diskflags);
0174
0175     fileattr_fill_flags(fa, fsflags);
0176
0177     gfs2_glock_dq(&gh);
0178 out_uninit:
0179     gfs2_holder_uninit(&gh);
0180     return error;
0181 }
0182
0183 void gfs2_set_inode_flags(struct inode *inode)
0184 {
0185     struct gfs2_inode *ip = GFS2_I(inode);
0186     unsigned int flags = inode->i_flags;
0187
0188     flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC);
0189     if ((ip->i_eattr == 0) && !is_sxid(inode->i_mode))
0190         flags |= S_NOSEC;
0191     if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
0192         flags |= S_IMMUTABLE;
0193     if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
0194         flags |= S_APPEND;
0195     if (ip->i_diskflags & GFS2_DIF_NOATIME)
0196         flags |= S_NOATIME;
0197     if (ip->i_diskflags & GFS2_DIF_SYNC)
0198         flags |= S_SYNC;
0199     inode->i_flags = flags;
0200 }
0201
0202 /* Flags that can be set by user space */
0203 #define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA|            \
0204                  GFS2_DIF_IMMUTABLE|        \
0205                  GFS2_DIF_APPENDONLY|       \
0206                  GFS2_DIF_NOATIME|          \
0207                  GFS2_DIF_SYNC|         \
0208                  GFS2_DIF_TOPDIR|           \
0209                  GFS2_DIF_INHERIT_JDATA)
0210
0211 /**
0212  * do_gfs2_set_flags - set flags on an inode
0213  * @inode: The inode
0214  * @reqflags: The flags to set
0215  * @mask: Indicates which flags are valid
0216  *
0217  */
0218 static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask)
0219 {
0220     struct gfs2_inode *ip = GFS2_I(inode);
0221     struct gfs2_sbd *sdp = GFS2_SB(inode);
0222     struct buffer_head *bh;
0223     struct gfs2_holder gh;
0224     int error;
0225     u32 new_flags, flags;
0226
0227     error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
0228     if (error)
0229         return error;
0230
0231     error = 0;
0232     flags = ip->i_diskflags;
0233     new_flags = (flags & ~mask) | (reqflags & mask);
0234     if ((new_flags ^ flags) == 0)
0235         goto out;
0236
0237     if (!IS_IMMUTABLE(inode)) {
0238         error = gfs2_permission(&init_user_ns, inode, MAY_WRITE);
0239         if (error)
0240             goto out;
0241     }
0242     if ((flags ^ new_flags) & GFS2_DIF_JDATA) {
0243         if (new_flags & GFS2_DIF_JDATA)
0244             gfs2_log_flush(sdp, ip->i_gl,
0245                        GFS2_LOG_HEAD_FLUSH_NORMAL |
0246                        GFS2_LFC_SET_FLAGS);
0247         error = filemap_fdatawrite(inode->i_mapping);
0248         if (error)
0249             goto out;
0250         error = filemap_fdatawait(inode->i_mapping);
0251         if (error)
0252             goto out;
0253         if (new_flags & GFS2_DIF_JDATA)
0254             gfs2_ordered_del_inode(ip);
0255     }
0256     error = gfs2_trans_begin(sdp, RES_DINODE, 0);
0257     if (error)
0258         goto out;
0259     error = gfs2_meta_inode_buffer(ip, &bh);
0260     if (error)
0261         goto out_trans_end;
0262     inode->i_ctime = current_time(inode);
0263     gfs2_trans_add_meta(ip->i_gl, bh);
0264     ip->i_diskflags = new_flags;
0265     gfs2_dinode_out(ip, bh->b_data);
0266     brelse(bh);
0267     gfs2_set_inode_flags(inode);
0268     gfs2_set_aops(inode);
0269 out_trans_end:
0270     gfs2_trans_end(sdp);
0271 out:
0272     gfs2_glock_dq_uninit(&gh);
0273     return error;
0274 }
0275
0276 int gfs2_fileattr_set(struct user_namespace *mnt_userns,
0277               struct dentry *dentry, struct fileattr *fa)
0278 {
0279     struct inode *inode = d_inode(dentry);
0280     u32 fsflags = fa->flags, gfsflags = 0;
0281     u32 mask;
0282     int i;
0283
0284     if (d_is_special(dentry))
0285         return -ENOTTY;
0286
0287     if (fileattr_has_fsx(fa))
0288         return -EOPNOTSUPP;
0289
0290     for (i = 0; i < ARRAY_SIZE(fsflag_gfs2flag); i++) {
0291         if (fsflags & fsflag_gfs2flag[i].fsflag) {
0292             fsflags &= ~fsflag_gfs2flag[i].fsflag;
0293             gfsflags |= fsflag_gfs2flag[i].gfsflag;
0294         }
0295     }
0296     if (fsflags || gfsflags & ~GFS2_FLAGS_USER_SET)
0297         return -EINVAL;
0298
0299     mask = GFS2_FLAGS_USER_SET;
0300     if (S_ISDIR(inode->i_mode)) {
0301         mask &= ~GFS2_DIF_JDATA;
0302     } else {
0303         /* The GFS2_DIF_TOPDIR flag is only valid for directories. */
0304         if (gfsflags & GFS2_DIF_TOPDIR)
0305             return -EINVAL;
0306         mask &= ~(GFS2_DIF_TOPDIR | GFS2_DIF_INHERIT_JDATA);
0307     }
0308
0309     return do_gfs2_set_flags(inode, gfsflags, mask);
0310 }
0311
0312 static int gfs2_getlabel(struct file *filp, char __user *label)
0313 {
0314     struct inode *inode = file_inode(filp);
0315     struct gfs2_sbd *sdp = GFS2_SB(inode);
0316
0317     if (copy_to_user(label, sdp->sd_sb.sb_locktable, GFS2_LOCKNAME_LEN))
0318         return -EFAULT;
0319
0320     return 0;
0321 }
0322
0323 static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
0324 {
0325     switch(cmd) {
0326     case FITRIM:
0327         return gfs2_fitrim(filp, (void __user *)arg);
0328     case FS_IOC_GETFSLABEL:
0329         return gfs2_getlabel(filp, (char __user *)arg);
0330     }
0331
0332     return -ENOTTY;
0333 }
0334
0335 #ifdef CONFIG_COMPAT
0336 static long gfs2_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
0337 {
0338     switch(cmd) {
0339     /* Keep this list in sync with gfs2_ioctl */
0340     case FITRIM:
0341     case FS_IOC_GETFSLABEL:
0342         break;
0343     default:
0344         return -ENOIOCTLCMD;
0345     }
0346
0347     return gfs2_ioctl(filp, cmd, (unsigned long)compat_ptr(arg));
0348 }
0349 #else
0350 #define gfs2_compat_ioctl NULL
0351 #endif
0352
0353 /**
0354  * gfs2_size_hint - Give a hint to the size of a write request
0355  * @filep: The struct file
0356  * @offset: The file offset of the write
0357  * @size: The length of the write
0358  *
0359  * When we are about to do a write, this function records the total
0360  * write size in order to provide a suitable hint to the lower layers
0361  * about how many blocks will be required.
0362  *
0363  */
0364
0365 static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size)
0366 {
0367     struct inode *inode = file_inode(filep);
0368     struct gfs2_sbd *sdp = GFS2_SB(inode);
0369     struct gfs2_inode *ip = GFS2_I(inode);
0370     size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift;
0371     int hint = min_t(size_t, INT_MAX, blks);
0372
0373     if (hint > atomic_read(&ip->i_sizehint))
0374         atomic_set(&ip->i_sizehint, hint);
0375 }
0376
0377 /**
0378  * gfs2_allocate_page_backing - Allocate blocks for a write fault
0379  * @page: The (locked) page to allocate backing for
0380  * @length: Size of the allocation
0381  *
0382  * We try to allocate all the blocks required for the page in one go.  This
0383  * might fail for various reasons, so we keep trying until all the blocks to
0384  * back this page are allocated.  If some of the blocks are already allocated,
0385  * that is ok too.
0386  */
0387 static int gfs2_allocate_page_backing(struct page *page, unsigned int length)
0388 {
0389     u64 pos = page_offset(page);
0390
0391     do {
0392         struct iomap iomap = { };
0393
0394         if (gfs2_iomap_alloc(page->mapping->host, pos, length, &iomap))
0395             return -EIO;
0396
0397         if (length < iomap.length)
0398             iomap.length = length;
0399         length -= iomap.length;
0400         pos += iomap.length;
0401     } while (length > 0);
0402
0403     return 0;
0404 }
0405
0406 /**
0407  * gfs2_page_mkwrite - Make a shared, mmap()ed, page writable
0408  * @vmf: The virtual memory fault containing the page to become writable
0409  *
0410  * When the page becomes writable, we need to ensure that we have
0411  * blocks allocated on disk to back that page.
0412  */
0413
0414 static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
0415 {
0416     struct page *page = vmf->page;
0417     struct inode *inode = file_inode(vmf->vma->vm_file);
0418     struct gfs2_inode *ip = GFS2_I(inode);
0419     struct gfs2_sbd *sdp = GFS2_SB(inode);
0420     struct gfs2_alloc_parms ap = { .aflags = 0, };
0421     u64 offset = page_offset(page);
0422     unsigned int data_blocks, ind_blocks, rblocks;
0423     vm_fault_t ret = VM_FAULT_LOCKED;
0424     struct gfs2_holder gh;
0425     unsigned int length;
0426     loff_t size;
0427     int err;
0428
0429     sb_start_pagefault(inode->i_sb);
0430
0431     gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
0432     err = gfs2_glock_nq(&gh);
0433     if (err) {
0434         ret = block_page_mkwrite_return(err);
0435         goto out_uninit;
0436     }
0437
0438     /* Check page index against inode size */
0439     size = i_size_read(inode);
0440     if (offset >= size) {
0441         ret = VM_FAULT_SIGBUS;
0442         goto out_unlock;
0443     }
0444
0445     /* Update file times before taking page lock */
0446     file_update_time(vmf->vma->vm_file);
0447
0448     /* page is wholly or partially inside EOF */
0449     if (size - offset < PAGE_SIZE)
0450         length = size - offset;
0451     else
0452         length = PAGE_SIZE;
0453
0454     gfs2_size_hint(vmf->vma->vm_file, offset, length);
0455
0456     set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
0457     set_bit(GIF_SW_PAGED, &ip->i_flags);
0458
0459     /*
0460      * iomap_writepage / iomap_writepages currently don't support inline
0461      * files, so always unstuff here.
0462      */
0463
0464     if (!gfs2_is_stuffed(ip) &&
0465         !gfs2_write_alloc_required(ip, offset, length)) {
0466         lock_page(page);
0467         if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
0468             ret = VM_FAULT_NOPAGE;
0469             unlock_page(page);
0470         }
0471         goto out_unlock;
0472     }
0473
0474     err = gfs2_rindex_update(sdp);
0475     if (err) {
0476         ret = block_page_mkwrite_return(err);
0477         goto out_unlock;
0478     }
0479
0480     gfs2_write_calc_reserv(ip, length, &data_blocks, &ind_blocks);
0481     ap.target = data_blocks + ind_blocks;
0482     err = gfs2_quota_lock_check(ip, &ap);
0483     if (err) {
0484         ret = block_page_mkwrite_return(err);
0485         goto out_unlock;
0486     }
0487     err = gfs2_inplace_reserve(ip, &ap);
0488     if (err) {
0489         ret = block_page_mkwrite_return(err);
0490         goto out_quota_unlock;
0491     }
0492
0493     rblocks = RES_DINODE + ind_blocks;
0494     if (gfs2_is_jdata(ip))
0495         rblocks += data_blocks ? data_blocks : 1;
0496     if (ind_blocks || data_blocks) {
0497         rblocks += RES_STATFS + RES_QUOTA;
0498         rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
0499     }
0500     err = gfs2_trans_begin(sdp, rblocks, 0);
0501     if (err) {
0502         ret = block_page_mkwrite_return(err);
0503         goto out_trans_fail;
0504     }
0505
0506     /* Unstuff, if required, and allocate backing blocks for page */
0507     if (gfs2_is_stuffed(ip)) {
0508         err = gfs2_unstuff_dinode(ip);
0509         if (err) {
0510             ret = block_page_mkwrite_return(err);
0511             goto out_trans_end;
0512         }
0513     }
0514
0515     lock_page(page);
0516     /* If truncated, we must retry the operation, we may have raced
0517      * with the glock demotion code.
0518      */
0519     if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
0520         ret = VM_FAULT_NOPAGE;
0521         goto out_page_locked;
0522     }
0523
0524     err = gfs2_allocate_page_backing(page, length);
0525     if (err)
0526         ret = block_page_mkwrite_return(err);
0527
0528 out_page_locked:
0529     if (ret != VM_FAULT_LOCKED)
0530         unlock_page(page);
0531 out_trans_end:
0532     gfs2_trans_end(sdp);
0533 out_trans_fail:
0534     gfs2_inplace_release(ip);
0535 out_quota_unlock:
0536     gfs2_quota_unlock(ip);
0537 out_unlock:
0538     gfs2_glock_dq(&gh);
0539 out_uninit:
0540     gfs2_holder_uninit(&gh);
0541     if (ret == VM_FAULT_LOCKED) {
0542         set_page_dirty(page);
0543         wait_for_stable_page(page);
0544     }
0545     sb_end_pagefault(inode->i_sb);
0546     return ret;
0547 }
0548
0549 static vm_fault_t gfs2_fault(struct vm_fault *vmf)
0550 {
0551     struct inode *inode = file_inode(vmf->vma->vm_file);
0552     struct gfs2_inode *ip = GFS2_I(inode);
0553     struct gfs2_holder gh;
0554     vm_fault_t ret;
0555     int err;
0556
0557     gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
0558     err = gfs2_glock_nq(&gh);
0559     if (err) {
0560         ret = block_page_mkwrite_return(err);
0561         goto out_uninit;
0562     }
0563     ret = filemap_fault(vmf);
0564     gfs2_glock_dq(&gh);
0565 out_uninit:
0566     gfs2_holder_uninit(&gh);
0567     return ret;
0568 }
0569
0570 static const struct vm_operations_struct gfs2_vm_ops = {
0571     .fault = gfs2_fault,
0572     .map_pages = filemap_map_pages,
0573     .page_mkwrite = gfs2_page_mkwrite,
0574 };
0575
0576 /**
0577  * gfs2_mmap
0578  * @file: The file to map
0579  * @vma: The VMA which described the mapping
0580  *
0581  * There is no need to get a lock here unless we should be updating
0582  * atime. We ignore any locking errors since the only consequence is
0583  * a missed atime update (which will just be deferred until later).
0584  *
0585  * Returns: 0
0586  */
0587
0588 static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
0589 {
0590     struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
0591
0592     if (!(file->f_flags & O_NOATIME) &&
0593         !IS_NOATIME(&ip->i_inode)) {
0594         struct gfs2_holder i_gh;
0595         int error;
0596
0597         error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
0598                        &i_gh);
0599         if (error)
0600             return error;
0601         /* grab lock to update inode */
0602         gfs2_glock_dq_uninit(&i_gh);
0603         file_accessed(file);
0604     }
0605     vma->vm_ops = &gfs2_vm_ops;
0606
0607     return 0;
0608 }
0609
0610 /**
0611  * gfs2_open_common - This is common to open and atomic_open
0612  * @inode: The inode being opened
0613  * @file: The file being opened
0614  *
0615  * This maybe called under a glock or not depending upon how it has
0616  * been called. We must always be called under a glock for regular
0617  * files, however. For other file types, it does not matter whether
0618  * we hold the glock or not.
0619  *
0620  * Returns: Error code or 0 for success
0621  */
0622
0623 int gfs2_open_common(struct inode *inode, struct file *file)
0624 {
0625     struct gfs2_file *fp;
0626     int ret;
0627
0628     if (S_ISREG(inode->i_mode)) {
0629         ret = generic_file_open(inode, file);
0630         if (ret)
0631             return ret;
0632     }
0633
0634     fp = kzalloc(sizeof(struct gfs2_file), GFP_NOFS);
0635     if (!fp)
0636         return -ENOMEM;
0637
0638     mutex_init(&fp->f_fl_mutex);
0639
0640     gfs2_assert_warn(GFS2_SB(inode), !file->private_data);
0641     file->private_data = fp;
0642     if (file->f_mode & FMODE_WRITE) {
0643         ret = gfs2_qa_get(GFS2_I(inode));
0644         if (ret)
0645             goto fail;
0646     }
0647     return 0;
0648
0649 fail:
0650     kfree(file->private_data);
0651     file->private_data = NULL;
0652     return ret;
0653 }
0654
0655 /**
0656  * gfs2_open - open a file
0657  * @inode: the inode to open
0658  * @file: the struct file for this opening
0659  *
0660  * After atomic_open, this function is only used for opening files
0661  * which are already cached. We must still get the glock for regular
0662  * files to ensure that we have the file size uptodate for the large
0663  * file check which is in the common code. That is only an issue for
0664  * regular files though.
0665  *
0666  * Returns: errno
0667  */
0668
0669 static int gfs2_open(struct inode *inode, struct file *file)
0670 {
0671     struct gfs2_inode *ip = GFS2_I(inode);
0672     struct gfs2_holder i_gh;
0673     int error;
0674     bool need_unlock = false;
0675
0676     if (S_ISREG(ip->i_inode.i_mode)) {
0677         error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
0678                        &i_gh);
0679         if (error)
0680             return error;
0681         need_unlock = true;
0682     }
0683
0684     error = gfs2_open_common(inode, file);
0685
0686     if (need_unlock)
0687         gfs2_glock_dq_uninit(&i_gh);
0688
0689     return error;
0690 }
0691
0692 /**
0693  * gfs2_release - called to close a struct file
0694  * @inode: the inode the struct file belongs to
0695  * @file: the struct file being closed
0696  *
0697  * Returns: errno
0698  */
0699
0700 static int gfs2_release(struct inode *inode, struct file *file)
0701 {
0702     struct gfs2_inode *ip = GFS2_I(inode);
0703
0704     kfree(file->private_data);
0705     file->private_data = NULL;
0706
0707     if (file->f_mode & FMODE_WRITE) {
0708         if (gfs2_rs_active(&ip->i_res))
0709             gfs2_rs_delete(ip);
0710         gfs2_qa_put(ip);
0711     }
0712     return 0;
0713 }
0714
0715 /**
0716  * gfs2_fsync - sync the dirty data for a file (across the cluster)
0717  * @file: the file that points to the dentry
0718  * @start: the start position in the file to sync
0719  * @end: the end position in the file to sync
0720  * @datasync: set if we can ignore timestamp changes
0721  *
0722  * We split the data flushing here so that we don't wait for the data
0723  * until after we've also sent the metadata to disk. Note that for
0724  * data=ordered, we will write & wait for the data at the log flush
0725  * stage anyway, so this is unlikely to make much of a difference
0726  * except in the data=writeback case.
0727  *
0728  * If the fdatawrite fails due to any reason except -EIO, we will
0729  * continue the remainder of the fsync, although we'll still report
0730  * the error at the end. This is to match filemap_write_and_wait_range()
0731  * behaviour.
0732  *
0733  * Returns: errno
0734  */
0735
0736 static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
0737               int datasync)
0738 {
0739     struct address_space *mapping = file->f_mapping;
0740     struct inode *inode = mapping->host;
0741     int sync_state = inode->i_state & I_DIRTY;
0742     struct gfs2_inode *ip = GFS2_I(inode);
0743     int ret = 0, ret1 = 0;
0744
0745     if (mapping->nrpages) {
0746         ret1 = filemap_fdatawrite_range(mapping, start, end);
0747         if (ret1 == -EIO)
0748             return ret1;
0749     }
0750
0751     if (!gfs2_is_jdata(ip))
0752         sync_state &= ~I_DIRTY_PAGES;
0753     if (datasync)
0754         sync_state &= ~I_DIRTY_SYNC;
0755
0756     if (sync_state) {
0757         ret = sync_inode_metadata(inode, 1);
0758         if (ret)
0759             return ret;
0760         if (gfs2_is_jdata(ip))
0761             ret = file_write_and_wait(file);
0762         if (ret)
0763             return ret;
0764         gfs2_ail_flush(ip->i_gl, 1);
0765     }
0766
0767     if (mapping->nrpages)
0768         ret = file_fdatawait_range(file, start, end);
0769
0770     return ret ? ret : ret1;
0771 }
0772
0773 static inline bool should_fault_in_pages(struct iov_iter *i,
0774                      struct kiocb *iocb,
0775                      size_t *prev_count,
0776                      size_t *window_size)
0777 {
0778     size_t count = iov_iter_count(i);
0779     size_t size, offs;
0780
0781     if (!count)
0782         return false;
0783     if (!user_backed_iter(i))
0784         return false;
0785
0786     size = PAGE_SIZE;
0787     offs = offset_in_page(iocb->ki_pos);
0788     if (*prev_count != count || !*window_size) {
0789         size_t nr_dirtied;
0790
0791         nr_dirtied = max(current->nr_dirtied_pause -
0792                  current->nr_dirtied, 8);
0793         size = min_t(size_t, SZ_1M, nr_dirtied << PAGE_SHIFT);
0794     }
0795
0796     *prev_count = count;
0797     *window_size = size - offs;
0798     return true;
0799 }
0800
0801 static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to,
0802                      struct gfs2_holder *gh)
0803 {
0804     struct file *file = iocb->ki_filp;
0805     struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
0806     size_t prev_count = 0, window_size = 0;
0807     size_t read = 0;
0808     ssize_t ret;
0809
0810     /*
0811      * In this function, we disable page faults when we're holding the
0812      * inode glock while doing I/O.  If a page fault occurs, we indicate
0813      * that the inode glock may be dropped, fault in the pages manually,
0814      * and retry.
0815      *
0816      * Unlike generic_file_read_iter, for reads, iomap_dio_rw can trigger
0817      * physical as well as manual page faults, and we need to disable both
0818      * kinds.
0819      *
0820      * For direct I/O, gfs2 takes the inode glock in deferred mode.  This
0821      * locking mode is compatible with other deferred holders, so multiple
0822      * processes and nodes can do direct I/O to a file at the same time.
0823      * There's no guarantee that reads or writes will be atomic.  Any
0824      * coordination among readers and writers needs to happen externally.
0825      */
0826
0827     if (!iov_iter_count(to))
0828         return 0; /* skip atime */
0829
0830     gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh);
0831 retry:
0832     ret = gfs2_glock_nq(gh);
0833     if (ret)
0834         goto out_uninit;
0835     pagefault_disable();
0836     to->nofault = true;
0837     ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL,
0838                IOMAP_DIO_PARTIAL, NULL, read);
0839     to->nofault = false;
0840     pagefault_enable();
0841     if (ret <= 0 && ret != -EFAULT)
0842         goto out_unlock;
0843     /* No increment (+=) because iomap_dio_rw returns a cumulative value. */
0844     if (ret > 0)
0845         read = ret;
0846
0847     if (should_fault_in_pages(to, iocb, &prev_count, &window_size)) {
0848         gfs2_glock_dq(gh);
0849         window_size -= fault_in_iov_iter_writeable(to, window_size);
0850         if (window_size)
0851             goto retry;
0852     }
0853 out_unlock:
0854     if (gfs2_holder_queued(gh))
0855         gfs2_glock_dq(gh);
0856 out_uninit:
0857     gfs2_holder_uninit(gh);
0858     /* User space doesn't expect partial success. */
0859     if (ret < 0)
0860         return ret;
0861     return read;
0862 }
0863
0864 static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
0865                       struct gfs2_holder *gh)
0866 {
0867     struct file *file = iocb->ki_filp;
0868     struct inode *inode = file->f_mapping->host;
0869     struct gfs2_inode *ip = GFS2_I(inode);
0870     size_t prev_count = 0, window_size = 0;
0871     size_t written = 0;
0872     ssize_t ret;
0873
0874     /*
0875      * In this function, we disable page faults when we're holding the
0876      * inode glock while doing I/O.  If a page fault occurs, we indicate
0877      * that the inode glock may be dropped, fault in the pages manually,
0878      * and retry.
0879      *
0880      * For writes, iomap_dio_rw only triggers manual page faults, so we
0881      * don't need to disable physical ones.
0882      */
0883
0884     /*
0885      * Deferred lock, even if its a write, since we do no allocation on
0886      * this path. All we need to change is the atime, and this lock mode
0887      * ensures that other nodes have flushed their buffered read caches
0888      * (i.e. their page cache entries for this inode). We do not,
0889      * unfortunately, have the option of only flushing a range like the
0890      * VFS does.
0891      */
0892     gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh);
0893 retry:
0894     ret = gfs2_glock_nq(gh);
0895     if (ret)
0896         goto out_uninit;
0897     /* Silently fall back to buffered I/O when writing beyond EOF */
0898     if (iocb->ki_pos + iov_iter_count(from) > i_size_read(&ip->i_inode))
0899         goto out_unlock;
0900
0901     from->nofault = true;
0902     ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL,
0903                IOMAP_DIO_PARTIAL, NULL, written);
0904     from->nofault = false;
0905     if (ret <= 0) {
0906         if (ret == -ENOTBLK)
0907             ret = 0;
0908         if (ret != -EFAULT)
0909             goto out_unlock;
0910     }
0911     /* No increment (+=) because iomap_dio_rw returns a cumulative value. */
0912     if (ret > 0)
0913         written = ret;
0914
0915     if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) {
0916         gfs2_glock_dq(gh);
0917         window_size -= fault_in_iov_iter_readable(from, window_size);
0918         if (window_size)
0919             goto retry;
0920     }
0921 out_unlock:
0922     if (gfs2_holder_queued(gh))
0923         gfs2_glock_dq(gh);
0924 out_uninit:
0925     gfs2_holder_uninit(gh);
0926     /* User space doesn't expect partial success. */
0927     if (ret < 0)
0928         return ret;
0929     return written;
0930 }
0931
0932 static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
0933 {
0934     struct gfs2_inode *ip;
0935     struct gfs2_holder gh;
0936     size_t prev_count = 0, window_size = 0;
0937     size_t read = 0;
0938     ssize_t ret;
0939
0940     /*
0941      * In this function, we disable page faults when we're holding the
0942      * inode glock while doing I/O.  If a page fault occurs, we indicate
0943      * that the inode glock may be dropped, fault in the pages manually,
0944      * and retry.
0945      */
0946
0947     if (iocb->ki_flags & IOCB_DIRECT)
0948         return gfs2_file_direct_read(iocb, to, &gh);
0949
0950     pagefault_disable();
0951     iocb->ki_flags |= IOCB_NOIO;
0952     ret = generic_file_read_iter(iocb, to);
0953     iocb->ki_flags &= ~IOCB_NOIO;
0954     pagefault_enable();
0955     if (ret >= 0) {
0956         if (!iov_iter_count(to))
0957             return ret;
0958         read = ret;
0959     } else if (ret != -EFAULT) {
0960         if (ret != -EAGAIN)
0961             return ret;
0962         if (iocb->ki_flags & IOCB_NOWAIT)
0963             return ret;
0964     }
0965     ip = GFS2_I(iocb->ki_filp->f_mapping->host);
0966     gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
0967 retry:
0968     ret = gfs2_glock_nq(&gh);
0969     if (ret)
0970         goto out_uninit;
0971     pagefault_disable();
0972     ret = generic_file_read_iter(iocb, to);
0973     pagefault_enable();
0974     if (ret <= 0 && ret != -EFAULT)
0975         goto out_unlock;
0976     if (ret > 0)
0977         read += ret;
0978
0979     if (should_fault_in_pages(to, iocb, &prev_count, &window_size)) {
0980         gfs2_glock_dq(&gh);
0981         window_size -= fault_in_iov_iter_writeable(to, window_size);
0982         if (window_size)
0983             goto retry;
0984     }
0985 out_unlock:
0986     if (gfs2_holder_queued(&gh))
0987         gfs2_glock_dq(&gh);
0988 out_uninit:
0989     gfs2_holder_uninit(&gh);
0990     return read ? read : ret;
0991 }
0992
0993 static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
0994                     struct iov_iter *from,
0995                     struct gfs2_holder *gh)
0996 {
0997     struct file *file = iocb->ki_filp;
0998     struct inode *inode = file_inode(file);
0999     struct gfs2_inode *ip = GFS2_I(inode);
1000     struct gfs2_sbd *sdp = GFS2_SB(inode);
1001     struct gfs2_holder *statfs_gh = NULL;
1002     size_t prev_count = 0, window_size = 0;
1003     size_t orig_count = iov_iter_count(from);
1004     size_t written = 0;
1005     ssize_t ret;
1006
1007     /*
1008      * In this function, we disable page faults when we're holding the
1009      * inode glock while doing I/O.  If a page fault occurs, we indicate
1010      * that the inode glock may be dropped, fault in the pages manually,
1011      * and retry.
1012      */
1013
1014     if (inode == sdp->sd_rindex) {
1015         statfs_gh = kmalloc(sizeof(*statfs_gh), GFP_NOFS);
1016         if (!statfs_gh)
1017             return -ENOMEM;
1018     }
1019
1020     gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, gh);
1021 retry:
1022     if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) {
1023         window_size -= fault_in_iov_iter_readable(from, window_size);
1024         if (!window_size) {
1025             ret = -EFAULT;
1026             goto out_uninit;
1027         }
1028         from->count = min(from->count, window_size);
1029     }
1030     ret = gfs2_glock_nq(gh);
1031     if (ret)
1032         goto out_uninit;
1033
1034     if (inode == sdp->sd_rindex) {
1035         struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
1036
1037         ret = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
1038                      GL_NOCACHE, statfs_gh);
1039         if (ret)
1040             goto out_unlock;
1041     }
1042
1043     current->backing_dev_info = inode_to_bdi(inode);
1044     pagefault_disable();
1045     ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
1046     pagefault_enable();
1047     current->backing_dev_info = NULL;
1048     if (ret > 0) {
1049         iocb->ki_pos += ret;
1050         written += ret;
1051     }
1052
1053     if (inode == sdp->sd_rindex)
1054         gfs2_glock_dq_uninit(statfs_gh);
1055
1056     if (ret <= 0 && ret != -EFAULT)
1057         goto out_unlock;
1058
1059     from->count = orig_count - written;
1060     if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) {
1061         gfs2_glock_dq(gh);
1062         goto retry;
1063     }
1064 out_unlock:
1065     if (gfs2_holder_queued(gh))
1066         gfs2_glock_dq(gh);
1067 out_uninit:
1068     gfs2_holder_uninit(gh);
1069     kfree(statfs_gh);
1070     from->count = orig_count - written;
1071     return written ? written : ret;
1072 }
1073
1074 /**
1075  * gfs2_file_write_iter - Perform a write to a file
1076  * @iocb: The io context
1077  * @from: The data to write
1078  *
1079  * We have to do a lock/unlock here to refresh the inode size for
1080  * O_APPEND writes, otherwise we can land up writing at the wrong
1081  * offset. There is still a race, but provided the app is using its
1082  * own file locking, this will make O_APPEND work as expected.
1083  *
1084  */
1085
1086 static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
1087 {
1088     struct file *file = iocb->ki_filp;
1089     struct inode *inode = file_inode(file);
1090     struct gfs2_inode *ip = GFS2_I(inode);
1091     struct gfs2_holder gh;
1092     ssize_t ret;
1093
1094     gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from));
1095
1096     if (iocb->ki_flags & IOCB_APPEND) {
1097         ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
1098         if (ret)
1099             return ret;
1100         gfs2_glock_dq_uninit(&gh);
1101     }
1102
1103     inode_lock(inode);
1104     ret = generic_write_checks(iocb, from);
1105     if (ret <= 0)
1106         goto out_unlock;
1107
1108     ret = file_remove_privs(file);
1109     if (ret)
1110         goto out_unlock;
1111
1112     ret = file_update_time(file);
1113     if (ret)
1114         goto out_unlock;
1115
1116     if (iocb->ki_flags & IOCB_DIRECT) {
1117         struct address_space *mapping = file->f_mapping;
1118         ssize_t buffered, ret2;
1119
1120         ret = gfs2_file_direct_write(iocb, from, &gh);
1121         if (ret < 0 || !iov_iter_count(from))
1122             goto out_unlock;
1123
1124         iocb->ki_flags |= IOCB_DSYNC;
1125         buffered = gfs2_file_buffered_write(iocb, from, &gh);
1126         if (unlikely(buffered <= 0)) {
1127             if (!ret)
1128                 ret = buffered;
1129             goto out_unlock;
1130         }
1131
1132         /*
1133          * We need to ensure that the page cache pages are written to
1134          * disk and invalidated to preserve the expected O_DIRECT
1135          * semantics.  If the writeback or invalidate fails, only report
1136          * the direct I/O range as we don't know if the buffered pages
1137          * made it to disk.
1138          */
1139         ret2 = generic_write_sync(iocb, buffered);
1140         invalidate_mapping_pages(mapping,
1141                 (iocb->ki_pos - buffered) >> PAGE_SHIFT,
1142                 (iocb->ki_pos - 1) >> PAGE_SHIFT);
1143         if (!ret || ret2 > 0)
1144             ret += ret2;
1145     } else {
1146         ret = gfs2_file_buffered_write(iocb, from, &gh);
1147         if (likely(ret > 0))
1148             ret = generic_write_sync(iocb, ret);
1149     }
1150
1151 out_unlock:
1152     inode_unlock(inode);
1153     return ret;
1154 }
1155
1156 static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
1157                int mode)
1158 {
1159     struct super_block *sb = inode->i_sb;
1160     struct gfs2_inode *ip = GFS2_I(inode);
1161     loff_t end = offset + len;
1162     struct buffer_head *dibh;
1163     int error;
1164
1165     error = gfs2_meta_inode_buffer(ip, &dibh);
1166     if (unlikely(error))
1167         return error;
1168
1169     gfs2_trans_add_meta(ip->i_gl, dibh);
1170
1171     if (gfs2_is_stuffed(ip)) {
1172         error = gfs2_unstuff_dinode(ip);
1173         if (unlikely(error))
1174             goto out;
1175     }
1176
1177     while (offset < end) {
1178         struct iomap iomap = { };
1179
1180         error = gfs2_iomap_alloc(inode, offset, end - offset, &iomap);
1181         if (error)
1182             goto out;
1183         offset = iomap.offset + iomap.length;
1184         if (!(iomap.flags & IOMAP_F_NEW))
1185             continue;
1186         error = sb_issue_zeroout(sb, iomap.addr >> inode->i_blkbits,
1187                      iomap.length >> inode->i_blkbits,
1188                      GFP_NOFS);
1189         if (error) {
1190             fs_err(GFS2_SB(inode), "Failed to zero data buffers\n");
1191             goto out;
1192         }
1193     }
1194 out:
1195     brelse(dibh);
1196     return error;
1197 }
1198
1199 /**
1200  * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of
1201  *                     blocks, determine how many bytes can be written.
1202  * @ip:          The inode in question.
1203  * @len:         Max cap of bytes. What we return in *len must be <= this.
1204  * @data_blocks: Compute and return the number of data blocks needed
1205  * @ind_blocks:  Compute and return the number of indirect blocks needed
1206  * @max_blocks:  The total blocks available to work with.
1207  *
1208  * Returns: void, but @len, @data_blocks and @ind_blocks are filled in.
1209  */
1210 static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len,
1211                 unsigned int *data_blocks, unsigned int *ind_blocks,
1212                 unsigned int max_blocks)
1213 {
1214     loff_t max = *len;
1215     const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1216     unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
1217
1218     for (tmp = max_data; tmp > sdp->sd_diptrs;) {
1219         tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
1220         max_data -= tmp;
1221     }
1222
1223     *data_blocks = max_data;
1224     *ind_blocks = max_blocks - max_data;
1225     *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
1226     if (*len > max) {
1227         *len = max;
1228         gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks);
1229     }
1230 }
1231
1232 static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
1233 {
1234     struct inode *inode = file_inode(file);
1235     struct gfs2_sbd *sdp = GFS2_SB(inode);
1236     struct gfs2_inode *ip = GFS2_I(inode);
1237     struct gfs2_alloc_parms ap = { .aflags = 0, };
1238     unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
1239     loff_t bytes, max_bytes, max_blks;
1240     int error;
1241     const loff_t pos = offset;
1242     const loff_t count = len;
1243     loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
1244     loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
1245     loff_t max_chunk_size = UINT_MAX & bsize_mask;
1246
1247     next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
1248
1249     offset &= bsize_mask;
1250
1251     len = next - offset;
1252     bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
1253     if (!bytes)
1254         bytes = UINT_MAX;
1255     bytes &= bsize_mask;
1256     if (bytes == 0)
1257         bytes = sdp->sd_sb.sb_bsize;
1258
1259     gfs2_size_hint(file, offset, len);
1260
1261     gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks);
1262     ap.min_target = data_blocks + ind_blocks;
1263
1264     while (len > 0) {
1265         if (len < bytes)
1266             bytes = len;
1267         if (!gfs2_write_alloc_required(ip, offset, bytes)) {
1268             len -= bytes;
1269             offset += bytes;
1270             continue;
1271         }
1272
1273         /* We need to determine how many bytes we can actually
1274          * fallocate without exceeding quota or going over the
1275          * end of the fs. We start off optimistically by assuming
1276          * we can write max_bytes */
1277         max_bytes = (len > max_chunk_size) ? max_chunk_size : len;
1278
1279         /* Since max_bytes is most likely a theoretical max, we
1280          * calculate a more realistic 'bytes' to serve as a good
1281          * starting point for the number of bytes we may be able
1282          * to write */
1283         gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
1284         ap.target = data_blocks + ind_blocks;
1285
1286         error = gfs2_quota_lock_check(ip, &ap);
1287         if (error)
1288             return error;
1289         /* ap.allowed tells us how many blocks quota will allow
1290          * us to write. Check if this reduces max_blks */
1291         max_blks = UINT_MAX;
1292         if (ap.allowed)
1293             max_blks = ap.allowed;
1294
1295         error = gfs2_inplace_reserve(ip, &ap);
1296         if (error)
1297             goto out_qunlock;
1298
1299         /* check if the selected rgrp limits our max_blks further */
1300         if (ip->i_res.rs_reserved < max_blks)
1301             max_blks = ip->i_res.rs_reserved;
1302
1303         /* Almost done. Calculate bytes that can be written using
1304          * max_blks. We also recompute max_bytes, data_blocks and
1305          * ind_blocks */
1306         calc_max_reserv(ip, &max_bytes, &data_blocks,
1307                 &ind_blocks, max_blks);
1308
1309         rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
1310               RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks);
1311         if (gfs2_is_jdata(ip))
1312             rblocks += data_blocks ? data_blocks : 1;
1313
1314         error = gfs2_trans_begin(sdp, rblocks,
1315                      PAGE_SIZE >> inode->i_blkbits);
1316         if (error)
1317             goto out_trans_fail;
1318
1319         error = fallocate_chunk(inode, offset, max_bytes, mode);
1320         gfs2_trans_end(sdp);
1321
1322         if (error)
1323             goto out_trans_fail;
1324
1325         len -= max_bytes;
1326         offset += max_bytes;
1327         gfs2_inplace_release(ip);
1328         gfs2_quota_unlock(ip);
1329     }
1330
1331     if (!(mode & FALLOC_FL_KEEP_SIZE) && (pos + count) > inode->i_size)
1332         i_size_write(inode, pos + count);
1333     file_update_time(file);
1334     mark_inode_dirty(inode);
1335
1336     if ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host))
1337         return vfs_fsync_range(file, pos, pos + count - 1,
1338                    (file->f_flags & __O_SYNC) ? 0 : 1);
1339     return 0;
1340
1341 out_trans_fail:
1342     gfs2_inplace_release(ip);
1343 out_qunlock:
1344     gfs2_quota_unlock(ip);
1345     return error;
1346 }
1347
1348 static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
1349 {
1350     struct inode *inode = file_inode(file);
1351     struct gfs2_sbd *sdp = GFS2_SB(inode);
1352     struct gfs2_inode *ip = GFS2_I(inode);
1353     struct gfs2_holder gh;
1354     int ret;
1355
1356     if (mode & ~(FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE))
1357         return -EOPNOTSUPP;
1358     /* fallocate is needed by gfs2_grow to reserve space in the rindex */
1359     if (gfs2_is_jdata(ip) && inode != sdp->sd_rindex)
1360         return -EOPNOTSUPP;
1361
1362     inode_lock(inode);
1363
1364     gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1365     ret = gfs2_glock_nq(&gh);
1366     if (ret)
1367         goto out_uninit;
1368
1369     if (!(mode & FALLOC_FL_KEEP_SIZE) &&
1370         (offset + len) > inode->i_size) {
1371         ret = inode_newsize_ok(inode, offset + len);
1372         if (ret)
1373             goto out_unlock;
1374     }
1375
1376     ret = get_write_access(inode);
1377     if (ret)
1378         goto out_unlock;
1379
1380     if (mode & FALLOC_FL_PUNCH_HOLE) {
1381         ret = __gfs2_punch_hole(file, offset, len);
1382     } else {
1383         ret = __gfs2_fallocate(file, mode, offset, len);
1384         if (ret)
1385             gfs2_rs_deltree(&ip->i_res);
1386     }
1387
1388     put_write_access(inode);
1389 out_unlock:
1390     gfs2_glock_dq(&gh);
1391 out_uninit:
1392     gfs2_holder_uninit(&gh);
1393     inode_unlock(inode);
1394     return ret;
1395 }
1396
1397 static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
1398                       struct file *out, loff_t *ppos,
1399                       size_t len, unsigned int flags)
1400 {
1401     ssize_t ret;
1402
1403     gfs2_size_hint(out, *ppos, len);
1404
1405     ret = iter_file_splice_write(pipe, out, ppos, len, flags);
1406     return ret;
1407 }
1408
1409 #ifdef CONFIG_GFS2_FS_LOCKING_DLM
1410
1411 /**
1412  * gfs2_lock - acquire/release a posix lock on a file
1413  * @file: the file pointer
1414  * @cmd: either modify or retrieve lock state, possibly wait
1415  * @fl: type and range of lock
1416  *
1417  * Returns: errno
1418  */
1419
1420 static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
1421 {
1422     struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
1423     struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
1424     struct lm_lockstruct *ls = &sdp->sd_lockstruct;
1425
1426     if (!(fl->fl_flags & FL_POSIX))
1427         return -ENOLCK;
1428     if (cmd == F_CANCELLK) {
1429         /* Hack: */
1430         cmd = F_SETLK;
1431         fl->fl_type = F_UNLCK;
1432     }
1433     if (unlikely(gfs2_withdrawn(sdp))) {
1434         if (fl->fl_type == F_UNLCK)
1435             locks_lock_file_wait(file, fl);
1436         return -EIO;
1437     }
1438     if (IS_GETLK(cmd))
1439         return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl);
1440     else if (fl->fl_type == F_UNLCK)
1441         return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl);
1442     else
1443         return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl);
1444 }
1445
1446 static int do_flock(struct file *file, int cmd, struct file_lock *fl)
1447 {
1448     struct gfs2_file *fp = file->private_data;
1449     struct gfs2_holder *fl_gh = &fp->f_fl_gh;
1450     struct gfs2_inode *ip = GFS2_I(file_inode(file));
1451     struct gfs2_glock *gl;
1452     unsigned int state;
1453     u16 flags;
1454     int error = 0;
1455     int sleeptime;
1456
1457     state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
1458     flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY_1CB) | GL_EXACT;
1459
1460     mutex_lock(&fp->f_fl_mutex);
1461
1462     if (gfs2_holder_initialized(fl_gh)) {
1463         struct file_lock request;
1464         if (fl_gh->gh_state == state)
1465             goto out;
1466         locks_init_lock(&request);
1467         request.fl_type = F_UNLCK;
1468         request.fl_flags = FL_FLOCK;
1469         locks_lock_file_wait(file, &request);
1470         gfs2_glock_dq(fl_gh);
1471         gfs2_holder_reinit(state, flags, fl_gh);
1472     } else {
1473         error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr,
1474                        &gfs2_flock_glops, CREATE, &gl);
1475         if (error)
1476             goto out;
1477         gfs2_holder_init(gl, state, flags, fl_gh);
1478         gfs2_glock_put(gl);
1479     }
1480     for (sleeptime = 1; sleeptime <= 4; sleeptime <<= 1) {
1481         error = gfs2_glock_nq(fl_gh);
1482         if (error != GLR_TRYFAILED)
1483             break;
1484         fl_gh->gh_flags = LM_FLAG_TRY | GL_EXACT;
1485         msleep(sleeptime);
1486     }
1487     if (error) {
1488         gfs2_holder_uninit(fl_gh);
1489         if (error == GLR_TRYFAILED)
1490             error = -EAGAIN;
1491     } else {
1492         error = locks_lock_file_wait(file, fl);
1493         gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
1494     }
1495
1496 out:
1497     mutex_unlock(&fp->f_fl_mutex);
1498     return error;
1499 }
1500
1501 static void do_unflock(struct file *file, struct file_lock *fl)
1502 {
1503     struct gfs2_file *fp = file->private_data;
1504     struct gfs2_holder *fl_gh = &fp->f_fl_gh;
1505
1506     mutex_lock(&fp->f_fl_mutex);
1507     locks_lock_file_wait(file, fl);
1508     if (gfs2_holder_initialized(fl_gh)) {
1509         gfs2_glock_dq(fl_gh);
1510         gfs2_holder_uninit(fl_gh);
1511     }
1512     mutex_unlock(&fp->f_fl_mutex);
1513 }
1514
1515 /**
1516  * gfs2_flock - acquire/release a flock lock on a file
1517  * @file: the file pointer
1518  * @cmd: either modify or retrieve lock state, possibly wait
1519  * @fl: type and range of lock
1520  *
1521  * Returns: errno
1522  */
1523
1524 static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
1525 {
1526     if (!(fl->fl_flags & FL_FLOCK))
1527         return -ENOLCK;
1528
1529     if (fl->fl_type == F_UNLCK) {
1530         do_unflock(file, fl);
1531         return 0;
1532     } else {
1533         return do_flock(file, cmd, fl);
1534     }
1535 }
1536
1537 const struct file_operations gfs2_file_fops = {
1538     .llseek     = gfs2_llseek,
1539     .read_iter  = gfs2_file_read_iter,
1540     .write_iter = gfs2_file_write_iter,
1541     .iopoll     = iocb_bio_iopoll,
1542     .unlocked_ioctl = gfs2_ioctl,
1543     .compat_ioctl   = gfs2_compat_ioctl,
1544     .mmap       = gfs2_mmap,
1545     .open       = gfs2_open,
1546     .release    = gfs2_release,
1547     .fsync      = gfs2_fsync,
1548     .lock       = gfs2_lock,
1549     .flock      = gfs2_flock,
1550     .splice_read    = generic_file_splice_read,
1551     .splice_write   = gfs2_file_splice_write,
1552     .setlease   = simple_nosetlease,
1553     .fallocate  = gfs2_fallocate,
1554 };
1555
1556 const struct file_operations gfs2_dir_fops = {
1557     .iterate_shared = gfs2_readdir,
1558     .unlocked_ioctl = gfs2_ioctl,
1559     .compat_ioctl   = gfs2_compat_ioctl,
1560     .open       = gfs2_open,
1561     .release    = gfs2_release,
1562     .fsync      = gfs2_fsync,
1563     .lock       = gfs2_lock,
1564     .flock      = gfs2_flock,
1565     .llseek     = default_llseek,
1566 };
1567
1568 #endif /* CONFIG_GFS2_FS_LOCKING_DLM */
1569
1570 const struct file_operations gfs2_file_fops_nolock = {
1571     .llseek     = gfs2_llseek,
1572     .read_iter  = gfs2_file_read_iter,
1573     .write_iter = gfs2_file_write_iter,
1574     .iopoll     = iocb_bio_iopoll,
1575     .unlocked_ioctl = gfs2_ioctl,
1576     .compat_ioctl   = gfs2_compat_ioctl,
1577     .mmap       = gfs2_mmap,
1578     .open       = gfs2_open,
1579     .release    = gfs2_release,
1580     .fsync      = gfs2_fsync,
1581     .splice_read    = generic_file_splice_read,
1582     .splice_write   = gfs2_file_splice_write,
1583     .setlease   = generic_setlease,
1584     .fallocate  = gfs2_fallocate,
1585 };
1586
1587 const struct file_operations gfs2_dir_fops_nolock = {
1588     .iterate_shared = gfs2_readdir,
1589     .unlocked_ioctl = gfs2_ioctl,
1590     .compat_ioctl   = gfs2_compat_ioctl,
1591     .open       = gfs2_open,
1592     .release    = gfs2_release,
1593     .fsync      = gfs2_fsync,
1594     .llseek     = default_llseek,
1595 };
1596