Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /**
0003  * inode.c - NTFS kernel inode handling.
0004  *
0005  * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc.
0006  */
0007 
0008 #include <linux/buffer_head.h>
0009 #include <linux/fs.h>
0010 #include <linux/mm.h>
0011 #include <linux/mount.h>
0012 #include <linux/mutex.h>
0013 #include <linux/pagemap.h>
0014 #include <linux/quotaops.h>
0015 #include <linux/slab.h>
0016 #include <linux/log2.h>
0017 
0018 #include "aops.h"
0019 #include "attrib.h"
0020 #include "bitmap.h"
0021 #include "dir.h"
0022 #include "debug.h"
0023 #include "inode.h"
0024 #include "lcnalloc.h"
0025 #include "malloc.h"
0026 #include "mft.h"
0027 #include "time.h"
0028 #include "ntfs.h"
0029 
0030 /**
0031  * ntfs_test_inode - compare two (possibly fake) inodes for equality
0032  * @vi:     vfs inode which to test
0033  * @data:   data which is being tested with
0034  *
0035  * Compare the ntfs attribute embedded in the ntfs specific part of the vfs
0036  * inode @vi for equality with the ntfs attribute @data.
0037  *
0038  * If searching for the normal file/directory inode, set @na->type to AT_UNUSED.
0039  * @na->name and @na->name_len are then ignored.
0040  *
0041  * Return 1 if the attributes match and 0 if not.
0042  *
0043  * NOTE: This function runs with the inode_hash_lock spin lock held so it is not
0044  * allowed to sleep.
0045  */
0046 int ntfs_test_inode(struct inode *vi, void *data)
0047 {
0048     ntfs_attr *na = (ntfs_attr *)data;
0049     ntfs_inode *ni;
0050 
0051     if (vi->i_ino != na->mft_no)
0052         return 0;
0053     ni = NTFS_I(vi);
0054     /* If !NInoAttr(ni), @vi is a normal file or directory inode. */
0055     if (likely(!NInoAttr(ni))) {
0056         /* If not looking for a normal inode this is a mismatch. */
0057         if (unlikely(na->type != AT_UNUSED))
0058             return 0;
0059     } else {
0060         /* A fake inode describing an attribute. */
0061         if (ni->type != na->type)
0062             return 0;
0063         if (ni->name_len != na->name_len)
0064             return 0;
0065         if (na->name_len && memcmp(ni->name, na->name,
0066                 na->name_len * sizeof(ntfschar)))
0067             return 0;
0068     }
0069     /* Match! */
0070     return 1;
0071 }
0072 
0073 /**
0074  * ntfs_init_locked_inode - initialize an inode
0075  * @vi:     vfs inode to initialize
0076  * @data:   data which to initialize @vi to
0077  *
0078  * Initialize the vfs inode @vi with the values from the ntfs attribute @data in
0079  * order to enable ntfs_test_inode() to do its work.
0080  *
0081  * If initializing the normal file/directory inode, set @na->type to AT_UNUSED.
0082  * In that case, @na->name and @na->name_len should be set to NULL and 0,
0083  * respectively. Although that is not strictly necessary as
0084  * ntfs_read_locked_inode() will fill them in later.
0085  *
0086  * Return 0 on success and -errno on error.
0087  *
0088  * NOTE: This function runs with the inode->i_lock spin lock held so it is not
0089  * allowed to sleep. (Hence the GFP_ATOMIC allocation.)
0090  */
0091 static int ntfs_init_locked_inode(struct inode *vi, void *data)
0092 {
0093     ntfs_attr *na = (ntfs_attr *)data;
0094     ntfs_inode *ni = NTFS_I(vi);
0095 
0096     vi->i_ino = na->mft_no;
0097 
0098     ni->type = na->type;
0099     if (na->type == AT_INDEX_ALLOCATION)
0100         NInoSetMstProtected(ni);
0101 
0102     ni->name = na->name;
0103     ni->name_len = na->name_len;
0104 
0105     /* If initializing a normal inode, we are done. */
0106     if (likely(na->type == AT_UNUSED)) {
0107         BUG_ON(na->name);
0108         BUG_ON(na->name_len);
0109         return 0;
0110     }
0111 
0112     /* It is a fake inode. */
0113     NInoSetAttr(ni);
0114 
0115     /*
0116      * We have I30 global constant as an optimization as it is the name
0117      * in >99.9% of named attributes! The other <0.1% incur a GFP_ATOMIC
0118      * allocation but that is ok. And most attributes are unnamed anyway,
0119      * thus the fraction of named attributes with name != I30 is actually
0120      * absolutely tiny.
0121      */
0122     if (na->name_len && na->name != I30) {
0123         unsigned int i;
0124 
0125         BUG_ON(!na->name);
0126         i = na->name_len * sizeof(ntfschar);
0127         ni->name = kmalloc(i + sizeof(ntfschar), GFP_ATOMIC);
0128         if (!ni->name)
0129             return -ENOMEM;
0130         memcpy(ni->name, na->name, i);
0131         ni->name[na->name_len] = 0;
0132     }
0133     return 0;
0134 }
0135 
0136 static int ntfs_read_locked_inode(struct inode *vi);
0137 static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi);
0138 static int ntfs_read_locked_index_inode(struct inode *base_vi,
0139         struct inode *vi);
0140 
0141 /**
0142  * ntfs_iget - obtain a struct inode corresponding to a specific normal inode
0143  * @sb:     super block of mounted volume
0144  * @mft_no: mft record number / inode number to obtain
0145  *
0146  * Obtain the struct inode corresponding to a specific normal inode (i.e. a
0147  * file or directory).
0148  *
0149  * If the inode is in the cache, it is just returned with an increased
0150  * reference count. Otherwise, a new struct inode is allocated and initialized,
0151  * and finally ntfs_read_locked_inode() is called to read in the inode and
0152  * fill in the remainder of the inode structure.
0153  *
0154  * Return the struct inode on success. Check the return value with IS_ERR() and
0155  * if true, the function failed and the error code is obtained from PTR_ERR().
0156  */
0157 struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no)
0158 {
0159     struct inode *vi;
0160     int err;
0161     ntfs_attr na;
0162 
0163     na.mft_no = mft_no;
0164     na.type = AT_UNUSED;
0165     na.name = NULL;
0166     na.name_len = 0;
0167 
0168     vi = iget5_locked(sb, mft_no, ntfs_test_inode,
0169             ntfs_init_locked_inode, &na);
0170     if (unlikely(!vi))
0171         return ERR_PTR(-ENOMEM);
0172 
0173     err = 0;
0174 
0175     /* If this is a freshly allocated inode, need to read it now. */
0176     if (vi->i_state & I_NEW) {
0177         err = ntfs_read_locked_inode(vi);
0178         unlock_new_inode(vi);
0179     }
0180     /*
0181      * There is no point in keeping bad inodes around if the failure was
0182      * due to ENOMEM. We want to be able to retry again later.
0183      */
0184     if (unlikely(err == -ENOMEM)) {
0185         iput(vi);
0186         vi = ERR_PTR(err);
0187     }
0188     return vi;
0189 }
0190 
0191 /**
0192  * ntfs_attr_iget - obtain a struct inode corresponding to an attribute
0193  * @base_vi:    vfs base inode containing the attribute
0194  * @type:   attribute type
0195  * @name:   Unicode name of the attribute (NULL if unnamed)
0196  * @name_len:   length of @name in Unicode characters (0 if unnamed)
0197  *
0198  * Obtain the (fake) struct inode corresponding to the attribute specified by
0199  * @type, @name, and @name_len, which is present in the base mft record
0200  * specified by the vfs inode @base_vi.
0201  *
0202  * If the attribute inode is in the cache, it is just returned with an
0203  * increased reference count. Otherwise, a new struct inode is allocated and
0204  * initialized, and finally ntfs_read_locked_attr_inode() is called to read the
0205  * attribute and fill in the inode structure.
0206  *
0207  * Note, for index allocation attributes, you need to use ntfs_index_iget()
0208  * instead of ntfs_attr_iget() as working with indices is a lot more complex.
0209  *
0210  * Return the struct inode of the attribute inode on success. Check the return
0211  * value with IS_ERR() and if true, the function failed and the error code is
0212  * obtained from PTR_ERR().
0213  */
0214 struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPE type,
0215         ntfschar *name, u32 name_len)
0216 {
0217     struct inode *vi;
0218     int err;
0219     ntfs_attr na;
0220 
0221     /* Make sure no one calls ntfs_attr_iget() for indices. */
0222     BUG_ON(type == AT_INDEX_ALLOCATION);
0223 
0224     na.mft_no = base_vi->i_ino;
0225     na.type = type;
0226     na.name = name;
0227     na.name_len = name_len;
0228 
0229     vi = iget5_locked(base_vi->i_sb, na.mft_no, ntfs_test_inode,
0230             ntfs_init_locked_inode, &na);
0231     if (unlikely(!vi))
0232         return ERR_PTR(-ENOMEM);
0233 
0234     err = 0;
0235 
0236     /* If this is a freshly allocated inode, need to read it now. */
0237     if (vi->i_state & I_NEW) {
0238         err = ntfs_read_locked_attr_inode(base_vi, vi);
0239         unlock_new_inode(vi);
0240     }
0241     /*
0242      * There is no point in keeping bad attribute inodes around. This also
0243      * simplifies things in that we never need to check for bad attribute
0244      * inodes elsewhere.
0245      */
0246     if (unlikely(err)) {
0247         iput(vi);
0248         vi = ERR_PTR(err);
0249     }
0250     return vi;
0251 }
0252 
0253 /**
0254  * ntfs_index_iget - obtain a struct inode corresponding to an index
0255  * @base_vi:    vfs base inode containing the index related attributes
0256  * @name:   Unicode name of the index
0257  * @name_len:   length of @name in Unicode characters
0258  *
0259  * Obtain the (fake) struct inode corresponding to the index specified by @name
0260  * and @name_len, which is present in the base mft record specified by the vfs
0261  * inode @base_vi.
0262  *
0263  * If the index inode is in the cache, it is just returned with an increased
0264  * reference count.  Otherwise, a new struct inode is allocated and
0265  * initialized, and finally ntfs_read_locked_index_inode() is called to read
0266  * the index related attributes and fill in the inode structure.
0267  *
0268  * Return the struct inode of the index inode on success. Check the return
0269  * value with IS_ERR() and if true, the function failed and the error code is
0270  * obtained from PTR_ERR().
0271  */
0272 struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name,
0273         u32 name_len)
0274 {
0275     struct inode *vi;
0276     int err;
0277     ntfs_attr na;
0278 
0279     na.mft_no = base_vi->i_ino;
0280     na.type = AT_INDEX_ALLOCATION;
0281     na.name = name;
0282     na.name_len = name_len;
0283 
0284     vi = iget5_locked(base_vi->i_sb, na.mft_no, ntfs_test_inode,
0285             ntfs_init_locked_inode, &na);
0286     if (unlikely(!vi))
0287         return ERR_PTR(-ENOMEM);
0288 
0289     err = 0;
0290 
0291     /* If this is a freshly allocated inode, need to read it now. */
0292     if (vi->i_state & I_NEW) {
0293         err = ntfs_read_locked_index_inode(base_vi, vi);
0294         unlock_new_inode(vi);
0295     }
0296     /*
0297      * There is no point in keeping bad index inodes around.  This also
0298      * simplifies things in that we never need to check for bad index
0299      * inodes elsewhere.
0300      */
0301     if (unlikely(err)) {
0302         iput(vi);
0303         vi = ERR_PTR(err);
0304     }
0305     return vi;
0306 }
0307 
0308 struct inode *ntfs_alloc_big_inode(struct super_block *sb)
0309 {
0310     ntfs_inode *ni;
0311 
0312     ntfs_debug("Entering.");
0313     ni = alloc_inode_sb(sb, ntfs_big_inode_cache, GFP_NOFS);
0314     if (likely(ni != NULL)) {
0315         ni->state = 0;
0316         return VFS_I(ni);
0317     }
0318     ntfs_error(sb, "Allocation of NTFS big inode structure failed.");
0319     return NULL;
0320 }
0321 
0322 void ntfs_free_big_inode(struct inode *inode)
0323 {
0324     kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
0325 }
0326 
0327 static inline ntfs_inode *ntfs_alloc_extent_inode(void)
0328 {
0329     ntfs_inode *ni;
0330 
0331     ntfs_debug("Entering.");
0332     ni = kmem_cache_alloc(ntfs_inode_cache, GFP_NOFS);
0333     if (likely(ni != NULL)) {
0334         ni->state = 0;
0335         return ni;
0336     }
0337     ntfs_error(NULL, "Allocation of NTFS inode structure failed.");
0338     return NULL;
0339 }
0340 
0341 static void ntfs_destroy_extent_inode(ntfs_inode *ni)
0342 {
0343     ntfs_debug("Entering.");
0344     BUG_ON(ni->page);
0345     if (!atomic_dec_and_test(&ni->count))
0346         BUG();
0347     kmem_cache_free(ntfs_inode_cache, ni);
0348 }
0349 
0350 /*
0351  * The attribute runlist lock has separate locking rules from the
0352  * normal runlist lock, so split the two lock-classes:
0353  */
0354 static struct lock_class_key attr_list_rl_lock_class;
0355 
0356 /**
0357  * __ntfs_init_inode - initialize ntfs specific part of an inode
0358  * @sb:     super block of mounted volume
0359  * @ni:     freshly allocated ntfs inode which to initialize
0360  *
0361  * Initialize an ntfs inode to defaults.
0362  *
0363  * NOTE: ni->mft_no, ni->state, ni->type, ni->name, and ni->name_len are left
0364  * untouched. Make sure to initialize them elsewhere.
0365  *
0366  * Return zero on success and -ENOMEM on error.
0367  */
0368 void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
0369 {
0370     ntfs_debug("Entering.");
0371     rwlock_init(&ni->size_lock);
0372     ni->initialized_size = ni->allocated_size = 0;
0373     ni->seq_no = 0;
0374     atomic_set(&ni->count, 1);
0375     ni->vol = NTFS_SB(sb);
0376     ntfs_init_runlist(&ni->runlist);
0377     mutex_init(&ni->mrec_lock);
0378     ni->page = NULL;
0379     ni->page_ofs = 0;
0380     ni->attr_list_size = 0;
0381     ni->attr_list = NULL;
0382     ntfs_init_runlist(&ni->attr_list_rl);
0383     lockdep_set_class(&ni->attr_list_rl.lock,
0384                 &attr_list_rl_lock_class);
0385     ni->itype.index.block_size = 0;
0386     ni->itype.index.vcn_size = 0;
0387     ni->itype.index.collation_rule = 0;
0388     ni->itype.index.block_size_bits = 0;
0389     ni->itype.index.vcn_size_bits = 0;
0390     mutex_init(&ni->extent_lock);
0391     ni->nr_extents = 0;
0392     ni->ext.base_ntfs_ino = NULL;
0393 }
0394 
0395 /*
0396  * Extent inodes get MFT-mapped in a nested way, while the base inode
0397  * is still mapped. Teach this nesting to the lock validator by creating
0398  * a separate class for nested inode's mrec_lock's:
0399  */
0400 static struct lock_class_key extent_inode_mrec_lock_key;
0401 
0402 inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,
0403         unsigned long mft_no)
0404 {
0405     ntfs_inode *ni = ntfs_alloc_extent_inode();
0406 
0407     ntfs_debug("Entering.");
0408     if (likely(ni != NULL)) {
0409         __ntfs_init_inode(sb, ni);
0410         lockdep_set_class(&ni->mrec_lock, &extent_inode_mrec_lock_key);
0411         ni->mft_no = mft_no;
0412         ni->type = AT_UNUSED;
0413         ni->name = NULL;
0414         ni->name_len = 0;
0415     }
0416     return ni;
0417 }
0418 
0419 /**
0420  * ntfs_is_extended_system_file - check if a file is in the $Extend directory
0421  * @ctx:    initialized attribute search context
0422  *
0423  * Search all file name attributes in the inode described by the attribute
0424  * search context @ctx and check if any of the names are in the $Extend system
0425  * directory.
0426  *
0427  * Return values:
0428  *     1: file is in $Extend directory
0429  *     0: file is not in $Extend directory
0430  *    -errno: failed to determine if the file is in the $Extend directory
0431  */
0432 static int ntfs_is_extended_system_file(ntfs_attr_search_ctx *ctx)
0433 {
0434     int nr_links, err;
0435 
0436     /* Restart search. */
0437     ntfs_attr_reinit_search_ctx(ctx);
0438 
0439     /* Get number of hard links. */
0440     nr_links = le16_to_cpu(ctx->mrec->link_count);
0441 
0442     /* Loop through all hard links. */
0443     while (!(err = ntfs_attr_lookup(AT_FILE_NAME, NULL, 0, 0, 0, NULL, 0,
0444             ctx))) {
0445         FILE_NAME_ATTR *file_name_attr;
0446         ATTR_RECORD *attr = ctx->attr;
0447         u8 *p, *p2;
0448 
0449         nr_links--;
0450         /*
0451          * Maximum sanity checking as we are called on an inode that
0452          * we suspect might be corrupt.
0453          */
0454         p = (u8*)attr + le32_to_cpu(attr->length);
0455         if (p < (u8*)ctx->mrec || (u8*)p > (u8*)ctx->mrec +
0456                 le32_to_cpu(ctx->mrec->bytes_in_use)) {
0457 err_corrupt_attr:
0458             ntfs_error(ctx->ntfs_ino->vol->sb, "Corrupt file name "
0459                     "attribute. You should run chkdsk.");
0460             return -EIO;
0461         }
0462         if (attr->non_resident) {
0463             ntfs_error(ctx->ntfs_ino->vol->sb, "Non-resident file "
0464                     "name. You should run chkdsk.");
0465             return -EIO;
0466         }
0467         if (attr->flags) {
0468             ntfs_error(ctx->ntfs_ino->vol->sb, "File name with "
0469                     "invalid flags. You should run "
0470                     "chkdsk.");
0471             return -EIO;
0472         }
0473         if (!(attr->data.resident.flags & RESIDENT_ATTR_IS_INDEXED)) {
0474             ntfs_error(ctx->ntfs_ino->vol->sb, "Unindexed file "
0475                     "name. You should run chkdsk.");
0476             return -EIO;
0477         }
0478         file_name_attr = (FILE_NAME_ATTR*)((u8*)attr +
0479                 le16_to_cpu(attr->data.resident.value_offset));
0480         p2 = (u8 *)file_name_attr + le32_to_cpu(attr->data.resident.value_length);
0481         if (p2 < (u8*)attr || p2 > p)
0482             goto err_corrupt_attr;
0483         /* This attribute is ok, but is it in the $Extend directory? */
0484         if (MREF_LE(file_name_attr->parent_directory) == FILE_Extend)
0485             return 1;   /* YES, it's an extended system file. */
0486     }
0487     if (unlikely(err != -ENOENT))
0488         return err;
0489     if (unlikely(nr_links)) {
0490         ntfs_error(ctx->ntfs_ino->vol->sb, "Inode hard link count "
0491                 "doesn't match number of name attributes. You "
0492                 "should run chkdsk.");
0493         return -EIO;
0494     }
0495     return 0;   /* NO, it is not an extended system file. */
0496 }
0497 
0498 /**
0499  * ntfs_read_locked_inode - read an inode from its device
0500  * @vi:     inode to read
0501  *
0502  * ntfs_read_locked_inode() is called from ntfs_iget() to read the inode
0503  * described by @vi into memory from the device.
0504  *
0505  * The only fields in @vi that we need to/can look at when the function is
0506  * called are i_sb, pointing to the mounted device's super block, and i_ino,
0507  * the number of the inode to load.
0508  *
0509  * ntfs_read_locked_inode() maps, pins and locks the mft record number i_ino
0510  * for reading and sets up the necessary @vi fields as well as initializing
0511  * the ntfs inode.
0512  *
0513  * Q: What locks are held when the function is called?
0514  * A: i_state has I_NEW set, hence the inode is locked, also
0515  *    i_count is set to 1, so it is not going to go away
0516  *    i_flags is set to 0 and we have no business touching it.  Only an ioctl()
0517  *    is allowed to write to them. We should of course be honouring them but
0518  *    we need to do that using the IS_* macros defined in include/linux/fs.h.
0519  *    In any case ntfs_read_locked_inode() has nothing to do with i_flags.
0520  *
0521  * Return 0 on success and -errno on error.  In the error case, the inode will
0522  * have had make_bad_inode() executed on it.
0523  */
0524 static int ntfs_read_locked_inode(struct inode *vi)
0525 {
0526     ntfs_volume *vol = NTFS_SB(vi->i_sb);
0527     ntfs_inode *ni;
0528     struct inode *bvi;
0529     MFT_RECORD *m;
0530     ATTR_RECORD *a;
0531     STANDARD_INFORMATION *si;
0532     ntfs_attr_search_ctx *ctx;
0533     int err = 0;
0534 
0535     ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino);
0536 
0537     /* Setup the generic vfs inode parts now. */
0538     vi->i_uid = vol->uid;
0539     vi->i_gid = vol->gid;
0540     vi->i_mode = 0;
0541 
0542     /*
0543      * Initialize the ntfs specific part of @vi special casing
0544      * FILE_MFT which we need to do at mount time.
0545      */
0546     if (vi->i_ino != FILE_MFT)
0547         ntfs_init_big_inode(vi);
0548     ni = NTFS_I(vi);
0549 
0550     m = map_mft_record(ni);
0551     if (IS_ERR(m)) {
0552         err = PTR_ERR(m);
0553         goto err_out;
0554     }
0555     ctx = ntfs_attr_get_search_ctx(ni, m);
0556     if (!ctx) {
0557         err = -ENOMEM;
0558         goto unm_err_out;
0559     }
0560 
0561     if (!(m->flags & MFT_RECORD_IN_USE)) {
0562         ntfs_error(vi->i_sb, "Inode is not in use!");
0563         goto unm_err_out;
0564     }
0565     if (m->base_mft_record) {
0566         ntfs_error(vi->i_sb, "Inode is an extent inode!");
0567         goto unm_err_out;
0568     }
0569 
0570     /* Transfer information from mft record into vfs and ntfs inodes. */
0571     vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number);
0572 
0573     /*
0574      * FIXME: Keep in mind that link_count is two for files which have both
0575      * a long file name and a short file name as separate entries, so if
0576      * we are hiding short file names this will be too high. Either we need
0577      * to account for the short file names by subtracting them or we need
0578      * to make sure we delete files even though i_nlink is not zero which
0579      * might be tricky due to vfs interactions. Need to think about this
0580      * some more when implementing the unlink command.
0581      */
0582     set_nlink(vi, le16_to_cpu(m->link_count));
0583     /*
0584      * FIXME: Reparse points can have the directory bit set even though
0585      * they would be S_IFLNK. Need to deal with this further below when we
0586      * implement reparse points / symbolic links but it will do for now.
0587      * Also if not a directory, it could be something else, rather than
0588      * a regular file. But again, will do for now.
0589      */
0590     /* Everyone gets all permissions. */
0591     vi->i_mode |= S_IRWXUGO;
0592     /* If read-only, no one gets write permissions. */
0593     if (IS_RDONLY(vi))
0594         vi->i_mode &= ~S_IWUGO;
0595     if (m->flags & MFT_RECORD_IS_DIRECTORY) {
0596         vi->i_mode |= S_IFDIR;
0597         /*
0598          * Apply the directory permissions mask set in the mount
0599          * options.
0600          */
0601         vi->i_mode &= ~vol->dmask;
0602         /* Things break without this kludge! */
0603         if (vi->i_nlink > 1)
0604             set_nlink(vi, 1);
0605     } else {
0606         vi->i_mode |= S_IFREG;
0607         /* Apply the file permissions mask set in the mount options. */
0608         vi->i_mode &= ~vol->fmask;
0609     }
0610     /*
0611      * Find the standard information attribute in the mft record. At this
0612      * stage we haven't setup the attribute list stuff yet, so this could
0613      * in fact fail if the standard information is in an extent record, but
0614      * I don't think this actually ever happens.
0615      */
0616     err = ntfs_attr_lookup(AT_STANDARD_INFORMATION, NULL, 0, 0, 0, NULL, 0,
0617             ctx);
0618     if (unlikely(err)) {
0619         if (err == -ENOENT) {
0620             /*
0621              * TODO: We should be performing a hot fix here (if the
0622              * recover mount option is set) by creating a new
0623              * attribute.
0624              */
0625             ntfs_error(vi->i_sb, "$STANDARD_INFORMATION attribute "
0626                     "is missing.");
0627         }
0628         goto unm_err_out;
0629     }
0630     a = ctx->attr;
0631     /* Get the standard information attribute value. */
0632     if ((u8 *)a + le16_to_cpu(a->data.resident.value_offset)
0633             + le32_to_cpu(a->data.resident.value_length) >
0634             (u8 *)ctx->mrec + vol->mft_record_size) {
0635         ntfs_error(vi->i_sb, "Corrupt standard information attribute in inode.");
0636         goto unm_err_out;
0637     }
0638     si = (STANDARD_INFORMATION*)((u8*)a +
0639             le16_to_cpu(a->data.resident.value_offset));
0640 
0641     /* Transfer information from the standard information into vi. */
0642     /*
0643      * Note: The i_?times do not quite map perfectly onto the NTFS times,
0644      * but they are close enough, and in the end it doesn't really matter
0645      * that much...
0646      */
0647     /*
0648      * mtime is the last change of the data within the file. Not changed
0649      * when only metadata is changed, e.g. a rename doesn't affect mtime.
0650      */
0651     vi->i_mtime = ntfs2utc(si->last_data_change_time);
0652     /*
0653      * ctime is the last change of the metadata of the file. This obviously
0654      * always changes, when mtime is changed. ctime can be changed on its
0655      * own, mtime is then not changed, e.g. when a file is renamed.
0656      */
0657     vi->i_ctime = ntfs2utc(si->last_mft_change_time);
0658     /*
0659      * Last access to the data within the file. Not changed during a rename
0660      * for example but changed whenever the file is written to.
0661      */
0662     vi->i_atime = ntfs2utc(si->last_access_time);
0663 
0664     /* Find the attribute list attribute if present. */
0665     ntfs_attr_reinit_search_ctx(ctx);
0666     err = ntfs_attr_lookup(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx);
0667     if (err) {
0668         if (unlikely(err != -ENOENT)) {
0669             ntfs_error(vi->i_sb, "Failed to lookup attribute list "
0670                     "attribute.");
0671             goto unm_err_out;
0672         }
0673     } else /* if (!err) */ {
0674         if (vi->i_ino == FILE_MFT)
0675             goto skip_attr_list_load;
0676         ntfs_debug("Attribute list found in inode 0x%lx.", vi->i_ino);
0677         NInoSetAttrList(ni);
0678         a = ctx->attr;
0679         if (a->flags & ATTR_COMPRESSION_MASK) {
0680             ntfs_error(vi->i_sb, "Attribute list attribute is "
0681                     "compressed.");
0682             goto unm_err_out;
0683         }
0684         if (a->flags & ATTR_IS_ENCRYPTED ||
0685                 a->flags & ATTR_IS_SPARSE) {
0686             if (a->non_resident) {
0687                 ntfs_error(vi->i_sb, "Non-resident attribute "
0688                         "list attribute is encrypted/"
0689                         "sparse.");
0690                 goto unm_err_out;
0691             }
0692             ntfs_warning(vi->i_sb, "Resident attribute list "
0693                     "attribute in inode 0x%lx is marked "
0694                     "encrypted/sparse which is not true.  "
0695                     "However, Windows allows this and "
0696                     "chkdsk does not detect or correct it "
0697                     "so we will just ignore the invalid "
0698                     "flags and pretend they are not set.",
0699                     vi->i_ino);
0700         }
0701         /* Now allocate memory for the attribute list. */
0702         ni->attr_list_size = (u32)ntfs_attr_size(a);
0703         ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
0704         if (!ni->attr_list) {
0705             ntfs_error(vi->i_sb, "Not enough memory to allocate "
0706                     "buffer for attribute list.");
0707             err = -ENOMEM;
0708             goto unm_err_out;
0709         }
0710         if (a->non_resident) {
0711             NInoSetAttrListNonResident(ni);
0712             if (a->data.non_resident.lowest_vcn) {
0713                 ntfs_error(vi->i_sb, "Attribute list has non "
0714                         "zero lowest_vcn.");
0715                 goto unm_err_out;
0716             }
0717             /*
0718              * Setup the runlist. No need for locking as we have
0719              * exclusive access to the inode at this time.
0720              */
0721             ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol,
0722                     a, NULL);
0723             if (IS_ERR(ni->attr_list_rl.rl)) {
0724                 err = PTR_ERR(ni->attr_list_rl.rl);
0725                 ni->attr_list_rl.rl = NULL;
0726                 ntfs_error(vi->i_sb, "Mapping pairs "
0727                         "decompression failed.");
0728                 goto unm_err_out;
0729             }
0730             /* Now load the attribute list. */
0731             if ((err = load_attribute_list(vol, &ni->attr_list_rl,
0732                     ni->attr_list, ni->attr_list_size,
0733                     sle64_to_cpu(a->data.non_resident.
0734                     initialized_size)))) {
0735                 ntfs_error(vi->i_sb, "Failed to load "
0736                         "attribute list attribute.");
0737                 goto unm_err_out;
0738             }
0739         } else /* if (!a->non_resident) */ {
0740             if ((u8*)a + le16_to_cpu(a->data.resident.value_offset)
0741                     + le32_to_cpu(
0742                     a->data.resident.value_length) >
0743                     (u8*)ctx->mrec + vol->mft_record_size) {
0744                 ntfs_error(vi->i_sb, "Corrupt attribute list "
0745                         "in inode.");
0746                 goto unm_err_out;
0747             }
0748             /* Now copy the attribute list. */
0749             memcpy(ni->attr_list, (u8*)a + le16_to_cpu(
0750                     a->data.resident.value_offset),
0751                     le32_to_cpu(
0752                     a->data.resident.value_length));
0753         }
0754     }
0755 skip_attr_list_load:
0756     /*
0757      * If an attribute list is present we now have the attribute list value
0758      * in ntfs_ino->attr_list and it is ntfs_ino->attr_list_size bytes.
0759      */
0760     if (S_ISDIR(vi->i_mode)) {
0761         loff_t bvi_size;
0762         ntfs_inode *bni;
0763         INDEX_ROOT *ir;
0764         u8 *ir_end, *index_end;
0765 
0766         /* It is a directory, find index root attribute. */
0767         ntfs_attr_reinit_search_ctx(ctx);
0768         err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE,
0769                 0, NULL, 0, ctx);
0770         if (unlikely(err)) {
0771             if (err == -ENOENT) {
0772                 // FIXME: File is corrupt! Hot-fix with empty
0773                 // index root attribute if recovery option is
0774                 // set.
0775                 ntfs_error(vi->i_sb, "$INDEX_ROOT attribute "
0776                         "is missing.");
0777             }
0778             goto unm_err_out;
0779         }
0780         a = ctx->attr;
0781         /* Set up the state. */
0782         if (unlikely(a->non_resident)) {
0783             ntfs_error(vol->sb, "$INDEX_ROOT attribute is not "
0784                     "resident.");
0785             goto unm_err_out;
0786         }
0787         /* Ensure the attribute name is placed before the value. */
0788         if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
0789                 le16_to_cpu(a->data.resident.value_offset)))) {
0790             ntfs_error(vol->sb, "$INDEX_ROOT attribute name is "
0791                     "placed after the attribute value.");
0792             goto unm_err_out;
0793         }
0794         /*
0795          * Compressed/encrypted index root just means that the newly
0796          * created files in that directory should be created compressed/
0797          * encrypted. However index root cannot be both compressed and
0798          * encrypted.
0799          */
0800         if (a->flags & ATTR_COMPRESSION_MASK)
0801             NInoSetCompressed(ni);
0802         if (a->flags & ATTR_IS_ENCRYPTED) {
0803             if (a->flags & ATTR_COMPRESSION_MASK) {
0804                 ntfs_error(vi->i_sb, "Found encrypted and "
0805                         "compressed attribute.");
0806                 goto unm_err_out;
0807             }
0808             NInoSetEncrypted(ni);
0809         }
0810         if (a->flags & ATTR_IS_SPARSE)
0811             NInoSetSparse(ni);
0812         ir = (INDEX_ROOT*)((u8*)a +
0813                 le16_to_cpu(a->data.resident.value_offset));
0814         ir_end = (u8*)ir + le32_to_cpu(a->data.resident.value_length);
0815         if (ir_end > (u8*)ctx->mrec + vol->mft_record_size) {
0816             ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is "
0817                     "corrupt.");
0818             goto unm_err_out;
0819         }
0820         index_end = (u8*)&ir->index +
0821                 le32_to_cpu(ir->index.index_length);
0822         if (index_end > ir_end) {
0823             ntfs_error(vi->i_sb, "Directory index is corrupt.");
0824             goto unm_err_out;
0825         }
0826         if (ir->type != AT_FILE_NAME) {
0827             ntfs_error(vi->i_sb, "Indexed attribute is not "
0828                     "$FILE_NAME.");
0829             goto unm_err_out;
0830         }
0831         if (ir->collation_rule != COLLATION_FILE_NAME) {
0832             ntfs_error(vi->i_sb, "Index collation rule is not "
0833                     "COLLATION_FILE_NAME.");
0834             goto unm_err_out;
0835         }
0836         ni->itype.index.collation_rule = ir->collation_rule;
0837         ni->itype.index.block_size = le32_to_cpu(ir->index_block_size);
0838         if (ni->itype.index.block_size &
0839                 (ni->itype.index.block_size - 1)) {
0840             ntfs_error(vi->i_sb, "Index block size (%u) is not a "
0841                     "power of two.",
0842                     ni->itype.index.block_size);
0843             goto unm_err_out;
0844         }
0845         if (ni->itype.index.block_size > PAGE_SIZE) {
0846             ntfs_error(vi->i_sb, "Index block size (%u) > "
0847                     "PAGE_SIZE (%ld) is not "
0848                     "supported.  Sorry.",
0849                     ni->itype.index.block_size,
0850                     PAGE_SIZE);
0851             err = -EOPNOTSUPP;
0852             goto unm_err_out;
0853         }
0854         if (ni->itype.index.block_size < NTFS_BLOCK_SIZE) {
0855             ntfs_error(vi->i_sb, "Index block size (%u) < "
0856                     "NTFS_BLOCK_SIZE (%i) is not "
0857                     "supported.  Sorry.",
0858                     ni->itype.index.block_size,
0859                     NTFS_BLOCK_SIZE);
0860             err = -EOPNOTSUPP;
0861             goto unm_err_out;
0862         }
0863         ni->itype.index.block_size_bits =
0864                 ffs(ni->itype.index.block_size) - 1;
0865         /* Determine the size of a vcn in the directory index. */
0866         if (vol->cluster_size <= ni->itype.index.block_size) {
0867             ni->itype.index.vcn_size = vol->cluster_size;
0868             ni->itype.index.vcn_size_bits = vol->cluster_size_bits;
0869         } else {
0870             ni->itype.index.vcn_size = vol->sector_size;
0871             ni->itype.index.vcn_size_bits = vol->sector_size_bits;
0872         }
0873 
0874         /* Setup the index allocation attribute, even if not present. */
0875         NInoSetMstProtected(ni);
0876         ni->type = AT_INDEX_ALLOCATION;
0877         ni->name = I30;
0878         ni->name_len = 4;
0879 
0880         if (!(ir->index.flags & LARGE_INDEX)) {
0881             /* No index allocation. */
0882             vi->i_size = ni->initialized_size =
0883                     ni->allocated_size = 0;
0884             /* We are done with the mft record, so we release it. */
0885             ntfs_attr_put_search_ctx(ctx);
0886             unmap_mft_record(ni);
0887             m = NULL;
0888             ctx = NULL;
0889             goto skip_large_dir_stuff;
0890         } /* LARGE_INDEX: Index allocation present. Setup state. */
0891         NInoSetIndexAllocPresent(ni);
0892         /* Find index allocation attribute. */
0893         ntfs_attr_reinit_search_ctx(ctx);
0894         err = ntfs_attr_lookup(AT_INDEX_ALLOCATION, I30, 4,
0895                 CASE_SENSITIVE, 0, NULL, 0, ctx);
0896         if (unlikely(err)) {
0897             if (err == -ENOENT)
0898                 ntfs_error(vi->i_sb, "$INDEX_ALLOCATION "
0899                         "attribute is not present but "
0900                         "$INDEX_ROOT indicated it is.");
0901             else
0902                 ntfs_error(vi->i_sb, "Failed to lookup "
0903                         "$INDEX_ALLOCATION "
0904                         "attribute.");
0905             goto unm_err_out;
0906         }
0907         a = ctx->attr;
0908         if (!a->non_resident) {
0909             ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
0910                     "is resident.");
0911             goto unm_err_out;
0912         }
0913         /*
0914          * Ensure the attribute name is placed before the mapping pairs
0915          * array.
0916          */
0917         if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
0918                 le16_to_cpu(
0919                 a->data.non_resident.mapping_pairs_offset)))) {
0920             ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name "
0921                     "is placed after the mapping pairs "
0922                     "array.");
0923             goto unm_err_out;
0924         }
0925         if (a->flags & ATTR_IS_ENCRYPTED) {
0926             ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
0927                     "is encrypted.");
0928             goto unm_err_out;
0929         }
0930         if (a->flags & ATTR_IS_SPARSE) {
0931             ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
0932                     "is sparse.");
0933             goto unm_err_out;
0934         }
0935         if (a->flags & ATTR_COMPRESSION_MASK) {
0936             ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
0937                     "is compressed.");
0938             goto unm_err_out;
0939         }
0940         if (a->data.non_resident.lowest_vcn) {
0941             ntfs_error(vi->i_sb, "First extent of "
0942                     "$INDEX_ALLOCATION attribute has non "
0943                     "zero lowest_vcn.");
0944             goto unm_err_out;
0945         }
0946         vi->i_size = sle64_to_cpu(a->data.non_resident.data_size);
0947         ni->initialized_size = sle64_to_cpu(
0948                 a->data.non_resident.initialized_size);
0949         ni->allocated_size = sle64_to_cpu(
0950                 a->data.non_resident.allocated_size);
0951         /*
0952          * We are done with the mft record, so we release it. Otherwise
0953          * we would deadlock in ntfs_attr_iget().
0954          */
0955         ntfs_attr_put_search_ctx(ctx);
0956         unmap_mft_record(ni);
0957         m = NULL;
0958         ctx = NULL;
0959         /* Get the index bitmap attribute inode. */
0960         bvi = ntfs_attr_iget(vi, AT_BITMAP, I30, 4);
0961         if (IS_ERR(bvi)) {
0962             ntfs_error(vi->i_sb, "Failed to get bitmap attribute.");
0963             err = PTR_ERR(bvi);
0964             goto unm_err_out;
0965         }
0966         bni = NTFS_I(bvi);
0967         if (NInoCompressed(bni) || NInoEncrypted(bni) ||
0968                 NInoSparse(bni)) {
0969             ntfs_error(vi->i_sb, "$BITMAP attribute is compressed "
0970                     "and/or encrypted and/or sparse.");
0971             goto iput_unm_err_out;
0972         }
0973         /* Consistency check bitmap size vs. index allocation size. */
0974         bvi_size = i_size_read(bvi);
0975         if ((bvi_size << 3) < (vi->i_size >>
0976                 ni->itype.index.block_size_bits)) {
0977             ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) "
0978                     "for index allocation (0x%llx).",
0979                     bvi_size << 3, vi->i_size);
0980             goto iput_unm_err_out;
0981         }
0982         /* No longer need the bitmap attribute inode. */
0983         iput(bvi);
0984 skip_large_dir_stuff:
0985         /* Setup the operations for this inode. */
0986         vi->i_op = &ntfs_dir_inode_ops;
0987         vi->i_fop = &ntfs_dir_ops;
0988         vi->i_mapping->a_ops = &ntfs_mst_aops;
0989     } else {
0990         /* It is a file. */
0991         ntfs_attr_reinit_search_ctx(ctx);
0992 
0993         /* Setup the data attribute, even if not present. */
0994         ni->type = AT_DATA;
0995         ni->name = NULL;
0996         ni->name_len = 0;
0997 
0998         /* Find first extent of the unnamed data attribute. */
0999         err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, 0, NULL, 0, ctx);
1000         if (unlikely(err)) {
1001             vi->i_size = ni->initialized_size =
1002                     ni->allocated_size = 0;
1003             if (err != -ENOENT) {
1004                 ntfs_error(vi->i_sb, "Failed to lookup $DATA "
1005                         "attribute.");
1006                 goto unm_err_out;
1007             }
1008             /*
1009              * FILE_Secure does not have an unnamed $DATA
1010              * attribute, so we special case it here.
1011              */
1012             if (vi->i_ino == FILE_Secure)
1013                 goto no_data_attr_special_case;
1014             /*
1015              * Most if not all the system files in the $Extend
1016              * system directory do not have unnamed data
1017              * attributes so we need to check if the parent
1018              * directory of the file is FILE_Extend and if it is
1019              * ignore this error. To do this we need to get the
1020              * name of this inode from the mft record as the name
1021              * contains the back reference to the parent directory.
1022              */
1023             if (ntfs_is_extended_system_file(ctx) > 0)
1024                 goto no_data_attr_special_case;
1025             // FIXME: File is corrupt! Hot-fix with empty data
1026             // attribute if recovery option is set.
1027             ntfs_error(vi->i_sb, "$DATA attribute is missing.");
1028             goto unm_err_out;
1029         }
1030         a = ctx->attr;
1031         /* Setup the state. */
1032         if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
1033             if (a->flags & ATTR_COMPRESSION_MASK) {
1034                 NInoSetCompressed(ni);
1035                 if (vol->cluster_size > 4096) {
1036                     ntfs_error(vi->i_sb, "Found "
1037                             "compressed data but "
1038                             "compression is "
1039                             "disabled due to "
1040                             "cluster size (%i) > "
1041                             "4kiB.",
1042                             vol->cluster_size);
1043                     goto unm_err_out;
1044                 }
1045                 if ((a->flags & ATTR_COMPRESSION_MASK)
1046                         != ATTR_IS_COMPRESSED) {
1047                     ntfs_error(vi->i_sb, "Found unknown "
1048                             "compression method "
1049                             "or corrupt file.");
1050                     goto unm_err_out;
1051                 }
1052             }
1053             if (a->flags & ATTR_IS_SPARSE)
1054                 NInoSetSparse(ni);
1055         }
1056         if (a->flags & ATTR_IS_ENCRYPTED) {
1057             if (NInoCompressed(ni)) {
1058                 ntfs_error(vi->i_sb, "Found encrypted and "
1059                         "compressed data.");
1060                 goto unm_err_out;
1061             }
1062             NInoSetEncrypted(ni);
1063         }
1064         if (a->non_resident) {
1065             NInoSetNonResident(ni);
1066             if (NInoCompressed(ni) || NInoSparse(ni)) {
1067                 if (NInoCompressed(ni) && a->data.non_resident.
1068                         compression_unit != 4) {
1069                     ntfs_error(vi->i_sb, "Found "
1070                             "non-standard "
1071                             "compression unit (%u "
1072                             "instead of 4).  "
1073                             "Cannot handle this.",
1074                             a->data.non_resident.
1075                             compression_unit);
1076                     err = -EOPNOTSUPP;
1077                     goto unm_err_out;
1078                 }
1079                 if (a->data.non_resident.compression_unit) {
1080                     ni->itype.compressed.block_size = 1U <<
1081                             (a->data.non_resident.
1082                             compression_unit +
1083                             vol->cluster_size_bits);
1084                     ni->itype.compressed.block_size_bits =
1085                             ffs(ni->itype.
1086                             compressed.
1087                             block_size) - 1;
1088                     ni->itype.compressed.block_clusters =
1089                             1U << a->data.
1090                             non_resident.
1091                             compression_unit;
1092                 } else {
1093                     ni->itype.compressed.block_size = 0;
1094                     ni->itype.compressed.block_size_bits =
1095                             0;
1096                     ni->itype.compressed.block_clusters =
1097                             0;
1098                 }
1099                 ni->itype.compressed.size = sle64_to_cpu(
1100                         a->data.non_resident.
1101                         compressed_size);
1102             }
1103             if (a->data.non_resident.lowest_vcn) {
1104                 ntfs_error(vi->i_sb, "First extent of $DATA "
1105                         "attribute has non zero "
1106                         "lowest_vcn.");
1107                 goto unm_err_out;
1108             }
1109             vi->i_size = sle64_to_cpu(
1110                     a->data.non_resident.data_size);
1111             ni->initialized_size = sle64_to_cpu(
1112                     a->data.non_resident.initialized_size);
1113             ni->allocated_size = sle64_to_cpu(
1114                     a->data.non_resident.allocated_size);
1115         } else { /* Resident attribute. */
1116             vi->i_size = ni->initialized_size = le32_to_cpu(
1117                     a->data.resident.value_length);
1118             ni->allocated_size = le32_to_cpu(a->length) -
1119                     le16_to_cpu(
1120                     a->data.resident.value_offset);
1121             if (vi->i_size > ni->allocated_size) {
1122                 ntfs_error(vi->i_sb, "Resident data attribute "
1123                         "is corrupt (size exceeds "
1124                         "allocation).");
1125                 goto unm_err_out;
1126             }
1127         }
1128 no_data_attr_special_case:
1129         /* We are done with the mft record, so we release it. */
1130         ntfs_attr_put_search_ctx(ctx);
1131         unmap_mft_record(ni);
1132         m = NULL;
1133         ctx = NULL;
1134         /* Setup the operations for this inode. */
1135         vi->i_op = &ntfs_file_inode_ops;
1136         vi->i_fop = &ntfs_file_ops;
1137         vi->i_mapping->a_ops = &ntfs_normal_aops;
1138         if (NInoMstProtected(ni))
1139             vi->i_mapping->a_ops = &ntfs_mst_aops;
1140         else if (NInoCompressed(ni))
1141             vi->i_mapping->a_ops = &ntfs_compressed_aops;
1142     }
1143     /*
1144      * The number of 512-byte blocks used on disk (for stat). This is in so
1145      * far inaccurate as it doesn't account for any named streams or other
1146      * special non-resident attributes, but that is how Windows works, too,
1147      * so we are at least consistent with Windows, if not entirely
1148      * consistent with the Linux Way. Doing it the Linux Way would cause a
1149      * significant slowdown as it would involve iterating over all
1150      * attributes in the mft record and adding the allocated/compressed
1151      * sizes of all non-resident attributes present to give us the Linux
1152      * correct size that should go into i_blocks (after division by 512).
1153      */
1154     if (S_ISREG(vi->i_mode) && (NInoCompressed(ni) || NInoSparse(ni)))
1155         vi->i_blocks = ni->itype.compressed.size >> 9;
1156     else
1157         vi->i_blocks = ni->allocated_size >> 9;
1158     ntfs_debug("Done.");
1159     return 0;
1160 iput_unm_err_out:
1161     iput(bvi);
1162 unm_err_out:
1163     if (!err)
1164         err = -EIO;
1165     if (ctx)
1166         ntfs_attr_put_search_ctx(ctx);
1167     if (m)
1168         unmap_mft_record(ni);
1169 err_out:
1170     ntfs_error(vol->sb, "Failed with error code %i.  Marking corrupt "
1171             "inode 0x%lx as bad.  Run chkdsk.", err, vi->i_ino);
1172     make_bad_inode(vi);
1173     if (err != -EOPNOTSUPP && err != -ENOMEM)
1174         NVolSetErrors(vol);
1175     return err;
1176 }
1177 
1178 /**
1179  * ntfs_read_locked_attr_inode - read an attribute inode from its base inode
1180  * @base_vi:    base inode
1181  * @vi:     attribute inode to read
1182  *
1183  * ntfs_read_locked_attr_inode() is called from ntfs_attr_iget() to read the
1184  * attribute inode described by @vi into memory from the base mft record
1185  * described by @base_ni.
1186  *
1187  * ntfs_read_locked_attr_inode() maps, pins and locks the base inode for
1188  * reading and looks up the attribute described by @vi before setting up the
1189  * necessary fields in @vi as well as initializing the ntfs inode.
1190  *
1191  * Q: What locks are held when the function is called?
1192  * A: i_state has I_NEW set, hence the inode is locked, also
1193  *    i_count is set to 1, so it is not going to go away
1194  *
1195  * Return 0 on success and -errno on error.  In the error case, the inode will
1196  * have had make_bad_inode() executed on it.
1197  *
1198  * Note this cannot be called for AT_INDEX_ALLOCATION.
1199  */
1200 static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
1201 {
1202     ntfs_volume *vol = NTFS_SB(vi->i_sb);
1203     ntfs_inode *ni, *base_ni;
1204     MFT_RECORD *m;
1205     ATTR_RECORD *a;
1206     ntfs_attr_search_ctx *ctx;
1207     int err = 0;
1208 
1209     ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino);
1210 
1211     ntfs_init_big_inode(vi);
1212 
1213     ni  = NTFS_I(vi);
1214     base_ni = NTFS_I(base_vi);
1215 
1216     /* Just mirror the values from the base inode. */
1217     vi->i_uid   = base_vi->i_uid;
1218     vi->i_gid   = base_vi->i_gid;
1219     set_nlink(vi, base_vi->i_nlink);
1220     vi->i_mtime = base_vi->i_mtime;
1221     vi->i_ctime = base_vi->i_ctime;
1222     vi->i_atime = base_vi->i_atime;
1223     vi->i_generation = ni->seq_no = base_ni->seq_no;
1224 
1225     /* Set inode type to zero but preserve permissions. */
1226     vi->i_mode  = base_vi->i_mode & ~S_IFMT;
1227 
1228     m = map_mft_record(base_ni);
1229     if (IS_ERR(m)) {
1230         err = PTR_ERR(m);
1231         goto err_out;
1232     }
1233     ctx = ntfs_attr_get_search_ctx(base_ni, m);
1234     if (!ctx) {
1235         err = -ENOMEM;
1236         goto unm_err_out;
1237     }
1238     /* Find the attribute. */
1239     err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
1240             CASE_SENSITIVE, 0, NULL, 0, ctx);
1241     if (unlikely(err))
1242         goto unm_err_out;
1243     a = ctx->attr;
1244     if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
1245         if (a->flags & ATTR_COMPRESSION_MASK) {
1246             NInoSetCompressed(ni);
1247             if ((ni->type != AT_DATA) || (ni->type == AT_DATA &&
1248                     ni->name_len)) {
1249                 ntfs_error(vi->i_sb, "Found compressed "
1250                         "non-data or named data "
1251                         "attribute.  Please report "
1252                         "you saw this message to "
1253                         "linux-ntfs-dev@lists."
1254                         "sourceforge.net");
1255                 goto unm_err_out;
1256             }
1257             if (vol->cluster_size > 4096) {
1258                 ntfs_error(vi->i_sb, "Found compressed "
1259                         "attribute but compression is "
1260                         "disabled due to cluster size "
1261                         "(%i) > 4kiB.",
1262                         vol->cluster_size);
1263                 goto unm_err_out;
1264             }
1265             if ((a->flags & ATTR_COMPRESSION_MASK) !=
1266                     ATTR_IS_COMPRESSED) {
1267                 ntfs_error(vi->i_sb, "Found unknown "
1268                         "compression method.");
1269                 goto unm_err_out;
1270             }
1271         }
1272         /*
1273          * The compressed/sparse flag set in an index root just means
1274          * to compress all files.
1275          */
1276         if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
1277             ntfs_error(vi->i_sb, "Found mst protected attribute "
1278                     "but the attribute is %s.  Please "
1279                     "report you saw this message to "
1280                     "linux-ntfs-dev@lists.sourceforge.net",
1281                     NInoCompressed(ni) ? "compressed" :
1282                     "sparse");
1283             goto unm_err_out;
1284         }
1285         if (a->flags & ATTR_IS_SPARSE)
1286             NInoSetSparse(ni);
1287     }
1288     if (a->flags & ATTR_IS_ENCRYPTED) {
1289         if (NInoCompressed(ni)) {
1290             ntfs_error(vi->i_sb, "Found encrypted and compressed "
1291                     "data.");
1292             goto unm_err_out;
1293         }
1294         /*
1295          * The encryption flag set in an index root just means to
1296          * encrypt all files.
1297          */
1298         if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
1299             ntfs_error(vi->i_sb, "Found mst protected attribute "
1300                     "but the attribute is encrypted.  "
1301                     "Please report you saw this message "
1302                     "to linux-ntfs-dev@lists.sourceforge."
1303                     "net");
1304             goto unm_err_out;
1305         }
1306         if (ni->type != AT_DATA) {
1307             ntfs_error(vi->i_sb, "Found encrypted non-data "
1308                     "attribute.");
1309             goto unm_err_out;
1310         }
1311         NInoSetEncrypted(ni);
1312     }
1313     if (!a->non_resident) {
1314         /* Ensure the attribute name is placed before the value. */
1315         if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1316                 le16_to_cpu(a->data.resident.value_offset)))) {
1317             ntfs_error(vol->sb, "Attribute name is placed after "
1318                     "the attribute value.");
1319             goto unm_err_out;
1320         }
1321         if (NInoMstProtected(ni)) {
1322             ntfs_error(vi->i_sb, "Found mst protected attribute "
1323                     "but the attribute is resident.  "
1324                     "Please report you saw this message to "
1325                     "linux-ntfs-dev@lists.sourceforge.net");
1326             goto unm_err_out;
1327         }
1328         vi->i_size = ni->initialized_size = le32_to_cpu(
1329                 a->data.resident.value_length);
1330         ni->allocated_size = le32_to_cpu(a->length) -
1331                 le16_to_cpu(a->data.resident.value_offset);
1332         if (vi->i_size > ni->allocated_size) {
1333             ntfs_error(vi->i_sb, "Resident attribute is corrupt "
1334                     "(size exceeds allocation).");
1335             goto unm_err_out;
1336         }
1337     } else {
1338         NInoSetNonResident(ni);
1339         /*
1340          * Ensure the attribute name is placed before the mapping pairs
1341          * array.
1342          */
1343         if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1344                 le16_to_cpu(
1345                 a->data.non_resident.mapping_pairs_offset)))) {
1346             ntfs_error(vol->sb, "Attribute name is placed after "
1347                     "the mapping pairs array.");
1348             goto unm_err_out;
1349         }
1350         if (NInoCompressed(ni) || NInoSparse(ni)) {
1351             if (NInoCompressed(ni) && a->data.non_resident.
1352                     compression_unit != 4) {
1353                 ntfs_error(vi->i_sb, "Found non-standard "
1354                         "compression unit (%u instead "
1355                         "of 4).  Cannot handle this.",
1356                         a->data.non_resident.
1357                         compression_unit);
1358                 err = -EOPNOTSUPP;
1359                 goto unm_err_out;
1360             }
1361             if (a->data.non_resident.compression_unit) {
1362                 ni->itype.compressed.block_size = 1U <<
1363                         (a->data.non_resident.
1364                         compression_unit +
1365                         vol->cluster_size_bits);
1366                 ni->itype.compressed.block_size_bits =
1367                         ffs(ni->itype.compressed.
1368                         block_size) - 1;
1369                 ni->itype.compressed.block_clusters = 1U <<
1370                         a->data.non_resident.
1371                         compression_unit;
1372             } else {
1373                 ni->itype.compressed.block_size = 0;
1374                 ni->itype.compressed.block_size_bits = 0;
1375                 ni->itype.compressed.block_clusters = 0;
1376             }
1377             ni->itype.compressed.size = sle64_to_cpu(
1378                     a->data.non_resident.compressed_size);
1379         }
1380         if (a->data.non_resident.lowest_vcn) {
1381             ntfs_error(vi->i_sb, "First extent of attribute has "
1382                     "non-zero lowest_vcn.");
1383             goto unm_err_out;
1384         }
1385         vi->i_size = sle64_to_cpu(a->data.non_resident.data_size);
1386         ni->initialized_size = sle64_to_cpu(
1387                 a->data.non_resident.initialized_size);
1388         ni->allocated_size = sle64_to_cpu(
1389                 a->data.non_resident.allocated_size);
1390     }
1391     vi->i_mapping->a_ops = &ntfs_normal_aops;
1392     if (NInoMstProtected(ni))
1393         vi->i_mapping->a_ops = &ntfs_mst_aops;
1394     else if (NInoCompressed(ni))
1395         vi->i_mapping->a_ops = &ntfs_compressed_aops;
1396     if ((NInoCompressed(ni) || NInoSparse(ni)) && ni->type != AT_INDEX_ROOT)
1397         vi->i_blocks = ni->itype.compressed.size >> 9;
1398     else
1399         vi->i_blocks = ni->allocated_size >> 9;
1400     /*
1401      * Make sure the base inode does not go away and attach it to the
1402      * attribute inode.
1403      */
1404     igrab(base_vi);
1405     ni->ext.base_ntfs_ino = base_ni;
1406     ni->nr_extents = -1;
1407 
1408     ntfs_attr_put_search_ctx(ctx);
1409     unmap_mft_record(base_ni);
1410 
1411     ntfs_debug("Done.");
1412     return 0;
1413 
1414 unm_err_out:
1415     if (!err)
1416         err = -EIO;
1417     if (ctx)
1418         ntfs_attr_put_search_ctx(ctx);
1419     unmap_mft_record(base_ni);
1420 err_out:
1421     ntfs_error(vol->sb, "Failed with error code %i while reading attribute "
1422             "inode (mft_no 0x%lx, type 0x%x, name_len %i).  "
1423             "Marking corrupt inode and base inode 0x%lx as bad.  "
1424             "Run chkdsk.", err, vi->i_ino, ni->type, ni->name_len,
1425             base_vi->i_ino);
1426     make_bad_inode(vi);
1427     if (err != -ENOMEM)
1428         NVolSetErrors(vol);
1429     return err;
1430 }
1431 
1432 /**
1433  * ntfs_read_locked_index_inode - read an index inode from its base inode
1434  * @base_vi:    base inode
1435  * @vi:     index inode to read
1436  *
1437  * ntfs_read_locked_index_inode() is called from ntfs_index_iget() to read the
1438  * index inode described by @vi into memory from the base mft record described
1439  * by @base_ni.
1440  *
1441  * ntfs_read_locked_index_inode() maps, pins and locks the base inode for
1442  * reading and looks up the attributes relating to the index described by @vi
1443  * before setting up the necessary fields in @vi as well as initializing the
1444  * ntfs inode.
1445  *
1446  * Note, index inodes are essentially attribute inodes (NInoAttr() is true)
1447  * with the attribute type set to AT_INDEX_ALLOCATION.  Apart from that, they
1448  * are setup like directory inodes since directories are a special case of
1449  * indices ao they need to be treated in much the same way.  Most importantly,
1450  * for small indices the index allocation attribute might not actually exist.
1451  * However, the index root attribute always exists but this does not need to
1452  * have an inode associated with it and this is why we define a new inode type
1453  * index.  Also, like for directories, we need to have an attribute inode for
1454  * the bitmap attribute corresponding to the index allocation attribute and we
1455  * can store this in the appropriate field of the inode, just like we do for
1456  * normal directory inodes.
1457  *
1458  * Q: What locks are held when the function is called?
1459  * A: i_state has I_NEW set, hence the inode is locked, also
1460  *    i_count is set to 1, so it is not going to go away
1461  *
1462  * Return 0 on success and -errno on error.  In the error case, the inode will
1463  * have had make_bad_inode() executed on it.
1464  */
1465 static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1466 {
1467     loff_t bvi_size;
1468     ntfs_volume *vol = NTFS_SB(vi->i_sb);
1469     ntfs_inode *ni, *base_ni, *bni;
1470     struct inode *bvi;
1471     MFT_RECORD *m;
1472     ATTR_RECORD *a;
1473     ntfs_attr_search_ctx *ctx;
1474     INDEX_ROOT *ir;
1475     u8 *ir_end, *index_end;
1476     int err = 0;
1477 
1478     ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino);
1479     ntfs_init_big_inode(vi);
1480     ni  = NTFS_I(vi);
1481     base_ni = NTFS_I(base_vi);
1482     /* Just mirror the values from the base inode. */
1483     vi->i_uid   = base_vi->i_uid;
1484     vi->i_gid   = base_vi->i_gid;
1485     set_nlink(vi, base_vi->i_nlink);
1486     vi->i_mtime = base_vi->i_mtime;
1487     vi->i_ctime = base_vi->i_ctime;
1488     vi->i_atime = base_vi->i_atime;
1489     vi->i_generation = ni->seq_no = base_ni->seq_no;
1490     /* Set inode type to zero but preserve permissions. */
1491     vi->i_mode  = base_vi->i_mode & ~S_IFMT;
1492     /* Map the mft record for the base inode. */
1493     m = map_mft_record(base_ni);
1494     if (IS_ERR(m)) {
1495         err = PTR_ERR(m);
1496         goto err_out;
1497     }
1498     ctx = ntfs_attr_get_search_ctx(base_ni, m);
1499     if (!ctx) {
1500         err = -ENOMEM;
1501         goto unm_err_out;
1502     }
1503     /* Find the index root attribute. */
1504     err = ntfs_attr_lookup(AT_INDEX_ROOT, ni->name, ni->name_len,
1505             CASE_SENSITIVE, 0, NULL, 0, ctx);
1506     if (unlikely(err)) {
1507         if (err == -ENOENT)
1508             ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is "
1509                     "missing.");
1510         goto unm_err_out;
1511     }
1512     a = ctx->attr;
1513     /* Set up the state. */
1514     if (unlikely(a->non_resident)) {
1515         ntfs_error(vol->sb, "$INDEX_ROOT attribute is not resident.");
1516         goto unm_err_out;
1517     }
1518     /* Ensure the attribute name is placed before the value. */
1519     if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1520             le16_to_cpu(a->data.resident.value_offset)))) {
1521         ntfs_error(vol->sb, "$INDEX_ROOT attribute name is placed "
1522                 "after the attribute value.");
1523         goto unm_err_out;
1524     }
1525     /*
1526      * Compressed/encrypted/sparse index root is not allowed, except for
1527      * directories of course but those are not dealt with here.
1528      */
1529     if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED |
1530             ATTR_IS_SPARSE)) {
1531         ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index "
1532                 "root attribute.");
1533         goto unm_err_out;
1534     }
1535     ir = (INDEX_ROOT*)((u8*)a + le16_to_cpu(a->data.resident.value_offset));
1536     ir_end = (u8*)ir + le32_to_cpu(a->data.resident.value_length);
1537     if (ir_end > (u8*)ctx->mrec + vol->mft_record_size) {
1538         ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is corrupt.");
1539         goto unm_err_out;
1540     }
1541     index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
1542     if (index_end > ir_end) {
1543         ntfs_error(vi->i_sb, "Index is corrupt.");
1544         goto unm_err_out;
1545     }
1546     if (ir->type) {
1547         ntfs_error(vi->i_sb, "Index type is not 0 (type is 0x%x).",
1548                 le32_to_cpu(ir->type));
1549         goto unm_err_out;
1550     }
1551     ni->itype.index.collation_rule = ir->collation_rule;
1552     ntfs_debug("Index collation rule is 0x%x.",
1553             le32_to_cpu(ir->collation_rule));
1554     ni->itype.index.block_size = le32_to_cpu(ir->index_block_size);
1555     if (!is_power_of_2(ni->itype.index.block_size)) {
1556         ntfs_error(vi->i_sb, "Index block size (%u) is not a power of "
1557                 "two.", ni->itype.index.block_size);
1558         goto unm_err_out;
1559     }
1560     if (ni->itype.index.block_size > PAGE_SIZE) {
1561         ntfs_error(vi->i_sb, "Index block size (%u) > PAGE_SIZE "
1562                 "(%ld) is not supported.  Sorry.",
1563                 ni->itype.index.block_size, PAGE_SIZE);
1564         err = -EOPNOTSUPP;
1565         goto unm_err_out;
1566     }
1567     if (ni->itype.index.block_size < NTFS_BLOCK_SIZE) {
1568         ntfs_error(vi->i_sb, "Index block size (%u) < NTFS_BLOCK_SIZE "
1569                 "(%i) is not supported.  Sorry.",
1570                 ni->itype.index.block_size, NTFS_BLOCK_SIZE);
1571         err = -EOPNOTSUPP;
1572         goto unm_err_out;
1573     }
1574     ni->itype.index.block_size_bits = ffs(ni->itype.index.block_size) - 1;
1575     /* Determine the size of a vcn in the index. */
1576     if (vol->cluster_size <= ni->itype.index.block_size) {
1577         ni->itype.index.vcn_size = vol->cluster_size;
1578         ni->itype.index.vcn_size_bits = vol->cluster_size_bits;
1579     } else {
1580         ni->itype.index.vcn_size = vol->sector_size;
1581         ni->itype.index.vcn_size_bits = vol->sector_size_bits;
1582     }
1583     /* Check for presence of index allocation attribute. */
1584     if (!(ir->index.flags & LARGE_INDEX)) {
1585         /* No index allocation. */
1586         vi->i_size = ni->initialized_size = ni->allocated_size = 0;
1587         /* We are done with the mft record, so we release it. */
1588         ntfs_attr_put_search_ctx(ctx);
1589         unmap_mft_record(base_ni);
1590         m = NULL;
1591         ctx = NULL;
1592         goto skip_large_index_stuff;
1593     } /* LARGE_INDEX:  Index allocation present.  Setup state. */
1594     NInoSetIndexAllocPresent(ni);
1595     /* Find index allocation attribute. */
1596     ntfs_attr_reinit_search_ctx(ctx);
1597     err = ntfs_attr_lookup(AT_INDEX_ALLOCATION, ni->name, ni->name_len,
1598             CASE_SENSITIVE, 0, NULL, 0, ctx);
1599     if (unlikely(err)) {
1600         if (err == -ENOENT)
1601             ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
1602                     "not present but $INDEX_ROOT "
1603                     "indicated it is.");
1604         else
1605             ntfs_error(vi->i_sb, "Failed to lookup "
1606                     "$INDEX_ALLOCATION attribute.");
1607         goto unm_err_out;
1608     }
1609     a = ctx->attr;
1610     if (!a->non_resident) {
1611         ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
1612                 "resident.");
1613         goto unm_err_out;
1614     }
1615     /*
1616      * Ensure the attribute name is placed before the mapping pairs array.
1617      */
1618     if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1619             le16_to_cpu(
1620             a->data.non_resident.mapping_pairs_offset)))) {
1621         ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name is "
1622                 "placed after the mapping pairs array.");
1623         goto unm_err_out;
1624     }
1625     if (a->flags & ATTR_IS_ENCRYPTED) {
1626         ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
1627                 "encrypted.");
1628         goto unm_err_out;
1629     }
1630     if (a->flags & ATTR_IS_SPARSE) {
1631         ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is sparse.");
1632         goto unm_err_out;
1633     }
1634     if (a->flags & ATTR_COMPRESSION_MASK) {
1635         ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
1636                 "compressed.");
1637         goto unm_err_out;
1638     }
1639     if (a->data.non_resident.lowest_vcn) {
1640         ntfs_error(vi->i_sb, "First extent of $INDEX_ALLOCATION "
1641                 "attribute has non zero lowest_vcn.");
1642         goto unm_err_out;
1643     }
1644     vi->i_size = sle64_to_cpu(a->data.non_resident.data_size);
1645     ni->initialized_size = sle64_to_cpu(
1646             a->data.non_resident.initialized_size);
1647     ni->allocated_size = sle64_to_cpu(a->data.non_resident.allocated_size);
1648     /*
1649      * We are done with the mft record, so we release it.  Otherwise
1650      * we would deadlock in ntfs_attr_iget().
1651      */
1652     ntfs_attr_put_search_ctx(ctx);
1653     unmap_mft_record(base_ni);
1654     m = NULL;
1655     ctx = NULL;
1656     /* Get the index bitmap attribute inode. */
1657     bvi = ntfs_attr_iget(base_vi, AT_BITMAP, ni->name, ni->name_len);
1658     if (IS_ERR(bvi)) {
1659         ntfs_error(vi->i_sb, "Failed to get bitmap attribute.");
1660         err = PTR_ERR(bvi);
1661         goto unm_err_out;
1662     }
1663     bni = NTFS_I(bvi);
1664     if (NInoCompressed(bni) || NInoEncrypted(bni) ||
1665             NInoSparse(bni)) {
1666         ntfs_error(vi->i_sb, "$BITMAP attribute is compressed and/or "
1667                 "encrypted and/or sparse.");
1668         goto iput_unm_err_out;
1669     }
1670     /* Consistency check bitmap size vs. index allocation size. */
1671     bvi_size = i_size_read(bvi);
1672     if ((bvi_size << 3) < (vi->i_size >> ni->itype.index.block_size_bits)) {
1673         ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) for "
1674                 "index allocation (0x%llx).", bvi_size << 3,
1675                 vi->i_size);
1676         goto iput_unm_err_out;
1677     }
1678     iput(bvi);
1679 skip_large_index_stuff:
1680     /* Setup the operations for this index inode. */
1681     vi->i_mapping->a_ops = &ntfs_mst_aops;
1682     vi->i_blocks = ni->allocated_size >> 9;
1683     /*
1684      * Make sure the base inode doesn't go away and attach it to the
1685      * index inode.
1686      */
1687     igrab(base_vi);
1688     ni->ext.base_ntfs_ino = base_ni;
1689     ni->nr_extents = -1;
1690 
1691     ntfs_debug("Done.");
1692     return 0;
1693 iput_unm_err_out:
1694     iput(bvi);
1695 unm_err_out:
1696     if (!err)
1697         err = -EIO;
1698     if (ctx)
1699         ntfs_attr_put_search_ctx(ctx);
1700     if (m)
1701         unmap_mft_record(base_ni);
1702 err_out:
1703     ntfs_error(vi->i_sb, "Failed with error code %i while reading index "
1704             "inode (mft_no 0x%lx, name_len %i.", err, vi->i_ino,
1705             ni->name_len);
1706     make_bad_inode(vi);
1707     if (err != -EOPNOTSUPP && err != -ENOMEM)
1708         NVolSetErrors(vol);
1709     return err;
1710 }
1711 
1712 /*
1713  * The MFT inode has special locking, so teach the lock validator
1714  * about this by splitting off the locking rules of the MFT from
1715  * the locking rules of other inodes. The MFT inode can never be
1716  * accessed from the VFS side (or even internally), only by the
1717  * map_mft functions.
1718  */
1719 static struct lock_class_key mft_ni_runlist_lock_key, mft_ni_mrec_lock_key;
1720 
1721 /**
1722  * ntfs_read_inode_mount - special read_inode for mount time use only
1723  * @vi:     inode to read
1724  *
1725  * Read inode FILE_MFT at mount time, only called with super_block lock
1726  * held from within the read_super() code path.
1727  *
1728  * This function exists because when it is called the page cache for $MFT/$DATA
1729  * is not initialized and hence we cannot get at the contents of mft records
1730  * by calling map_mft_record*().
1731  *
1732  * Further it needs to cope with the circular references problem, i.e. cannot
1733  * load any attributes other than $ATTRIBUTE_LIST until $DATA is loaded, because
1734  * we do not know where the other extent mft records are yet and again, because
1735  * we cannot call map_mft_record*() yet.  Obviously this applies only when an
1736  * attribute list is actually present in $MFT inode.
1737  *
1738  * We solve these problems by starting with the $DATA attribute before anything
1739  * else and iterating using ntfs_attr_lookup($DATA) over all extents.  As each
1740  * extent is found, we ntfs_mapping_pairs_decompress() including the implied
1741  * ntfs_runlists_merge().  Each step of the iteration necessarily provides
1742  * sufficient information for the next step to complete.
1743  *
1744  * This should work but there are two possible pit falls (see inline comments
1745  * below), but only time will tell if they are real pits or just smoke...
1746  */
1747 int ntfs_read_inode_mount(struct inode *vi)
1748 {
1749     VCN next_vcn, last_vcn, highest_vcn;
1750     s64 block;
1751     struct super_block *sb = vi->i_sb;
1752     ntfs_volume *vol = NTFS_SB(sb);
1753     struct buffer_head *bh;
1754     ntfs_inode *ni;
1755     MFT_RECORD *m = NULL;
1756     ATTR_RECORD *a;
1757     ntfs_attr_search_ctx *ctx;
1758     unsigned int i, nr_blocks;
1759     int err;
1760 
1761     ntfs_debug("Entering.");
1762 
1763     /* Initialize the ntfs specific part of @vi. */
1764     ntfs_init_big_inode(vi);
1765 
1766     ni = NTFS_I(vi);
1767 
1768     /* Setup the data attribute. It is special as it is mst protected. */
1769     NInoSetNonResident(ni);
1770     NInoSetMstProtected(ni);
1771     NInoSetSparseDisabled(ni);
1772     ni->type = AT_DATA;
1773     ni->name = NULL;
1774     ni->name_len = 0;
1775     /*
1776      * This sets up our little cheat allowing us to reuse the async read io
1777      * completion handler for directories.
1778      */
1779     ni->itype.index.block_size = vol->mft_record_size;
1780     ni->itype.index.block_size_bits = vol->mft_record_size_bits;
1781 
1782     /* Very important! Needed to be able to call map_mft_record*(). */
1783     vol->mft_ino = vi;
1784 
1785     /* Allocate enough memory to read the first mft record. */
1786     if (vol->mft_record_size > 64 * 1024) {
1787         ntfs_error(sb, "Unsupported mft record size %i (max 64kiB).",
1788                 vol->mft_record_size);
1789         goto err_out;
1790     }
1791     i = vol->mft_record_size;
1792     if (i < sb->s_blocksize)
1793         i = sb->s_blocksize;
1794     m = (MFT_RECORD*)ntfs_malloc_nofs(i);
1795     if (!m) {
1796         ntfs_error(sb, "Failed to allocate buffer for $MFT record 0.");
1797         goto err_out;
1798     }
1799 
1800     /* Determine the first block of the $MFT/$DATA attribute. */
1801     block = vol->mft_lcn << vol->cluster_size_bits >>
1802             sb->s_blocksize_bits;
1803     nr_blocks = vol->mft_record_size >> sb->s_blocksize_bits;
1804     if (!nr_blocks)
1805         nr_blocks = 1;
1806 
1807     /* Load $MFT/$DATA's first mft record. */
1808     for (i = 0; i < nr_blocks; i++) {
1809         bh = sb_bread(sb, block++);
1810         if (!bh) {
1811             ntfs_error(sb, "Device read failed.");
1812             goto err_out;
1813         }
1814         memcpy((char*)m + (i << sb->s_blocksize_bits), bh->b_data,
1815                 sb->s_blocksize);
1816         brelse(bh);
1817     }
1818 
1819     if (le32_to_cpu(m->bytes_allocated) != vol->mft_record_size) {
1820         ntfs_error(sb, "Incorrect mft record size %u in superblock, should be %u.",
1821                 le32_to_cpu(m->bytes_allocated), vol->mft_record_size);
1822         goto err_out;
1823     }
1824 
1825     /* Apply the mst fixups. */
1826     if (post_read_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size)) {
1827         /* FIXME: Try to use the $MFTMirr now. */
1828         ntfs_error(sb, "MST fixup failed. $MFT is corrupt.");
1829         goto err_out;
1830     }
1831 
1832     /* Need this to sanity check attribute list references to $MFT. */
1833     vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number);
1834 
1835     /* Provides read_folio() for map_mft_record(). */
1836     vi->i_mapping->a_ops = &ntfs_mst_aops;
1837 
1838     ctx = ntfs_attr_get_search_ctx(ni, m);
1839     if (!ctx) {
1840         err = -ENOMEM;
1841         goto err_out;
1842     }
1843 
1844     /* Find the attribute list attribute if present. */
1845     err = ntfs_attr_lookup(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx);
1846     if (err) {
1847         if (unlikely(err != -ENOENT)) {
1848             ntfs_error(sb, "Failed to lookup attribute list "
1849                     "attribute. You should run chkdsk.");
1850             goto put_err_out;
1851         }
1852     } else /* if (!err) */ {
1853         ATTR_LIST_ENTRY *al_entry, *next_al_entry;
1854         u8 *al_end;
1855         static const char *es = "  Not allowed.  $MFT is corrupt.  "
1856                 "You should run chkdsk.";
1857 
1858         ntfs_debug("Attribute list attribute found in $MFT.");
1859         NInoSetAttrList(ni);
1860         a = ctx->attr;
1861         if (a->flags & ATTR_COMPRESSION_MASK) {
1862             ntfs_error(sb, "Attribute list attribute is "
1863                     "compressed.%s", es);
1864             goto put_err_out;
1865         }
1866         if (a->flags & ATTR_IS_ENCRYPTED ||
1867                 a->flags & ATTR_IS_SPARSE) {
1868             if (a->non_resident) {
1869                 ntfs_error(sb, "Non-resident attribute list "
1870                         "attribute is encrypted/"
1871                         "sparse.%s", es);
1872                 goto put_err_out;
1873             }
1874             ntfs_warning(sb, "Resident attribute list attribute "
1875                     "in $MFT system file is marked "
1876                     "encrypted/sparse which is not true.  "
1877                     "However, Windows allows this and "
1878                     "chkdsk does not detect or correct it "
1879                     "so we will just ignore the invalid "
1880                     "flags and pretend they are not set.");
1881         }
1882         /* Now allocate memory for the attribute list. */
1883         ni->attr_list_size = (u32)ntfs_attr_size(a);
1884         if (!ni->attr_list_size) {
1885             ntfs_error(sb, "Attr_list_size is zero");
1886             goto put_err_out;
1887         }
1888         ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
1889         if (!ni->attr_list) {
1890             ntfs_error(sb, "Not enough memory to allocate buffer "
1891                     "for attribute list.");
1892             goto put_err_out;
1893         }
1894         if (a->non_resident) {
1895             NInoSetAttrListNonResident(ni);
1896             if (a->data.non_resident.lowest_vcn) {
1897                 ntfs_error(sb, "Attribute list has non zero "
1898                         "lowest_vcn. $MFT is corrupt. "
1899                         "You should run chkdsk.");
1900                 goto put_err_out;
1901             }
1902             /* Setup the runlist. */
1903             ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol,
1904                     a, NULL);
1905             if (IS_ERR(ni->attr_list_rl.rl)) {
1906                 err = PTR_ERR(ni->attr_list_rl.rl);
1907                 ni->attr_list_rl.rl = NULL;
1908                 ntfs_error(sb, "Mapping pairs decompression "
1909                         "failed with error code %i.",
1910                         -err);
1911                 goto put_err_out;
1912             }
1913             /* Now load the attribute list. */
1914             if ((err = load_attribute_list(vol, &ni->attr_list_rl,
1915                     ni->attr_list, ni->attr_list_size,
1916                     sle64_to_cpu(a->data.
1917                     non_resident.initialized_size)))) {
1918                 ntfs_error(sb, "Failed to load attribute list "
1919                         "attribute with error code %i.",
1920                         -err);
1921                 goto put_err_out;
1922             }
1923         } else /* if (!ctx.attr->non_resident) */ {
1924             if ((u8*)a + le16_to_cpu(
1925                     a->data.resident.value_offset) +
1926                     le32_to_cpu(
1927                     a->data.resident.value_length) >
1928                     (u8*)ctx->mrec + vol->mft_record_size) {
1929                 ntfs_error(sb, "Corrupt attribute list "
1930                         "attribute.");
1931                 goto put_err_out;
1932             }
1933             /* Now copy the attribute list. */
1934             memcpy(ni->attr_list, (u8*)a + le16_to_cpu(
1935                     a->data.resident.value_offset),
1936                     le32_to_cpu(
1937                     a->data.resident.value_length));
1938         }
1939         /* The attribute list is now setup in memory. */
1940         /*
1941          * FIXME: I don't know if this case is actually possible.
1942          * According to logic it is not possible but I have seen too
1943          * many weird things in MS software to rely on logic... Thus we
1944          * perform a manual search and make sure the first $MFT/$DATA
1945          * extent is in the base inode. If it is not we abort with an
1946          * error and if we ever see a report of this error we will need
1947          * to do some magic in order to have the necessary mft record
1948          * loaded and in the right place in the page cache. But
1949          * hopefully logic will prevail and this never happens...
1950          */
1951         al_entry = (ATTR_LIST_ENTRY*)ni->attr_list;
1952         al_end = (u8*)al_entry + ni->attr_list_size;
1953         for (;; al_entry = next_al_entry) {
1954             /* Out of bounds check. */
1955             if ((u8*)al_entry < ni->attr_list ||
1956                     (u8*)al_entry > al_end)
1957                 goto em_put_err_out;
1958             /* Catch the end of the attribute list. */
1959             if ((u8*)al_entry == al_end)
1960                 goto em_put_err_out;
1961             if (!al_entry->length)
1962                 goto em_put_err_out;
1963             if ((u8*)al_entry + 6 > al_end || (u8*)al_entry +
1964                     le16_to_cpu(al_entry->length) > al_end)
1965                 goto em_put_err_out;
1966             next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry +
1967                     le16_to_cpu(al_entry->length));
1968             if (le32_to_cpu(al_entry->type) > le32_to_cpu(AT_DATA))
1969                 goto em_put_err_out;
1970             if (AT_DATA != al_entry->type)
1971                 continue;
1972             /* We want an unnamed attribute. */
1973             if (al_entry->name_length)
1974                 goto em_put_err_out;
1975             /* Want the first entry, i.e. lowest_vcn == 0. */
1976             if (al_entry->lowest_vcn)
1977                 goto em_put_err_out;
1978             /* First entry has to be in the base mft record. */
1979             if (MREF_LE(al_entry->mft_reference) != vi->i_ino) {
1980                 /* MFT references do not match, logic fails. */
1981                 ntfs_error(sb, "BUG: The first $DATA extent "
1982                         "of $MFT is not in the base "
1983                         "mft record. Please report "
1984                         "you saw this message to "
1985                         "linux-ntfs-dev@lists."
1986                         "sourceforge.net");
1987                 goto put_err_out;
1988             } else {
1989                 /* Sequence numbers must match. */
1990                 if (MSEQNO_LE(al_entry->mft_reference) !=
1991                         ni->seq_no)
1992                     goto em_put_err_out;
1993                 /* Got it. All is ok. We can stop now. */
1994                 break;
1995             }
1996         }
1997     }
1998 
1999     ntfs_attr_reinit_search_ctx(ctx);
2000 
2001     /* Now load all attribute extents. */
2002     a = NULL;
2003     next_vcn = last_vcn = highest_vcn = 0;
2004     while (!(err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, next_vcn, NULL, 0,
2005             ctx))) {
2006         runlist_element *nrl;
2007 
2008         /* Cache the current attribute. */
2009         a = ctx->attr;
2010         /* $MFT must be non-resident. */
2011         if (!a->non_resident) {
2012             ntfs_error(sb, "$MFT must be non-resident but a "
2013                     "resident extent was found. $MFT is "
2014                     "corrupt. Run chkdsk.");
2015             goto put_err_out;
2016         }
2017         /* $MFT must be uncompressed and unencrypted. */
2018         if (a->flags & ATTR_COMPRESSION_MASK ||
2019                 a->flags & ATTR_IS_ENCRYPTED ||
2020                 a->flags & ATTR_IS_SPARSE) {
2021             ntfs_error(sb, "$MFT must be uncompressed, "
2022                     "non-sparse, and unencrypted but a "
2023                     "compressed/sparse/encrypted extent "
2024                     "was found. $MFT is corrupt. Run "
2025                     "chkdsk.");
2026             goto put_err_out;
2027         }
2028         /*
2029          * Decompress the mapping pairs array of this extent and merge
2030          * the result into the existing runlist. No need for locking
2031          * as we have exclusive access to the inode at this time and we
2032          * are a mount in progress task, too.
2033          */
2034         nrl = ntfs_mapping_pairs_decompress(vol, a, ni->runlist.rl);
2035         if (IS_ERR(nrl)) {
2036             ntfs_error(sb, "ntfs_mapping_pairs_decompress() "
2037                     "failed with error code %ld.  $MFT is "
2038                     "corrupt.", PTR_ERR(nrl));
2039             goto put_err_out;
2040         }
2041         ni->runlist.rl = nrl;
2042 
2043         /* Are we in the first extent? */
2044         if (!next_vcn) {
2045             if (a->data.non_resident.lowest_vcn) {
2046                 ntfs_error(sb, "First extent of $DATA "
2047                         "attribute has non zero "
2048                         "lowest_vcn. $MFT is corrupt. "
2049                         "You should run chkdsk.");
2050                 goto put_err_out;
2051             }
2052             /* Get the last vcn in the $DATA attribute. */
2053             last_vcn = sle64_to_cpu(
2054                     a->data.non_resident.allocated_size)
2055                     >> vol->cluster_size_bits;
2056             /* Fill in the inode size. */
2057             vi->i_size = sle64_to_cpu(
2058                     a->data.non_resident.data_size);
2059             ni->initialized_size = sle64_to_cpu(
2060                     a->data.non_resident.initialized_size);
2061             ni->allocated_size = sle64_to_cpu(
2062                     a->data.non_resident.allocated_size);
2063             /*
2064              * Verify the number of mft records does not exceed
2065              * 2^32 - 1.
2066              */
2067             if ((vi->i_size >> vol->mft_record_size_bits) >=
2068                     (1ULL << 32)) {
2069                 ntfs_error(sb, "$MFT is too big! Aborting.");
2070                 goto put_err_out;
2071             }
2072             /*
2073              * We have got the first extent of the runlist for
2074              * $MFT which means it is now relatively safe to call
2075              * the normal ntfs_read_inode() function.
2076              * Complete reading the inode, this will actually
2077              * re-read the mft record for $MFT, this time entering
2078              * it into the page cache with which we complete the
2079              * kick start of the volume. It should be safe to do
2080              * this now as the first extent of $MFT/$DATA is
2081              * already known and we would hope that we don't need
2082              * further extents in order to find the other
2083              * attributes belonging to $MFT. Only time will tell if
2084              * this is really the case. If not we will have to play
2085              * magic at this point, possibly duplicating a lot of
2086              * ntfs_read_inode() at this point. We will need to
2087              * ensure we do enough of its work to be able to call
2088              * ntfs_read_inode() on extents of $MFT/$DATA. But lets
2089              * hope this never happens...
2090              */
2091             ntfs_read_locked_inode(vi);
2092             if (is_bad_inode(vi)) {
2093                 ntfs_error(sb, "ntfs_read_inode() of $MFT "
2094                         "failed. BUG or corrupt $MFT. "
2095                         "Run chkdsk and if no errors "
2096                         "are found, please report you "
2097                         "saw this message to "
2098                         "linux-ntfs-dev@lists."
2099                         "sourceforge.net");
2100                 ntfs_attr_put_search_ctx(ctx);
2101                 /* Revert to the safe super operations. */
2102                 ntfs_free(m);
2103                 return -1;
2104             }
2105             /*
2106              * Re-initialize some specifics about $MFT's inode as
2107              * ntfs_read_inode() will have set up the default ones.
2108              */
2109             /* Set uid and gid to root. */
2110             vi->i_uid = GLOBAL_ROOT_UID;
2111             vi->i_gid = GLOBAL_ROOT_GID;
2112             /* Regular file. No access for anyone. */
2113             vi->i_mode = S_IFREG;
2114             /* No VFS initiated operations allowed for $MFT. */
2115             vi->i_op = &ntfs_empty_inode_ops;
2116             vi->i_fop = &ntfs_empty_file_ops;
2117         }
2118 
2119         /* Get the lowest vcn for the next extent. */
2120         highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
2121         next_vcn = highest_vcn + 1;
2122 
2123         /* Only one extent or error, which we catch below. */
2124         if (next_vcn <= 0)
2125             break;
2126 
2127         /* Avoid endless loops due to corruption. */
2128         if (next_vcn < sle64_to_cpu(
2129                 a->data.non_resident.lowest_vcn)) {
2130             ntfs_error(sb, "$MFT has corrupt attribute list "
2131                     "attribute. Run chkdsk.");
2132             goto put_err_out;
2133         }
2134     }
2135     if (err != -ENOENT) {
2136         ntfs_error(sb, "Failed to lookup $MFT/$DATA attribute extent. "
2137                 "$MFT is corrupt. Run chkdsk.");
2138         goto put_err_out;
2139     }
2140     if (!a) {
2141         ntfs_error(sb, "$MFT/$DATA attribute not found. $MFT is "
2142                 "corrupt. Run chkdsk.");
2143         goto put_err_out;
2144     }
2145     if (highest_vcn && highest_vcn != last_vcn - 1) {
2146         ntfs_error(sb, "Failed to load the complete runlist for "
2147                 "$MFT/$DATA. Driver bug or corrupt $MFT. "
2148                 "Run chkdsk.");
2149         ntfs_debug("highest_vcn = 0x%llx, last_vcn - 1 = 0x%llx",
2150                 (unsigned long long)highest_vcn,
2151                 (unsigned long long)last_vcn - 1);
2152         goto put_err_out;
2153     }
2154     ntfs_attr_put_search_ctx(ctx);
2155     ntfs_debug("Done.");
2156     ntfs_free(m);
2157 
2158     /*
2159      * Split the locking rules of the MFT inode from the
2160      * locking rules of other inodes:
2161      */
2162     lockdep_set_class(&ni->runlist.lock, &mft_ni_runlist_lock_key);
2163     lockdep_set_class(&ni->mrec_lock, &mft_ni_mrec_lock_key);
2164 
2165     return 0;
2166 
2167 em_put_err_out:
2168     ntfs_error(sb, "Couldn't find first extent of $DATA attribute in "
2169             "attribute list. $MFT is corrupt. Run chkdsk.");
2170 put_err_out:
2171     ntfs_attr_put_search_ctx(ctx);
2172 err_out:
2173     ntfs_error(sb, "Failed. Marking inode as bad.");
2174     make_bad_inode(vi);
2175     ntfs_free(m);
2176     return -1;
2177 }
2178 
2179 static void __ntfs_clear_inode(ntfs_inode *ni)
2180 {
2181     /* Free all alocated memory. */
2182     down_write(&ni->runlist.lock);
2183     if (ni->runlist.rl) {
2184         ntfs_free(ni->runlist.rl);
2185         ni->runlist.rl = NULL;
2186     }
2187     up_write(&ni->runlist.lock);
2188 
2189     if (ni->attr_list) {
2190         ntfs_free(ni->attr_list);
2191         ni->attr_list = NULL;
2192     }
2193 
2194     down_write(&ni->attr_list_rl.lock);
2195     if (ni->attr_list_rl.rl) {
2196         ntfs_free(ni->attr_list_rl.rl);
2197         ni->attr_list_rl.rl = NULL;
2198     }
2199     up_write(&ni->attr_list_rl.lock);
2200 
2201     if (ni->name_len && ni->name != I30) {
2202         /* Catch bugs... */
2203         BUG_ON(!ni->name);
2204         kfree(ni->name);
2205     }
2206 }
2207 
2208 void ntfs_clear_extent_inode(ntfs_inode *ni)
2209 {
2210     ntfs_debug("Entering for inode 0x%lx.", ni->mft_no);
2211 
2212     BUG_ON(NInoAttr(ni));
2213     BUG_ON(ni->nr_extents != -1);
2214 
2215 #ifdef NTFS_RW
2216     if (NInoDirty(ni)) {
2217         if (!is_bad_inode(VFS_I(ni->ext.base_ntfs_ino)))
2218             ntfs_error(ni->vol->sb, "Clearing dirty extent inode!  "
2219                     "Losing data!  This is a BUG!!!");
2220         // FIXME:  Do something!!!
2221     }
2222 #endif /* NTFS_RW */
2223 
2224     __ntfs_clear_inode(ni);
2225 
2226     /* Bye, bye... */
2227     ntfs_destroy_extent_inode(ni);
2228 }
2229 
2230 /**
2231  * ntfs_evict_big_inode - clean up the ntfs specific part of an inode
2232  * @vi:     vfs inode pending annihilation
2233  *
2234  * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode()
2235  * is called, which deallocates all memory belonging to the NTFS specific part
2236  * of the inode and returns.
2237  *
2238  * If the MFT record is dirty, we commit it before doing anything else.
2239  */
2240 void ntfs_evict_big_inode(struct inode *vi)
2241 {
2242     ntfs_inode *ni = NTFS_I(vi);
2243 
2244     truncate_inode_pages_final(&vi->i_data);
2245     clear_inode(vi);
2246 
2247 #ifdef NTFS_RW
2248     if (NInoDirty(ni)) {
2249         bool was_bad = (is_bad_inode(vi));
2250 
2251         /* Committing the inode also commits all extent inodes. */
2252         ntfs_commit_inode(vi);
2253 
2254         if (!was_bad && (is_bad_inode(vi) || NInoDirty(ni))) {
2255             ntfs_error(vi->i_sb, "Failed to commit dirty inode "
2256                     "0x%lx.  Losing data!", vi->i_ino);
2257             // FIXME:  Do something!!!
2258         }
2259     }
2260 #endif /* NTFS_RW */
2261 
2262     /* No need to lock at this stage as no one else has a reference. */
2263     if (ni->nr_extents > 0) {
2264         int i;
2265 
2266         for (i = 0; i < ni->nr_extents; i++)
2267             ntfs_clear_extent_inode(ni->ext.extent_ntfs_inos[i]);
2268         kfree(ni->ext.extent_ntfs_inos);
2269     }
2270 
2271     __ntfs_clear_inode(ni);
2272 
2273     if (NInoAttr(ni)) {
2274         /* Release the base inode if we are holding it. */
2275         if (ni->nr_extents == -1) {
2276             iput(VFS_I(ni->ext.base_ntfs_ino));
2277             ni->nr_extents = 0;
2278             ni->ext.base_ntfs_ino = NULL;
2279         }
2280     }
2281     BUG_ON(ni->page);
2282     if (!atomic_dec_and_test(&ni->count))
2283         BUG();
2284     return;
2285 }
2286 
2287 /**
2288  * ntfs_show_options - show mount options in /proc/mounts
2289  * @sf:     seq_file in which to write our mount options
2290  * @root:   root of the mounted tree whose mount options to display
2291  *
2292  * Called by the VFS once for each mounted ntfs volume when someone reads
2293  * /proc/mounts in order to display the NTFS specific mount options of each
2294  * mount. The mount options of fs specified by @root are written to the seq file
2295  * @sf and success is returned.
2296  */
2297 int ntfs_show_options(struct seq_file *sf, struct dentry *root)
2298 {
2299     ntfs_volume *vol = NTFS_SB(root->d_sb);
2300     int i;
2301 
2302     seq_printf(sf, ",uid=%i", from_kuid_munged(&init_user_ns, vol->uid));
2303     seq_printf(sf, ",gid=%i", from_kgid_munged(&init_user_ns, vol->gid));
2304     if (vol->fmask == vol->dmask)
2305         seq_printf(sf, ",umask=0%o", vol->fmask);
2306     else {
2307         seq_printf(sf, ",fmask=0%o", vol->fmask);
2308         seq_printf(sf, ",dmask=0%o", vol->dmask);
2309     }
2310     seq_printf(sf, ",nls=%s", vol->nls_map->charset);
2311     if (NVolCaseSensitive(vol))
2312         seq_printf(sf, ",case_sensitive");
2313     if (NVolShowSystemFiles(vol))
2314         seq_printf(sf, ",show_sys_files");
2315     if (!NVolSparseEnabled(vol))
2316         seq_printf(sf, ",disable_sparse");
2317     for (i = 0; on_errors_arr[i].val; i++) {
2318         if (on_errors_arr[i].val & vol->on_errors)
2319             seq_printf(sf, ",errors=%s", on_errors_arr[i].str);
2320     }
2321     seq_printf(sf, ",mft_zone_multiplier=%i", vol->mft_zone_multiplier);
2322     return 0;
2323 }
2324 
2325 #ifdef NTFS_RW
2326 
2327 static const char *es = "  Leaving inconsistent metadata.  Unmount and run "
2328         "chkdsk.";
2329 
2330 /**
2331  * ntfs_truncate - called when the i_size of an ntfs inode is changed
2332  * @vi:     inode for which the i_size was changed
2333  *
2334  * We only support i_size changes for normal files at present, i.e. not
2335  * compressed and not encrypted.  This is enforced in ntfs_setattr(), see
2336  * below.
2337  *
2338  * The kernel guarantees that @vi is a regular file (S_ISREG() is true) and
2339  * that the change is allowed.
2340  *
2341  * This implies for us that @vi is a file inode rather than a directory, index,
2342  * or attribute inode as well as that @vi is a base inode.
2343  *
2344  * Returns 0 on success or -errno on error.
2345  *
2346  * Called with ->i_mutex held.
2347  */
2348 int ntfs_truncate(struct inode *vi)
2349 {
2350     s64 new_size, old_size, nr_freed, new_alloc_size, old_alloc_size;
2351     VCN highest_vcn;
2352     unsigned long flags;
2353     ntfs_inode *base_ni, *ni = NTFS_I(vi);
2354     ntfs_volume *vol = ni->vol;
2355     ntfs_attr_search_ctx *ctx;
2356     MFT_RECORD *m;
2357     ATTR_RECORD *a;
2358     const char *te = "  Leaving file length out of sync with i_size.";
2359     int err, mp_size, size_change, alloc_change;
2360 
2361     ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
2362     BUG_ON(NInoAttr(ni));
2363     BUG_ON(S_ISDIR(vi->i_mode));
2364     BUG_ON(NInoMstProtected(ni));
2365     BUG_ON(ni->nr_extents < 0);
2366 retry_truncate:
2367     /*
2368      * Lock the runlist for writing and map the mft record to ensure it is
2369      * safe to mess with the attribute runlist and sizes.
2370      */
2371     down_write(&ni->runlist.lock);
2372     if (!NInoAttr(ni))
2373         base_ni = ni;
2374     else
2375         base_ni = ni->ext.base_ntfs_ino;
2376     m = map_mft_record(base_ni);
2377     if (IS_ERR(m)) {
2378         err = PTR_ERR(m);
2379         ntfs_error(vi->i_sb, "Failed to map mft record for inode 0x%lx "
2380                 "(error code %d).%s", vi->i_ino, err, te);
2381         ctx = NULL;
2382         m = NULL;
2383         goto old_bad_out;
2384     }
2385     ctx = ntfs_attr_get_search_ctx(base_ni, m);
2386     if (unlikely(!ctx)) {
2387         ntfs_error(vi->i_sb, "Failed to allocate a search context for "
2388                 "inode 0x%lx (not enough memory).%s",
2389                 vi->i_ino, te);
2390         err = -ENOMEM;
2391         goto old_bad_out;
2392     }
2393     err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
2394             CASE_SENSITIVE, 0, NULL, 0, ctx);
2395     if (unlikely(err)) {
2396         if (err == -ENOENT) {
2397             ntfs_error(vi->i_sb, "Open attribute is missing from "
2398                     "mft record.  Inode 0x%lx is corrupt.  "
2399                     "Run chkdsk.%s", vi->i_ino, te);
2400             err = -EIO;
2401         } else
2402             ntfs_error(vi->i_sb, "Failed to lookup attribute in "
2403                     "inode 0x%lx (error code %d).%s",
2404                     vi->i_ino, err, te);
2405         goto old_bad_out;
2406     }
2407     m = ctx->mrec;
2408     a = ctx->attr;
2409     /*
2410      * The i_size of the vfs inode is the new size for the attribute value.
2411      */
2412     new_size = i_size_read(vi);
2413     /* The current size of the attribute value is the old size. */
2414     old_size = ntfs_attr_size(a);
2415     /* Calculate the new allocated size. */
2416     if (NInoNonResident(ni))
2417         new_alloc_size = (new_size + vol->cluster_size - 1) &
2418                 ~(s64)vol->cluster_size_mask;
2419     else
2420         new_alloc_size = (new_size + 7) & ~7;
2421     /* The current allocated size is the old allocated size. */
2422     read_lock_irqsave(&ni->size_lock, flags);
2423     old_alloc_size = ni->allocated_size;
2424     read_unlock_irqrestore(&ni->size_lock, flags);
2425     /*
2426      * The change in the file size.  This will be 0 if no change, >0 if the
2427      * size is growing, and <0 if the size is shrinking.
2428      */
2429     size_change = -1;
2430     if (new_size - old_size >= 0) {
2431         size_change = 1;
2432         if (new_size == old_size)
2433             size_change = 0;
2434     }
2435     /* As above for the allocated size. */
2436     alloc_change = -1;
2437     if (new_alloc_size - old_alloc_size >= 0) {
2438         alloc_change = 1;
2439         if (new_alloc_size == old_alloc_size)
2440             alloc_change = 0;
2441     }
2442     /*
2443      * If neither the size nor the allocation are being changed there is
2444      * nothing to do.
2445      */
2446     if (!size_change && !alloc_change)
2447         goto unm_done;
2448     /* If the size is changing, check if new size is allowed in $AttrDef. */
2449     if (size_change) {
2450         err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
2451         if (unlikely(err)) {
2452             if (err == -ERANGE) {
2453                 ntfs_error(vol->sb, "Truncate would cause the "
2454                         "inode 0x%lx to %simum size "
2455                         "for its attribute type "
2456                         "(0x%x).  Aborting truncate.",
2457                         vi->i_ino,
2458                         new_size > old_size ? "exceed "
2459                         "the max" : "go under the min",
2460                         le32_to_cpu(ni->type));
2461                 err = -EFBIG;
2462             } else {
2463                 ntfs_error(vol->sb, "Inode 0x%lx has unknown "
2464                         "attribute type 0x%x.  "
2465                         "Aborting truncate.",
2466                         vi->i_ino,
2467                         le32_to_cpu(ni->type));
2468                 err = -EIO;
2469             }
2470             /* Reset the vfs inode size to the old size. */
2471             i_size_write(vi, old_size);
2472             goto err_out;
2473         }
2474     }
2475     if (NInoCompressed(ni) || NInoEncrypted(ni)) {
2476         ntfs_warning(vi->i_sb, "Changes in inode size are not "
2477                 "supported yet for %s files, ignoring.",
2478                 NInoCompressed(ni) ? "compressed" :
2479                 "encrypted");
2480         err = -EOPNOTSUPP;
2481         goto bad_out;
2482     }
2483     if (a->non_resident)
2484         goto do_non_resident_truncate;
2485     BUG_ON(NInoNonResident(ni));
2486     /* Resize the attribute record to best fit the new attribute size. */
2487     if (new_size < vol->mft_record_size &&
2488             !ntfs_resident_attr_value_resize(m, a, new_size)) {
2489         /* The resize succeeded! */
2490         flush_dcache_mft_record_page(ctx->ntfs_ino);
2491         mark_mft_record_dirty(ctx->ntfs_ino);
2492         write_lock_irqsave(&ni->size_lock, flags);
2493         /* Update the sizes in the ntfs inode and all is done. */
2494         ni->allocated_size = le32_to_cpu(a->length) -
2495                 le16_to_cpu(a->data.resident.value_offset);
2496         /*
2497          * Note ntfs_resident_attr_value_resize() has already done any
2498          * necessary data clearing in the attribute record.  When the
2499          * file is being shrunk vmtruncate() will already have cleared
2500          * the top part of the last partial page, i.e. since this is
2501          * the resident case this is the page with index 0.  However,
2502          * when the file is being expanded, the page cache page data
2503          * between the old data_size, i.e. old_size, and the new_size
2504          * has not been zeroed.  Fortunately, we do not need to zero it
2505          * either since on one hand it will either already be zero due
2506          * to both read_folio and writepage clearing partial page data
2507          * beyond i_size in which case there is nothing to do or in the
2508          * case of the file being mmap()ped at the same time, POSIX
2509          * specifies that the behaviour is unspecified thus we do not
2510          * have to do anything.  This means that in our implementation
2511          * in the rare case that the file is mmap()ped and a write
2512          * occurred into the mmap()ped region just beyond the file size
2513          * and writepage has not yet been called to write out the page
2514          * (which would clear the area beyond the file size) and we now
2515          * extend the file size to incorporate this dirty region
2516          * outside the file size, a write of the page would result in
2517          * this data being written to disk instead of being cleared.
2518          * Given both POSIX and the Linux mmap(2) man page specify that
2519          * this corner case is undefined, we choose to leave it like
2520          * that as this is much simpler for us as we cannot lock the
2521          * relevant page now since we are holding too many ntfs locks
2522          * which would result in a lock reversal deadlock.
2523          */
2524         ni->initialized_size = new_size;
2525         write_unlock_irqrestore(&ni->size_lock, flags);
2526         goto unm_done;
2527     }
2528     /* If the above resize failed, this must be an attribute extension. */
2529     BUG_ON(size_change < 0);
2530     /*
2531      * We have to drop all the locks so we can call
2532      * ntfs_attr_make_non_resident().  This could be optimised by try-
2533      * locking the first page cache page and only if that fails dropping
2534      * the locks, locking the page, and redoing all the locking and
2535      * lookups.  While this would be a huge optimisation, it is not worth
2536      * it as this is definitely a slow code path as it only ever can happen
2537      * once for any given file.
2538      */
2539     ntfs_attr_put_search_ctx(ctx);
2540     unmap_mft_record(base_ni);
2541     up_write(&ni->runlist.lock);
2542     /*
2543      * Not enough space in the mft record, try to make the attribute
2544      * non-resident and if successful restart the truncation process.
2545      */
2546     err = ntfs_attr_make_non_resident(ni, old_size);
2547     if (likely(!err))
2548         goto retry_truncate;
2549     /*
2550      * Could not make non-resident.  If this is due to this not being
2551      * permitted for this attribute type or there not being enough space,
2552      * try to make other attributes non-resident.  Otherwise fail.
2553      */
2554     if (unlikely(err != -EPERM && err != -ENOSPC)) {
2555         ntfs_error(vol->sb, "Cannot truncate inode 0x%lx, attribute "
2556                 "type 0x%x, because the conversion from "
2557                 "resident to non-resident attribute failed "
2558                 "with error code %i.", vi->i_ino,
2559                 (unsigned)le32_to_cpu(ni->type), err);
2560         if (err != -ENOMEM)
2561             err = -EIO;
2562         goto conv_err_out;
2563     }
2564     /* TODO: Not implemented from here, abort. */
2565     if (err == -ENOSPC)
2566         ntfs_error(vol->sb, "Not enough space in the mft record/on "
2567                 "disk for the non-resident attribute value.  "
2568                 "This case is not implemented yet.");
2569     else /* if (err == -EPERM) */
2570         ntfs_error(vol->sb, "This attribute type may not be "
2571                 "non-resident.  This case is not implemented "
2572                 "yet.");
2573     err = -EOPNOTSUPP;
2574     goto conv_err_out;
2575 #if 0
2576     // TODO: Attempt to make other attributes non-resident.
2577     if (!err)
2578         goto do_resident_extend;
2579     /*
2580      * Both the attribute list attribute and the standard information
2581      * attribute must remain in the base inode.  Thus, if this is one of
2582      * these attributes, we have to try to move other attributes out into
2583      * extent mft records instead.
2584      */
2585     if (ni->type == AT_ATTRIBUTE_LIST ||
2586             ni->type == AT_STANDARD_INFORMATION) {
2587         // TODO: Attempt to move other attributes into extent mft
2588         // records.
2589         err = -EOPNOTSUPP;
2590         if (!err)
2591             goto do_resident_extend;
2592         goto err_out;
2593     }
2594     // TODO: Attempt to move this attribute to an extent mft record, but
2595     // only if it is not already the only attribute in an mft record in
2596     // which case there would be nothing to gain.
2597     err = -EOPNOTSUPP;
2598     if (!err)
2599         goto do_resident_extend;
2600     /* There is nothing we can do to make enough space. )-: */
2601     goto err_out;
2602 #endif
2603 do_non_resident_truncate:
2604     BUG_ON(!NInoNonResident(ni));
2605     if (alloc_change < 0) {
2606         highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
2607         if (highest_vcn > 0 &&
2608                 old_alloc_size >> vol->cluster_size_bits >
2609                 highest_vcn + 1) {
2610             /*
2611              * This attribute has multiple extents.  Not yet
2612              * supported.
2613              */
2614             ntfs_error(vol->sb, "Cannot truncate inode 0x%lx, "
2615                     "attribute type 0x%x, because the "
2616                     "attribute is highly fragmented (it "
2617                     "consists of multiple extents) and "
2618                     "this case is not implemented yet.",
2619                     vi->i_ino,
2620                     (unsigned)le32_to_cpu(ni->type));
2621             err = -EOPNOTSUPP;
2622             goto bad_out;
2623         }
2624     }
2625     /*
2626      * If the size is shrinking, need to reduce the initialized_size and
2627      * the data_size before reducing the allocation.
2628      */
2629     if (size_change < 0) {
2630         /*
2631          * Make the valid size smaller (i_size is already up-to-date).
2632          */
2633         write_lock_irqsave(&ni->size_lock, flags);
2634         if (new_size < ni->initialized_size) {
2635             ni->initialized_size = new_size;
2636             a->data.non_resident.initialized_size =
2637                     cpu_to_sle64(new_size);
2638         }
2639         a->data.non_resident.data_size = cpu_to_sle64(new_size);
2640         write_unlock_irqrestore(&ni->size_lock, flags);
2641         flush_dcache_mft_record_page(ctx->ntfs_ino);
2642         mark_mft_record_dirty(ctx->ntfs_ino);
2643         /* If the allocated size is not changing, we are done. */
2644         if (!alloc_change)
2645             goto unm_done;
2646         /*
2647          * If the size is shrinking it makes no sense for the
2648          * allocation to be growing.
2649          */
2650         BUG_ON(alloc_change > 0);
2651     } else /* if (size_change >= 0) */ {
2652         /*
2653          * The file size is growing or staying the same but the
2654          * allocation can be shrinking, growing or staying the same.
2655          */
2656         if (alloc_change > 0) {
2657             /*
2658              * We need to extend the allocation and possibly update
2659              * the data size.  If we are updating the data size,
2660              * since we are not touching the initialized_size we do
2661              * not need to worry about the actual data on disk.
2662              * And as far as the page cache is concerned, there
2663              * will be no pages beyond the old data size and any
2664              * partial region in the last page between the old and
2665              * new data size (or the end of the page if the new
2666              * data size is outside the page) does not need to be
2667              * modified as explained above for the resident
2668              * attribute truncate case.  To do this, we simply drop
2669              * the locks we hold and leave all the work to our
2670              * friendly helper ntfs_attr_extend_allocation().
2671              */
2672             ntfs_attr_put_search_ctx(ctx);
2673             unmap_mft_record(base_ni);
2674             up_write(&ni->runlist.lock);
2675             err = ntfs_attr_extend_allocation(ni, new_size,
2676                     size_change > 0 ? new_size : -1, -1);
2677             /*
2678              * ntfs_attr_extend_allocation() will have done error
2679              * output already.
2680              */
2681             goto done;
2682         }
2683         if (!alloc_change)
2684             goto alloc_done;
2685     }
2686     /* alloc_change < 0 */
2687     /* Free the clusters. */
2688     nr_freed = ntfs_cluster_free(ni, new_alloc_size >>
2689             vol->cluster_size_bits, -1, ctx);
2690     m = ctx->mrec;
2691     a = ctx->attr;
2692     if (unlikely(nr_freed < 0)) {
2693         ntfs_error(vol->sb, "Failed to release cluster(s) (error code "
2694                 "%lli).  Unmount and run chkdsk to recover "
2695                 "the lost cluster(s).", (long long)nr_freed);
2696         NVolSetErrors(vol);
2697         nr_freed = 0;
2698     }
2699     /* Truncate the runlist. */
2700     err = ntfs_rl_truncate_nolock(vol, &ni->runlist,
2701             new_alloc_size >> vol->cluster_size_bits);
2702     /*
2703      * If the runlist truncation failed and/or the search context is no
2704      * longer valid, we cannot resize the attribute record or build the
2705      * mapping pairs array thus we mark the inode bad so that no access to
2706      * the freed clusters can happen.
2707      */
2708     if (unlikely(err || IS_ERR(m))) {
2709         ntfs_error(vol->sb, "Failed to %s (error code %li).%s",
2710                 IS_ERR(m) ?
2711                 "restore attribute search context" :
2712                 "truncate attribute runlist",
2713                 IS_ERR(m) ? PTR_ERR(m) : err, es);
2714         err = -EIO;
2715         goto bad_out;
2716     }
2717     /* Get the size for the shrunk mapping pairs array for the runlist. */
2718     mp_size = ntfs_get_size_for_mapping_pairs(vol, ni->runlist.rl, 0, -1);
2719     if (unlikely(mp_size <= 0)) {
2720         ntfs_error(vol->sb, "Cannot shrink allocation of inode 0x%lx, "
2721                 "attribute type 0x%x, because determining the "
2722                 "size for the mapping pairs failed with error "
2723                 "code %i.%s", vi->i_ino,
2724                 (unsigned)le32_to_cpu(ni->type), mp_size, es);
2725         err = -EIO;
2726         goto bad_out;
2727     }
2728     /*
2729      * Shrink the attribute record for the new mapping pairs array.  Note,
2730      * this cannot fail since we are making the attribute smaller thus by
2731      * definition there is enough space to do so.
2732      */
2733     err = ntfs_attr_record_resize(m, a, mp_size +
2734             le16_to_cpu(a->data.non_resident.mapping_pairs_offset));
2735     BUG_ON(err);
2736     /*
2737      * Generate the mapping pairs array directly into the attribute record.
2738      */
2739     err = ntfs_mapping_pairs_build(vol, (u8*)a +
2740             le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
2741             mp_size, ni->runlist.rl, 0, -1, NULL);
2742     if (unlikely(err)) {
2743         ntfs_error(vol->sb, "Cannot shrink allocation of inode 0x%lx, "
2744                 "attribute type 0x%x, because building the "
2745                 "mapping pairs failed with error code %i.%s",
2746                 vi->i_ino, (unsigned)le32_to_cpu(ni->type),
2747                 err, es);
2748         err = -EIO;
2749         goto bad_out;
2750     }
2751     /* Update the allocated/compressed size as well as the highest vcn. */
2752     a->data.non_resident.highest_vcn = cpu_to_sle64((new_alloc_size >>
2753             vol->cluster_size_bits) - 1);
2754     write_lock_irqsave(&ni->size_lock, flags);
2755     ni->allocated_size = new_alloc_size;
2756     a->data.non_resident.allocated_size = cpu_to_sle64(new_alloc_size);
2757     if (NInoSparse(ni) || NInoCompressed(ni)) {
2758         if (nr_freed) {
2759             ni->itype.compressed.size -= nr_freed <<
2760                     vol->cluster_size_bits;
2761             BUG_ON(ni->itype.compressed.size < 0);
2762             a->data.non_resident.compressed_size = cpu_to_sle64(
2763                     ni->itype.compressed.size);
2764             vi->i_blocks = ni->itype.compressed.size >> 9;
2765         }
2766     } else
2767         vi->i_blocks = new_alloc_size >> 9;
2768     write_unlock_irqrestore(&ni->size_lock, flags);
2769     /*
2770      * We have shrunk the allocation.  If this is a shrinking truncate we
2771      * have already dealt with the initialized_size and the data_size above
2772      * and we are done.  If the truncate is only changing the allocation
2773      * and not the data_size, we are also done.  If this is an extending
2774      * truncate, need to extend the data_size now which is ensured by the
2775      * fact that @size_change is positive.
2776      */
2777 alloc_done:
2778     /*
2779      * If the size is growing, need to update it now.  If it is shrinking,
2780      * we have already updated it above (before the allocation change).
2781      */
2782     if (size_change > 0)
2783         a->data.non_resident.data_size = cpu_to_sle64(new_size);
2784     /* Ensure the modified mft record is written out. */
2785     flush_dcache_mft_record_page(ctx->ntfs_ino);
2786     mark_mft_record_dirty(ctx->ntfs_ino);
2787 unm_done:
2788     ntfs_attr_put_search_ctx(ctx);
2789     unmap_mft_record(base_ni);
2790     up_write(&ni->runlist.lock);
2791 done:
2792     /* Update the mtime and ctime on the base inode. */
2793     /* normally ->truncate shouldn't update ctime or mtime,
2794      * but ntfs did before so it got a copy & paste version
2795      * of file_update_time.  one day someone should fix this
2796      * for real.
2797      */
2798     if (!IS_NOCMTIME(VFS_I(base_ni)) && !IS_RDONLY(VFS_I(base_ni))) {
2799         struct timespec64 now = current_time(VFS_I(base_ni));
2800         int sync_it = 0;
2801 
2802         if (!timespec64_equal(&VFS_I(base_ni)->i_mtime, &now) ||
2803             !timespec64_equal(&VFS_I(base_ni)->i_ctime, &now))
2804             sync_it = 1;
2805         VFS_I(base_ni)->i_mtime = now;
2806         VFS_I(base_ni)->i_ctime = now;
2807 
2808         if (sync_it)
2809             mark_inode_dirty_sync(VFS_I(base_ni));
2810     }
2811 
2812     if (likely(!err)) {
2813         NInoClearTruncateFailed(ni);
2814         ntfs_debug("Done.");
2815     }
2816     return err;
2817 old_bad_out:
2818     old_size = -1;
2819 bad_out:
2820     if (err != -ENOMEM && err != -EOPNOTSUPP)
2821         NVolSetErrors(vol);
2822     if (err != -EOPNOTSUPP)
2823         NInoSetTruncateFailed(ni);
2824     else if (old_size >= 0)
2825         i_size_write(vi, old_size);
2826 err_out:
2827     if (ctx)
2828         ntfs_attr_put_search_ctx(ctx);
2829     if (m)
2830         unmap_mft_record(base_ni);
2831     up_write(&ni->runlist.lock);
2832 out:
2833     ntfs_debug("Failed.  Returning error code %i.", err);
2834     return err;
2835 conv_err_out:
2836     if (err != -ENOMEM && err != -EOPNOTSUPP)
2837         NVolSetErrors(vol);
2838     if (err != -EOPNOTSUPP)
2839         NInoSetTruncateFailed(ni);
2840     else
2841         i_size_write(vi, old_size);
2842     goto out;
2843 }
2844 
2845 /**
2846  * ntfs_truncate_vfs - wrapper for ntfs_truncate() that has no return value
2847  * @vi:     inode for which the i_size was changed
2848  *
2849  * Wrapper for ntfs_truncate() that has no return value.
2850  *
2851  * See ntfs_truncate() description above for details.
2852  */
2853 #ifdef NTFS_RW
2854 void ntfs_truncate_vfs(struct inode *vi) {
2855     ntfs_truncate(vi);
2856 }
2857 #endif
2858 
2859 /**
2860  * ntfs_setattr - called from notify_change() when an attribute is being changed
2861  * @mnt_userns: user namespace of the mount the inode was found from
2862  * @dentry: dentry whose attributes to change
2863  * @attr:   structure describing the attributes and the changes
2864  *
2865  * We have to trap VFS attempts to truncate the file described by @dentry as
2866  * soon as possible, because we do not implement changes in i_size yet.  So we
2867  * abort all i_size changes here.
2868  *
2869  * We also abort all changes of user, group, and mode as we do not implement
2870  * the NTFS ACLs yet.
2871  *
2872  * Called with ->i_mutex held.
2873  */
2874 int ntfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
2875          struct iattr *attr)
2876 {
2877     struct inode *vi = d_inode(dentry);
2878     int err;
2879     unsigned int ia_valid = attr->ia_valid;
2880 
2881     err = setattr_prepare(&init_user_ns, dentry, attr);
2882     if (err)
2883         goto out;
2884     /* We do not support NTFS ACLs yet. */
2885     if (ia_valid & (ATTR_UID | ATTR_GID | ATTR_MODE)) {
2886         ntfs_warning(vi->i_sb, "Changes in user/group/mode are not "
2887                 "supported yet, ignoring.");
2888         err = -EOPNOTSUPP;
2889         goto out;
2890     }
2891     if (ia_valid & ATTR_SIZE) {
2892         if (attr->ia_size != i_size_read(vi)) {
2893             ntfs_inode *ni = NTFS_I(vi);
2894             /*
2895              * FIXME: For now we do not support resizing of
2896              * compressed or encrypted files yet.
2897              */
2898             if (NInoCompressed(ni) || NInoEncrypted(ni)) {
2899                 ntfs_warning(vi->i_sb, "Changes in inode size "
2900                         "are not supported yet for "
2901                         "%s files, ignoring.",
2902                         NInoCompressed(ni) ?
2903                         "compressed" : "encrypted");
2904                 err = -EOPNOTSUPP;
2905             } else {
2906                 truncate_setsize(vi, attr->ia_size);
2907                 ntfs_truncate_vfs(vi);
2908             }
2909             if (err || ia_valid == ATTR_SIZE)
2910                 goto out;
2911         } else {
2912             /*
2913              * We skipped the truncate but must still update
2914              * timestamps.
2915              */
2916             ia_valid |= ATTR_MTIME | ATTR_CTIME;
2917         }
2918     }
2919     if (ia_valid & ATTR_ATIME)
2920         vi->i_atime = attr->ia_atime;
2921     if (ia_valid & ATTR_MTIME)
2922         vi->i_mtime = attr->ia_mtime;
2923     if (ia_valid & ATTR_CTIME)
2924         vi->i_ctime = attr->ia_ctime;
2925     mark_inode_dirty(vi);
2926 out:
2927     return err;
2928 }
2929 
2930 /**
2931  * ntfs_write_inode - write out a dirty inode
2932  * @vi:     inode to write out
2933  * @sync:   if true, write out synchronously
2934  *
2935  * Write out a dirty inode to disk including any extent inodes if present.
2936  *
2937  * If @sync is true, commit the inode to disk and wait for io completion.  This
2938  * is done using write_mft_record().
2939  *
2940  * If @sync is false, just schedule the write to happen but do not wait for i/o
2941  * completion.  In 2.6 kernels, scheduling usually happens just by virtue of
2942  * marking the page (and in this case mft record) dirty but we do not implement
2943  * this yet as write_mft_record() largely ignores the @sync parameter and
2944  * always performs synchronous writes.
2945  *
2946  * Return 0 on success and -errno on error.
2947  */
2948 int __ntfs_write_inode(struct inode *vi, int sync)
2949 {
2950     sle64 nt;
2951     ntfs_inode *ni = NTFS_I(vi);
2952     ntfs_attr_search_ctx *ctx;
2953     MFT_RECORD *m;
2954     STANDARD_INFORMATION *si;
2955     int err = 0;
2956     bool modified = false;
2957 
2958     ntfs_debug("Entering for %sinode 0x%lx.", NInoAttr(ni) ? "attr " : "",
2959             vi->i_ino);
2960     /*
2961      * Dirty attribute inodes are written via their real inodes so just
2962      * clean them here.  Access time updates are taken care off when the
2963      * real inode is written.
2964      */
2965     if (NInoAttr(ni)) {
2966         NInoClearDirty(ni);
2967         ntfs_debug("Done.");
2968         return 0;
2969     }
2970     /* Map, pin, and lock the mft record belonging to the inode. */
2971     m = map_mft_record(ni);
2972     if (IS_ERR(m)) {
2973         err = PTR_ERR(m);
2974         goto err_out;
2975     }
2976     /* Update the access times in the standard information attribute. */
2977     ctx = ntfs_attr_get_search_ctx(ni, m);
2978     if (unlikely(!ctx)) {
2979         err = -ENOMEM;
2980         goto unm_err_out;
2981     }
2982     err = ntfs_attr_lookup(AT_STANDARD_INFORMATION, NULL, 0,
2983             CASE_SENSITIVE, 0, NULL, 0, ctx);
2984     if (unlikely(err)) {
2985         ntfs_attr_put_search_ctx(ctx);
2986         goto unm_err_out;
2987     }
2988     si = (STANDARD_INFORMATION*)((u8*)ctx->attr +
2989             le16_to_cpu(ctx->attr->data.resident.value_offset));
2990     /* Update the access times if they have changed. */
2991     nt = utc2ntfs(vi->i_mtime);
2992     if (si->last_data_change_time != nt) {
2993         ntfs_debug("Updating mtime for inode 0x%lx: old = 0x%llx, "
2994                 "new = 0x%llx", vi->i_ino, (long long)
2995                 sle64_to_cpu(si->last_data_change_time),
2996                 (long long)sle64_to_cpu(nt));
2997         si->last_data_change_time = nt;
2998         modified = true;
2999     }
3000     nt = utc2ntfs(vi->i_ctime);
3001     if (si->last_mft_change_time != nt) {
3002         ntfs_debug("Updating ctime for inode 0x%lx: old = 0x%llx, "
3003                 "new = 0x%llx", vi->i_ino, (long long)
3004                 sle64_to_cpu(si->last_mft_change_time),
3005                 (long long)sle64_to_cpu(nt));
3006         si->last_mft_change_time = nt;
3007         modified = true;
3008     }
3009     nt = utc2ntfs(vi->i_atime);
3010     if (si->last_access_time != nt) {
3011         ntfs_debug("Updating atime for inode 0x%lx: old = 0x%llx, "
3012                 "new = 0x%llx", vi->i_ino,
3013                 (long long)sle64_to_cpu(si->last_access_time),
3014                 (long long)sle64_to_cpu(nt));
3015         si->last_access_time = nt;
3016         modified = true;
3017     }
3018     /*
3019      * If we just modified the standard information attribute we need to
3020      * mark the mft record it is in dirty.  We do this manually so that
3021      * mark_inode_dirty() is not called which would redirty the inode and
3022      * hence result in an infinite loop of trying to write the inode.
3023      * There is no need to mark the base inode nor the base mft record
3024      * dirty, since we are going to write this mft record below in any case
3025      * and the base mft record may actually not have been modified so it
3026      * might not need to be written out.
3027      * NOTE: It is not a problem when the inode for $MFT itself is being
3028      * written out as mark_ntfs_record_dirty() will only set I_DIRTY_PAGES
3029      * on the $MFT inode and hence ntfs_write_inode() will not be
3030      * re-invoked because of it which in turn is ok since the dirtied mft
3031      * record will be cleaned and written out to disk below, i.e. before
3032      * this function returns.
3033      */
3034     if (modified) {
3035         flush_dcache_mft_record_page(ctx->ntfs_ino);
3036         if (!NInoTestSetDirty(ctx->ntfs_ino))
3037             mark_ntfs_record_dirty(ctx->ntfs_ino->page,
3038                     ctx->ntfs_ino->page_ofs);
3039     }
3040     ntfs_attr_put_search_ctx(ctx);
3041     /* Now the access times are updated, write the base mft record. */
3042     if (NInoDirty(ni))
3043         err = write_mft_record(ni, m, sync);
3044     /* Write all attached extent mft records. */
3045     mutex_lock(&ni->extent_lock);
3046     if (ni->nr_extents > 0) {
3047         ntfs_inode **extent_nis = ni->ext.extent_ntfs_inos;
3048         int i;
3049 
3050         ntfs_debug("Writing %i extent inodes.", ni->nr_extents);
3051         for (i = 0; i < ni->nr_extents; i++) {
3052             ntfs_inode *tni = extent_nis[i];
3053 
3054             if (NInoDirty(tni)) {
3055                 MFT_RECORD *tm = map_mft_record(tni);
3056                 int ret;
3057 
3058                 if (IS_ERR(tm)) {
3059                     if (!err || err == -ENOMEM)
3060                         err = PTR_ERR(tm);
3061                     continue;
3062                 }
3063                 ret = write_mft_record(tni, tm, sync);
3064                 unmap_mft_record(tni);
3065                 if (unlikely(ret)) {
3066                     if (!err || err == -ENOMEM)
3067                         err = ret;
3068                 }
3069             }
3070         }
3071     }
3072     mutex_unlock(&ni->extent_lock);
3073     unmap_mft_record(ni);
3074     if (unlikely(err))
3075         goto err_out;
3076     ntfs_debug("Done.");
3077     return 0;
3078 unm_err_out:
3079     unmap_mft_record(ni);
3080 err_out:
3081     if (err == -ENOMEM) {
3082         ntfs_warning(vi->i_sb, "Not enough memory to write inode.  "
3083                 "Marking the inode dirty again, so the VFS "
3084                 "retries later.");
3085         mark_inode_dirty(vi);
3086     } else {
3087         ntfs_error(vi->i_sb, "Failed (error %i):  Run chkdsk.", -err);
3088         NVolSetErrors(ni->vol);
3089     }
3090     return err;
3091 }
3092 
3093 #endif /* NTFS_RW */