Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /**
0003  * dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project.
0004  *
0005  * Copyright (c) 2001-2007 Anton Altaparmakov
0006  * Copyright (c) 2002 Richard Russon
0007  */
0008 
0009 #include <linux/buffer_head.h>
0010 #include <linux/slab.h>
0011 #include <linux/blkdev.h>
0012 
0013 #include "dir.h"
0014 #include "aops.h"
0015 #include "attrib.h"
0016 #include "mft.h"
0017 #include "debug.h"
0018 #include "ntfs.h"
0019 
0020 /**
0021  * The little endian Unicode string $I30 as a global constant.
0022  */
0023 ntfschar I30[5] = { cpu_to_le16('$'), cpu_to_le16('I'),
0024         cpu_to_le16('3'),   cpu_to_le16('0'), 0 };
0025 
0026 /**
0027  * ntfs_lookup_inode_by_name - find an inode in a directory given its name
0028  * @dir_ni: ntfs inode of the directory in which to search for the name
0029  * @uname:  Unicode name for which to search in the directory
0030  * @uname_len:  length of the name @uname in Unicode characters
0031  * @res:    return the found file name if necessary (see below)
0032  *
0033  * Look for an inode with name @uname in the directory with inode @dir_ni.
0034  * ntfs_lookup_inode_by_name() walks the contents of the directory looking for
0035  * the Unicode name. If the name is found in the directory, the corresponding
0036  * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it
0037  * is a 64-bit number containing the sequence number.
0038  *
0039  * On error, a negative value is returned corresponding to the error code. In
0040  * particular if the inode is not found -ENOENT is returned. Note that you
0041  * can't just check the return value for being negative, you have to check the
0042  * inode number for being negative which you can extract using MREC(return
0043  * value).
0044  *
0045  * Note, @uname_len does not include the (optional) terminating NULL character.
0046  *
0047  * Note, we look for a case sensitive match first but we also look for a case
0048  * insensitive match at the same time. If we find a case insensitive match, we
0049  * save that for the case that we don't find an exact match, where we return
0050  * the case insensitive match and setup @res (which we allocate!) with the mft
0051  * reference, the file name type, length and with a copy of the little endian
0052  * Unicode file name itself. If we match a file name which is in the DOS name
0053  * space, we only return the mft reference and file name type in @res.
0054  * ntfs_lookup() then uses this to find the long file name in the inode itself.
0055  * This is to avoid polluting the dcache with short file names. We want them to
0056  * work but we don't care for how quickly one can access them. This also fixes
0057  * the dcache aliasing issues.
0058  *
0059  * Locking:  - Caller must hold i_mutex on the directory.
0060  *       - Each page cache page in the index allocation mapping must be
0061  *         locked whilst being accessed otherwise we may find a corrupt
0062  *         page due to it being under ->writepage at the moment which
0063  *         applies the mst protection fixups before writing out and then
0064  *         removes them again after the write is complete after which it 
0065  *         unlocks the page.
0066  */
0067 MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
0068         const int uname_len, ntfs_name **res)
0069 {
0070     ntfs_volume *vol = dir_ni->vol;
0071     struct super_block *sb = vol->sb;
0072     MFT_RECORD *m;
0073     INDEX_ROOT *ir;
0074     INDEX_ENTRY *ie;
0075     INDEX_ALLOCATION *ia;
0076     u8 *index_end;
0077     u64 mref;
0078     ntfs_attr_search_ctx *ctx;
0079     int err, rc;
0080     VCN vcn, old_vcn;
0081     struct address_space *ia_mapping;
0082     struct page *page;
0083     u8 *kaddr;
0084     ntfs_name *name = NULL;
0085 
0086     BUG_ON(!S_ISDIR(VFS_I(dir_ni)->i_mode));
0087     BUG_ON(NInoAttr(dir_ni));
0088     /* Get hold of the mft record for the directory. */
0089     m = map_mft_record(dir_ni);
0090     if (IS_ERR(m)) {
0091         ntfs_error(sb, "map_mft_record() failed with error code %ld.",
0092                 -PTR_ERR(m));
0093         return ERR_MREF(PTR_ERR(m));
0094     }
0095     ctx = ntfs_attr_get_search_ctx(dir_ni, m);
0096     if (unlikely(!ctx)) {
0097         err = -ENOMEM;
0098         goto err_out;
0099     }
0100     /* Find the index root attribute in the mft record. */
0101     err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL,
0102             0, ctx);
0103     if (unlikely(err)) {
0104         if (err == -ENOENT) {
0105             ntfs_error(sb, "Index root attribute missing in "
0106                     "directory inode 0x%lx.",
0107                     dir_ni->mft_no);
0108             err = -EIO;
0109         }
0110         goto err_out;
0111     }
0112     /* Get to the index root value (it's been verified in read_inode). */
0113     ir = (INDEX_ROOT*)((u8*)ctx->attr +
0114             le16_to_cpu(ctx->attr->data.resident.value_offset));
0115     index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
0116     /* The first index entry. */
0117     ie = (INDEX_ENTRY*)((u8*)&ir->index +
0118             le32_to_cpu(ir->index.entries_offset));
0119     /*
0120      * Loop until we exceed valid memory (corruption case) or until we
0121      * reach the last entry.
0122      */
0123     for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
0124         /* Bounds checks. */
0125         if ((u8*)ie < (u8*)ctx->mrec || (u8*)ie +
0126                 sizeof(INDEX_ENTRY_HEADER) > index_end ||
0127                 (u8*)ie + le16_to_cpu(ie->key_length) >
0128                 index_end)
0129             goto dir_err_out;
0130         /*
0131          * The last entry cannot contain a name. It can however contain
0132          * a pointer to a child node in the B+tree so we just break out.
0133          */
0134         if (ie->flags & INDEX_ENTRY_END)
0135             break;
0136         /*
0137          * We perform a case sensitive comparison and if that matches
0138          * we are done and return the mft reference of the inode (i.e.
0139          * the inode number together with the sequence number for
0140          * consistency checking). We convert it to cpu format before
0141          * returning.
0142          */
0143         if (ntfs_are_names_equal(uname, uname_len,
0144                 (ntfschar*)&ie->key.file_name.file_name,
0145                 ie->key.file_name.file_name_length,
0146                 CASE_SENSITIVE, vol->upcase, vol->upcase_len)) {
0147 found_it:
0148             /*
0149              * We have a perfect match, so we don't need to care
0150              * about having matched imperfectly before, so we can
0151              * free name and set *res to NULL.
0152              * However, if the perfect match is a short file name,
0153              * we need to signal this through *res, so that
0154              * ntfs_lookup() can fix dcache aliasing issues.
0155              * As an optimization we just reuse an existing
0156              * allocation of *res.
0157              */
0158             if (ie->key.file_name.file_name_type == FILE_NAME_DOS) {
0159                 if (!name) {
0160                     name = kmalloc(sizeof(ntfs_name),
0161                             GFP_NOFS);
0162                     if (!name) {
0163                         err = -ENOMEM;
0164                         goto err_out;
0165                     }
0166                 }
0167                 name->mref = le64_to_cpu(
0168                         ie->data.dir.indexed_file);
0169                 name->type = FILE_NAME_DOS;
0170                 name->len = 0;
0171                 *res = name;
0172             } else {
0173                 kfree(name);
0174                 *res = NULL;
0175             }
0176             mref = le64_to_cpu(ie->data.dir.indexed_file);
0177             ntfs_attr_put_search_ctx(ctx);
0178             unmap_mft_record(dir_ni);
0179             return mref;
0180         }
0181         /*
0182          * For a case insensitive mount, we also perform a case
0183          * insensitive comparison (provided the file name is not in the
0184          * POSIX namespace). If the comparison matches, and the name is
0185          * in the WIN32 namespace, we cache the filename in *res so
0186          * that the caller, ntfs_lookup(), can work on it. If the
0187          * comparison matches, and the name is in the DOS namespace, we
0188          * only cache the mft reference and the file name type (we set
0189          * the name length to zero for simplicity).
0190          */
0191         if (!NVolCaseSensitive(vol) &&
0192                 ie->key.file_name.file_name_type &&
0193                 ntfs_are_names_equal(uname, uname_len,
0194                 (ntfschar*)&ie->key.file_name.file_name,
0195                 ie->key.file_name.file_name_length,
0196                 IGNORE_CASE, vol->upcase, vol->upcase_len)) {
0197             int name_size = sizeof(ntfs_name);
0198             u8 type = ie->key.file_name.file_name_type;
0199             u8 len = ie->key.file_name.file_name_length;
0200 
0201             /* Only one case insensitive matching name allowed. */
0202             if (name) {
0203                 ntfs_error(sb, "Found already allocated name "
0204                         "in phase 1. Please run chkdsk "
0205                         "and if that doesn't find any "
0206                         "errors please report you saw "
0207                         "this message to "
0208                         "linux-ntfs-dev@lists."
0209                         "sourceforge.net.");
0210                 goto dir_err_out;
0211             }
0212 
0213             if (type != FILE_NAME_DOS)
0214                 name_size += len * sizeof(ntfschar);
0215             name = kmalloc(name_size, GFP_NOFS);
0216             if (!name) {
0217                 err = -ENOMEM;
0218                 goto err_out;
0219             }
0220             name->mref = le64_to_cpu(ie->data.dir.indexed_file);
0221             name->type = type;
0222             if (type != FILE_NAME_DOS) {
0223                 name->len = len;
0224                 memcpy(name->name, ie->key.file_name.file_name,
0225                         len * sizeof(ntfschar));
0226             } else
0227                 name->len = 0;
0228             *res = name;
0229         }
0230         /*
0231          * Not a perfect match, need to do full blown collation so we
0232          * know which way in the B+tree we have to go.
0233          */
0234         rc = ntfs_collate_names(uname, uname_len,
0235                 (ntfschar*)&ie->key.file_name.file_name,
0236                 ie->key.file_name.file_name_length, 1,
0237                 IGNORE_CASE, vol->upcase, vol->upcase_len);
0238         /*
0239          * If uname collates before the name of the current entry, there
0240          * is definitely no such name in this index but we might need to
0241          * descend into the B+tree so we just break out of the loop.
0242          */
0243         if (rc == -1)
0244             break;
0245         /* The names are not equal, continue the search. */
0246         if (rc)
0247             continue;
0248         /*
0249          * Names match with case insensitive comparison, now try the
0250          * case sensitive comparison, which is required for proper
0251          * collation.
0252          */
0253         rc = ntfs_collate_names(uname, uname_len,
0254                 (ntfschar*)&ie->key.file_name.file_name,
0255                 ie->key.file_name.file_name_length, 1,
0256                 CASE_SENSITIVE, vol->upcase, vol->upcase_len);
0257         if (rc == -1)
0258             break;
0259         if (rc)
0260             continue;
0261         /*
0262          * Perfect match, this will never happen as the
0263          * ntfs_are_names_equal() call will have gotten a match but we
0264          * still treat it correctly.
0265          */
0266         goto found_it;
0267     }
0268     /*
0269      * We have finished with this index without success. Check for the
0270      * presence of a child node and if not present return -ENOENT, unless
0271      * we have got a matching name cached in name in which case return the
0272      * mft reference associated with it.
0273      */
0274     if (!(ie->flags & INDEX_ENTRY_NODE)) {
0275         if (name) {
0276             ntfs_attr_put_search_ctx(ctx);
0277             unmap_mft_record(dir_ni);
0278             return name->mref;
0279         }
0280         ntfs_debug("Entry not found.");
0281         err = -ENOENT;
0282         goto err_out;
0283     } /* Child node present, descend into it. */
0284     /* Consistency check: Verify that an index allocation exists. */
0285     if (!NInoIndexAllocPresent(dir_ni)) {
0286         ntfs_error(sb, "No index allocation attribute but index entry "
0287                 "requires one. Directory inode 0x%lx is "
0288                 "corrupt or driver bug.", dir_ni->mft_no);
0289         goto err_out;
0290     }
0291     /* Get the starting vcn of the index_block holding the child node. */
0292     vcn = sle64_to_cpup((sle64*)((u8*)ie + le16_to_cpu(ie->length) - 8));
0293     ia_mapping = VFS_I(dir_ni)->i_mapping;
0294     /*
0295      * We are done with the index root and the mft record. Release them,
0296      * otherwise we deadlock with ntfs_map_page().
0297      */
0298     ntfs_attr_put_search_ctx(ctx);
0299     unmap_mft_record(dir_ni);
0300     m = NULL;
0301     ctx = NULL;
0302 descend_into_child_node:
0303     /*
0304      * Convert vcn to index into the index allocation attribute in units
0305      * of PAGE_SIZE and map the page cache page, reading it from
0306      * disk if necessary.
0307      */
0308     page = ntfs_map_page(ia_mapping, vcn <<
0309             dir_ni->itype.index.vcn_size_bits >> PAGE_SHIFT);
0310     if (IS_ERR(page)) {
0311         ntfs_error(sb, "Failed to map directory index page, error %ld.",
0312                 -PTR_ERR(page));
0313         err = PTR_ERR(page);
0314         goto err_out;
0315     }
0316     lock_page(page);
0317     kaddr = (u8*)page_address(page);
0318 fast_descend_into_child_node:
0319     /* Get to the index allocation block. */
0320     ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
0321             dir_ni->itype.index.vcn_size_bits) & ~PAGE_MASK));
0322     /* Bounds checks. */
0323     if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE) {
0324         ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
0325                 "inode 0x%lx or driver bug.", dir_ni->mft_no);
0326         goto unm_err_out;
0327     }
0328     /* Catch multi sector transfer fixup errors. */
0329     if (unlikely(!ntfs_is_indx_record(ia->magic))) {
0330         ntfs_error(sb, "Directory index record with vcn 0x%llx is "
0331                 "corrupt.  Corrupt inode 0x%lx.  Run chkdsk.",
0332                 (unsigned long long)vcn, dir_ni->mft_no);
0333         goto unm_err_out;
0334     }
0335     if (sle64_to_cpu(ia->index_block_vcn) != vcn) {
0336         ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is "
0337                 "different from expected VCN (0x%llx). "
0338                 "Directory inode 0x%lx is corrupt or driver "
0339                 "bug.", (unsigned long long)
0340                 sle64_to_cpu(ia->index_block_vcn),
0341                 (unsigned long long)vcn, dir_ni->mft_no);
0342         goto unm_err_out;
0343     }
0344     if (le32_to_cpu(ia->index.allocated_size) + 0x18 !=
0345             dir_ni->itype.index.block_size) {
0346         ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
0347                 "0x%lx has a size (%u) differing from the "
0348                 "directory specified size (%u). Directory "
0349                 "inode is corrupt or driver bug.",
0350                 (unsigned long long)vcn, dir_ni->mft_no,
0351                 le32_to_cpu(ia->index.allocated_size) + 0x18,
0352                 dir_ni->itype.index.block_size);
0353         goto unm_err_out;
0354     }
0355     index_end = (u8*)ia + dir_ni->itype.index.block_size;
0356     if (index_end > kaddr + PAGE_SIZE) {
0357         ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
0358                 "0x%lx crosses page boundary. Impossible! "
0359                 "Cannot access! This is probably a bug in the "
0360                 "driver.", (unsigned long long)vcn,
0361                 dir_ni->mft_no);
0362         goto unm_err_out;
0363     }
0364     index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
0365     if (index_end > (u8*)ia + dir_ni->itype.index.block_size) {
0366         ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of directory "
0367                 "inode 0x%lx exceeds maximum size.",
0368                 (unsigned long long)vcn, dir_ni->mft_no);
0369         goto unm_err_out;
0370     }
0371     /* The first index entry. */
0372     ie = (INDEX_ENTRY*)((u8*)&ia->index +
0373             le32_to_cpu(ia->index.entries_offset));
0374     /*
0375      * Iterate similar to above big loop but applied to index buffer, thus
0376      * loop until we exceed valid memory (corruption case) or until we
0377      * reach the last entry.
0378      */
0379     for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
0380         /* Bounds check. */
0381         if ((u8*)ie < (u8*)ia || (u8*)ie +
0382                 sizeof(INDEX_ENTRY_HEADER) > index_end ||
0383                 (u8*)ie + le16_to_cpu(ie->key_length) >
0384                 index_end) {
0385             ntfs_error(sb, "Index entry out of bounds in "
0386                     "directory inode 0x%lx.",
0387                     dir_ni->mft_no);
0388             goto unm_err_out;
0389         }
0390         /*
0391          * The last entry cannot contain a name. It can however contain
0392          * a pointer to a child node in the B+tree so we just break out.
0393          */
0394         if (ie->flags & INDEX_ENTRY_END)
0395             break;
0396         /*
0397          * We perform a case sensitive comparison and if that matches
0398          * we are done and return the mft reference of the inode (i.e.
0399          * the inode number together with the sequence number for
0400          * consistency checking). We convert it to cpu format before
0401          * returning.
0402          */
0403         if (ntfs_are_names_equal(uname, uname_len,
0404                 (ntfschar*)&ie->key.file_name.file_name,
0405                 ie->key.file_name.file_name_length,
0406                 CASE_SENSITIVE, vol->upcase, vol->upcase_len)) {
0407 found_it2:
0408             /*
0409              * We have a perfect match, so we don't need to care
0410              * about having matched imperfectly before, so we can
0411              * free name and set *res to NULL.
0412              * However, if the perfect match is a short file name,
0413              * we need to signal this through *res, so that
0414              * ntfs_lookup() can fix dcache aliasing issues.
0415              * As an optimization we just reuse an existing
0416              * allocation of *res.
0417              */
0418             if (ie->key.file_name.file_name_type == FILE_NAME_DOS) {
0419                 if (!name) {
0420                     name = kmalloc(sizeof(ntfs_name),
0421                             GFP_NOFS);
0422                     if (!name) {
0423                         err = -ENOMEM;
0424                         goto unm_err_out;
0425                     }
0426                 }
0427                 name->mref = le64_to_cpu(
0428                         ie->data.dir.indexed_file);
0429                 name->type = FILE_NAME_DOS;
0430                 name->len = 0;
0431                 *res = name;
0432             } else {
0433                 kfree(name);
0434                 *res = NULL;
0435             }
0436             mref = le64_to_cpu(ie->data.dir.indexed_file);
0437             unlock_page(page);
0438             ntfs_unmap_page(page);
0439             return mref;
0440         }
0441         /*
0442          * For a case insensitive mount, we also perform a case
0443          * insensitive comparison (provided the file name is not in the
0444          * POSIX namespace). If the comparison matches, and the name is
0445          * in the WIN32 namespace, we cache the filename in *res so
0446          * that the caller, ntfs_lookup(), can work on it. If the
0447          * comparison matches, and the name is in the DOS namespace, we
0448          * only cache the mft reference and the file name type (we set
0449          * the name length to zero for simplicity).
0450          */
0451         if (!NVolCaseSensitive(vol) &&
0452                 ie->key.file_name.file_name_type &&
0453                 ntfs_are_names_equal(uname, uname_len,
0454                 (ntfschar*)&ie->key.file_name.file_name,
0455                 ie->key.file_name.file_name_length,
0456                 IGNORE_CASE, vol->upcase, vol->upcase_len)) {
0457             int name_size = sizeof(ntfs_name);
0458             u8 type = ie->key.file_name.file_name_type;
0459             u8 len = ie->key.file_name.file_name_length;
0460 
0461             /* Only one case insensitive matching name allowed. */
0462             if (name) {
0463                 ntfs_error(sb, "Found already allocated name "
0464                         "in phase 2. Please run chkdsk "
0465                         "and if that doesn't find any "
0466                         "errors please report you saw "
0467                         "this message to "
0468                         "linux-ntfs-dev@lists."
0469                         "sourceforge.net.");
0470                 unlock_page(page);
0471                 ntfs_unmap_page(page);
0472                 goto dir_err_out;
0473             }
0474 
0475             if (type != FILE_NAME_DOS)
0476                 name_size += len * sizeof(ntfschar);
0477             name = kmalloc(name_size, GFP_NOFS);
0478             if (!name) {
0479                 err = -ENOMEM;
0480                 goto unm_err_out;
0481             }
0482             name->mref = le64_to_cpu(ie->data.dir.indexed_file);
0483             name->type = type;
0484             if (type != FILE_NAME_DOS) {
0485                 name->len = len;
0486                 memcpy(name->name, ie->key.file_name.file_name,
0487                         len * sizeof(ntfschar));
0488             } else
0489                 name->len = 0;
0490             *res = name;
0491         }
0492         /*
0493          * Not a perfect match, need to do full blown collation so we
0494          * know which way in the B+tree we have to go.
0495          */
0496         rc = ntfs_collate_names(uname, uname_len,
0497                 (ntfschar*)&ie->key.file_name.file_name,
0498                 ie->key.file_name.file_name_length, 1,
0499                 IGNORE_CASE, vol->upcase, vol->upcase_len);
0500         /*
0501          * If uname collates before the name of the current entry, there
0502          * is definitely no such name in this index but we might need to
0503          * descend into the B+tree so we just break out of the loop.
0504          */
0505         if (rc == -1)
0506             break;
0507         /* The names are not equal, continue the search. */
0508         if (rc)
0509             continue;
0510         /*
0511          * Names match with case insensitive comparison, now try the
0512          * case sensitive comparison, which is required for proper
0513          * collation.
0514          */
0515         rc = ntfs_collate_names(uname, uname_len,
0516                 (ntfschar*)&ie->key.file_name.file_name,
0517                 ie->key.file_name.file_name_length, 1,
0518                 CASE_SENSITIVE, vol->upcase, vol->upcase_len);
0519         if (rc == -1)
0520             break;
0521         if (rc)
0522             continue;
0523         /*
0524          * Perfect match, this will never happen as the
0525          * ntfs_are_names_equal() call will have gotten a match but we
0526          * still treat it correctly.
0527          */
0528         goto found_it2;
0529     }
0530     /*
0531      * We have finished with this index buffer without success. Check for
0532      * the presence of a child node.
0533      */
0534     if (ie->flags & INDEX_ENTRY_NODE) {
0535         if ((ia->index.flags & NODE_MASK) == LEAF_NODE) {
0536             ntfs_error(sb, "Index entry with child node found in "
0537                     "a leaf node in directory inode 0x%lx.",
0538                     dir_ni->mft_no);
0539             goto unm_err_out;
0540         }
0541         /* Child node present, descend into it. */
0542         old_vcn = vcn;
0543         vcn = sle64_to_cpup((sle64*)((u8*)ie +
0544                 le16_to_cpu(ie->length) - 8));
0545         if (vcn >= 0) {
0546             /* If vcn is in the same page cache page as old_vcn we
0547              * recycle the mapped page. */
0548             if (old_vcn << vol->cluster_size_bits >>
0549                     PAGE_SHIFT == vcn <<
0550                     vol->cluster_size_bits >>
0551                     PAGE_SHIFT)
0552                 goto fast_descend_into_child_node;
0553             unlock_page(page);
0554             ntfs_unmap_page(page);
0555             goto descend_into_child_node;
0556         }
0557         ntfs_error(sb, "Negative child node vcn in directory inode "
0558                 "0x%lx.", dir_ni->mft_no);
0559         goto unm_err_out;
0560     }
0561     /*
0562      * No child node present, return -ENOENT, unless we have got a matching
0563      * name cached in name in which case return the mft reference
0564      * associated with it.
0565      */
0566     if (name) {
0567         unlock_page(page);
0568         ntfs_unmap_page(page);
0569         return name->mref;
0570     }
0571     ntfs_debug("Entry not found.");
0572     err = -ENOENT;
0573 unm_err_out:
0574     unlock_page(page);
0575     ntfs_unmap_page(page);
0576 err_out:
0577     if (!err)
0578         err = -EIO;
0579     if (ctx)
0580         ntfs_attr_put_search_ctx(ctx);
0581     if (m)
0582         unmap_mft_record(dir_ni);
0583     if (name) {
0584         kfree(name);
0585         *res = NULL;
0586     }
0587     return ERR_MREF(err);
0588 dir_err_out:
0589     ntfs_error(sb, "Corrupt directory.  Aborting lookup.");
0590     goto err_out;
0591 }
0592 
0593 #if 0
0594 
0595 // TODO: (AIA)
0596 // The algorithm embedded in this code will be required for the time when we
0597 // want to support adding of entries to directories, where we require correct
0598 // collation of file names in order not to cause corruption of the filesystem.
0599 
0600 /**
0601  * ntfs_lookup_inode_by_name - find an inode in a directory given its name
0602  * @dir_ni: ntfs inode of the directory in which to search for the name
0603  * @uname:  Unicode name for which to search in the directory
0604  * @uname_len:  length of the name @uname in Unicode characters
0605  *
0606  * Look for an inode with name @uname in the directory with inode @dir_ni.
0607  * ntfs_lookup_inode_by_name() walks the contents of the directory looking for
0608  * the Unicode name. If the name is found in the directory, the corresponding
0609  * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it
0610  * is a 64-bit number containing the sequence number.
0611  *
0612  * On error, a negative value is returned corresponding to the error code. In
0613  * particular if the inode is not found -ENOENT is returned. Note that you
0614  * can't just check the return value for being negative, you have to check the
0615  * inode number for being negative which you can extract using MREC(return
0616  * value).
0617  *
0618  * Note, @uname_len does not include the (optional) terminating NULL character.
0619  */
0620 u64 ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
0621         const int uname_len)
0622 {
0623     ntfs_volume *vol = dir_ni->vol;
0624     struct super_block *sb = vol->sb;
0625     MFT_RECORD *m;
0626     INDEX_ROOT *ir;
0627     INDEX_ENTRY *ie;
0628     INDEX_ALLOCATION *ia;
0629     u8 *index_end;
0630     u64 mref;
0631     ntfs_attr_search_ctx *ctx;
0632     int err, rc;
0633     IGNORE_CASE_BOOL ic;
0634     VCN vcn, old_vcn;
0635     struct address_space *ia_mapping;
0636     struct page *page;
0637     u8 *kaddr;
0638 
0639     /* Get hold of the mft record for the directory. */
0640     m = map_mft_record(dir_ni);
0641     if (IS_ERR(m)) {
0642         ntfs_error(sb, "map_mft_record() failed with error code %ld.",
0643                 -PTR_ERR(m));
0644         return ERR_MREF(PTR_ERR(m));
0645     }
0646     ctx = ntfs_attr_get_search_ctx(dir_ni, m);
0647     if (!ctx) {
0648         err = -ENOMEM;
0649         goto err_out;
0650     }
0651     /* Find the index root attribute in the mft record. */
0652     err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL,
0653             0, ctx);
0654     if (unlikely(err)) {
0655         if (err == -ENOENT) {
0656             ntfs_error(sb, "Index root attribute missing in "
0657                     "directory inode 0x%lx.",
0658                     dir_ni->mft_no);
0659             err = -EIO;
0660         }
0661         goto err_out;
0662     }
0663     /* Get to the index root value (it's been verified in read_inode). */
0664     ir = (INDEX_ROOT*)((u8*)ctx->attr +
0665             le16_to_cpu(ctx->attr->data.resident.value_offset));
0666     index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
0667     /* The first index entry. */
0668     ie = (INDEX_ENTRY*)((u8*)&ir->index +
0669             le32_to_cpu(ir->index.entries_offset));
0670     /*
0671      * Loop until we exceed valid memory (corruption case) or until we
0672      * reach the last entry.
0673      */
0674     for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
0675         /* Bounds checks. */
0676         if ((u8*)ie < (u8*)ctx->mrec || (u8*)ie +
0677                 sizeof(INDEX_ENTRY_HEADER) > index_end ||
0678                 (u8*)ie + le16_to_cpu(ie->key_length) >
0679                 index_end)
0680             goto dir_err_out;
0681         /*
0682          * The last entry cannot contain a name. It can however contain
0683          * a pointer to a child node in the B+tree so we just break out.
0684          */
0685         if (ie->flags & INDEX_ENTRY_END)
0686             break;
0687         /*
0688          * If the current entry has a name type of POSIX, the name is
0689          * case sensitive and not otherwise. This has the effect of us
0690          * not being able to access any POSIX file names which collate
0691          * after the non-POSIX one when they only differ in case, but
0692          * anyone doing screwy stuff like that deserves to burn in
0693          * hell... Doing that kind of stuff on NT4 actually causes
0694          * corruption on the partition even when using SP6a and Linux
0695          * is not involved at all.
0696          */
0697         ic = ie->key.file_name.file_name_type ? IGNORE_CASE :
0698                 CASE_SENSITIVE;
0699         /*
0700          * If the names match perfectly, we are done and return the
0701          * mft reference of the inode (i.e. the inode number together
0702          * with the sequence number for consistency checking. We
0703          * convert it to cpu format before returning.
0704          */
0705         if (ntfs_are_names_equal(uname, uname_len,
0706                 (ntfschar*)&ie->key.file_name.file_name,
0707                 ie->key.file_name.file_name_length, ic,
0708                 vol->upcase, vol->upcase_len)) {
0709 found_it:
0710             mref = le64_to_cpu(ie->data.dir.indexed_file);
0711             ntfs_attr_put_search_ctx(ctx);
0712             unmap_mft_record(dir_ni);
0713             return mref;
0714         }
0715         /*
0716          * Not a perfect match, need to do full blown collation so we
0717          * know which way in the B+tree we have to go.
0718          */
0719         rc = ntfs_collate_names(uname, uname_len,
0720                 (ntfschar*)&ie->key.file_name.file_name,
0721                 ie->key.file_name.file_name_length, 1,
0722                 IGNORE_CASE, vol->upcase, vol->upcase_len);
0723         /*
0724          * If uname collates before the name of the current entry, there
0725          * is definitely no such name in this index but we might need to
0726          * descend into the B+tree so we just break out of the loop.
0727          */
0728         if (rc == -1)
0729             break;
0730         /* The names are not equal, continue the search. */
0731         if (rc)
0732             continue;
0733         /*
0734          * Names match with case insensitive comparison, now try the
0735          * case sensitive comparison, which is required for proper
0736          * collation.
0737          */
0738         rc = ntfs_collate_names(uname, uname_len,
0739                 (ntfschar*)&ie->key.file_name.file_name,
0740                 ie->key.file_name.file_name_length, 1,
0741                 CASE_SENSITIVE, vol->upcase, vol->upcase_len);
0742         if (rc == -1)
0743             break;
0744         if (rc)
0745             continue;
0746         /*
0747          * Perfect match, this will never happen as the
0748          * ntfs_are_names_equal() call will have gotten a match but we
0749          * still treat it correctly.
0750          */
0751         goto found_it;
0752     }
0753     /*
0754      * We have finished with this index without success. Check for the
0755      * presence of a child node.
0756      */
0757     if (!(ie->flags & INDEX_ENTRY_NODE)) {
0758         /* No child node, return -ENOENT. */
0759         err = -ENOENT;
0760         goto err_out;
0761     } /* Child node present, descend into it. */
0762     /* Consistency check: Verify that an index allocation exists. */
0763     if (!NInoIndexAllocPresent(dir_ni)) {
0764         ntfs_error(sb, "No index allocation attribute but index entry "
0765                 "requires one. Directory inode 0x%lx is "
0766                 "corrupt or driver bug.", dir_ni->mft_no);
0767         goto err_out;
0768     }
0769     /* Get the starting vcn of the index_block holding the child node. */
0770     vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8);
0771     ia_mapping = VFS_I(dir_ni)->i_mapping;
0772     /*
0773      * We are done with the index root and the mft record. Release them,
0774      * otherwise we deadlock with ntfs_map_page().
0775      */
0776     ntfs_attr_put_search_ctx(ctx);
0777     unmap_mft_record(dir_ni);
0778     m = NULL;
0779     ctx = NULL;
0780 descend_into_child_node:
0781     /*
0782      * Convert vcn to index into the index allocation attribute in units
0783      * of PAGE_SIZE and map the page cache page, reading it from
0784      * disk if necessary.
0785      */
0786     page = ntfs_map_page(ia_mapping, vcn <<
0787             dir_ni->itype.index.vcn_size_bits >> PAGE_SHIFT);
0788     if (IS_ERR(page)) {
0789         ntfs_error(sb, "Failed to map directory index page, error %ld.",
0790                 -PTR_ERR(page));
0791         err = PTR_ERR(page);
0792         goto err_out;
0793     }
0794     lock_page(page);
0795     kaddr = (u8*)page_address(page);
0796 fast_descend_into_child_node:
0797     /* Get to the index allocation block. */
0798     ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
0799             dir_ni->itype.index.vcn_size_bits) & ~PAGE_MASK));
0800     /* Bounds checks. */
0801     if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE) {
0802         ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
0803                 "inode 0x%lx or driver bug.", dir_ni->mft_no);
0804         goto unm_err_out;
0805     }
0806     /* Catch multi sector transfer fixup errors. */
0807     if (unlikely(!ntfs_is_indx_record(ia->magic))) {
0808         ntfs_error(sb, "Directory index record with vcn 0x%llx is "
0809                 "corrupt.  Corrupt inode 0x%lx.  Run chkdsk.",
0810                 (unsigned long long)vcn, dir_ni->mft_no);
0811         goto unm_err_out;
0812     }
0813     if (sle64_to_cpu(ia->index_block_vcn) != vcn) {
0814         ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is "
0815                 "different from expected VCN (0x%llx). "
0816                 "Directory inode 0x%lx is corrupt or driver "
0817                 "bug.", (unsigned long long)
0818                 sle64_to_cpu(ia->index_block_vcn),
0819                 (unsigned long long)vcn, dir_ni->mft_no);
0820         goto unm_err_out;
0821     }
0822     if (le32_to_cpu(ia->index.allocated_size) + 0x18 !=
0823             dir_ni->itype.index.block_size) {
0824         ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
0825                 "0x%lx has a size (%u) differing from the "
0826                 "directory specified size (%u). Directory "
0827                 "inode is corrupt or driver bug.",
0828                 (unsigned long long)vcn, dir_ni->mft_no,
0829                 le32_to_cpu(ia->index.allocated_size) + 0x18,
0830                 dir_ni->itype.index.block_size);
0831         goto unm_err_out;
0832     }
0833     index_end = (u8*)ia + dir_ni->itype.index.block_size;
0834     if (index_end > kaddr + PAGE_SIZE) {
0835         ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
0836                 "0x%lx crosses page boundary. Impossible! "
0837                 "Cannot access! This is probably a bug in the "
0838                 "driver.", (unsigned long long)vcn,
0839                 dir_ni->mft_no);
0840         goto unm_err_out;
0841     }
0842     index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
0843     if (index_end > (u8*)ia + dir_ni->itype.index.block_size) {
0844         ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of directory "
0845                 "inode 0x%lx exceeds maximum size.",
0846                 (unsigned long long)vcn, dir_ni->mft_no);
0847         goto unm_err_out;
0848     }
0849     /* The first index entry. */
0850     ie = (INDEX_ENTRY*)((u8*)&ia->index +
0851             le32_to_cpu(ia->index.entries_offset));
0852     /*
0853      * Iterate similar to above big loop but applied to index buffer, thus
0854      * loop until we exceed valid memory (corruption case) or until we
0855      * reach the last entry.
0856      */
0857     for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
0858         /* Bounds check. */
0859         if ((u8*)ie < (u8*)ia || (u8*)ie +
0860                 sizeof(INDEX_ENTRY_HEADER) > index_end ||
0861                 (u8*)ie + le16_to_cpu(ie->key_length) >
0862                 index_end) {
0863             ntfs_error(sb, "Index entry out of bounds in "
0864                     "directory inode 0x%lx.",
0865                     dir_ni->mft_no);
0866             goto unm_err_out;
0867         }
0868         /*
0869          * The last entry cannot contain a name. It can however contain
0870          * a pointer to a child node in the B+tree so we just break out.
0871          */
0872         if (ie->flags & INDEX_ENTRY_END)
0873             break;
0874         /*
0875          * If the current entry has a name type of POSIX, the name is
0876          * case sensitive and not otherwise. This has the effect of us
0877          * not being able to access any POSIX file names which collate
0878          * after the non-POSIX one when they only differ in case, but
0879          * anyone doing screwy stuff like that deserves to burn in
0880          * hell... Doing that kind of stuff on NT4 actually causes
0881          * corruption on the partition even when using SP6a and Linux
0882          * is not involved at all.
0883          */
0884         ic = ie->key.file_name.file_name_type ? IGNORE_CASE :
0885                 CASE_SENSITIVE;
0886         /*
0887          * If the names match perfectly, we are done and return the
0888          * mft reference of the inode (i.e. the inode number together
0889          * with the sequence number for consistency checking. We
0890          * convert it to cpu format before returning.
0891          */
0892         if (ntfs_are_names_equal(uname, uname_len,
0893                 (ntfschar*)&ie->key.file_name.file_name,
0894                 ie->key.file_name.file_name_length, ic,
0895                 vol->upcase, vol->upcase_len)) {
0896 found_it2:
0897             mref = le64_to_cpu(ie->data.dir.indexed_file);
0898             unlock_page(page);
0899             ntfs_unmap_page(page);
0900             return mref;
0901         }
0902         /*
0903          * Not a perfect match, need to do full blown collation so we
0904          * know which way in the B+tree we have to go.
0905          */
0906         rc = ntfs_collate_names(uname, uname_len,
0907                 (ntfschar*)&ie->key.file_name.file_name,
0908                 ie->key.file_name.file_name_length, 1,
0909                 IGNORE_CASE, vol->upcase, vol->upcase_len);
0910         /*
0911          * If uname collates before the name of the current entry, there
0912          * is definitely no such name in this index but we might need to
0913          * descend into the B+tree so we just break out of the loop.
0914          */
0915         if (rc == -1)
0916             break;
0917         /* The names are not equal, continue the search. */
0918         if (rc)
0919             continue;
0920         /*
0921          * Names match with case insensitive comparison, now try the
0922          * case sensitive comparison, which is required for proper
0923          * collation.
0924          */
0925         rc = ntfs_collate_names(uname, uname_len,
0926                 (ntfschar*)&ie->key.file_name.file_name,
0927                 ie->key.file_name.file_name_length, 1,
0928                 CASE_SENSITIVE, vol->upcase, vol->upcase_len);
0929         if (rc == -1)
0930             break;
0931         if (rc)
0932             continue;
0933         /*
0934          * Perfect match, this will never happen as the
0935          * ntfs_are_names_equal() call will have gotten a match but we
0936          * still treat it correctly.
0937          */
0938         goto found_it2;
0939     }
0940     /*
0941      * We have finished with this index buffer without success. Check for
0942      * the presence of a child node.
0943      */
0944     if (ie->flags & INDEX_ENTRY_NODE) {
0945         if ((ia->index.flags & NODE_MASK) == LEAF_NODE) {
0946             ntfs_error(sb, "Index entry with child node found in "
0947                     "a leaf node in directory inode 0x%lx.",
0948                     dir_ni->mft_no);
0949             goto unm_err_out;
0950         }
0951         /* Child node present, descend into it. */
0952         old_vcn = vcn;
0953         vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8);
0954         if (vcn >= 0) {
0955             /* If vcn is in the same page cache page as old_vcn we
0956              * recycle the mapped page. */
0957             if (old_vcn << vol->cluster_size_bits >>
0958                     PAGE_SHIFT == vcn <<
0959                     vol->cluster_size_bits >>
0960                     PAGE_SHIFT)
0961                 goto fast_descend_into_child_node;
0962             unlock_page(page);
0963             ntfs_unmap_page(page);
0964             goto descend_into_child_node;
0965         }
0966         ntfs_error(sb, "Negative child node vcn in directory inode "
0967                 "0x%lx.", dir_ni->mft_no);
0968         goto unm_err_out;
0969     }
0970     /* No child node, return -ENOENT. */
0971     ntfs_debug("Entry not found.");
0972     err = -ENOENT;
0973 unm_err_out:
0974     unlock_page(page);
0975     ntfs_unmap_page(page);
0976 err_out:
0977     if (!err)
0978         err = -EIO;
0979     if (ctx)
0980         ntfs_attr_put_search_ctx(ctx);
0981     if (m)
0982         unmap_mft_record(dir_ni);
0983     return ERR_MREF(err);
0984 dir_err_out:
0985     ntfs_error(sb, "Corrupt directory. Aborting lookup.");
0986     goto err_out;
0987 }
0988 
0989 #endif
0990 
0991 /**
0992  * ntfs_filldir - ntfs specific filldir method
0993  * @vol:    current ntfs volume
0994  * @ndir:   ntfs inode of current directory
0995  * @ia_page:    page in which the index allocation buffer @ie is in resides
0996  * @ie:     current index entry
0997  * @name:   buffer to use for the converted name
0998  * @actor:  what to feed the entries to
0999  *
1000  * Convert the Unicode @name to the loaded NLS and pass it to the @filldir
1001  * callback.
1002  *
1003  * If @ia_page is not NULL it is the locked page containing the index
1004  * allocation block containing the index entry @ie.
1005  *
1006  * Note, we drop (and then reacquire) the page lock on @ia_page across the
1007  * @filldir() call otherwise we would deadlock with NFSd when it calls ->lookup
1008  * since ntfs_lookup() will lock the same page.  As an optimization, we do not
1009  * retake the lock if we are returning a non-zero value as ntfs_readdir()
1010  * would need to drop the lock immediately anyway.
1011  */
1012 static inline int ntfs_filldir(ntfs_volume *vol,
1013         ntfs_inode *ndir, struct page *ia_page, INDEX_ENTRY *ie,
1014         u8 *name, struct dir_context *actor)
1015 {
1016     unsigned long mref;
1017     int name_len;
1018     unsigned dt_type;
1019     FILE_NAME_TYPE_FLAGS name_type;
1020 
1021     name_type = ie->key.file_name.file_name_type;
1022     if (name_type == FILE_NAME_DOS) {
1023         ntfs_debug("Skipping DOS name space entry.");
1024         return 0;
1025     }
1026     if (MREF_LE(ie->data.dir.indexed_file) == FILE_root) {
1027         ntfs_debug("Skipping root directory self reference entry.");
1028         return 0;
1029     }
1030     if (MREF_LE(ie->data.dir.indexed_file) < FILE_first_user &&
1031             !NVolShowSystemFiles(vol)) {
1032         ntfs_debug("Skipping system file.");
1033         return 0;
1034     }
1035     name_len = ntfs_ucstonls(vol, (ntfschar*)&ie->key.file_name.file_name,
1036             ie->key.file_name.file_name_length, &name,
1037             NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1);
1038     if (name_len <= 0) {
1039         ntfs_warning(vol->sb, "Skipping unrepresentable inode 0x%llx.",
1040                 (long long)MREF_LE(ie->data.dir.indexed_file));
1041         return 0;
1042     }
1043     if (ie->key.file_name.file_attributes &
1044             FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT)
1045         dt_type = DT_DIR;
1046     else
1047         dt_type = DT_REG;
1048     mref = MREF_LE(ie->data.dir.indexed_file);
1049     /*
1050      * Drop the page lock otherwise we deadlock with NFS when it calls
1051      * ->lookup since ntfs_lookup() will lock the same page.
1052      */
1053     if (ia_page)
1054         unlock_page(ia_page);
1055     ntfs_debug("Calling filldir for %s with len %i, fpos 0x%llx, inode "
1056             "0x%lx, DT_%s.", name, name_len, actor->pos, mref,
1057             dt_type == DT_DIR ? "DIR" : "REG");
1058     if (!dir_emit(actor, name, name_len, mref, dt_type))
1059         return 1;
1060     /* Relock the page but not if we are aborting ->readdir. */
1061     if (ia_page)
1062         lock_page(ia_page);
1063     return 0;
1064 }
1065 
1066 /*
1067  * We use the same basic approach as the old NTFS driver, i.e. we parse the
1068  * index root entries and then the index allocation entries that are marked
1069  * as in use in the index bitmap.
1070  *
1071  * While this will return the names in random order this doesn't matter for
1072  * ->readdir but OTOH results in a faster ->readdir.
1073  *
1074  * VFS calls ->readdir without BKL but with i_mutex held. This protects the VFS
1075  * parts (e.g. ->f_pos and ->i_size, and it also protects against directory
1076  * modifications).
1077  *
1078  * Locking:  - Caller must hold i_mutex on the directory.
1079  *       - Each page cache page in the index allocation mapping must be
1080  *         locked whilst being accessed otherwise we may find a corrupt
1081  *         page due to it being under ->writepage at the moment which
1082  *         applies the mst protection fixups before writing out and then
1083  *         removes them again after the write is complete after which it 
1084  *         unlocks the page.
1085  */
1086 static int ntfs_readdir(struct file *file, struct dir_context *actor)
1087 {
1088     s64 ia_pos, ia_start, prev_ia_pos, bmp_pos;
1089     loff_t i_size;
1090     struct inode *bmp_vi, *vdir = file_inode(file);
1091     struct super_block *sb = vdir->i_sb;
1092     ntfs_inode *ndir = NTFS_I(vdir);
1093     ntfs_volume *vol = NTFS_SB(sb);
1094     MFT_RECORD *m;
1095     INDEX_ROOT *ir = NULL;
1096     INDEX_ENTRY *ie;
1097     INDEX_ALLOCATION *ia;
1098     u8 *name = NULL;
1099     int rc, err, ir_pos, cur_bmp_pos;
1100     struct address_space *ia_mapping, *bmp_mapping;
1101     struct page *bmp_page = NULL, *ia_page = NULL;
1102     u8 *kaddr, *bmp, *index_end;
1103     ntfs_attr_search_ctx *ctx;
1104 
1105     ntfs_debug("Entering for inode 0x%lx, fpos 0x%llx.",
1106             vdir->i_ino, actor->pos);
1107     rc = err = 0;
1108     /* Are we at end of dir yet? */
1109     i_size = i_size_read(vdir);
1110     if (actor->pos >= i_size + vol->mft_record_size)
1111         return 0;
1112     /* Emulate . and .. for all directories. */
1113     if (!dir_emit_dots(file, actor))
1114         return 0;
1115     m = NULL;
1116     ctx = NULL;
1117     /*
1118      * Allocate a buffer to store the current name being processed
1119      * converted to format determined by current NLS.
1120      */
1121     name = kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1, GFP_NOFS);
1122     if (unlikely(!name)) {
1123         err = -ENOMEM;
1124         goto err_out;
1125     }
1126     /* Are we jumping straight into the index allocation attribute? */
1127     if (actor->pos >= vol->mft_record_size)
1128         goto skip_index_root;
1129     /* Get hold of the mft record for the directory. */
1130     m = map_mft_record(ndir);
1131     if (IS_ERR(m)) {
1132         err = PTR_ERR(m);
1133         m = NULL;
1134         goto err_out;
1135     }
1136     ctx = ntfs_attr_get_search_ctx(ndir, m);
1137     if (unlikely(!ctx)) {
1138         err = -ENOMEM;
1139         goto err_out;
1140     }
1141     /* Get the offset into the index root attribute. */
1142     ir_pos = (s64)actor->pos;
1143     /* Find the index root attribute in the mft record. */
1144     err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL,
1145             0, ctx);
1146     if (unlikely(err)) {
1147         ntfs_error(sb, "Index root attribute missing in directory "
1148                 "inode 0x%lx.", vdir->i_ino);
1149         goto err_out;
1150     }
1151     /*
1152      * Copy the index root attribute value to a buffer so that we can put
1153      * the search context and unmap the mft record before calling the
1154      * filldir() callback.  We need to do this because of NFSd which calls
1155      * ->lookup() from its filldir callback() and this causes NTFS to
1156      * deadlock as ntfs_lookup() maps the mft record of the directory and
1157      * we have got it mapped here already.  The only solution is for us to
1158      * unmap the mft record here so that a call to ntfs_lookup() is able to
1159      * map the mft record without deadlocking.
1160      */
1161     rc = le32_to_cpu(ctx->attr->data.resident.value_length);
1162     ir = kmalloc(rc, GFP_NOFS);
1163     if (unlikely(!ir)) {
1164         err = -ENOMEM;
1165         goto err_out;
1166     }
1167     /* Copy the index root value (it has been verified in read_inode). */
1168     memcpy(ir, (u8*)ctx->attr +
1169             le16_to_cpu(ctx->attr->data.resident.value_offset), rc);
1170     ntfs_attr_put_search_ctx(ctx);
1171     unmap_mft_record(ndir);
1172     ctx = NULL;
1173     m = NULL;
1174     index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
1175     /* The first index entry. */
1176     ie = (INDEX_ENTRY*)((u8*)&ir->index +
1177             le32_to_cpu(ir->index.entries_offset));
1178     /*
1179      * Loop until we exceed valid memory (corruption case) or until we
1180      * reach the last entry or until filldir tells us it has had enough
1181      * or signals an error (both covered by the rc test).
1182      */
1183     for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
1184         ntfs_debug("In index root, offset 0x%zx.", (u8*)ie - (u8*)ir);
1185         /* Bounds checks. */
1186         if (unlikely((u8*)ie < (u8*)ir || (u8*)ie +
1187                 sizeof(INDEX_ENTRY_HEADER) > index_end ||
1188                 (u8*)ie + le16_to_cpu(ie->key_length) >
1189                 index_end))
1190             goto err_out;
1191         /* The last entry cannot contain a name. */
1192         if (ie->flags & INDEX_ENTRY_END)
1193             break;
1194         /* Skip index root entry if continuing previous readdir. */
1195         if (ir_pos > (u8*)ie - (u8*)ir)
1196             continue;
1197         /* Advance the position even if going to skip the entry. */
1198         actor->pos = (u8*)ie - (u8*)ir;
1199         /* Submit the name to the filldir callback. */
1200         rc = ntfs_filldir(vol, ndir, NULL, ie, name, actor);
1201         if (rc) {
1202             kfree(ir);
1203             goto abort;
1204         }
1205     }
1206     /* We are done with the index root and can free the buffer. */
1207     kfree(ir);
1208     ir = NULL;
1209     /* If there is no index allocation attribute we are finished. */
1210     if (!NInoIndexAllocPresent(ndir))
1211         goto EOD;
1212     /* Advance fpos to the beginning of the index allocation. */
1213     actor->pos = vol->mft_record_size;
1214 skip_index_root:
1215     kaddr = NULL;
1216     prev_ia_pos = -1LL;
1217     /* Get the offset into the index allocation attribute. */
1218     ia_pos = (s64)actor->pos - vol->mft_record_size;
1219     ia_mapping = vdir->i_mapping;
1220     ntfs_debug("Inode 0x%lx, getting index bitmap.", vdir->i_ino);
1221     bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4);
1222     if (IS_ERR(bmp_vi)) {
1223         ntfs_error(sb, "Failed to get bitmap attribute.");
1224         err = PTR_ERR(bmp_vi);
1225         goto err_out;
1226     }
1227     bmp_mapping = bmp_vi->i_mapping;
1228     /* Get the starting bitmap bit position and sanity check it. */
1229     bmp_pos = ia_pos >> ndir->itype.index.block_size_bits;
1230     if (unlikely(bmp_pos >> 3 >= i_size_read(bmp_vi))) {
1231         ntfs_error(sb, "Current index allocation position exceeds "
1232                 "index bitmap size.");
1233         goto iput_err_out;
1234     }
1235     /* Get the starting bit position in the current bitmap page. */
1236     cur_bmp_pos = bmp_pos & ((PAGE_SIZE * 8) - 1);
1237     bmp_pos &= ~(u64)((PAGE_SIZE * 8) - 1);
1238 get_next_bmp_page:
1239     ntfs_debug("Reading bitmap with page index 0x%llx, bit ofs 0x%llx",
1240             (unsigned long long)bmp_pos >> (3 + PAGE_SHIFT),
1241             (unsigned long long)bmp_pos &
1242             (unsigned long long)((PAGE_SIZE * 8) - 1));
1243     bmp_page = ntfs_map_page(bmp_mapping,
1244             bmp_pos >> (3 + PAGE_SHIFT));
1245     if (IS_ERR(bmp_page)) {
1246         ntfs_error(sb, "Reading index bitmap failed.");
1247         err = PTR_ERR(bmp_page);
1248         bmp_page = NULL;
1249         goto iput_err_out;
1250     }
1251     bmp = (u8*)page_address(bmp_page);
1252     /* Find next index block in use. */
1253     while (!(bmp[cur_bmp_pos >> 3] & (1 << (cur_bmp_pos & 7)))) {
1254 find_next_index_buffer:
1255         cur_bmp_pos++;
1256         /*
1257          * If we have reached the end of the bitmap page, get the next
1258          * page, and put away the old one.
1259          */
1260         if (unlikely((cur_bmp_pos >> 3) >= PAGE_SIZE)) {
1261             ntfs_unmap_page(bmp_page);
1262             bmp_pos += PAGE_SIZE * 8;
1263             cur_bmp_pos = 0;
1264             goto get_next_bmp_page;
1265         }
1266         /* If we have reached the end of the bitmap, we are done. */
1267         if (unlikely(((bmp_pos + cur_bmp_pos) >> 3) >= i_size))
1268             goto unm_EOD;
1269         ia_pos = (bmp_pos + cur_bmp_pos) <<
1270                 ndir->itype.index.block_size_bits;
1271     }
1272     ntfs_debug("Handling index buffer 0x%llx.",
1273             (unsigned long long)bmp_pos + cur_bmp_pos);
1274     /* If the current index buffer is in the same page we reuse the page. */
1275     if ((prev_ia_pos & (s64)PAGE_MASK) !=
1276             (ia_pos & (s64)PAGE_MASK)) {
1277         prev_ia_pos = ia_pos;
1278         if (likely(ia_page != NULL)) {
1279             unlock_page(ia_page);
1280             ntfs_unmap_page(ia_page);
1281         }
1282         /*
1283          * Map the page cache page containing the current ia_pos,
1284          * reading it from disk if necessary.
1285          */
1286         ia_page = ntfs_map_page(ia_mapping, ia_pos >> PAGE_SHIFT);
1287         if (IS_ERR(ia_page)) {
1288             ntfs_error(sb, "Reading index allocation data failed.");
1289             err = PTR_ERR(ia_page);
1290             ia_page = NULL;
1291             goto err_out;
1292         }
1293         lock_page(ia_page);
1294         kaddr = (u8*)page_address(ia_page);
1295     }
1296     /* Get the current index buffer. */
1297     ia = (INDEX_ALLOCATION*)(kaddr + (ia_pos & ~PAGE_MASK &
1298                       ~(s64)(ndir->itype.index.block_size - 1)));
1299     /* Bounds checks. */
1300     if (unlikely((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE)) {
1301         ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
1302                 "inode 0x%lx or driver bug.", vdir->i_ino);
1303         goto err_out;
1304     }
1305     /* Catch multi sector transfer fixup errors. */
1306     if (unlikely(!ntfs_is_indx_record(ia->magic))) {
1307         ntfs_error(sb, "Directory index record with vcn 0x%llx is "
1308                 "corrupt.  Corrupt inode 0x%lx.  Run chkdsk.",
1309                 (unsigned long long)ia_pos >>
1310                 ndir->itype.index.vcn_size_bits, vdir->i_ino);
1311         goto err_out;
1312     }
1313     if (unlikely(sle64_to_cpu(ia->index_block_vcn) != (ia_pos &
1314             ~(s64)(ndir->itype.index.block_size - 1)) >>
1315             ndir->itype.index.vcn_size_bits)) {
1316         ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is "
1317                 "different from expected VCN (0x%llx). "
1318                 "Directory inode 0x%lx is corrupt or driver "
1319                 "bug. ", (unsigned long long)
1320                 sle64_to_cpu(ia->index_block_vcn),
1321                 (unsigned long long)ia_pos >>
1322                 ndir->itype.index.vcn_size_bits, vdir->i_ino);
1323         goto err_out;
1324     }
1325     if (unlikely(le32_to_cpu(ia->index.allocated_size) + 0x18 !=
1326             ndir->itype.index.block_size)) {
1327         ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
1328                 "0x%lx has a size (%u) differing from the "
1329                 "directory specified size (%u). Directory "
1330                 "inode is corrupt or driver bug.",
1331                 (unsigned long long)ia_pos >>
1332                 ndir->itype.index.vcn_size_bits, vdir->i_ino,
1333                 le32_to_cpu(ia->index.allocated_size) + 0x18,
1334                 ndir->itype.index.block_size);
1335         goto err_out;
1336     }
1337     index_end = (u8*)ia + ndir->itype.index.block_size;
1338     if (unlikely(index_end > kaddr + PAGE_SIZE)) {
1339         ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
1340                 "0x%lx crosses page boundary. Impossible! "
1341                 "Cannot access! This is probably a bug in the "
1342                 "driver.", (unsigned long long)ia_pos >>
1343                 ndir->itype.index.vcn_size_bits, vdir->i_ino);
1344         goto err_out;
1345     }
1346     ia_start = ia_pos & ~(s64)(ndir->itype.index.block_size - 1);
1347     index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
1348     if (unlikely(index_end > (u8*)ia + ndir->itype.index.block_size)) {
1349         ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of directory "
1350                 "inode 0x%lx exceeds maximum size.",
1351                 (unsigned long long)ia_pos >>
1352                 ndir->itype.index.vcn_size_bits, vdir->i_ino);
1353         goto err_out;
1354     }
1355     /* The first index entry in this index buffer. */
1356     ie = (INDEX_ENTRY*)((u8*)&ia->index +
1357             le32_to_cpu(ia->index.entries_offset));
1358     /*
1359      * Loop until we exceed valid memory (corruption case) or until we
1360      * reach the last entry or until filldir tells us it has had enough
1361      * or signals an error (both covered by the rc test).
1362      */
1363     for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
1364         ntfs_debug("In index allocation, offset 0x%llx.",
1365                 (unsigned long long)ia_start +
1366                 (unsigned long long)((u8*)ie - (u8*)ia));
1367         /* Bounds checks. */
1368         if (unlikely((u8*)ie < (u8*)ia || (u8*)ie +
1369                 sizeof(INDEX_ENTRY_HEADER) > index_end ||
1370                 (u8*)ie + le16_to_cpu(ie->key_length) >
1371                 index_end))
1372             goto err_out;
1373         /* The last entry cannot contain a name. */
1374         if (ie->flags & INDEX_ENTRY_END)
1375             break;
1376         /* Skip index block entry if continuing previous readdir. */
1377         if (ia_pos - ia_start > (u8*)ie - (u8*)ia)
1378             continue;
1379         /* Advance the position even if going to skip the entry. */
1380         actor->pos = (u8*)ie - (u8*)ia +
1381                 (sle64_to_cpu(ia->index_block_vcn) <<
1382                 ndir->itype.index.vcn_size_bits) +
1383                 vol->mft_record_size;
1384         /*
1385          * Submit the name to the @filldir callback.  Note,
1386          * ntfs_filldir() drops the lock on @ia_page but it retakes it
1387          * before returning, unless a non-zero value is returned in
1388          * which case the page is left unlocked.
1389          */
1390         rc = ntfs_filldir(vol, ndir, ia_page, ie, name, actor);
1391         if (rc) {
1392             /* @ia_page is already unlocked in this case. */
1393             ntfs_unmap_page(ia_page);
1394             ntfs_unmap_page(bmp_page);
1395             iput(bmp_vi);
1396             goto abort;
1397         }
1398     }
1399     goto find_next_index_buffer;
1400 unm_EOD:
1401     if (ia_page) {
1402         unlock_page(ia_page);
1403         ntfs_unmap_page(ia_page);
1404     }
1405     ntfs_unmap_page(bmp_page);
1406     iput(bmp_vi);
1407 EOD:
1408     /* We are finished, set fpos to EOD. */
1409     actor->pos = i_size + vol->mft_record_size;
1410 abort:
1411     kfree(name);
1412     return 0;
1413 err_out:
1414     if (bmp_page) {
1415         ntfs_unmap_page(bmp_page);
1416 iput_err_out:
1417         iput(bmp_vi);
1418     }
1419     if (ia_page) {
1420         unlock_page(ia_page);
1421         ntfs_unmap_page(ia_page);
1422     }
1423     kfree(ir);
1424     kfree(name);
1425     if (ctx)
1426         ntfs_attr_put_search_ctx(ctx);
1427     if (m)
1428         unmap_mft_record(ndir);
1429     if (!err)
1430         err = -EIO;
1431     ntfs_debug("Failed. Returning error code %i.", -err);
1432     return err;
1433 }
1434 
1435 /**
1436  * ntfs_dir_open - called when an inode is about to be opened
1437  * @vi:     inode to be opened
1438  * @filp:   file structure describing the inode
1439  *
1440  * Limit directory size to the page cache limit on architectures where unsigned
1441  * long is 32-bits. This is the most we can do for now without overflowing the
1442  * page cache page index. Doing it this way means we don't run into problems
1443  * because of existing too large directories. It would be better to allow the
1444  * user to read the accessible part of the directory but I doubt very much
1445  * anyone is going to hit this check on a 32-bit architecture, so there is no
1446  * point in adding the extra complexity required to support this.
1447  *
1448  * On 64-bit architectures, the check is hopefully optimized away by the
1449  * compiler.
1450  */
1451 static int ntfs_dir_open(struct inode *vi, struct file *filp)
1452 {
1453     if (sizeof(unsigned long) < 8) {
1454         if (i_size_read(vi) > MAX_LFS_FILESIZE)
1455             return -EFBIG;
1456     }
1457     return 0;
1458 }
1459 
1460 #ifdef NTFS_RW
1461 
1462 /**
1463  * ntfs_dir_fsync - sync a directory to disk
1464  * @filp:   directory to be synced
1465  * @dentry: dentry describing the directory to sync
1466  * @datasync:   if non-zero only flush user data and not metadata
1467  *
1468  * Data integrity sync of a directory to disk.  Used for fsync, fdatasync, and
1469  * msync system calls.  This function is based on file.c::ntfs_file_fsync().
1470  *
1471  * Write the mft record and all associated extent mft records as well as the
1472  * $INDEX_ALLOCATION and $BITMAP attributes and then sync the block device.
1473  *
1474  * If @datasync is true, we do not wait on the inode(s) to be written out
1475  * but we always wait on the page cache pages to be written out.
1476  *
1477  * Note: In the past @filp could be NULL so we ignore it as we don't need it
1478  * anyway.
1479  *
1480  * Locking: Caller must hold i_mutex on the inode.
1481  *
1482  * TODO: We should probably also write all attribute/index inodes associated
1483  * with this inode but since we have no simple way of getting to them we ignore
1484  * this problem for now.  We do write the $BITMAP attribute if it is present
1485  * which is the important one for a directory so things are not too bad.
1486  */
1487 static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
1488               int datasync)
1489 {
1490     struct inode *bmp_vi, *vi = filp->f_mapping->host;
1491     int err, ret;
1492     ntfs_attr na;
1493 
1494     ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
1495 
1496     err = file_write_and_wait_range(filp, start, end);
1497     if (err)
1498         return err;
1499     inode_lock(vi);
1500 
1501     BUG_ON(!S_ISDIR(vi->i_mode));
1502     /* If the bitmap attribute inode is in memory sync it, too. */
1503     na.mft_no = vi->i_ino;
1504     na.type = AT_BITMAP;
1505     na.name = I30;
1506     na.name_len = 4;
1507     bmp_vi = ilookup5(vi->i_sb, vi->i_ino, ntfs_test_inode, &na);
1508     if (bmp_vi) {
1509         write_inode_now(bmp_vi, !datasync);
1510         iput(bmp_vi);
1511     }
1512     ret = __ntfs_write_inode(vi, 1);
1513     write_inode_now(vi, !datasync);
1514     err = sync_blockdev(vi->i_sb->s_bdev);
1515     if (unlikely(err && !ret))
1516         ret = err;
1517     if (likely(!ret))
1518         ntfs_debug("Done.");
1519     else
1520         ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx.  Error "
1521                 "%u.", datasync ? "data" : "", vi->i_ino, -ret);
1522     inode_unlock(vi);
1523     return ret;
1524 }
1525 
1526 #endif /* NTFS_RW */
1527 
1528 const struct file_operations ntfs_dir_ops = {
1529     .llseek     = generic_file_llseek,  /* Seek inside directory. */
1530     .read       = generic_read_dir, /* Return -EISDIR. */
1531     .iterate    = ntfs_readdir,     /* Read directory contents. */
1532 #ifdef NTFS_RW
1533     .fsync      = ntfs_dir_fsync,   /* Sync a directory to disk. */
1534 #endif /* NTFS_RW */
1535     /*.ioctl    = ,*/           /* Perform function on the
1536                            mounted filesystem. */
1537     .open       = ntfs_dir_open,    /* Open directory. */
1538 };