Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  *  linux/fs/ext4/namei.c
0004  *
0005  * Copyright (C) 1992, 1993, 1994, 1995
0006  * Remy Card (card@masi.ibp.fr)
0007  * Laboratoire MASI - Institut Blaise Pascal
0008  * Universite Pierre et Marie Curie (Paris VI)
0009  *
0010  *  from
0011  *
0012  *  linux/fs/minix/namei.c
0013  *
0014  *  Copyright (C) 1991, 1992  Linus Torvalds
0015  *
0016  *  Big-endian to little-endian byte-swapping/bitmaps by
0017  *        David S. Miller (davem@caip.rutgers.edu), 1995
0018  *  Directory entry file type support and forward compatibility hooks
0019  *  for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
0020  *  Hash Tree Directory indexing (c)
0021  *  Daniel Phillips, 2001
0022  *  Hash Tree Directory indexing porting
0023  *  Christopher Li, 2002
0024  *  Hash Tree Directory indexing cleanup
0025  *  Theodore Ts'o, 2002
0026  */
0027 
0028 #include <linux/fs.h>
0029 #include <linux/pagemap.h>
0030 #include <linux/time.h>
0031 #include <linux/fcntl.h>
0032 #include <linux/stat.h>
0033 #include <linux/string.h>
0034 #include <linux/quotaops.h>
0035 #include <linux/buffer_head.h>
0036 #include <linux/bio.h>
0037 #include <linux/iversion.h>
0038 #include <linux/unicode.h>
0039 #include "ext4.h"
0040 #include "ext4_jbd2.h"
0041 
0042 #include "xattr.h"
0043 #include "acl.h"
0044 
0045 #include <trace/events/ext4.h>
0046 /*
0047  * define how far ahead to read directories while searching them.
0048  */
0049 #define NAMEI_RA_CHUNKS  2
0050 #define NAMEI_RA_BLOCKS  4
0051 #define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
0052 
0053 static struct buffer_head *ext4_append(handle_t *handle,
0054                     struct inode *inode,
0055                     ext4_lblk_t *block)
0056 {
0057     struct ext4_map_blocks map;
0058     struct buffer_head *bh;
0059     int err;
0060 
0061     if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
0062              ((inode->i_size >> 10) >=
0063               EXT4_SB(inode->i_sb)->s_max_dir_size_kb)))
0064         return ERR_PTR(-ENOSPC);
0065 
0066     *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
0067     map.m_lblk = *block;
0068     map.m_len = 1;
0069 
0070     /*
0071      * We're appending new directory block. Make sure the block is not
0072      * allocated yet, otherwise we will end up corrupting the
0073      * directory.
0074      */
0075     err = ext4_map_blocks(NULL, inode, &map, 0);
0076     if (err < 0)
0077         return ERR_PTR(err);
0078     if (err) {
0079         EXT4_ERROR_INODE(inode, "Logical block already allocated");
0080         return ERR_PTR(-EFSCORRUPTED);
0081     }
0082 
0083     bh = ext4_bread(handle, inode, *block, EXT4_GET_BLOCKS_CREATE);
0084     if (IS_ERR(bh))
0085         return bh;
0086     inode->i_size += inode->i_sb->s_blocksize;
0087     EXT4_I(inode)->i_disksize = inode->i_size;
0088     BUFFER_TRACE(bh, "get_write_access");
0089     err = ext4_journal_get_write_access(handle, inode->i_sb, bh,
0090                         EXT4_JTR_NONE);
0091     if (err) {
0092         brelse(bh);
0093         ext4_std_error(inode->i_sb, err);
0094         return ERR_PTR(err);
0095     }
0096     return bh;
0097 }
0098 
0099 static int ext4_dx_csum_verify(struct inode *inode,
0100                    struct ext4_dir_entry *dirent);
0101 
0102 /*
0103  * Hints to ext4_read_dirblock regarding whether we expect a directory
0104  * block being read to be an index block, or a block containing
0105  * directory entries (and if the latter, whether it was found via a
0106  * logical block in an htree index block).  This is used to control
0107  * what sort of sanity checkinig ext4_read_dirblock() will do on the
0108  * directory block read from the storage device.  EITHER will means
0109  * the caller doesn't know what kind of directory block will be read,
0110  * so no specific verification will be done.
0111  */
0112 typedef enum {
0113     EITHER, INDEX, DIRENT, DIRENT_HTREE
0114 } dirblock_type_t;
0115 
0116 #define ext4_read_dirblock(inode, block, type) \
0117     __ext4_read_dirblock((inode), (block), (type), __func__, __LINE__)
0118 
0119 static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
0120                         ext4_lblk_t block,
0121                         dirblock_type_t type,
0122                         const char *func,
0123                         unsigned int line)
0124 {
0125     struct buffer_head *bh;
0126     struct ext4_dir_entry *dirent;
0127     int is_dx_block = 0;
0128 
0129     if (block >= inode->i_size) {
0130         ext4_error_inode(inode, func, line, block,
0131                "Attempting to read directory block (%u) that is past i_size (%llu)",
0132                block, inode->i_size);
0133         return ERR_PTR(-EFSCORRUPTED);
0134     }
0135 
0136     if (ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_EIO))
0137         bh = ERR_PTR(-EIO);
0138     else
0139         bh = ext4_bread(NULL, inode, block, 0);
0140     if (IS_ERR(bh)) {
0141         __ext4_warning(inode->i_sb, func, line,
0142                    "inode #%lu: lblock %lu: comm %s: "
0143                    "error %ld reading directory block",
0144                    inode->i_ino, (unsigned long)block,
0145                    current->comm, PTR_ERR(bh));
0146 
0147         return bh;
0148     }
0149     if (!bh && (type == INDEX || type == DIRENT_HTREE)) {
0150         ext4_error_inode(inode, func, line, block,
0151                  "Directory hole found for htree %s block",
0152                  (type == INDEX) ? "index" : "leaf");
0153         return ERR_PTR(-EFSCORRUPTED);
0154     }
0155     if (!bh)
0156         return NULL;
0157     dirent = (struct ext4_dir_entry *) bh->b_data;
0158     /* Determine whether or not we have an index block */
0159     if (is_dx(inode)) {
0160         if (block == 0)
0161             is_dx_block = 1;
0162         else if (ext4_rec_len_from_disk(dirent->rec_len,
0163                         inode->i_sb->s_blocksize) ==
0164              inode->i_sb->s_blocksize)
0165             is_dx_block = 1;
0166     }
0167     if (!is_dx_block && type == INDEX) {
0168         ext4_error_inode(inode, func, line, block,
0169                "directory leaf block found instead of index block");
0170         brelse(bh);
0171         return ERR_PTR(-EFSCORRUPTED);
0172     }
0173     if (!ext4_has_metadata_csum(inode->i_sb) ||
0174         buffer_verified(bh))
0175         return bh;
0176 
0177     /*
0178      * An empty leaf block can get mistaken for a index block; for
0179      * this reason, we can only check the index checksum when the
0180      * caller is sure it should be an index block.
0181      */
0182     if (is_dx_block && type == INDEX) {
0183         if (ext4_dx_csum_verify(inode, dirent) &&
0184             !ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_CRC))
0185             set_buffer_verified(bh);
0186         else {
0187             ext4_error_inode_err(inode, func, line, block,
0188                          EFSBADCRC,
0189                          "Directory index failed checksum");
0190             brelse(bh);
0191             return ERR_PTR(-EFSBADCRC);
0192         }
0193     }
0194     if (!is_dx_block) {
0195         if (ext4_dirblock_csum_verify(inode, bh) &&
0196             !ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_CRC))
0197             set_buffer_verified(bh);
0198         else {
0199             ext4_error_inode_err(inode, func, line, block,
0200                          EFSBADCRC,
0201                          "Directory block failed checksum");
0202             brelse(bh);
0203             return ERR_PTR(-EFSBADCRC);
0204         }
0205     }
0206     return bh;
0207 }
0208 
0209 #ifdef DX_DEBUG
0210 #define dxtrace(command) command
0211 #else
0212 #define dxtrace(command)
0213 #endif
0214 
0215 struct fake_dirent
0216 {
0217     __le32 inode;
0218     __le16 rec_len;
0219     u8 name_len;
0220     u8 file_type;
0221 };
0222 
0223 struct dx_countlimit
0224 {
0225     __le16 limit;
0226     __le16 count;
0227 };
0228 
0229 struct dx_entry
0230 {
0231     __le32 hash;
0232     __le32 block;
0233 };
0234 
0235 /*
0236  * dx_root_info is laid out so that if it should somehow get overlaid by a
0237  * dirent the two low bits of the hash version will be zero.  Therefore, the
0238  * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
0239  */
0240 
0241 struct dx_root
0242 {
0243     struct fake_dirent dot;
0244     char dot_name[4];
0245     struct fake_dirent dotdot;
0246     char dotdot_name[4];
0247     struct dx_root_info
0248     {
0249         __le32 reserved_zero;
0250         u8 hash_version;
0251         u8 info_length; /* 8 */
0252         u8 indirect_levels;
0253         u8 unused_flags;
0254     }
0255     info;
0256     struct dx_entry entries[];
0257 };
0258 
0259 struct dx_node
0260 {
0261     struct fake_dirent fake;
0262     struct dx_entry entries[];
0263 };
0264 
0265 
0266 struct dx_frame
0267 {
0268     struct buffer_head *bh;
0269     struct dx_entry *entries;
0270     struct dx_entry *at;
0271 };
0272 
0273 struct dx_map_entry
0274 {
0275     u32 hash;
0276     u16 offs;
0277     u16 size;
0278 };
0279 
0280 /*
0281  * This goes at the end of each htree block.
0282  */
0283 struct dx_tail {
0284     u32 dt_reserved;
0285     __le32 dt_checksum; /* crc32c(uuid+inum+dirblock) */
0286 };
0287 
0288 static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
0289 static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
0290 static inline unsigned dx_get_hash(struct dx_entry *entry);
0291 static void dx_set_hash(struct dx_entry *entry, unsigned value);
0292 static unsigned dx_get_count(struct dx_entry *entries);
0293 static unsigned dx_get_limit(struct dx_entry *entries);
0294 static void dx_set_count(struct dx_entry *entries, unsigned value);
0295 static void dx_set_limit(struct dx_entry *entries, unsigned value);
0296 static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
0297 static unsigned dx_node_limit(struct inode *dir);
0298 static struct dx_frame *dx_probe(struct ext4_filename *fname,
0299                  struct inode *dir,
0300                  struct dx_hash_info *hinfo,
0301                  struct dx_frame *frame);
0302 static void dx_release(struct dx_frame *frames);
0303 static int dx_make_map(struct inode *dir, struct buffer_head *bh,
0304                struct dx_hash_info *hinfo,
0305                struct dx_map_entry *map_tail);
0306 static void dx_sort_map(struct dx_map_entry *map, unsigned count);
0307 static struct ext4_dir_entry_2 *dx_move_dirents(struct inode *dir, char *from,
0308                     char *to, struct dx_map_entry *offsets,
0309                     int count, unsigned int blocksize);
0310 static struct ext4_dir_entry_2 *dx_pack_dirents(struct inode *dir, char *base,
0311                         unsigned int blocksize);
0312 static void dx_insert_block(struct dx_frame *frame,
0313                     u32 hash, ext4_lblk_t block);
0314 static int ext4_htree_next_block(struct inode *dir, __u32 hash,
0315                  struct dx_frame *frame,
0316                  struct dx_frame *frames,
0317                  __u32 *start_hash);
0318 static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
0319         struct ext4_filename *fname,
0320         struct ext4_dir_entry_2 **res_dir);
0321 static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
0322                  struct inode *dir, struct inode *inode);
0323 
0324 /* checksumming functions */
0325 void ext4_initialize_dirent_tail(struct buffer_head *bh,
0326                  unsigned int blocksize)
0327 {
0328     struct ext4_dir_entry_tail *t = EXT4_DIRENT_TAIL(bh->b_data, blocksize);
0329 
0330     memset(t, 0, sizeof(struct ext4_dir_entry_tail));
0331     t->det_rec_len = ext4_rec_len_to_disk(
0332             sizeof(struct ext4_dir_entry_tail), blocksize);
0333     t->det_reserved_ft = EXT4_FT_DIR_CSUM;
0334 }
0335 
0336 /* Walk through a dirent block to find a checksum "dirent" at the tail */
0337 static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
0338                            struct buffer_head *bh)
0339 {
0340     struct ext4_dir_entry_tail *t;
0341 
0342 #ifdef PARANOID
0343     struct ext4_dir_entry *d, *top;
0344 
0345     d = (struct ext4_dir_entry *)bh->b_data;
0346     top = (struct ext4_dir_entry *)(bh->b_data +
0347         (EXT4_BLOCK_SIZE(inode->i_sb) -
0348          sizeof(struct ext4_dir_entry_tail)));
0349     while (d < top && d->rec_len)
0350         d = (struct ext4_dir_entry *)(((void *)d) +
0351             le16_to_cpu(d->rec_len));
0352 
0353     if (d != top)
0354         return NULL;
0355 
0356     t = (struct ext4_dir_entry_tail *)d;
0357 #else
0358     t = EXT4_DIRENT_TAIL(bh->b_data, EXT4_BLOCK_SIZE(inode->i_sb));
0359 #endif
0360 
0361     if (t->det_reserved_zero1 ||
0362         le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) ||
0363         t->det_reserved_zero2 ||
0364         t->det_reserved_ft != EXT4_FT_DIR_CSUM)
0365         return NULL;
0366 
0367     return t;
0368 }
0369 
0370 static __le32 ext4_dirblock_csum(struct inode *inode, void *dirent, int size)
0371 {
0372     struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
0373     struct ext4_inode_info *ei = EXT4_I(inode);
0374     __u32 csum;
0375 
0376     csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
0377     return cpu_to_le32(csum);
0378 }
0379 
0380 #define warn_no_space_for_csum(inode)                   \
0381     __warn_no_space_for_csum((inode), __func__, __LINE__)
0382 
0383 static void __warn_no_space_for_csum(struct inode *inode, const char *func,
0384                      unsigned int line)
0385 {
0386     __ext4_warning_inode(inode, func, line,
0387         "No space for directory leaf checksum. Please run e2fsck -D.");
0388 }
0389 
0390 int ext4_dirblock_csum_verify(struct inode *inode, struct buffer_head *bh)
0391 {
0392     struct ext4_dir_entry_tail *t;
0393 
0394     if (!ext4_has_metadata_csum(inode->i_sb))
0395         return 1;
0396 
0397     t = get_dirent_tail(inode, bh);
0398     if (!t) {
0399         warn_no_space_for_csum(inode);
0400         return 0;
0401     }
0402 
0403     if (t->det_checksum != ext4_dirblock_csum(inode, bh->b_data,
0404                           (char *)t - bh->b_data))
0405         return 0;
0406 
0407     return 1;
0408 }
0409 
0410 static void ext4_dirblock_csum_set(struct inode *inode,
0411                  struct buffer_head *bh)
0412 {
0413     struct ext4_dir_entry_tail *t;
0414 
0415     if (!ext4_has_metadata_csum(inode->i_sb))
0416         return;
0417 
0418     t = get_dirent_tail(inode, bh);
0419     if (!t) {
0420         warn_no_space_for_csum(inode);
0421         return;
0422     }
0423 
0424     t->det_checksum = ext4_dirblock_csum(inode, bh->b_data,
0425                          (char *)t - bh->b_data);
0426 }
0427 
0428 int ext4_handle_dirty_dirblock(handle_t *handle,
0429                    struct inode *inode,
0430                    struct buffer_head *bh)
0431 {
0432     ext4_dirblock_csum_set(inode, bh);
0433     return ext4_handle_dirty_metadata(handle, inode, bh);
0434 }
0435 
0436 static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
0437                            struct ext4_dir_entry *dirent,
0438                            int *offset)
0439 {
0440     struct ext4_dir_entry *dp;
0441     struct dx_root_info *root;
0442     int count_offset;
0443 
0444     if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
0445         count_offset = 8;
0446     else if (le16_to_cpu(dirent->rec_len) == 12) {
0447         dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
0448         if (le16_to_cpu(dp->rec_len) !=
0449             EXT4_BLOCK_SIZE(inode->i_sb) - 12)
0450             return NULL;
0451         root = (struct dx_root_info *)(((void *)dp + 12));
0452         if (root->reserved_zero ||
0453             root->info_length != sizeof(struct dx_root_info))
0454             return NULL;
0455         count_offset = 32;
0456     } else
0457         return NULL;
0458 
0459     if (offset)
0460         *offset = count_offset;
0461     return (struct dx_countlimit *)(((void *)dirent) + count_offset);
0462 }
0463 
0464 static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
0465                int count_offset, int count, struct dx_tail *t)
0466 {
0467     struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
0468     struct ext4_inode_info *ei = EXT4_I(inode);
0469     __u32 csum;
0470     int size;
0471     __u32 dummy_csum = 0;
0472     int offset = offsetof(struct dx_tail, dt_checksum);
0473 
0474     size = count_offset + (count * sizeof(struct dx_entry));
0475     csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
0476     csum = ext4_chksum(sbi, csum, (__u8 *)t, offset);
0477     csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, sizeof(dummy_csum));
0478 
0479     return cpu_to_le32(csum);
0480 }
0481 
0482 static int ext4_dx_csum_verify(struct inode *inode,
0483                    struct ext4_dir_entry *dirent)
0484 {
0485     struct dx_countlimit *c;
0486     struct dx_tail *t;
0487     int count_offset, limit, count;
0488 
0489     if (!ext4_has_metadata_csum(inode->i_sb))
0490         return 1;
0491 
0492     c = get_dx_countlimit(inode, dirent, &count_offset);
0493     if (!c) {
0494         EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
0495         return 0;
0496     }
0497     limit = le16_to_cpu(c->limit);
0498     count = le16_to_cpu(c->count);
0499     if (count_offset + (limit * sizeof(struct dx_entry)) >
0500         EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
0501         warn_no_space_for_csum(inode);
0502         return 0;
0503     }
0504     t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
0505 
0506     if (t->dt_checksum != ext4_dx_csum(inode, dirent, count_offset,
0507                         count, t))
0508         return 0;
0509     return 1;
0510 }
0511 
0512 static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
0513 {
0514     struct dx_countlimit *c;
0515     struct dx_tail *t;
0516     int count_offset, limit, count;
0517 
0518     if (!ext4_has_metadata_csum(inode->i_sb))
0519         return;
0520 
0521     c = get_dx_countlimit(inode, dirent, &count_offset);
0522     if (!c) {
0523         EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
0524         return;
0525     }
0526     limit = le16_to_cpu(c->limit);
0527     count = le16_to_cpu(c->count);
0528     if (count_offset + (limit * sizeof(struct dx_entry)) >
0529         EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
0530         warn_no_space_for_csum(inode);
0531         return;
0532     }
0533     t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
0534 
0535     t->dt_checksum = ext4_dx_csum(inode, dirent, count_offset, count, t);
0536 }
0537 
0538 static inline int ext4_handle_dirty_dx_node(handle_t *handle,
0539                         struct inode *inode,
0540                         struct buffer_head *bh)
0541 {
0542     ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
0543     return ext4_handle_dirty_metadata(handle, inode, bh);
0544 }
0545 
0546 /*
0547  * p is at least 6 bytes before the end of page
0548  */
0549 static inline struct ext4_dir_entry_2 *
0550 ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
0551 {
0552     return (struct ext4_dir_entry_2 *)((char *)p +
0553         ext4_rec_len_from_disk(p->rec_len, blocksize));
0554 }
0555 
0556 /*
0557  * Future: use high four bits of block for coalesce-on-delete flags
0558  * Mask them off for now.
0559  */
0560 
0561 static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
0562 {
0563     return le32_to_cpu(entry->block) & 0x0fffffff;
0564 }
0565 
0566 static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
0567 {
0568     entry->block = cpu_to_le32(value);
0569 }
0570 
0571 static inline unsigned dx_get_hash(struct dx_entry *entry)
0572 {
0573     return le32_to_cpu(entry->hash);
0574 }
0575 
0576 static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
0577 {
0578     entry->hash = cpu_to_le32(value);
0579 }
0580 
0581 static inline unsigned dx_get_count(struct dx_entry *entries)
0582 {
0583     return le16_to_cpu(((struct dx_countlimit *) entries)->count);
0584 }
0585 
0586 static inline unsigned dx_get_limit(struct dx_entry *entries)
0587 {
0588     return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
0589 }
0590 
0591 static inline void dx_set_count(struct dx_entry *entries, unsigned value)
0592 {
0593     ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
0594 }
0595 
0596 static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
0597 {
0598     ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
0599 }
0600 
0601 static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
0602 {
0603     unsigned int entry_space = dir->i_sb->s_blocksize -
0604             ext4_dir_rec_len(1, NULL) -
0605             ext4_dir_rec_len(2, NULL) - infosize;
0606 
0607     if (ext4_has_metadata_csum(dir->i_sb))
0608         entry_space -= sizeof(struct dx_tail);
0609     return entry_space / sizeof(struct dx_entry);
0610 }
0611 
0612 static inline unsigned dx_node_limit(struct inode *dir)
0613 {
0614     unsigned int entry_space = dir->i_sb->s_blocksize -
0615             ext4_dir_rec_len(0, dir);
0616 
0617     if (ext4_has_metadata_csum(dir->i_sb))
0618         entry_space -= sizeof(struct dx_tail);
0619     return entry_space / sizeof(struct dx_entry);
0620 }
0621 
0622 /*
0623  * Debug
0624  */
0625 #ifdef DX_DEBUG
0626 static void dx_show_index(char * label, struct dx_entry *entries)
0627 {
0628     int i, n = dx_get_count (entries);
0629     printk(KERN_DEBUG "%s index", label);
0630     for (i = 0; i < n; i++) {
0631         printk(KERN_CONT " %x->%lu",
0632                i ? dx_get_hash(entries + i) : 0,
0633                (unsigned long)dx_get_block(entries + i));
0634     }
0635     printk(KERN_CONT "\n");
0636 }
0637 
0638 struct stats
0639 {
0640     unsigned names;
0641     unsigned space;
0642     unsigned bcount;
0643 };
0644 
0645 static struct stats dx_show_leaf(struct inode *dir,
0646                 struct dx_hash_info *hinfo,
0647                 struct ext4_dir_entry_2 *de,
0648                 int size, int show_names)
0649 {
0650     unsigned names = 0, space = 0;
0651     char *base = (char *) de;
0652     struct dx_hash_info h = *hinfo;
0653 
0654     printk("names: ");
0655     while ((char *) de < base + size)
0656     {
0657         if (de->inode)
0658         {
0659             if (show_names)
0660             {
0661 #ifdef CONFIG_FS_ENCRYPTION
0662                 int len;
0663                 char *name;
0664                 struct fscrypt_str fname_crypto_str =
0665                     FSTR_INIT(NULL, 0);
0666                 int res = 0;
0667 
0668                 name  = de->name;
0669                 len = de->name_len;
0670                 if (!IS_ENCRYPTED(dir)) {
0671                     /* Directory is not encrypted */
0672                     ext4fs_dirhash(dir, de->name,
0673                         de->name_len, &h);
0674                     printk("%*.s:(U)%x.%u ", len,
0675                            name, h.hash,
0676                            (unsigned) ((char *) de
0677                                - base));
0678                 } else {
0679                     struct fscrypt_str de_name =
0680                         FSTR_INIT(name, len);
0681 
0682                     /* Directory is encrypted */
0683                     res = fscrypt_fname_alloc_buffer(
0684                         len, &fname_crypto_str);
0685                     if (res)
0686                         printk(KERN_WARNING "Error "
0687                             "allocating crypto "
0688                             "buffer--skipping "
0689                             "crypto\n");
0690                     res = fscrypt_fname_disk_to_usr(dir,
0691                         0, 0, &de_name,
0692                         &fname_crypto_str);
0693                     if (res) {
0694                         printk(KERN_WARNING "Error "
0695                             "converting filename "
0696                             "from disk to usr"
0697                             "\n");
0698                         name = "??";
0699                         len = 2;
0700                     } else {
0701                         name = fname_crypto_str.name;
0702                         len = fname_crypto_str.len;
0703                     }
0704                     if (IS_CASEFOLDED(dir))
0705                         h.hash = EXT4_DIRENT_HASH(de);
0706                     else
0707                         ext4fs_dirhash(dir, de->name,
0708                                de->name_len, &h);
0709                     printk("%*.s:(E)%x.%u ", len, name,
0710                            h.hash, (unsigned) ((char *) de
0711                                    - base));
0712                     fscrypt_fname_free_buffer(
0713                             &fname_crypto_str);
0714                 }
0715 #else
0716                 int len = de->name_len;
0717                 char *name = de->name;
0718                 ext4fs_dirhash(dir, de->name, de->name_len, &h);
0719                 printk("%*.s:%x.%u ", len, name, h.hash,
0720                        (unsigned) ((char *) de - base));
0721 #endif
0722             }
0723             space += ext4_dir_rec_len(de->name_len, dir);
0724             names++;
0725         }
0726         de = ext4_next_entry(de, size);
0727     }
0728     printk(KERN_CONT "(%i)\n", names);
0729     return (struct stats) { names, space, 1 };
0730 }
0731 
0732 struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
0733                  struct dx_entry *entries, int levels)
0734 {
0735     unsigned blocksize = dir->i_sb->s_blocksize;
0736     unsigned count = dx_get_count(entries), names = 0, space = 0, i;
0737     unsigned bcount = 0;
0738     struct buffer_head *bh;
0739     printk("%i indexed blocks...\n", count);
0740     for (i = 0; i < count; i++, entries++)
0741     {
0742         ext4_lblk_t block = dx_get_block(entries);
0743         ext4_lblk_t hash  = i ? dx_get_hash(entries): 0;
0744         u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
0745         struct stats stats;
0746         printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
0747         bh = ext4_bread(NULL,dir, block, 0);
0748         if (!bh || IS_ERR(bh))
0749             continue;
0750         stats = levels?
0751            dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
0752            dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *)
0753             bh->b_data, blocksize, 0);
0754         names += stats.names;
0755         space += stats.space;
0756         bcount += stats.bcount;
0757         brelse(bh);
0758     }
0759     if (bcount)
0760         printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
0761                levels ? "" : "   ", names, space/bcount,
0762                (space/bcount)*100/blocksize);
0763     return (struct stats) { names, space, bcount};
0764 }
0765 
0766 /*
0767  * Linear search cross check
0768  */
0769 static inline void htree_rep_invariant_check(struct dx_entry *at,
0770                          struct dx_entry *target,
0771                          u32 hash, unsigned int n)
0772 {
0773     while (n--) {
0774         dxtrace(printk(KERN_CONT ","));
0775         if (dx_get_hash(++at) > hash) {
0776             at--;
0777             break;
0778         }
0779     }
0780     ASSERT(at == target - 1);
0781 }
0782 #else /* DX_DEBUG */
0783 static inline void htree_rep_invariant_check(struct dx_entry *at,
0784                          struct dx_entry *target,
0785                          u32 hash, unsigned int n)
0786 {
0787 }
0788 #endif /* DX_DEBUG */
0789 
0790 /*
0791  * Probe for a directory leaf block to search.
0792  *
0793  * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
0794  * error in the directory index, and the caller should fall back to
0795  * searching the directory normally.  The callers of dx_probe **MUST**
0796  * check for this error code, and make sure it never gets reflected
0797  * back to userspace.
0798  */
0799 static struct dx_frame *
0800 dx_probe(struct ext4_filename *fname, struct inode *dir,
0801      struct dx_hash_info *hinfo, struct dx_frame *frame_in)
0802 {
0803     unsigned count, indirect, level, i;
0804     struct dx_entry *at, *entries, *p, *q, *m;
0805     struct dx_root *root;
0806     struct dx_frame *frame = frame_in;
0807     struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
0808     u32 hash;
0809     ext4_lblk_t block;
0810     ext4_lblk_t blocks[EXT4_HTREE_LEVEL];
0811 
0812     memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0]));
0813     frame->bh = ext4_read_dirblock(dir, 0, INDEX);
0814     if (IS_ERR(frame->bh))
0815         return (struct dx_frame *) frame->bh;
0816 
0817     root = (struct dx_root *) frame->bh->b_data;
0818     if (root->info.hash_version != DX_HASH_TEA &&
0819         root->info.hash_version != DX_HASH_HALF_MD4 &&
0820         root->info.hash_version != DX_HASH_LEGACY &&
0821         root->info.hash_version != DX_HASH_SIPHASH) {
0822         ext4_warning_inode(dir, "Unrecognised inode hash code %u",
0823                    root->info.hash_version);
0824         goto fail;
0825     }
0826     if (ext4_hash_in_dirent(dir)) {
0827         if (root->info.hash_version != DX_HASH_SIPHASH) {
0828             ext4_warning_inode(dir,
0829                 "Hash in dirent, but hash is not SIPHASH");
0830             goto fail;
0831         }
0832     } else {
0833         if (root->info.hash_version == DX_HASH_SIPHASH) {
0834             ext4_warning_inode(dir,
0835                 "Hash code is SIPHASH, but hash not in dirent");
0836             goto fail;
0837         }
0838     }
0839     if (fname)
0840         hinfo = &fname->hinfo;
0841     hinfo->hash_version = root->info.hash_version;
0842     if (hinfo->hash_version <= DX_HASH_TEA)
0843         hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
0844     hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
0845     /* hash is already computed for encrypted casefolded directory */
0846     if (fname && fname_name(fname) &&
0847                 !(IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir)))
0848         ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), hinfo);
0849     hash = hinfo->hash;
0850 
0851     if (root->info.unused_flags & 1) {
0852         ext4_warning_inode(dir, "Unimplemented hash flags: %#06x",
0853                    root->info.unused_flags);
0854         goto fail;
0855     }
0856 
0857     indirect = root->info.indirect_levels;
0858     if (indirect >= ext4_dir_htree_level(dir->i_sb)) {
0859         ext4_warning(dir->i_sb,
0860                  "Directory (ino: %lu) htree depth %#06x exceed"
0861                  "supported value", dir->i_ino,
0862                  ext4_dir_htree_level(dir->i_sb));
0863         if (ext4_dir_htree_level(dir->i_sb) < EXT4_HTREE_LEVEL) {
0864             ext4_warning(dir->i_sb, "Enable large directory "
0865                         "feature to access it");
0866         }
0867         goto fail;
0868     }
0869 
0870     entries = (struct dx_entry *)(((char *)&root->info) +
0871                       root->info.info_length);
0872 
0873     if (dx_get_limit(entries) != dx_root_limit(dir,
0874                            root->info.info_length)) {
0875         ext4_warning_inode(dir, "dx entry: limit %u != root limit %u",
0876                    dx_get_limit(entries),
0877                    dx_root_limit(dir, root->info.info_length));
0878         goto fail;
0879     }
0880 
0881     dxtrace(printk("Look up %x", hash));
0882     level = 0;
0883     blocks[0] = 0;
0884     while (1) {
0885         count = dx_get_count(entries);
0886         if (!count || count > dx_get_limit(entries)) {
0887             ext4_warning_inode(dir,
0888                        "dx entry: count %u beyond limit %u",
0889                        count, dx_get_limit(entries));
0890             goto fail;
0891         }
0892 
0893         p = entries + 1;
0894         q = entries + count - 1;
0895         while (p <= q) {
0896             m = p + (q - p) / 2;
0897             dxtrace(printk(KERN_CONT "."));
0898             if (dx_get_hash(m) > hash)
0899                 q = m - 1;
0900             else
0901                 p = m + 1;
0902         }
0903 
0904         htree_rep_invariant_check(entries, p, hash, count - 1);
0905 
0906         at = p - 1;
0907         dxtrace(printk(KERN_CONT " %x->%u\n",
0908                    at == entries ? 0 : dx_get_hash(at),
0909                    dx_get_block(at)));
0910         frame->entries = entries;
0911         frame->at = at;
0912 
0913         block = dx_get_block(at);
0914         for (i = 0; i <= level; i++) {
0915             if (blocks[i] == block) {
0916                 ext4_warning_inode(dir,
0917                     "dx entry: tree cycle block %u points back to block %u",
0918                     blocks[level], block);
0919                 goto fail;
0920             }
0921         }
0922         if (++level > indirect)
0923             return frame;
0924         blocks[level] = block;
0925         frame++;
0926         frame->bh = ext4_read_dirblock(dir, block, INDEX);
0927         if (IS_ERR(frame->bh)) {
0928             ret_err = (struct dx_frame *) frame->bh;
0929             frame->bh = NULL;
0930             goto fail;
0931         }
0932 
0933         entries = ((struct dx_node *) frame->bh->b_data)->entries;
0934 
0935         if (dx_get_limit(entries) != dx_node_limit(dir)) {
0936             ext4_warning_inode(dir,
0937                 "dx entry: limit %u != node limit %u",
0938                 dx_get_limit(entries), dx_node_limit(dir));
0939             goto fail;
0940         }
0941     }
0942 fail:
0943     while (frame >= frame_in) {
0944         brelse(frame->bh);
0945         frame--;
0946     }
0947 
0948     if (ret_err == ERR_PTR(ERR_BAD_DX_DIR))
0949         ext4_warning_inode(dir,
0950             "Corrupt directory, running e2fsck is recommended");
0951     return ret_err;
0952 }
0953 
0954 static void dx_release(struct dx_frame *frames)
0955 {
0956     struct dx_root_info *info;
0957     int i;
0958     unsigned int indirect_levels;
0959 
0960     if (frames[0].bh == NULL)
0961         return;
0962 
0963     info = &((struct dx_root *)frames[0].bh->b_data)->info;
0964     /* save local copy, "info" may be freed after brelse() */
0965     indirect_levels = info->indirect_levels;
0966     for (i = 0; i <= indirect_levels; i++) {
0967         if (frames[i].bh == NULL)
0968             break;
0969         brelse(frames[i].bh);
0970         frames[i].bh = NULL;
0971     }
0972 }
0973 
0974 /*
0975  * This function increments the frame pointer to search the next leaf
0976  * block, and reads in the necessary intervening nodes if the search
0977  * should be necessary.  Whether or not the search is necessary is
0978  * controlled by the hash parameter.  If the hash value is even, then
0979  * the search is only continued if the next block starts with that
0980  * hash value.  This is used if we are searching for a specific file.
0981  *
0982  * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
0983  *
0984  * This function returns 1 if the caller should continue to search,
0985  * or 0 if it should not.  If there is an error reading one of the
0986  * index blocks, it will a negative error code.
0987  *
0988  * If start_hash is non-null, it will be filled in with the starting
0989  * hash of the next page.
0990  */
0991 static int ext4_htree_next_block(struct inode *dir, __u32 hash,
0992                  struct dx_frame *frame,
0993                  struct dx_frame *frames,
0994                  __u32 *start_hash)
0995 {
0996     struct dx_frame *p;
0997     struct buffer_head *bh;
0998     int num_frames = 0;
0999     __u32 bhash;
1000 
1001     p = frame;
1002     /*
1003      * Find the next leaf page by incrementing the frame pointer.
1004      * If we run out of entries in the interior node, loop around and
1005      * increment pointer in the parent node.  When we break out of
1006      * this loop, num_frames indicates the number of interior
1007      * nodes need to be read.
1008      */
1009     while (1) {
1010         if (++(p->at) < p->entries + dx_get_count(p->entries))
1011             break;
1012         if (p == frames)
1013             return 0;
1014         num_frames++;
1015         p--;
1016     }
1017 
1018     /*
1019      * If the hash is 1, then continue only if the next page has a
1020      * continuation hash of any value.  This is used for readdir
1021      * handling.  Otherwise, check to see if the hash matches the
1022      * desired continuation hash.  If it doesn't, return since
1023      * there's no point to read in the successive index pages.
1024      */
1025     bhash = dx_get_hash(p->at);
1026     if (start_hash)
1027         *start_hash = bhash;
1028     if ((hash & 1) == 0) {
1029         if ((bhash & ~1) != hash)
1030             return 0;
1031     }
1032     /*
1033      * If the hash is HASH_NB_ALWAYS, we always go to the next
1034      * block so no check is necessary
1035      */
1036     while (num_frames--) {
1037         bh = ext4_read_dirblock(dir, dx_get_block(p->at), INDEX);
1038         if (IS_ERR(bh))
1039             return PTR_ERR(bh);
1040         p++;
1041         brelse(p->bh);
1042         p->bh = bh;
1043         p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
1044     }
1045     return 1;
1046 }
1047 
1048 
1049 /*
1050  * This function fills a red-black tree with information from a
1051  * directory block.  It returns the number directory entries loaded
1052  * into the tree.  If there is an error it is returned in err.
1053  */
1054 static int htree_dirblock_to_tree(struct file *dir_file,
1055                   struct inode *dir, ext4_lblk_t block,
1056                   struct dx_hash_info *hinfo,
1057                   __u32 start_hash, __u32 start_minor_hash)
1058 {
1059     struct buffer_head *bh;
1060     struct ext4_dir_entry_2 *de, *top;
1061     int err = 0, count = 0;
1062     struct fscrypt_str fname_crypto_str = FSTR_INIT(NULL, 0), tmp_str;
1063     int csum = ext4_has_metadata_csum(dir->i_sb);
1064 
1065     dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
1066                             (unsigned long)block));
1067     bh = ext4_read_dirblock(dir, block, DIRENT_HTREE);
1068     if (IS_ERR(bh))
1069         return PTR_ERR(bh);
1070 
1071     de = (struct ext4_dir_entry_2 *) bh->b_data;
1072     /* csum entries are not larger in the casefolded encrypted case */
1073     top = (struct ext4_dir_entry_2 *) ((char *) de +
1074                        dir->i_sb->s_blocksize -
1075                        ext4_dir_rec_len(0,
1076                                csum ? NULL : dir));
1077     /* Check if the directory is encrypted */
1078     if (IS_ENCRYPTED(dir)) {
1079         err = fscrypt_prepare_readdir(dir);
1080         if (err < 0) {
1081             brelse(bh);
1082             return err;
1083         }
1084         err = fscrypt_fname_alloc_buffer(EXT4_NAME_LEN,
1085                          &fname_crypto_str);
1086         if (err < 0) {
1087             brelse(bh);
1088             return err;
1089         }
1090     }
1091 
1092     for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
1093         if (ext4_check_dir_entry(dir, NULL, de, bh,
1094                 bh->b_data, bh->b_size,
1095                 (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
1096                      + ((char *)de - bh->b_data))) {
1097             /* silently ignore the rest of the block */
1098             break;
1099         }
1100         if (ext4_hash_in_dirent(dir)) {
1101             if (de->name_len && de->inode) {
1102                 hinfo->hash = EXT4_DIRENT_HASH(de);
1103                 hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de);
1104             } else {
1105                 hinfo->hash = 0;
1106                 hinfo->minor_hash = 0;
1107             }
1108         } else {
1109             ext4fs_dirhash(dir, de->name, de->name_len, hinfo);
1110         }
1111         if ((hinfo->hash < start_hash) ||
1112             ((hinfo->hash == start_hash) &&
1113              (hinfo->minor_hash < start_minor_hash)))
1114             continue;
1115         if (de->inode == 0)
1116             continue;
1117         if (!IS_ENCRYPTED(dir)) {
1118             tmp_str.name = de->name;
1119             tmp_str.len = de->name_len;
1120             err = ext4_htree_store_dirent(dir_file,
1121                    hinfo->hash, hinfo->minor_hash, de,
1122                    &tmp_str);
1123         } else {
1124             int save_len = fname_crypto_str.len;
1125             struct fscrypt_str de_name = FSTR_INIT(de->name,
1126                                 de->name_len);
1127 
1128             /* Directory is encrypted */
1129             err = fscrypt_fname_disk_to_usr(dir, hinfo->hash,
1130                     hinfo->minor_hash, &de_name,
1131                     &fname_crypto_str);
1132             if (err) {
1133                 count = err;
1134                 goto errout;
1135             }
1136             err = ext4_htree_store_dirent(dir_file,
1137                    hinfo->hash, hinfo->minor_hash, de,
1138                     &fname_crypto_str);
1139             fname_crypto_str.len = save_len;
1140         }
1141         if (err != 0) {
1142             count = err;
1143             goto errout;
1144         }
1145         count++;
1146     }
1147 errout:
1148     brelse(bh);
1149     fscrypt_fname_free_buffer(&fname_crypto_str);
1150     return count;
1151 }
1152 
1153 
1154 /*
1155  * This function fills a red-black tree with information from a
1156  * directory.  We start scanning the directory in hash order, starting
1157  * at start_hash and start_minor_hash.
1158  *
1159  * This function returns the number of entries inserted into the tree,
1160  * or a negative error code.
1161  */
1162 int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
1163              __u32 start_minor_hash, __u32 *next_hash)
1164 {
1165     struct dx_hash_info hinfo;
1166     struct ext4_dir_entry_2 *de;
1167     struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
1168     struct inode *dir;
1169     ext4_lblk_t block;
1170     int count = 0;
1171     int ret, err;
1172     __u32 hashval;
1173     struct fscrypt_str tmp_str;
1174 
1175     dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
1176                start_hash, start_minor_hash));
1177     dir = file_inode(dir_file);
1178     if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
1179         if (ext4_hash_in_dirent(dir))
1180             hinfo.hash_version = DX_HASH_SIPHASH;
1181         else
1182             hinfo.hash_version =
1183                     EXT4_SB(dir->i_sb)->s_def_hash_version;
1184         if (hinfo.hash_version <= DX_HASH_TEA)
1185             hinfo.hash_version +=
1186                 EXT4_SB(dir->i_sb)->s_hash_unsigned;
1187         hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
1188         if (ext4_has_inline_data(dir)) {
1189             int has_inline_data = 1;
1190             count = ext4_inlinedir_to_tree(dir_file, dir, 0,
1191                                &hinfo, start_hash,
1192                                start_minor_hash,
1193                                &has_inline_data);
1194             if (has_inline_data) {
1195                 *next_hash = ~0;
1196                 return count;
1197             }
1198         }
1199         count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
1200                            start_hash, start_minor_hash);
1201         *next_hash = ~0;
1202         return count;
1203     }
1204     hinfo.hash = start_hash;
1205     hinfo.minor_hash = 0;
1206     frame = dx_probe(NULL, dir, &hinfo, frames);
1207     if (IS_ERR(frame))
1208         return PTR_ERR(frame);
1209 
1210     /* Add '.' and '..' from the htree header */
1211     if (!start_hash && !start_minor_hash) {
1212         de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
1213         tmp_str.name = de->name;
1214         tmp_str.len = de->name_len;
1215         err = ext4_htree_store_dirent(dir_file, 0, 0,
1216                           de, &tmp_str);
1217         if (err != 0)
1218             goto errout;
1219         count++;
1220     }
1221     if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
1222         de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
1223         de = ext4_next_entry(de, dir->i_sb->s_blocksize);
1224         tmp_str.name = de->name;
1225         tmp_str.len = de->name_len;
1226         err = ext4_htree_store_dirent(dir_file, 2, 0,
1227                           de, &tmp_str);
1228         if (err != 0)
1229             goto errout;
1230         count++;
1231     }
1232 
1233     while (1) {
1234         if (fatal_signal_pending(current)) {
1235             err = -ERESTARTSYS;
1236             goto errout;
1237         }
1238         cond_resched();
1239         block = dx_get_block(frame->at);
1240         ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
1241                          start_hash, start_minor_hash);
1242         if (ret < 0) {
1243             err = ret;
1244             goto errout;
1245         }
1246         count += ret;
1247         hashval = ~0;
1248         ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
1249                         frame, frames, &hashval);
1250         *next_hash = hashval;
1251         if (ret < 0) {
1252             err = ret;
1253             goto errout;
1254         }
1255         /*
1256          * Stop if:  (a) there are no more entries, or
1257          * (b) we have inserted at least one entry and the
1258          * next hash value is not a continuation
1259          */
1260         if ((ret == 0) ||
1261             (count && ((hashval & 1) == 0)))
1262             break;
1263     }
1264     dx_release(frames);
1265     dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
1266                "next hash: %x\n", count, *next_hash));
1267     return count;
1268 errout:
1269     dx_release(frames);
1270     return (err);
1271 }
1272 
1273 static inline int search_dirblock(struct buffer_head *bh,
1274                   struct inode *dir,
1275                   struct ext4_filename *fname,
1276                   unsigned int offset,
1277                   struct ext4_dir_entry_2 **res_dir)
1278 {
1279     return ext4_search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir,
1280                    fname, offset, res_dir);
1281 }
1282 
1283 /*
1284  * Directory block splitting, compacting
1285  */
1286 
1287 /*
1288  * Create map of hash values, offsets, and sizes, stored at end of block.
1289  * Returns number of entries mapped.
1290  */
1291 static int dx_make_map(struct inode *dir, struct buffer_head *bh,
1292                struct dx_hash_info *hinfo,
1293                struct dx_map_entry *map_tail)
1294 {
1295     int count = 0;
1296     struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)bh->b_data;
1297     unsigned int buflen = bh->b_size;
1298     char *base = bh->b_data;
1299     struct dx_hash_info h = *hinfo;
1300 
1301     if (ext4_has_metadata_csum(dir->i_sb))
1302         buflen -= sizeof(struct ext4_dir_entry_tail);
1303 
1304     while ((char *) de < base + buflen) {
1305         if (ext4_check_dir_entry(dir, NULL, de, bh, base, buflen,
1306                      ((char *)de) - base))
1307             return -EFSCORRUPTED;
1308         if (de->name_len && de->inode) {
1309             if (ext4_hash_in_dirent(dir))
1310                 h.hash = EXT4_DIRENT_HASH(de);
1311             else
1312                 ext4fs_dirhash(dir, de->name, de->name_len, &h);
1313             map_tail--;
1314             map_tail->hash = h.hash;
1315             map_tail->offs = ((char *) de - base)>>2;
1316             map_tail->size = le16_to_cpu(de->rec_len);
1317             count++;
1318             cond_resched();
1319         }
1320         de = ext4_next_entry(de, dir->i_sb->s_blocksize);
1321     }
1322     return count;
1323 }
1324 
1325 /* Sort map by hash value */
1326 static void dx_sort_map (struct dx_map_entry *map, unsigned count)
1327 {
1328     struct dx_map_entry *p, *q, *top = map + count - 1;
1329     int more;
1330     /* Combsort until bubble sort doesn't suck */
1331     while (count > 2) {
1332         count = count*10/13;
1333         if (count - 9 < 2) /* 9, 10 -> 11 */
1334             count = 11;
1335         for (p = top, q = p - count; q >= map; p--, q--)
1336             if (p->hash < q->hash)
1337                 swap(*p, *q);
1338     }
1339     /* Garden variety bubble sort */
1340     do {
1341         more = 0;
1342         q = top;
1343         while (q-- > map) {
1344             if (q[1].hash >= q[0].hash)
1345                 continue;
1346             swap(*(q+1), *q);
1347             more = 1;
1348         }
1349     } while(more);
1350 }
1351 
1352 static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
1353 {
1354     struct dx_entry *entries = frame->entries;
1355     struct dx_entry *old = frame->at, *new = old + 1;
1356     int count = dx_get_count(entries);
1357 
1358     ASSERT(count < dx_get_limit(entries));
1359     ASSERT(old < entries + count);
1360     memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
1361     dx_set_hash(new, hash);
1362     dx_set_block(new, block);
1363     dx_set_count(entries, count + 1);
1364 }
1365 
1366 #if IS_ENABLED(CONFIG_UNICODE)
1367 /*
1368  * Test whether a case-insensitive directory entry matches the filename
1369  * being searched for.  If quick is set, assume the name being looked up
1370  * is already in the casefolded form.
1371  *
1372  * Returns: 0 if the directory entry matches, more than 0 if it
1373  * doesn't match or less than zero on error.
1374  */
1375 static int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
1376                u8 *de_name, size_t de_name_len, bool quick)
1377 {
1378     const struct super_block *sb = parent->i_sb;
1379     const struct unicode_map *um = sb->s_encoding;
1380     struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len);
1381     struct qstr entry = QSTR_INIT(de_name, de_name_len);
1382     int ret;
1383 
1384     if (IS_ENCRYPTED(parent)) {
1385         const struct fscrypt_str encrypted_name =
1386                 FSTR_INIT(de_name, de_name_len);
1387 
1388         decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
1389         if (!decrypted_name.name)
1390             return -ENOMEM;
1391         ret = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name,
1392                         &decrypted_name);
1393         if (ret < 0)
1394             goto out;
1395         entry.name = decrypted_name.name;
1396         entry.len = decrypted_name.len;
1397     }
1398 
1399     if (quick)
1400         ret = utf8_strncasecmp_folded(um, name, &entry);
1401     else
1402         ret = utf8_strncasecmp(um, name, &entry);
1403     if (ret < 0) {
1404         /* Handle invalid character sequence as either an error
1405          * or as an opaque byte sequence.
1406          */
1407         if (sb_has_strict_encoding(sb))
1408             ret = -EINVAL;
1409         else if (name->len != entry.len)
1410             ret = 1;
1411         else
1412             ret = !!memcmp(name->name, entry.name, entry.len);
1413     }
1414 out:
1415     kfree(decrypted_name.name);
1416     return ret;
1417 }
1418 
1419 int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
1420                   struct ext4_filename *name)
1421 {
1422     struct fscrypt_str *cf_name = &name->cf_name;
1423     struct dx_hash_info *hinfo = &name->hinfo;
1424     int len;
1425 
1426     if (!IS_CASEFOLDED(dir) || !dir->i_sb->s_encoding ||
1427         (IS_ENCRYPTED(dir) && !fscrypt_has_encryption_key(dir))) {
1428         cf_name->name = NULL;
1429         return 0;
1430     }
1431 
1432     cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS);
1433     if (!cf_name->name)
1434         return -ENOMEM;
1435 
1436     len = utf8_casefold(dir->i_sb->s_encoding,
1437                 iname, cf_name->name,
1438                 EXT4_NAME_LEN);
1439     if (len <= 0) {
1440         kfree(cf_name->name);
1441         cf_name->name = NULL;
1442     }
1443     cf_name->len = (unsigned) len;
1444     if (!IS_ENCRYPTED(dir))
1445         return 0;
1446 
1447     hinfo->hash_version = DX_HASH_SIPHASH;
1448     hinfo->seed = NULL;
1449     if (cf_name->name)
1450         ext4fs_dirhash(dir, cf_name->name, cf_name->len, hinfo);
1451     else
1452         ext4fs_dirhash(dir, iname->name, iname->len, hinfo);
1453     return 0;
1454 }
1455 #endif
1456 
1457 /*
1458  * Test whether a directory entry matches the filename being searched for.
1459  *
1460  * Return: %true if the directory entry matches, otherwise %false.
1461  */
1462 static bool ext4_match(struct inode *parent,
1463                   const struct ext4_filename *fname,
1464                   struct ext4_dir_entry_2 *de)
1465 {
1466     struct fscrypt_name f;
1467 
1468     if (!de->inode)
1469         return false;
1470 
1471     f.usr_fname = fname->usr_fname;
1472     f.disk_name = fname->disk_name;
1473 #ifdef CONFIG_FS_ENCRYPTION
1474     f.crypto_buf = fname->crypto_buf;
1475 #endif
1476 
1477 #if IS_ENABLED(CONFIG_UNICODE)
1478     if (parent->i_sb->s_encoding && IS_CASEFOLDED(parent) &&
1479         (!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) {
1480         if (fname->cf_name.name) {
1481             struct qstr cf = {.name = fname->cf_name.name,
1482                       .len = fname->cf_name.len};
1483             if (IS_ENCRYPTED(parent)) {
1484                 if (fname->hinfo.hash != EXT4_DIRENT_HASH(de) ||
1485                     fname->hinfo.minor_hash !=
1486                         EXT4_DIRENT_MINOR_HASH(de)) {
1487 
1488                     return false;
1489                 }
1490             }
1491             return !ext4_ci_compare(parent, &cf, de->name,
1492                             de->name_len, true);
1493         }
1494         return !ext4_ci_compare(parent, fname->usr_fname, de->name,
1495                         de->name_len, false);
1496     }
1497 #endif
1498 
1499     return fscrypt_match_name(&f, de->name, de->name_len);
1500 }
1501 
1502 /*
1503  * Returns 0 if not found, -1 on failure, and 1 on success
1504  */
1505 int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
1506             struct inode *dir, struct ext4_filename *fname,
1507             unsigned int offset, struct ext4_dir_entry_2 **res_dir)
1508 {
1509     struct ext4_dir_entry_2 * de;
1510     char * dlimit;
1511     int de_len;
1512 
1513     de = (struct ext4_dir_entry_2 *)search_buf;
1514     dlimit = search_buf + buf_size;
1515     while ((char *) de < dlimit - EXT4_BASE_DIR_LEN) {
1516         /* this code is executed quadratically often */
1517         /* do minimal checking `by hand' */
1518         if (de->name + de->name_len <= dlimit &&
1519             ext4_match(dir, fname, de)) {
1520             /* found a match - just to be sure, do
1521              * a full check */
1522             if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf,
1523                          buf_size, offset))
1524                 return -1;
1525             *res_dir = de;
1526             return 1;
1527         }
1528         /* prevent looping on a bad block */
1529         de_len = ext4_rec_len_from_disk(de->rec_len,
1530                         dir->i_sb->s_blocksize);
1531         if (de_len <= 0)
1532             return -1;
1533         offset += de_len;
1534         de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
1535     }
1536     return 0;
1537 }
1538 
1539 static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
1540                    struct ext4_dir_entry *de)
1541 {
1542     struct super_block *sb = dir->i_sb;
1543 
1544     if (!is_dx(dir))
1545         return 0;
1546     if (block == 0)
1547         return 1;
1548     if (de->inode == 0 &&
1549         ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) ==
1550             sb->s_blocksize)
1551         return 1;
1552     return 0;
1553 }
1554 
1555 /*
1556  *  __ext4_find_entry()
1557  *
1558  * finds an entry in the specified directory with the wanted name. It
1559  * returns the cache buffer in which the entry was found, and the entry
1560  * itself (as a parameter - res_dir). It does NOT read the inode of the
1561  * entry - you'll have to do that yourself if you want to.
1562  *
1563  * The returned buffer_head has ->b_count elevated.  The caller is expected
1564  * to brelse() it when appropriate.
1565  */
1566 static struct buffer_head *__ext4_find_entry(struct inode *dir,
1567                          struct ext4_filename *fname,
1568                          struct ext4_dir_entry_2 **res_dir,
1569                          int *inlined)
1570 {
1571     struct super_block *sb;
1572     struct buffer_head *bh_use[NAMEI_RA_SIZE];
1573     struct buffer_head *bh, *ret = NULL;
1574     ext4_lblk_t start, block;
1575     const u8 *name = fname->usr_fname->name;
1576     size_t ra_max = 0;  /* Number of bh's in the readahead
1577                    buffer, bh_use[] */
1578     size_t ra_ptr = 0;  /* Current index into readahead
1579                    buffer */
1580     ext4_lblk_t  nblocks;
1581     int i, namelen, retval;
1582 
1583     *res_dir = NULL;
1584     sb = dir->i_sb;
1585     namelen = fname->usr_fname->len;
1586     if (namelen > EXT4_NAME_LEN)
1587         return NULL;
1588 
1589     if (ext4_has_inline_data(dir)) {
1590         int has_inline_data = 1;
1591         ret = ext4_find_inline_entry(dir, fname, res_dir,
1592                          &has_inline_data);
1593         if (has_inline_data) {
1594             if (inlined)
1595                 *inlined = 1;
1596             goto cleanup_and_exit;
1597         }
1598     }
1599 
1600     if ((namelen <= 2) && (name[0] == '.') &&
1601         (name[1] == '.' || name[1] == '\0')) {
1602         /*
1603          * "." or ".." will only be in the first block
1604          * NFS may look up ".."; "." should be handled by the VFS
1605          */
1606         block = start = 0;
1607         nblocks = 1;
1608         goto restart;
1609     }
1610     if (is_dx(dir)) {
1611         ret = ext4_dx_find_entry(dir, fname, res_dir);
1612         /*
1613          * On success, or if the error was file not found,
1614          * return.  Otherwise, fall back to doing a search the
1615          * old fashioned way.
1616          */
1617         if (!IS_ERR(ret) || PTR_ERR(ret) != ERR_BAD_DX_DIR)
1618             goto cleanup_and_exit;
1619         dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
1620                    "falling back\n"));
1621         ret = NULL;
1622     }
1623     nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1624     if (!nblocks) {
1625         ret = NULL;
1626         goto cleanup_and_exit;
1627     }
1628     start = EXT4_I(dir)->i_dir_start_lookup;
1629     if (start >= nblocks)
1630         start = 0;
1631     block = start;
1632 restart:
1633     do {
1634         /*
1635          * We deal with the read-ahead logic here.
1636          */
1637         cond_resched();
1638         if (ra_ptr >= ra_max) {
1639             /* Refill the readahead buffer */
1640             ra_ptr = 0;
1641             if (block < start)
1642                 ra_max = start - block;
1643             else
1644                 ra_max = nblocks - block;
1645             ra_max = min(ra_max, ARRAY_SIZE(bh_use));
1646             retval = ext4_bread_batch(dir, block, ra_max,
1647                           false /* wait */, bh_use);
1648             if (retval) {
1649                 ret = ERR_PTR(retval);
1650                 ra_max = 0;
1651                 goto cleanup_and_exit;
1652             }
1653         }
1654         if ((bh = bh_use[ra_ptr++]) == NULL)
1655             goto next;
1656         wait_on_buffer(bh);
1657         if (!buffer_uptodate(bh)) {
1658             EXT4_ERROR_INODE_ERR(dir, EIO,
1659                          "reading directory lblock %lu",
1660                          (unsigned long) block);
1661             brelse(bh);
1662             ret = ERR_PTR(-EIO);
1663             goto cleanup_and_exit;
1664         }
1665         if (!buffer_verified(bh) &&
1666             !is_dx_internal_node(dir, block,
1667                      (struct ext4_dir_entry *)bh->b_data) &&
1668             !ext4_dirblock_csum_verify(dir, bh)) {
1669             EXT4_ERROR_INODE_ERR(dir, EFSBADCRC,
1670                          "checksumming directory "
1671                          "block %lu", (unsigned long)block);
1672             brelse(bh);
1673             ret = ERR_PTR(-EFSBADCRC);
1674             goto cleanup_and_exit;
1675         }
1676         set_buffer_verified(bh);
1677         i = search_dirblock(bh, dir, fname,
1678                 block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
1679         if (i == 1) {
1680             EXT4_I(dir)->i_dir_start_lookup = block;
1681             ret = bh;
1682             goto cleanup_and_exit;
1683         } else {
1684             brelse(bh);
1685             if (i < 0)
1686                 goto cleanup_and_exit;
1687         }
1688     next:
1689         if (++block >= nblocks)
1690             block = 0;
1691     } while (block != start);
1692 
1693     /*
1694      * If the directory has grown while we were searching, then
1695      * search the last part of the directory before giving up.
1696      */
1697     block = nblocks;
1698     nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1699     if (block < nblocks) {
1700         start = 0;
1701         goto restart;
1702     }
1703 
1704 cleanup_and_exit:
1705     /* Clean up the read-ahead blocks */
1706     for (; ra_ptr < ra_max; ra_ptr++)
1707         brelse(bh_use[ra_ptr]);
1708     return ret;
1709 }
1710 
1711 static struct buffer_head *ext4_find_entry(struct inode *dir,
1712                        const struct qstr *d_name,
1713                        struct ext4_dir_entry_2 **res_dir,
1714                        int *inlined)
1715 {
1716     int err;
1717     struct ext4_filename fname;
1718     struct buffer_head *bh;
1719 
1720     err = ext4_fname_setup_filename(dir, d_name, 1, &fname);
1721     if (err == -ENOENT)
1722         return NULL;
1723     if (err)
1724         return ERR_PTR(err);
1725 
1726     bh = __ext4_find_entry(dir, &fname, res_dir, inlined);
1727 
1728     ext4_fname_free_filename(&fname);
1729     return bh;
1730 }
1731 
1732 static struct buffer_head *ext4_lookup_entry(struct inode *dir,
1733                          struct dentry *dentry,
1734                          struct ext4_dir_entry_2 **res_dir)
1735 {
1736     int err;
1737     struct ext4_filename fname;
1738     struct buffer_head *bh;
1739 
1740     err = ext4_fname_prepare_lookup(dir, dentry, &fname);
1741     generic_set_encrypted_ci_d_ops(dentry);
1742     if (err == -ENOENT)
1743         return NULL;
1744     if (err)
1745         return ERR_PTR(err);
1746 
1747     bh = __ext4_find_entry(dir, &fname, res_dir, NULL);
1748 
1749     ext4_fname_free_filename(&fname);
1750     return bh;
1751 }
1752 
1753 static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
1754             struct ext4_filename *fname,
1755             struct ext4_dir_entry_2 **res_dir)
1756 {
1757     struct super_block * sb = dir->i_sb;
1758     struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
1759     struct buffer_head *bh;
1760     ext4_lblk_t block;
1761     int retval;
1762 
1763 #ifdef CONFIG_FS_ENCRYPTION
1764     *res_dir = NULL;
1765 #endif
1766     frame = dx_probe(fname, dir, NULL, frames);
1767     if (IS_ERR(frame))
1768         return (struct buffer_head *) frame;
1769     do {
1770         block = dx_get_block(frame->at);
1771         bh = ext4_read_dirblock(dir, block, DIRENT_HTREE);
1772         if (IS_ERR(bh))
1773             goto errout;
1774 
1775         retval = search_dirblock(bh, dir, fname,
1776                      block << EXT4_BLOCK_SIZE_BITS(sb),
1777                      res_dir);
1778         if (retval == 1)
1779             goto success;
1780         brelse(bh);
1781         if (retval == -1) {
1782             bh = ERR_PTR(ERR_BAD_DX_DIR);
1783             goto errout;
1784         }
1785 
1786         /* Check to see if we should continue to search */
1787         retval = ext4_htree_next_block(dir, fname->hinfo.hash, frame,
1788                            frames, NULL);
1789         if (retval < 0) {
1790             ext4_warning_inode(dir,
1791                 "error %d reading directory index block",
1792                 retval);
1793             bh = ERR_PTR(retval);
1794             goto errout;
1795         }
1796     } while (retval == 1);
1797 
1798     bh = NULL;
1799 errout:
1800     dxtrace(printk(KERN_DEBUG "%s not found\n", fname->usr_fname->name));
1801 success:
1802     dx_release(frames);
1803     return bh;
1804 }
1805 
1806 static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
1807 {
1808     struct inode *inode;
1809     struct ext4_dir_entry_2 *de;
1810     struct buffer_head *bh;
1811 
1812     if (dentry->d_name.len > EXT4_NAME_LEN)
1813         return ERR_PTR(-ENAMETOOLONG);
1814 
1815     bh = ext4_lookup_entry(dir, dentry, &de);
1816     if (IS_ERR(bh))
1817         return ERR_CAST(bh);
1818     inode = NULL;
1819     if (bh) {
1820         __u32 ino = le32_to_cpu(de->inode);
1821         brelse(bh);
1822         if (!ext4_valid_inum(dir->i_sb, ino)) {
1823             EXT4_ERROR_INODE(dir, "bad inode number: %u", ino);
1824             return ERR_PTR(-EFSCORRUPTED);
1825         }
1826         if (unlikely(ino == dir->i_ino)) {
1827             EXT4_ERROR_INODE(dir, "'%pd' linked to parent dir",
1828                      dentry);
1829             return ERR_PTR(-EFSCORRUPTED);
1830         }
1831         inode = ext4_iget(dir->i_sb, ino, EXT4_IGET_NORMAL);
1832         if (inode == ERR_PTR(-ESTALE)) {
1833             EXT4_ERROR_INODE(dir,
1834                      "deleted inode referenced: %u",
1835                      ino);
1836             return ERR_PTR(-EFSCORRUPTED);
1837         }
1838         if (!IS_ERR(inode) && IS_ENCRYPTED(dir) &&
1839             (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
1840             !fscrypt_has_permitted_context(dir, inode)) {
1841             ext4_warning(inode->i_sb,
1842                      "Inconsistent encryption contexts: %lu/%lu",
1843                      dir->i_ino, inode->i_ino);
1844             iput(inode);
1845             return ERR_PTR(-EPERM);
1846         }
1847     }
1848 
1849 #if IS_ENABLED(CONFIG_UNICODE)
1850     if (!inode && IS_CASEFOLDED(dir)) {
1851         /* Eventually we want to call d_add_ci(dentry, NULL)
1852          * for negative dentries in the encoding case as
1853          * well.  For now, prevent the negative dentry
1854          * from being cached.
1855          */
1856         return NULL;
1857     }
1858 #endif
1859     return d_splice_alias(inode, dentry);
1860 }
1861 
1862 
1863 struct dentry *ext4_get_parent(struct dentry *child)
1864 {
1865     __u32 ino;
1866     struct ext4_dir_entry_2 * de;
1867     struct buffer_head *bh;
1868 
1869     bh = ext4_find_entry(d_inode(child), &dotdot_name, &de, NULL);
1870     if (IS_ERR(bh))
1871         return ERR_CAST(bh);
1872     if (!bh)
1873         return ERR_PTR(-ENOENT);
1874     ino = le32_to_cpu(de->inode);
1875     brelse(bh);
1876 
1877     if (!ext4_valid_inum(child->d_sb, ino)) {
1878         EXT4_ERROR_INODE(d_inode(child),
1879                  "bad parent inode number: %u", ino);
1880         return ERR_PTR(-EFSCORRUPTED);
1881     }
1882 
1883     return d_obtain_alias(ext4_iget(child->d_sb, ino, EXT4_IGET_NORMAL));
1884 }
1885 
1886 /*
1887  * Move count entries from end of map between two memory locations.
1888  * Returns pointer to last entry moved.
1889  */
1890 static struct ext4_dir_entry_2 *
1891 dx_move_dirents(struct inode *dir, char *from, char *to,
1892         struct dx_map_entry *map, int count,
1893         unsigned blocksize)
1894 {
1895     unsigned rec_len = 0;
1896 
1897     while (count--) {
1898         struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
1899                         (from + (map->offs<<2));
1900         rec_len = ext4_dir_rec_len(de->name_len, dir);
1901 
1902         memcpy (to, de, rec_len);
1903         ((struct ext4_dir_entry_2 *) to)->rec_len =
1904                 ext4_rec_len_to_disk(rec_len, blocksize);
1905 
1906         /* wipe dir_entry excluding the rec_len field */
1907         de->inode = 0;
1908         memset(&de->name_len, 0, ext4_rec_len_from_disk(de->rec_len,
1909                                 blocksize) -
1910                      offsetof(struct ext4_dir_entry_2,
1911                                 name_len));
1912 
1913         map++;
1914         to += rec_len;
1915     }
1916     return (struct ext4_dir_entry_2 *) (to - rec_len);
1917 }
1918 
1919 /*
1920  * Compact each dir entry in the range to the minimal rec_len.
1921  * Returns pointer to last entry in range.
1922  */
1923 static struct ext4_dir_entry_2 *dx_pack_dirents(struct inode *dir, char *base,
1924                             unsigned int blocksize)
1925 {
1926     struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
1927     unsigned rec_len = 0;
1928 
1929     prev = to = de;
1930     while ((char*)de < base + blocksize) {
1931         next = ext4_next_entry(de, blocksize);
1932         if (de->inode && de->name_len) {
1933             rec_len = ext4_dir_rec_len(de->name_len, dir);
1934             if (de > to)
1935                 memmove(to, de, rec_len);
1936             to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
1937             prev = to;
1938             to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len);
1939         }
1940         de = next;
1941     }
1942     return prev;
1943 }
1944 
1945 /*
1946  * Split a full leaf block to make room for a new dir entry.
1947  * Allocate a new block, and move entries so that they are approx. equally full.
1948  * Returns pointer to de in block into which the new entry will be inserted.
1949  */
1950 static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1951             struct buffer_head **bh,struct dx_frame *frame,
1952             struct dx_hash_info *hinfo)
1953 {
1954     unsigned blocksize = dir->i_sb->s_blocksize;
1955     unsigned continued;
1956     int count;
1957     struct buffer_head *bh2;
1958     ext4_lblk_t newblock;
1959     u32 hash2;
1960     struct dx_map_entry *map;
1961     char *data1 = (*bh)->b_data, *data2;
1962     unsigned split, move, size;
1963     struct ext4_dir_entry_2 *de = NULL, *de2;
1964     int csum_size = 0;
1965     int err = 0, i;
1966 
1967     if (ext4_has_metadata_csum(dir->i_sb))
1968         csum_size = sizeof(struct ext4_dir_entry_tail);
1969 
1970     bh2 = ext4_append(handle, dir, &newblock);
1971     if (IS_ERR(bh2)) {
1972         brelse(*bh);
1973         *bh = NULL;
1974         return (struct ext4_dir_entry_2 *) bh2;
1975     }
1976 
1977     BUFFER_TRACE(*bh, "get_write_access");
1978     err = ext4_journal_get_write_access(handle, dir->i_sb, *bh,
1979                         EXT4_JTR_NONE);
1980     if (err)
1981         goto journal_error;
1982 
1983     BUFFER_TRACE(frame->bh, "get_write_access");
1984     err = ext4_journal_get_write_access(handle, dir->i_sb, frame->bh,
1985                         EXT4_JTR_NONE);
1986     if (err)
1987         goto journal_error;
1988 
1989     data2 = bh2->b_data;
1990 
1991     /* create map in the end of data2 block */
1992     map = (struct dx_map_entry *) (data2 + blocksize);
1993     count = dx_make_map(dir, *bh, hinfo, map);
1994     if (count < 0) {
1995         err = count;
1996         goto journal_error;
1997     }
1998     map -= count;
1999     dx_sort_map(map, count);
2000     /* Ensure that neither split block is over half full */
2001     size = 0;
2002     move = 0;
2003     for (i = count-1; i >= 0; i--) {
2004         /* is more than half of this entry in 2nd half of the block? */
2005         if (size + map[i].size/2 > blocksize/2)
2006             break;
2007         size += map[i].size;
2008         move++;
2009     }
2010     /*
2011      * map index at which we will split
2012      *
2013      * If the sum of active entries didn't exceed half the block size, just
2014      * split it in half by count; each resulting block will have at least
2015      * half the space free.
2016      */
2017     if (i > 0)
2018         split = count - move;
2019     else
2020         split = count/2;
2021 
2022     hash2 = map[split].hash;
2023     continued = hash2 == map[split - 1].hash;
2024     dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n",
2025             (unsigned long)dx_get_block(frame->at),
2026                     hash2, split, count-split));
2027 
2028     /* Fancy dance to stay within two buffers */
2029     de2 = dx_move_dirents(dir, data1, data2, map + split, count - split,
2030                   blocksize);
2031     de = dx_pack_dirents(dir, data1, blocksize);
2032     de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
2033                        (char *) de,
2034                        blocksize);
2035     de2->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
2036                         (char *) de2,
2037                         blocksize);
2038     if (csum_size) {
2039         ext4_initialize_dirent_tail(*bh, blocksize);
2040         ext4_initialize_dirent_tail(bh2, blocksize);
2041     }
2042 
2043     dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data1,
2044             blocksize, 1));
2045     dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data2,
2046             blocksize, 1));
2047 
2048     /* Which block gets the new entry? */
2049     if (hinfo->hash >= hash2) {
2050         swap(*bh, bh2);
2051         de = de2;
2052     }
2053     dx_insert_block(frame, hash2 + continued, newblock);
2054     err = ext4_handle_dirty_dirblock(handle, dir, bh2);
2055     if (err)
2056         goto journal_error;
2057     err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
2058     if (err)
2059         goto journal_error;
2060     brelse(bh2);
2061     dxtrace(dx_show_index("frame", frame->entries));
2062     return de;
2063 
2064 journal_error:
2065     brelse(*bh);
2066     brelse(bh2);
2067     *bh = NULL;
2068     ext4_std_error(dir->i_sb, err);
2069     return ERR_PTR(err);
2070 }
2071 
2072 int ext4_find_dest_de(struct inode *dir, struct inode *inode,
2073               struct buffer_head *bh,
2074               void *buf, int buf_size,
2075               struct ext4_filename *fname,
2076               struct ext4_dir_entry_2 **dest_de)
2077 {
2078     struct ext4_dir_entry_2 *de;
2079     unsigned short reclen = ext4_dir_rec_len(fname_len(fname), dir);
2080     int nlen, rlen;
2081     unsigned int offset = 0;
2082     char *top;
2083 
2084     de = buf;
2085     top = buf + buf_size - reclen;
2086     while ((char *) de <= top) {
2087         if (ext4_check_dir_entry(dir, NULL, de, bh,
2088                      buf, buf_size, offset))
2089             return -EFSCORRUPTED;
2090         if (ext4_match(dir, fname, de))
2091             return -EEXIST;
2092         nlen = ext4_dir_rec_len(de->name_len, dir);
2093         rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
2094         if ((de->inode ? rlen - nlen : rlen) >= reclen)
2095             break;
2096         de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
2097         offset += rlen;
2098     }
2099     if ((char *) de > top)
2100         return -ENOSPC;
2101 
2102     *dest_de = de;
2103     return 0;
2104 }
2105 
2106 void ext4_insert_dentry(struct inode *dir,
2107             struct inode *inode,
2108             struct ext4_dir_entry_2 *de,
2109             int buf_size,
2110             struct ext4_filename *fname)
2111 {
2112 
2113     int nlen, rlen;
2114 
2115     nlen = ext4_dir_rec_len(de->name_len, dir);
2116     rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
2117     if (de->inode) {
2118         struct ext4_dir_entry_2 *de1 =
2119             (struct ext4_dir_entry_2 *)((char *)de + nlen);
2120         de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, buf_size);
2121         de->rec_len = ext4_rec_len_to_disk(nlen, buf_size);
2122         de = de1;
2123     }
2124     de->file_type = EXT4_FT_UNKNOWN;
2125     de->inode = cpu_to_le32(inode->i_ino);
2126     ext4_set_de_type(inode->i_sb, de, inode->i_mode);
2127     de->name_len = fname_len(fname);
2128     memcpy(de->name, fname_name(fname), fname_len(fname));
2129     if (ext4_hash_in_dirent(dir)) {
2130         struct dx_hash_info *hinfo = &fname->hinfo;
2131 
2132         EXT4_DIRENT_HASHES(de)->hash = cpu_to_le32(hinfo->hash);
2133         EXT4_DIRENT_HASHES(de)->minor_hash =
2134                         cpu_to_le32(hinfo->minor_hash);
2135     }
2136 }
2137 
2138 /*
2139  * Add a new entry into a directory (leaf) block.  If de is non-NULL,
2140  * it points to a directory entry which is guaranteed to be large
2141  * enough for new directory entry.  If de is NULL, then
2142  * add_dirent_to_buf will attempt search the directory block for
2143  * space.  It will return -ENOSPC if no space is available, and -EIO
2144  * and -EEXIST if directory entry already exists.
2145  */
2146 static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
2147                  struct inode *dir,
2148                  struct inode *inode, struct ext4_dir_entry_2 *de,
2149                  struct buffer_head *bh)
2150 {
2151     unsigned int    blocksize = dir->i_sb->s_blocksize;
2152     int     csum_size = 0;
2153     int     err, err2;
2154 
2155     if (ext4_has_metadata_csum(inode->i_sb))
2156         csum_size = sizeof(struct ext4_dir_entry_tail);
2157 
2158     if (!de) {
2159         err = ext4_find_dest_de(dir, inode, bh, bh->b_data,
2160                     blocksize - csum_size, fname, &de);
2161         if (err)
2162             return err;
2163     }
2164     BUFFER_TRACE(bh, "get_write_access");
2165     err = ext4_journal_get_write_access(handle, dir->i_sb, bh,
2166                         EXT4_JTR_NONE);
2167     if (err) {
2168         ext4_std_error(dir->i_sb, err);
2169         return err;
2170     }
2171 
2172     /* By now the buffer is marked for journaling */
2173     ext4_insert_dentry(dir, inode, de, blocksize, fname);
2174 
2175     /*
2176      * XXX shouldn't update any times until successful
2177      * completion of syscall, but too many callers depend
2178      * on this.
2179      *
2180      * XXX similarly, too many callers depend on
2181      * ext4_new_inode() setting the times, but error
2182      * recovery deletes the inode, so the worst that can
2183      * happen is that the times are slightly out of date
2184      * and/or different from the directory change time.
2185      */
2186     dir->i_mtime = dir->i_ctime = current_time(dir);
2187     ext4_update_dx_flag(dir);
2188     inode_inc_iversion(dir);
2189     err2 = ext4_mark_inode_dirty(handle, dir);
2190     BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
2191     err = ext4_handle_dirty_dirblock(handle, dir, bh);
2192     if (err)
2193         ext4_std_error(dir->i_sb, err);
2194     return err ? err : err2;
2195 }
2196 
2197 /*
2198  * This converts a one block unindexed directory to a 3 block indexed
2199  * directory, and adds the dentry to the indexed directory.
2200  */
2201 static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
2202                 struct inode *dir,
2203                 struct inode *inode, struct buffer_head *bh)
2204 {
2205     struct buffer_head *bh2;
2206     struct dx_root  *root;
2207     struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
2208     struct dx_entry *entries;
2209     struct ext4_dir_entry_2 *de, *de2;
2210     char        *data2, *top;
2211     unsigned    len;
2212     int     retval;
2213     unsigned    blocksize;
2214     ext4_lblk_t  block;
2215     struct fake_dirent *fde;
2216     int csum_size = 0;
2217 
2218     if (ext4_has_metadata_csum(inode->i_sb))
2219         csum_size = sizeof(struct ext4_dir_entry_tail);
2220 
2221     blocksize =  dir->i_sb->s_blocksize;
2222     dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
2223     BUFFER_TRACE(bh, "get_write_access");
2224     retval = ext4_journal_get_write_access(handle, dir->i_sb, bh,
2225                            EXT4_JTR_NONE);
2226     if (retval) {
2227         ext4_std_error(dir->i_sb, retval);
2228         brelse(bh);
2229         return retval;
2230     }
2231     root = (struct dx_root *) bh->b_data;
2232 
2233     /* The 0th block becomes the root, move the dirents out */
2234     fde = &root->dotdot;
2235     de = (struct ext4_dir_entry_2 *)((char *)fde +
2236         ext4_rec_len_from_disk(fde->rec_len, blocksize));
2237     if ((char *) de >= (((char *) root) + blocksize)) {
2238         EXT4_ERROR_INODE(dir, "invalid rec_len for '..'");
2239         brelse(bh);
2240         return -EFSCORRUPTED;
2241     }
2242     len = ((char *) root) + (blocksize - csum_size) - (char *) de;
2243 
2244     /* Allocate new block for the 0th block's dirents */
2245     bh2 = ext4_append(handle, dir, &block);
2246     if (IS_ERR(bh2)) {
2247         brelse(bh);
2248         return PTR_ERR(bh2);
2249     }
2250     ext4_set_inode_flag(dir, EXT4_INODE_INDEX);
2251     data2 = bh2->b_data;
2252 
2253     memcpy(data2, de, len);
2254     memset(de, 0, len); /* wipe old data */
2255     de = (struct ext4_dir_entry_2 *) data2;
2256     top = data2 + len;
2257     while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
2258         de = de2;
2259     de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
2260                        (char *) de, blocksize);
2261 
2262     if (csum_size)
2263         ext4_initialize_dirent_tail(bh2, blocksize);
2264 
2265     /* Initialize the root; the dot dirents already exist */
2266     de = (struct ext4_dir_entry_2 *) (&root->dotdot);
2267     de->rec_len = ext4_rec_len_to_disk(
2268             blocksize - ext4_dir_rec_len(2, NULL), blocksize);
2269     memset (&root->info, 0, sizeof(root->info));
2270     root->info.info_length = sizeof(root->info);
2271     if (ext4_hash_in_dirent(dir))
2272         root->info.hash_version = DX_HASH_SIPHASH;
2273     else
2274         root->info.hash_version =
2275                 EXT4_SB(dir->i_sb)->s_def_hash_version;
2276 
2277     entries = root->entries;
2278     dx_set_block(entries, 1);
2279     dx_set_count(entries, 1);
2280     dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
2281 
2282     /* Initialize as for dx_probe */
2283     fname->hinfo.hash_version = root->info.hash_version;
2284     if (fname->hinfo.hash_version <= DX_HASH_TEA)
2285         fname->hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
2286     fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
2287 
2288     /* casefolded encrypted hashes are computed on fname setup */
2289     if (!ext4_hash_in_dirent(dir))
2290         ext4fs_dirhash(dir, fname_name(fname),
2291                 fname_len(fname), &fname->hinfo);
2292 
2293     memset(frames, 0, sizeof(frames));
2294     frame = frames;
2295     frame->entries = entries;
2296     frame->at = entries;
2297     frame->bh = bh;
2298 
2299     retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
2300     if (retval)
2301         goto out_frames;
2302     retval = ext4_handle_dirty_dirblock(handle, dir, bh2);
2303     if (retval)
2304         goto out_frames;
2305 
2306     de = do_split(handle,dir, &bh2, frame, &fname->hinfo);
2307     if (IS_ERR(de)) {
2308         retval = PTR_ERR(de);
2309         goto out_frames;
2310     }
2311 
2312     retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh2);
2313 out_frames:
2314     /*
2315      * Even if the block split failed, we have to properly write
2316      * out all the changes we did so far. Otherwise we can end up
2317      * with corrupted filesystem.
2318      */
2319     if (retval)
2320         ext4_mark_inode_dirty(handle, dir);
2321     dx_release(frames);
2322     brelse(bh2);
2323     return retval;
2324 }
2325 
2326 /*
2327  *  ext4_add_entry()
2328  *
2329  * adds a file entry to the specified directory, using the same
2330  * semantics as ext4_find_entry(). It returns NULL if it failed.
2331  *
2332  * NOTE!! The inode part of 'de' is left at 0 - which means you
2333  * may not sleep between calling this and putting something into
2334  * the entry, as someone else might have used it while you slept.
2335  */
2336 static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
2337               struct inode *inode)
2338 {
2339     struct inode *dir = d_inode(dentry->d_parent);
2340     struct buffer_head *bh = NULL;
2341     struct ext4_dir_entry_2 *de;
2342     struct super_block *sb;
2343     struct ext4_filename fname;
2344     int retval;
2345     int dx_fallback=0;
2346     unsigned blocksize;
2347     ext4_lblk_t block, blocks;
2348     int csum_size = 0;
2349 
2350     if (ext4_has_metadata_csum(inode->i_sb))
2351         csum_size = sizeof(struct ext4_dir_entry_tail);
2352 
2353     sb = dir->i_sb;
2354     blocksize = sb->s_blocksize;
2355     if (!dentry->d_name.len)
2356         return -EINVAL;
2357 
2358     if (fscrypt_is_nokey_name(dentry))
2359         return -ENOKEY;
2360 
2361 #if IS_ENABLED(CONFIG_UNICODE)
2362     if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) &&
2363         sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name))
2364         return -EINVAL;
2365 #endif
2366 
2367     retval = ext4_fname_setup_filename(dir, &dentry->d_name, 0, &fname);
2368     if (retval)
2369         return retval;
2370 
2371     if (ext4_has_inline_data(dir)) {
2372         retval = ext4_try_add_inline_entry(handle, &fname, dir, inode);
2373         if (retval < 0)
2374             goto out;
2375         if (retval == 1) {
2376             retval = 0;
2377             goto out;
2378         }
2379     }
2380 
2381     if (is_dx(dir)) {
2382         retval = ext4_dx_add_entry(handle, &fname, dir, inode);
2383         if (!retval || (retval != ERR_BAD_DX_DIR))
2384             goto out;
2385         /* Can we just ignore htree data? */
2386         if (ext4_has_metadata_csum(sb)) {
2387             EXT4_ERROR_INODE(dir,
2388                 "Directory has corrupted htree index.");
2389             retval = -EFSCORRUPTED;
2390             goto out;
2391         }
2392         ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
2393         dx_fallback++;
2394         retval = ext4_mark_inode_dirty(handle, dir);
2395         if (unlikely(retval))
2396             goto out;
2397     }
2398     blocks = dir->i_size >> sb->s_blocksize_bits;
2399     for (block = 0; block < blocks; block++) {
2400         bh = ext4_read_dirblock(dir, block, DIRENT);
2401         if (bh == NULL) {
2402             bh = ext4_bread(handle, dir, block,
2403                     EXT4_GET_BLOCKS_CREATE);
2404             goto add_to_new_block;
2405         }
2406         if (IS_ERR(bh)) {
2407             retval = PTR_ERR(bh);
2408             bh = NULL;
2409             goto out;
2410         }
2411         retval = add_dirent_to_buf(handle, &fname, dir, inode,
2412                        NULL, bh);
2413         if (retval != -ENOSPC)
2414             goto out;
2415 
2416         if (blocks == 1 && !dx_fallback &&
2417             ext4_has_feature_dir_index(sb)) {
2418             retval = make_indexed_dir(handle, &fname, dir,
2419                           inode, bh);
2420             bh = NULL; /* make_indexed_dir releases bh */
2421             goto out;
2422         }
2423         brelse(bh);
2424     }
2425     bh = ext4_append(handle, dir, &block);
2426 add_to_new_block:
2427     if (IS_ERR(bh)) {
2428         retval = PTR_ERR(bh);
2429         bh = NULL;
2430         goto out;
2431     }
2432     de = (struct ext4_dir_entry_2 *) bh->b_data;
2433     de->inode = 0;
2434     de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize);
2435 
2436     if (csum_size)
2437         ext4_initialize_dirent_tail(bh, blocksize);
2438 
2439     retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh);
2440 out:
2441     ext4_fname_free_filename(&fname);
2442     brelse(bh);
2443     if (retval == 0)
2444         ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY);
2445     return retval;
2446 }
2447 
2448 /*
2449  * Returns 0 for success, or a negative error value
2450  */
2451 static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
2452                  struct inode *dir, struct inode *inode)
2453 {
2454     struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
2455     struct dx_entry *entries, *at;
2456     struct buffer_head *bh;
2457     struct super_block *sb = dir->i_sb;
2458     struct ext4_dir_entry_2 *de;
2459     int restart;
2460     int err;
2461 
2462 again:
2463     restart = 0;
2464     frame = dx_probe(fname, dir, NULL, frames);
2465     if (IS_ERR(frame))
2466         return PTR_ERR(frame);
2467     entries = frame->entries;
2468     at = frame->at;
2469     bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT_HTREE);
2470     if (IS_ERR(bh)) {
2471         err = PTR_ERR(bh);
2472         bh = NULL;
2473         goto cleanup;
2474     }
2475 
2476     BUFFER_TRACE(bh, "get_write_access");
2477     err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
2478     if (err)
2479         goto journal_error;
2480 
2481     err = add_dirent_to_buf(handle, fname, dir, inode, NULL, bh);
2482     if (err != -ENOSPC)
2483         goto cleanup;
2484 
2485     err = 0;
2486     /* Block full, should compress but for now just split */
2487     dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
2488                dx_get_count(entries), dx_get_limit(entries)));
2489     /* Need to split index? */
2490     if (dx_get_count(entries) == dx_get_limit(entries)) {
2491         ext4_lblk_t newblock;
2492         int levels = frame - frames + 1;
2493         unsigned int icount;
2494         int add_level = 1;
2495         struct dx_entry *entries2;
2496         struct dx_node *node2;
2497         struct buffer_head *bh2;
2498 
2499         while (frame > frames) {
2500             if (dx_get_count((frame - 1)->entries) <
2501                 dx_get_limit((frame - 1)->entries)) {
2502                 add_level = 0;
2503                 break;
2504             }
2505             frame--; /* split higher index block */
2506             at = frame->at;
2507             entries = frame->entries;
2508             restart = 1;
2509         }
2510         if (add_level && levels == ext4_dir_htree_level(sb)) {
2511             ext4_warning(sb, "Directory (ino: %lu) index full, "
2512                      "reach max htree level :%d",
2513                      dir->i_ino, levels);
2514             if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) {
2515                 ext4_warning(sb, "Large directory feature is "
2516                          "not enabled on this "
2517                          "filesystem");
2518             }
2519             err = -ENOSPC;
2520             goto cleanup;
2521         }
2522         icount = dx_get_count(entries);
2523         bh2 = ext4_append(handle, dir, &newblock);
2524         if (IS_ERR(bh2)) {
2525             err = PTR_ERR(bh2);
2526             goto cleanup;
2527         }
2528         node2 = (struct dx_node *)(bh2->b_data);
2529         entries2 = node2->entries;
2530         memset(&node2->fake, 0, sizeof(struct fake_dirent));
2531         node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize,
2532                                sb->s_blocksize);
2533         BUFFER_TRACE(frame->bh, "get_write_access");
2534         err = ext4_journal_get_write_access(handle, sb, frame->bh,
2535                             EXT4_JTR_NONE);
2536         if (err)
2537             goto journal_error;
2538         if (!add_level) {
2539             unsigned icount1 = icount/2, icount2 = icount - icount1;
2540             unsigned hash2 = dx_get_hash(entries + icount1);
2541             dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
2542                        icount1, icount2));
2543 
2544             BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
2545             err = ext4_journal_get_write_access(handle, sb,
2546                                 (frame - 1)->bh,
2547                                 EXT4_JTR_NONE);
2548             if (err)
2549                 goto journal_error;
2550 
2551             memcpy((char *) entries2, (char *) (entries + icount1),
2552                    icount2 * sizeof(struct dx_entry));
2553             dx_set_count(entries, icount1);
2554             dx_set_count(entries2, icount2);
2555             dx_set_limit(entries2, dx_node_limit(dir));
2556 
2557             /* Which index block gets the new entry? */
2558             if (at - entries >= icount1) {
2559                 frame->at = at - entries - icount1 + entries2;
2560                 frame->entries = entries = entries2;
2561                 swap(frame->bh, bh2);
2562             }
2563             dx_insert_block((frame - 1), hash2, newblock);
2564             dxtrace(dx_show_index("node", frame->entries));
2565             dxtrace(dx_show_index("node",
2566                    ((struct dx_node *) bh2->b_data)->entries));
2567             err = ext4_handle_dirty_dx_node(handle, dir, bh2);
2568             if (err)
2569                 goto journal_error;
2570             brelse (bh2);
2571             err = ext4_handle_dirty_dx_node(handle, dir,
2572                            (frame - 1)->bh);
2573             if (err)
2574                 goto journal_error;
2575             err = ext4_handle_dirty_dx_node(handle, dir,
2576                             frame->bh);
2577             if (restart || err)
2578                 goto journal_error;
2579         } else {
2580             struct dx_root *dxroot;
2581             memcpy((char *) entries2, (char *) entries,
2582                    icount * sizeof(struct dx_entry));
2583             dx_set_limit(entries2, dx_node_limit(dir));
2584 
2585             /* Set up root */
2586             dx_set_count(entries, 1);
2587             dx_set_block(entries + 0, newblock);
2588             dxroot = (struct dx_root *)frames[0].bh->b_data;
2589             dxroot->info.indirect_levels += 1;
2590             dxtrace(printk(KERN_DEBUG
2591                        "Creating %d level index...\n",
2592                        dxroot->info.indirect_levels));
2593             err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
2594             if (err)
2595                 goto journal_error;
2596             err = ext4_handle_dirty_dx_node(handle, dir, bh2);
2597             brelse(bh2);
2598             restart = 1;
2599             goto journal_error;
2600         }
2601     }
2602     de = do_split(handle, dir, &bh, frame, &fname->hinfo);
2603     if (IS_ERR(de)) {
2604         err = PTR_ERR(de);
2605         goto cleanup;
2606     }
2607     err = add_dirent_to_buf(handle, fname, dir, inode, de, bh);
2608     goto cleanup;
2609 
2610 journal_error:
2611     ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */
2612 cleanup:
2613     brelse(bh);
2614     dx_release(frames);
2615     /* @restart is true means htree-path has been changed, we need to
2616      * repeat dx_probe() to find out valid htree-path
2617      */
2618     if (restart && err == 0)
2619         goto again;
2620     return err;
2621 }
2622 
2623 /*
2624  * ext4_generic_delete_entry deletes a directory entry by merging it
2625  * with the previous entry
2626  */
2627 int ext4_generic_delete_entry(struct inode *dir,
2628                   struct ext4_dir_entry_2 *de_del,
2629                   struct buffer_head *bh,
2630                   void *entry_buf,
2631                   int buf_size,
2632                   int csum_size)
2633 {
2634     struct ext4_dir_entry_2 *de, *pde;
2635     unsigned int blocksize = dir->i_sb->s_blocksize;
2636     int i;
2637 
2638     i = 0;
2639     pde = NULL;
2640     de = entry_buf;
2641     while (i < buf_size - csum_size) {
2642         if (ext4_check_dir_entry(dir, NULL, de, bh,
2643                      entry_buf, buf_size, i))
2644             return -EFSCORRUPTED;
2645         if (de == de_del)  {
2646             if (pde) {
2647                 pde->rec_len = ext4_rec_len_to_disk(
2648                     ext4_rec_len_from_disk(pde->rec_len,
2649                                    blocksize) +
2650                     ext4_rec_len_from_disk(de->rec_len,
2651                                    blocksize),
2652                     blocksize);
2653 
2654                 /* wipe entire dir_entry */
2655                 memset(de, 0, ext4_rec_len_from_disk(de->rec_len,
2656                                 blocksize));
2657             } else {
2658                 /* wipe dir_entry excluding the rec_len field */
2659                 de->inode = 0;
2660                 memset(&de->name_len, 0,
2661                     ext4_rec_len_from_disk(de->rec_len,
2662                                 blocksize) -
2663                     offsetof(struct ext4_dir_entry_2,
2664                                 name_len));
2665             }
2666 
2667             inode_inc_iversion(dir);
2668             return 0;
2669         }
2670         i += ext4_rec_len_from_disk(de->rec_len, blocksize);
2671         pde = de;
2672         de = ext4_next_entry(de, blocksize);
2673     }
2674     return -ENOENT;
2675 }
2676 
2677 static int ext4_delete_entry(handle_t *handle,
2678                  struct inode *dir,
2679                  struct ext4_dir_entry_2 *de_del,
2680                  struct buffer_head *bh)
2681 {
2682     int err, csum_size = 0;
2683 
2684     if (ext4_has_inline_data(dir)) {
2685         int has_inline_data = 1;
2686         err = ext4_delete_inline_entry(handle, dir, de_del, bh,
2687                            &has_inline_data);
2688         if (has_inline_data)
2689             return err;
2690     }
2691 
2692     if (ext4_has_metadata_csum(dir->i_sb))
2693         csum_size = sizeof(struct ext4_dir_entry_tail);
2694 
2695     BUFFER_TRACE(bh, "get_write_access");
2696     err = ext4_journal_get_write_access(handle, dir->i_sb, bh,
2697                         EXT4_JTR_NONE);
2698     if (unlikely(err))
2699         goto out;
2700 
2701     err = ext4_generic_delete_entry(dir, de_del, bh, bh->b_data,
2702                     dir->i_sb->s_blocksize, csum_size);
2703     if (err)
2704         goto out;
2705 
2706     BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
2707     err = ext4_handle_dirty_dirblock(handle, dir, bh);
2708     if (unlikely(err))
2709         goto out;
2710 
2711     return 0;
2712 out:
2713     if (err != -ENOENT)
2714         ext4_std_error(dir->i_sb, err);
2715     return err;
2716 }
2717 
2718 /*
2719  * Set directory link count to 1 if nlinks > EXT4_LINK_MAX, or if nlinks == 2
2720  * since this indicates that nlinks count was previously 1 to avoid overflowing
2721  * the 16-bit i_links_count field on disk.  Directories with i_nlink == 1 mean
2722  * that subdirectory link counts are not being maintained accurately.
2723  *
2724  * The caller has already checked for i_nlink overflow in case the DIR_LINK
2725  * feature is not enabled and returned -EMLINK.  The is_dx() check is a proxy
2726  * for checking S_ISDIR(inode) (since the INODE_INDEX feature will not be set
2727  * on regular files) and to avoid creating huge/slow non-HTREE directories.
2728  */
2729 static void ext4_inc_count(struct inode *inode)
2730 {
2731     inc_nlink(inode);
2732     if (is_dx(inode) &&
2733         (inode->i_nlink > EXT4_LINK_MAX || inode->i_nlink == 2))
2734         set_nlink(inode, 1);
2735 }
2736 
2737 /*
2738  * If a directory had nlink == 1, then we should let it be 1. This indicates
2739  * directory has >EXT4_LINK_MAX subdirs.
2740  */
2741 static void ext4_dec_count(struct inode *inode)
2742 {
2743     if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
2744         drop_nlink(inode);
2745 }
2746 
2747 
2748 /*
2749  * Add non-directory inode to a directory. On success, the inode reference is
2750  * consumed by dentry is instantiation. This is also indicated by clearing of
2751  * *inodep pointer. On failure, the caller is responsible for dropping the
2752  * inode reference in the safe context.
2753  */
2754 static int ext4_add_nondir(handle_t *handle,
2755         struct dentry *dentry, struct inode **inodep)
2756 {
2757     struct inode *dir = d_inode(dentry->d_parent);
2758     struct inode *inode = *inodep;
2759     int err = ext4_add_entry(handle, dentry, inode);
2760     if (!err) {
2761         err = ext4_mark_inode_dirty(handle, inode);
2762         if (IS_DIRSYNC(dir))
2763             ext4_handle_sync(handle);
2764         d_instantiate_new(dentry, inode);
2765         *inodep = NULL;
2766         return err;
2767     }
2768     drop_nlink(inode);
2769     ext4_orphan_add(handle, inode);
2770     unlock_new_inode(inode);
2771     return err;
2772 }
2773 
2774 /*
2775  * By the time this is called, we already have created
2776  * the directory cache entry for the new file, but it
2777  * is so far negative - it has no inode.
2778  *
2779  * If the create succeeds, we fill in the inode information
2780  * with d_instantiate().
2781  */
2782 static int ext4_create(struct user_namespace *mnt_userns, struct inode *dir,
2783                struct dentry *dentry, umode_t mode, bool excl)
2784 {
2785     handle_t *handle;
2786     struct inode *inode;
2787     int err, credits, retries = 0;
2788 
2789     err = dquot_initialize(dir);
2790     if (err)
2791         return err;
2792 
2793     credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2794            EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
2795 retry:
2796     inode = ext4_new_inode_start_handle(mnt_userns, dir, mode, &dentry->d_name,
2797                         0, NULL, EXT4_HT_DIR, credits);
2798     handle = ext4_journal_current_handle();
2799     err = PTR_ERR(inode);
2800     if (!IS_ERR(inode)) {
2801         inode->i_op = &ext4_file_inode_operations;
2802         inode->i_fop = &ext4_file_operations;
2803         ext4_set_aops(inode);
2804         err = ext4_add_nondir(handle, dentry, &inode);
2805         if (!err)
2806             ext4_fc_track_create(handle, dentry);
2807     }
2808     if (handle)
2809         ext4_journal_stop(handle);
2810     if (!IS_ERR_OR_NULL(inode))
2811         iput(inode);
2812     if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2813         goto retry;
2814     return err;
2815 }
2816 
2817 static int ext4_mknod(struct user_namespace *mnt_userns, struct inode *dir,
2818               struct dentry *dentry, umode_t mode, dev_t rdev)
2819 {
2820     handle_t *handle;
2821     struct inode *inode;
2822     int err, credits, retries = 0;
2823 
2824     err = dquot_initialize(dir);
2825     if (err)
2826         return err;
2827 
2828     credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2829            EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
2830 retry:
2831     inode = ext4_new_inode_start_handle(mnt_userns, dir, mode, &dentry->d_name,
2832                         0, NULL, EXT4_HT_DIR, credits);
2833     handle = ext4_journal_current_handle();
2834     err = PTR_ERR(inode);
2835     if (!IS_ERR(inode)) {
2836         init_special_inode(inode, inode->i_mode, rdev);
2837         inode->i_op = &ext4_special_inode_operations;
2838         err = ext4_add_nondir(handle, dentry, &inode);
2839         if (!err)
2840             ext4_fc_track_create(handle, dentry);
2841     }
2842     if (handle)
2843         ext4_journal_stop(handle);
2844     if (!IS_ERR_OR_NULL(inode))
2845         iput(inode);
2846     if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2847         goto retry;
2848     return err;
2849 }
2850 
2851 static int ext4_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
2852             struct dentry *dentry, umode_t mode)
2853 {
2854     handle_t *handle;
2855     struct inode *inode;
2856     int err, retries = 0;
2857 
2858     err = dquot_initialize(dir);
2859     if (err)
2860         return err;
2861 
2862 retry:
2863     inode = ext4_new_inode_start_handle(mnt_userns, dir, mode,
2864                         NULL, 0, NULL,
2865                         EXT4_HT_DIR,
2866             EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
2867               4 + EXT4_XATTR_TRANS_BLOCKS);
2868     handle = ext4_journal_current_handle();
2869     err = PTR_ERR(inode);
2870     if (!IS_ERR(inode)) {
2871         inode->i_op = &ext4_file_inode_operations;
2872         inode->i_fop = &ext4_file_operations;
2873         ext4_set_aops(inode);
2874         d_tmpfile(dentry, inode);
2875         err = ext4_orphan_add(handle, inode);
2876         if (err)
2877             goto err_unlock_inode;
2878         mark_inode_dirty(inode);
2879         unlock_new_inode(inode);
2880     }
2881     if (handle)
2882         ext4_journal_stop(handle);
2883     if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2884         goto retry;
2885     return err;
2886 err_unlock_inode:
2887     ext4_journal_stop(handle);
2888     unlock_new_inode(inode);
2889     return err;
2890 }
2891 
2892 struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
2893               struct ext4_dir_entry_2 *de,
2894               int blocksize, int csum_size,
2895               unsigned int parent_ino, int dotdot_real_len)
2896 {
2897     de->inode = cpu_to_le32(inode->i_ino);
2898     de->name_len = 1;
2899     de->rec_len = ext4_rec_len_to_disk(ext4_dir_rec_len(de->name_len, NULL),
2900                        blocksize);
2901     strcpy(de->name, ".");
2902     ext4_set_de_type(inode->i_sb, de, S_IFDIR);
2903 
2904     de = ext4_next_entry(de, blocksize);
2905     de->inode = cpu_to_le32(parent_ino);
2906     de->name_len = 2;
2907     if (!dotdot_real_len)
2908         de->rec_len = ext4_rec_len_to_disk(blocksize -
2909                     (csum_size + ext4_dir_rec_len(1, NULL)),
2910                     blocksize);
2911     else
2912         de->rec_len = ext4_rec_len_to_disk(
2913                     ext4_dir_rec_len(de->name_len, NULL),
2914                     blocksize);
2915     strcpy(de->name, "..");
2916     ext4_set_de_type(inode->i_sb, de, S_IFDIR);
2917 
2918     return ext4_next_entry(de, blocksize);
2919 }
2920 
2921 int ext4_init_new_dir(handle_t *handle, struct inode *dir,
2922                  struct inode *inode)
2923 {
2924     struct buffer_head *dir_block = NULL;
2925     struct ext4_dir_entry_2 *de;
2926     ext4_lblk_t block = 0;
2927     unsigned int blocksize = dir->i_sb->s_blocksize;
2928     int csum_size = 0;
2929     int err;
2930 
2931     if (ext4_has_metadata_csum(dir->i_sb))
2932         csum_size = sizeof(struct ext4_dir_entry_tail);
2933 
2934     if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
2935         err = ext4_try_create_inline_dir(handle, dir, inode);
2936         if (err < 0 && err != -ENOSPC)
2937             goto out;
2938         if (!err)
2939             goto out;
2940     }
2941 
2942     inode->i_size = 0;
2943     dir_block = ext4_append(handle, inode, &block);
2944     if (IS_ERR(dir_block))
2945         return PTR_ERR(dir_block);
2946     de = (struct ext4_dir_entry_2 *)dir_block->b_data;
2947     ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0);
2948     set_nlink(inode, 2);
2949     if (csum_size)
2950         ext4_initialize_dirent_tail(dir_block, blocksize);
2951 
2952     BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
2953     err = ext4_handle_dirty_dirblock(handle, inode, dir_block);
2954     if (err)
2955         goto out;
2956     set_buffer_verified(dir_block);
2957 out:
2958     brelse(dir_block);
2959     return err;
2960 }
2961 
2962 static int ext4_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
2963               struct dentry *dentry, umode_t mode)
2964 {
2965     handle_t *handle;
2966     struct inode *inode;
2967     int err, err2 = 0, credits, retries = 0;
2968 
2969     if (EXT4_DIR_LINK_MAX(dir))
2970         return -EMLINK;
2971 
2972     err = dquot_initialize(dir);
2973     if (err)
2974         return err;
2975 
2976     credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2977            EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
2978 retry:
2979     inode = ext4_new_inode_start_handle(mnt_userns, dir, S_IFDIR | mode,
2980                         &dentry->d_name,
2981                         0, NULL, EXT4_HT_DIR, credits);
2982     handle = ext4_journal_current_handle();
2983     err = PTR_ERR(inode);
2984     if (IS_ERR(inode))
2985         goto out_stop;
2986 
2987     inode->i_op = &ext4_dir_inode_operations;
2988     inode->i_fop = &ext4_dir_operations;
2989     err = ext4_init_new_dir(handle, dir, inode);
2990     if (err)
2991         goto out_clear_inode;
2992     err = ext4_mark_inode_dirty(handle, inode);
2993     if (!err)
2994         err = ext4_add_entry(handle, dentry, inode);
2995     if (err) {
2996 out_clear_inode:
2997         clear_nlink(inode);
2998         ext4_orphan_add(handle, inode);
2999         unlock_new_inode(inode);
3000         err2 = ext4_mark_inode_dirty(handle, inode);
3001         if (unlikely(err2))
3002             err = err2;
3003         ext4_journal_stop(handle);
3004         iput(inode);
3005         goto out_retry;
3006     }
3007     ext4_inc_count(dir);
3008 
3009     ext4_update_dx_flag(dir);
3010     err = ext4_mark_inode_dirty(handle, dir);
3011     if (err)
3012         goto out_clear_inode;
3013     d_instantiate_new(dentry, inode);
3014     ext4_fc_track_create(handle, dentry);
3015     if (IS_DIRSYNC(dir))
3016         ext4_handle_sync(handle);
3017 
3018 out_stop:
3019     if (handle)
3020         ext4_journal_stop(handle);
3021 out_retry:
3022     if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
3023         goto retry;
3024     return err;
3025 }
3026 
3027 /*
3028  * routine to check that the specified directory is empty (for rmdir)
3029  */
3030 bool ext4_empty_dir(struct inode *inode)
3031 {
3032     unsigned int offset;
3033     struct buffer_head *bh;
3034     struct ext4_dir_entry_2 *de;
3035     struct super_block *sb;
3036 
3037     if (ext4_has_inline_data(inode)) {
3038         int has_inline_data = 1;
3039         int ret;
3040 
3041         ret = empty_inline_dir(inode, &has_inline_data);
3042         if (has_inline_data)
3043             return ret;
3044     }
3045 
3046     sb = inode->i_sb;
3047     if (inode->i_size < ext4_dir_rec_len(1, NULL) +
3048                     ext4_dir_rec_len(2, NULL)) {
3049         EXT4_ERROR_INODE(inode, "invalid size");
3050         return false;
3051     }
3052     /* The first directory block must not be a hole,
3053      * so treat it as DIRENT_HTREE
3054      */
3055     bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
3056     if (IS_ERR(bh))
3057         return false;
3058 
3059     de = (struct ext4_dir_entry_2 *) bh->b_data;
3060     if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
3061                  0) ||
3062         le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) {
3063         ext4_warning_inode(inode, "directory missing '.'");
3064         brelse(bh);
3065         return false;
3066     }
3067     offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
3068     de = ext4_next_entry(de, sb->s_blocksize);
3069     if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
3070                  offset) ||
3071         le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) {
3072         ext4_warning_inode(inode, "directory missing '..'");
3073         brelse(bh);
3074         return false;
3075     }
3076     offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
3077     while (offset < inode->i_size) {
3078         if (!(offset & (sb->s_blocksize - 1))) {
3079             unsigned int lblock;
3080             brelse(bh);
3081             lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
3082             bh = ext4_read_dirblock(inode, lblock, EITHER);
3083             if (bh == NULL) {
3084                 offset += sb->s_blocksize;
3085                 continue;
3086             }
3087             if (IS_ERR(bh))
3088                 return false;
3089         }
3090         de = (struct ext4_dir_entry_2 *) (bh->b_data +
3091                     (offset & (sb->s_blocksize - 1)));
3092         if (ext4_check_dir_entry(inode, NULL, de, bh,
3093                      bh->b_data, bh->b_size, offset) ||
3094             le32_to_cpu(de->inode)) {
3095             brelse(bh);
3096             return false;
3097         }
3098         offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
3099     }
3100     brelse(bh);
3101     return true;
3102 }
3103 
3104 static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
3105 {
3106     int retval;
3107     struct inode *inode;
3108     struct buffer_head *bh;
3109     struct ext4_dir_entry_2 *de;
3110     handle_t *handle = NULL;
3111 
3112     if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
3113         return -EIO;
3114 
3115     /* Initialize quotas before so that eventual writes go in
3116      * separate transaction */
3117     retval = dquot_initialize(dir);
3118     if (retval)
3119         return retval;
3120     retval = dquot_initialize(d_inode(dentry));
3121     if (retval)
3122         return retval;
3123 
3124     retval = -ENOENT;
3125     bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
3126     if (IS_ERR(bh))
3127         return PTR_ERR(bh);
3128     if (!bh)
3129         goto end_rmdir;
3130 
3131     inode = d_inode(dentry);
3132 
3133     retval = -EFSCORRUPTED;
3134     if (le32_to_cpu(de->inode) != inode->i_ino)
3135         goto end_rmdir;
3136 
3137     retval = -ENOTEMPTY;
3138     if (!ext4_empty_dir(inode))
3139         goto end_rmdir;
3140 
3141     handle = ext4_journal_start(dir, EXT4_HT_DIR,
3142                     EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
3143     if (IS_ERR(handle)) {
3144         retval = PTR_ERR(handle);
3145         handle = NULL;
3146         goto end_rmdir;
3147     }
3148 
3149     if (IS_DIRSYNC(dir))
3150         ext4_handle_sync(handle);
3151 
3152     retval = ext4_delete_entry(handle, dir, de, bh);
3153     if (retval)
3154         goto end_rmdir;
3155     if (!EXT4_DIR_LINK_EMPTY(inode))
3156         ext4_warning_inode(inode,
3157                  "empty directory '%.*s' has too many links (%u)",
3158                  dentry->d_name.len, dentry->d_name.name,
3159                  inode->i_nlink);
3160     inode_inc_iversion(inode);
3161     clear_nlink(inode);
3162     /* There's no need to set i_disksize: the fact that i_nlink is
3163      * zero will ensure that the right thing happens during any
3164      * recovery. */
3165     inode->i_size = 0;
3166     ext4_orphan_add(handle, inode);
3167     inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
3168     retval = ext4_mark_inode_dirty(handle, inode);
3169     if (retval)
3170         goto end_rmdir;
3171     ext4_dec_count(dir);
3172     ext4_update_dx_flag(dir);
3173     ext4_fc_track_unlink(handle, dentry);
3174     retval = ext4_mark_inode_dirty(handle, dir);
3175 
3176 #if IS_ENABLED(CONFIG_UNICODE)
3177     /* VFS negative dentries are incompatible with Encoding and
3178      * Case-insensitiveness. Eventually we'll want avoid
3179      * invalidating the dentries here, alongside with returning the
3180      * negative dentries at ext4_lookup(), when it is better
3181      * supported by the VFS for the CI case.
3182      */
3183     if (IS_CASEFOLDED(dir))
3184         d_invalidate(dentry);
3185 #endif
3186 
3187 end_rmdir:
3188     brelse(bh);
3189     if (handle)
3190         ext4_journal_stop(handle);
3191     return retval;
3192 }
3193 
3194 int __ext4_unlink(handle_t *handle, struct inode *dir, const struct qstr *d_name,
3195           struct inode *inode)
3196 {
3197     int retval = -ENOENT;
3198     struct buffer_head *bh;
3199     struct ext4_dir_entry_2 *de;
3200     int skip_remove_dentry = 0;
3201 
3202     bh = ext4_find_entry(dir, d_name, &de, NULL);
3203     if (IS_ERR(bh))
3204         return PTR_ERR(bh);
3205 
3206     if (!bh)
3207         return -ENOENT;
3208 
3209     if (le32_to_cpu(de->inode) != inode->i_ino) {
3210         /*
3211          * It's okay if we find dont find dentry which matches
3212          * the inode. That's because it might have gotten
3213          * renamed to a different inode number
3214          */
3215         if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
3216             skip_remove_dentry = 1;
3217         else
3218             goto out;
3219     }
3220 
3221     if (IS_DIRSYNC(dir))
3222         ext4_handle_sync(handle);
3223 
3224     if (!skip_remove_dentry) {
3225         retval = ext4_delete_entry(handle, dir, de, bh);
3226         if (retval)
3227             goto out;
3228         dir->i_ctime = dir->i_mtime = current_time(dir);
3229         ext4_update_dx_flag(dir);
3230         retval = ext4_mark_inode_dirty(handle, dir);
3231         if (retval)
3232             goto out;
3233     } else {
3234         retval = 0;
3235     }
3236     if (inode->i_nlink == 0)
3237         ext4_warning_inode(inode, "Deleting file '%.*s' with no links",
3238                    d_name->len, d_name->name);
3239     else
3240         drop_nlink(inode);
3241     if (!inode->i_nlink)
3242         ext4_orphan_add(handle, inode);
3243     inode->i_ctime = current_time(inode);
3244     retval = ext4_mark_inode_dirty(handle, inode);
3245 
3246 out:
3247     brelse(bh);
3248     return retval;
3249 }
3250 
3251 static int ext4_unlink(struct inode *dir, struct dentry *dentry)
3252 {
3253     handle_t *handle;
3254     int retval;
3255 
3256     if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
3257         return -EIO;
3258 
3259     trace_ext4_unlink_enter(dir, dentry);
3260     /*
3261      * Initialize quotas before so that eventual writes go
3262      * in separate transaction
3263      */
3264     retval = dquot_initialize(dir);
3265     if (retval)
3266         goto out_trace;
3267     retval = dquot_initialize(d_inode(dentry));
3268     if (retval)
3269         goto out_trace;
3270 
3271     handle = ext4_journal_start(dir, EXT4_HT_DIR,
3272                     EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
3273     if (IS_ERR(handle)) {
3274         retval = PTR_ERR(handle);
3275         goto out_trace;
3276     }
3277 
3278     retval = __ext4_unlink(handle, dir, &dentry->d_name, d_inode(dentry));
3279     if (!retval)
3280         ext4_fc_track_unlink(handle, dentry);
3281 #if IS_ENABLED(CONFIG_UNICODE)
3282     /* VFS negative dentries are incompatible with Encoding and
3283      * Case-insensitiveness. Eventually we'll want avoid
3284      * invalidating the dentries here, alongside with returning the
3285      * negative dentries at ext4_lookup(), when it is  better
3286      * supported by the VFS for the CI case.
3287      */
3288     if (IS_CASEFOLDED(dir))
3289         d_invalidate(dentry);
3290 #endif
3291     if (handle)
3292         ext4_journal_stop(handle);
3293 
3294 out_trace:
3295     trace_ext4_unlink_exit(dentry, retval);
3296     return retval;
3297 }
3298 
3299 static int ext4_init_symlink_block(handle_t *handle, struct inode *inode,
3300                    struct fscrypt_str *disk_link)
3301 {
3302     struct buffer_head *bh;
3303     char *kaddr;
3304     int err = 0;
3305 
3306     bh = ext4_bread(handle, inode, 0, EXT4_GET_BLOCKS_CREATE);
3307     if (IS_ERR(bh))
3308         return PTR_ERR(bh);
3309 
3310     BUFFER_TRACE(bh, "get_write_access");
3311     err = ext4_journal_get_write_access(handle, inode->i_sb, bh, EXT4_JTR_NONE);
3312     if (err)
3313         goto out;
3314 
3315     kaddr = (char *)bh->b_data;
3316     memcpy(kaddr, disk_link->name, disk_link->len);
3317     inode->i_size = disk_link->len - 1;
3318     EXT4_I(inode)->i_disksize = inode->i_size;
3319     err = ext4_handle_dirty_metadata(handle, inode, bh);
3320 out:
3321     brelse(bh);
3322     return err;
3323 }
3324 
3325 static int ext4_symlink(struct user_namespace *mnt_userns, struct inode *dir,
3326             struct dentry *dentry, const char *symname)
3327 {
3328     handle_t *handle;
3329     struct inode *inode;
3330     int err, len = strlen(symname);
3331     int credits;
3332     struct fscrypt_str disk_link;
3333     int retries = 0;
3334 
3335     if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
3336         return -EIO;
3337 
3338     err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
3339                       &disk_link);
3340     if (err)
3341         return err;
3342 
3343     err = dquot_initialize(dir);
3344     if (err)
3345         return err;
3346 
3347     /*
3348      * EXT4_INDEX_EXTRA_TRANS_BLOCKS for addition of entry into the
3349      * directory. +3 for inode, inode bitmap, group descriptor allocation.
3350      * EXT4_DATA_TRANS_BLOCKS for the data block allocation and
3351      * modification.
3352      */
3353     credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
3354           EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3;
3355 retry:
3356     inode = ext4_new_inode_start_handle(mnt_userns, dir, S_IFLNK|S_IRWXUGO,
3357                         &dentry->d_name, 0, NULL,
3358                         EXT4_HT_DIR, credits);
3359     handle = ext4_journal_current_handle();
3360     if (IS_ERR(inode)) {
3361         if (handle)
3362             ext4_journal_stop(handle);
3363         err = PTR_ERR(inode);
3364         goto out_retry;
3365     }
3366 
3367     if (IS_ENCRYPTED(inode)) {
3368         err = fscrypt_encrypt_symlink(inode, symname, len, &disk_link);
3369         if (err)
3370             goto err_drop_inode;
3371         inode->i_op = &ext4_encrypted_symlink_inode_operations;
3372     } else {
3373         if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
3374             inode->i_op = &ext4_symlink_inode_operations;
3375         } else {
3376             inode->i_op = &ext4_fast_symlink_inode_operations;
3377             inode->i_link = (char *)&EXT4_I(inode)->i_data;
3378         }
3379     }
3380 
3381     if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
3382         /* alloc symlink block and fill it */
3383         err = ext4_init_symlink_block(handle, inode, &disk_link);
3384         if (err)
3385             goto err_drop_inode;
3386     } else {
3387         /* clear the extent format for fast symlink */
3388         ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
3389         memcpy((char *)&EXT4_I(inode)->i_data, disk_link.name,
3390                disk_link.len);
3391         inode->i_size = disk_link.len - 1;
3392         EXT4_I(inode)->i_disksize = inode->i_size;
3393     }
3394     err = ext4_add_nondir(handle, dentry, &inode);
3395     if (handle)
3396         ext4_journal_stop(handle);
3397     iput(inode);
3398     goto out_retry;
3399 
3400 err_drop_inode:
3401     clear_nlink(inode);
3402     ext4_orphan_add(handle, inode);
3403     unlock_new_inode(inode);
3404     if (handle)
3405         ext4_journal_stop(handle);
3406     iput(inode);
3407 out_retry:
3408     if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
3409         goto retry;
3410     if (disk_link.name != (unsigned char *)symname)
3411         kfree(disk_link.name);
3412     return err;
3413 }
3414 
3415 int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry)
3416 {
3417     handle_t *handle;
3418     int err, retries = 0;
3419 retry:
3420     handle = ext4_journal_start(dir, EXT4_HT_DIR,
3421         (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
3422          EXT4_INDEX_EXTRA_TRANS_BLOCKS) + 1);
3423     if (IS_ERR(handle))
3424         return PTR_ERR(handle);
3425 
3426     if (IS_DIRSYNC(dir))
3427         ext4_handle_sync(handle);
3428 
3429     inode->i_ctime = current_time(inode);
3430     ext4_inc_count(inode);
3431     ihold(inode);
3432 
3433     err = ext4_add_entry(handle, dentry, inode);
3434     if (!err) {
3435         err = ext4_mark_inode_dirty(handle, inode);
3436         /* this can happen only for tmpfile being
3437          * linked the first time
3438          */
3439         if (inode->i_nlink == 1)
3440             ext4_orphan_del(handle, inode);
3441         d_instantiate(dentry, inode);
3442         ext4_fc_track_link(handle, dentry);
3443     } else {
3444         drop_nlink(inode);
3445         iput(inode);
3446     }
3447     ext4_journal_stop(handle);
3448     if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
3449         goto retry;
3450     return err;
3451 }
3452 
3453 static int ext4_link(struct dentry *old_dentry,
3454              struct inode *dir, struct dentry *dentry)
3455 {
3456     struct inode *inode = d_inode(old_dentry);
3457     int err;
3458 
3459     if (inode->i_nlink >= EXT4_LINK_MAX)
3460         return -EMLINK;
3461 
3462     err = fscrypt_prepare_link(old_dentry, dir, dentry);
3463     if (err)
3464         return err;
3465 
3466     if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
3467         (!projid_eq(EXT4_I(dir)->i_projid,
3468             EXT4_I(old_dentry->d_inode)->i_projid)))
3469         return -EXDEV;
3470 
3471     err = dquot_initialize(dir);
3472     if (err)
3473         return err;
3474     return __ext4_link(dir, inode, dentry);
3475 }
3476 
3477 /*
3478  * Try to find buffer head where contains the parent block.
3479  * It should be the inode block if it is inlined or the 1st block
3480  * if it is a normal dir.
3481  */
3482 static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
3483                     struct inode *inode,
3484                     int *retval,
3485                     struct ext4_dir_entry_2 **parent_de,
3486                     int *inlined)
3487 {
3488     struct buffer_head *bh;
3489 
3490     if (!ext4_has_inline_data(inode)) {
3491         struct ext4_dir_entry_2 *de;
3492         unsigned int offset;
3493 
3494         /* The first directory block must not be a hole, so
3495          * treat it as DIRENT_HTREE
3496          */
3497         bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
3498         if (IS_ERR(bh)) {
3499             *retval = PTR_ERR(bh);
3500             return NULL;
3501         }
3502 
3503         de = (struct ext4_dir_entry_2 *) bh->b_data;
3504         if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data,
3505                      bh->b_size, 0) ||
3506             le32_to_cpu(de->inode) != inode->i_ino ||
3507             strcmp(".", de->name)) {
3508             EXT4_ERROR_INODE(inode, "directory missing '.'");
3509             brelse(bh);
3510             *retval = -EFSCORRUPTED;
3511             return NULL;
3512         }
3513         offset = ext4_rec_len_from_disk(de->rec_len,
3514                         inode->i_sb->s_blocksize);
3515         de = ext4_next_entry(de, inode->i_sb->s_blocksize);
3516         if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data,
3517                      bh->b_size, offset) ||
3518             le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) {
3519             EXT4_ERROR_INODE(inode, "directory missing '..'");
3520             brelse(bh);
3521             *retval = -EFSCORRUPTED;
3522             return NULL;
3523         }
3524         *parent_de = de;
3525 
3526         return bh;
3527     }
3528 
3529     *inlined = 1;
3530     return ext4_get_first_inline_block(inode, parent_de, retval);
3531 }
3532 
3533 struct ext4_renament {
3534     struct inode *dir;
3535     struct dentry *dentry;
3536     struct inode *inode;
3537     bool is_dir;
3538     int dir_nlink_delta;
3539 
3540     /* entry for "dentry" */
3541     struct buffer_head *bh;
3542     struct ext4_dir_entry_2 *de;
3543     int inlined;
3544 
3545     /* entry for ".." in inode if it's a directory */
3546     struct buffer_head *dir_bh;
3547     struct ext4_dir_entry_2 *parent_de;
3548     int dir_inlined;
3549 };
3550 
3551 static int ext4_rename_dir_prepare(handle_t *handle, struct ext4_renament *ent)
3552 {
3553     int retval;
3554 
3555     ent->dir_bh = ext4_get_first_dir_block(handle, ent->inode,
3556                           &retval, &ent->parent_de,
3557                           &ent->dir_inlined);
3558     if (!ent->dir_bh)
3559         return retval;
3560     if (le32_to_cpu(ent->parent_de->inode) != ent->dir->i_ino)
3561         return -EFSCORRUPTED;
3562     BUFFER_TRACE(ent->dir_bh, "get_write_access");
3563     return ext4_journal_get_write_access(handle, ent->dir->i_sb,
3564                          ent->dir_bh, EXT4_JTR_NONE);
3565 }
3566 
3567 static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent,
3568                   unsigned dir_ino)
3569 {
3570     int retval;
3571 
3572     ent->parent_de->inode = cpu_to_le32(dir_ino);
3573     BUFFER_TRACE(ent->dir_bh, "call ext4_handle_dirty_metadata");
3574     if (!ent->dir_inlined) {
3575         if (is_dx(ent->inode)) {
3576             retval = ext4_handle_dirty_dx_node(handle,
3577                                ent->inode,
3578                                ent->dir_bh);
3579         } else {
3580             retval = ext4_handle_dirty_dirblock(handle, ent->inode,
3581                                 ent->dir_bh);
3582         }
3583     } else {
3584         retval = ext4_mark_inode_dirty(handle, ent->inode);
3585     }
3586     if (retval) {
3587         ext4_std_error(ent->dir->i_sb, retval);
3588         return retval;
3589     }
3590     return 0;
3591 }
3592 
3593 static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
3594                unsigned ino, unsigned file_type)
3595 {
3596     int retval, retval2;
3597 
3598     BUFFER_TRACE(ent->bh, "get write access");
3599     retval = ext4_journal_get_write_access(handle, ent->dir->i_sb, ent->bh,
3600                            EXT4_JTR_NONE);
3601     if (retval)
3602         return retval;
3603     ent->de->inode = cpu_to_le32(ino);
3604     if (ext4_has_feature_filetype(ent->dir->i_sb))
3605         ent->de->file_type = file_type;
3606     inode_inc_iversion(ent->dir);
3607     ent->dir->i_ctime = ent->dir->i_mtime =
3608         current_time(ent->dir);
3609     retval = ext4_mark_inode_dirty(handle, ent->dir);
3610     BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata");
3611     if (!ent->inlined) {
3612         retval2 = ext4_handle_dirty_dirblock(handle, ent->dir, ent->bh);
3613         if (unlikely(retval2)) {
3614             ext4_std_error(ent->dir->i_sb, retval2);
3615             return retval2;
3616         }
3617     }
3618     return retval;
3619 }
3620 
3621 static void ext4_resetent(handle_t *handle, struct ext4_renament *ent,
3622               unsigned ino, unsigned file_type)
3623 {
3624     struct ext4_renament old = *ent;
3625     int retval = 0;
3626 
3627     /*
3628      * old->de could have moved from under us during make indexed dir,
3629      * so the old->de may no longer valid and need to find it again
3630      * before reset old inode info.
3631      */
3632     old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
3633     if (IS_ERR(old.bh))
3634         retval = PTR_ERR(old.bh);
3635     if (!old.bh)
3636         retval = -ENOENT;
3637     if (retval) {
3638         ext4_std_error(old.dir->i_sb, retval);
3639         return;
3640     }
3641 
3642     ext4_setent(handle, &old, ino, file_type);
3643     brelse(old.bh);
3644 }
3645 
3646 static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
3647                   const struct qstr *d_name)
3648 {
3649     int retval = -ENOENT;
3650     struct buffer_head *bh;
3651     struct ext4_dir_entry_2 *de;
3652 
3653     bh = ext4_find_entry(dir, d_name, &de, NULL);
3654     if (IS_ERR(bh))
3655         return PTR_ERR(bh);
3656     if (bh) {
3657         retval = ext4_delete_entry(handle, dir, de, bh);
3658         brelse(bh);
3659     }
3660     return retval;
3661 }
3662 
3663 static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent,
3664                    int force_reread)
3665 {
3666     int retval;
3667     /*
3668      * ent->de could have moved from under us during htree split, so make
3669      * sure that we are deleting the right entry.  We might also be pointing
3670      * to a stale entry in the unused part of ent->bh so just checking inum
3671      * and the name isn't enough.
3672      */
3673     if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino ||
3674         ent->de->name_len != ent->dentry->d_name.len ||
3675         strncmp(ent->de->name, ent->dentry->d_name.name,
3676             ent->de->name_len) ||
3677         force_reread) {
3678         retval = ext4_find_delete_entry(handle, ent->dir,
3679                         &ent->dentry->d_name);
3680     } else {
3681         retval = ext4_delete_entry(handle, ent->dir, ent->de, ent->bh);
3682         if (retval == -ENOENT) {
3683             retval = ext4_find_delete_entry(handle, ent->dir,
3684                             &ent->dentry->d_name);
3685         }
3686     }
3687 
3688     if (retval) {
3689         ext4_warning_inode(ent->dir,
3690                    "Deleting old file: nlink %d, error=%d",
3691                    ent->dir->i_nlink, retval);
3692     }
3693 }
3694 
3695 static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent)
3696 {
3697     if (ent->dir_nlink_delta) {
3698         if (ent->dir_nlink_delta == -1)
3699             ext4_dec_count(ent->dir);
3700         else
3701             ext4_inc_count(ent->dir);
3702         ext4_mark_inode_dirty(handle, ent->dir);
3703     }
3704 }
3705 
3706 static struct inode *ext4_whiteout_for_rename(struct user_namespace *mnt_userns,
3707                           struct ext4_renament *ent,
3708                           int credits, handle_t **h)
3709 {
3710     struct inode *wh;
3711     handle_t *handle;
3712     int retries = 0;
3713 
3714     /*
3715      * for inode block, sb block, group summaries,
3716      * and inode bitmap
3717      */
3718     credits += (EXT4_MAXQUOTAS_TRANS_BLOCKS(ent->dir->i_sb) +
3719             EXT4_XATTR_TRANS_BLOCKS + 4);
3720 retry:
3721     wh = ext4_new_inode_start_handle(mnt_userns, ent->dir,
3722                      S_IFCHR | WHITEOUT_MODE,
3723                      &ent->dentry->d_name, 0, NULL,
3724                      EXT4_HT_DIR, credits);
3725 
3726     handle = ext4_journal_current_handle();
3727     if (IS_ERR(wh)) {
3728         if (handle)
3729             ext4_journal_stop(handle);
3730         if (PTR_ERR(wh) == -ENOSPC &&
3731             ext4_should_retry_alloc(ent->dir->i_sb, &retries))
3732             goto retry;
3733     } else {
3734         *h = handle;
3735         init_special_inode(wh, wh->i_mode, WHITEOUT_DEV);
3736         wh->i_op = &ext4_special_inode_operations;
3737     }
3738     return wh;
3739 }
3740 
3741 /*
3742  * Anybody can rename anything with this: the permission checks are left to the
3743  * higher-level routines.
3744  *
3745  * n.b.  old_{dentry,inode) refers to the source dentry/inode
3746  * while new_{dentry,inode) refers to the destination dentry/inode
3747  * This comes from rename(const char *oldpath, const char *newpath)
3748  */
3749 static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
3750                struct dentry *old_dentry, struct inode *new_dir,
3751                struct dentry *new_dentry, unsigned int flags)
3752 {
3753     handle_t *handle = NULL;
3754     struct ext4_renament old = {
3755         .dir = old_dir,
3756         .dentry = old_dentry,
3757         .inode = d_inode(old_dentry),
3758     };
3759     struct ext4_renament new = {
3760         .dir = new_dir,
3761         .dentry = new_dentry,
3762         .inode = d_inode(new_dentry),
3763     };
3764     int force_reread;
3765     int retval;
3766     struct inode *whiteout = NULL;
3767     int credits;
3768     u8 old_file_type;
3769 
3770     if (new.inode && new.inode->i_nlink == 0) {
3771         EXT4_ERROR_INODE(new.inode,
3772                  "target of rename is already freed");
3773         return -EFSCORRUPTED;
3774     }
3775 
3776     if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) &&
3777         (!projid_eq(EXT4_I(new_dir)->i_projid,
3778             EXT4_I(old_dentry->d_inode)->i_projid)))
3779         return -EXDEV;
3780 
3781     retval = dquot_initialize(old.dir);
3782     if (retval)
3783         return retval;
3784     retval = dquot_initialize(new.dir);
3785     if (retval)
3786         return retval;
3787 
3788     /* Initialize quotas before so that eventual writes go
3789      * in separate transaction */
3790     if (new.inode) {
3791         retval = dquot_initialize(new.inode);
3792         if (retval)
3793             return retval;
3794     }
3795 
3796     old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
3797     if (IS_ERR(old.bh))
3798         return PTR_ERR(old.bh);
3799     /*
3800      *  Check for inode number is _not_ due to possible IO errors.
3801      *  We might rmdir the source, keep it as pwd of some process
3802      *  and merrily kill the link to whatever was created under the
3803      *  same name. Goodbye sticky bit ;-<
3804      */
3805     retval = -ENOENT;
3806     if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
3807         goto release_bh;
3808 
3809     new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
3810                  &new.de, &new.inlined);
3811     if (IS_ERR(new.bh)) {
3812         retval = PTR_ERR(new.bh);
3813         new.bh = NULL;
3814         goto release_bh;
3815     }
3816     if (new.bh) {
3817         if (!new.inode) {
3818             brelse(new.bh);
3819             new.bh = NULL;
3820         }
3821     }
3822     if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC))
3823         ext4_alloc_da_blocks(old.inode);
3824 
3825     credits = (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
3826            EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
3827     if (!(flags & RENAME_WHITEOUT)) {
3828         handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits);
3829         if (IS_ERR(handle)) {
3830             retval = PTR_ERR(handle);
3831             goto release_bh;
3832         }
3833     } else {
3834         whiteout = ext4_whiteout_for_rename(mnt_userns, &old, credits, &handle);
3835         if (IS_ERR(whiteout)) {
3836             retval = PTR_ERR(whiteout);
3837             goto release_bh;
3838         }
3839     }
3840 
3841     old_file_type = old.de->file_type;
3842     if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
3843         ext4_handle_sync(handle);
3844 
3845     if (S_ISDIR(old.inode->i_mode)) {
3846         if (new.inode) {
3847             retval = -ENOTEMPTY;
3848             if (!ext4_empty_dir(new.inode))
3849                 goto end_rename;
3850         } else {
3851             retval = -EMLINK;
3852             if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir))
3853                 goto end_rename;
3854         }
3855         retval = ext4_rename_dir_prepare(handle, &old);
3856         if (retval)
3857             goto end_rename;
3858     }
3859     /*
3860      * If we're renaming a file within an inline_data dir and adding or
3861      * setting the new dirent causes a conversion from inline_data to
3862      * extents/blockmap, we need to force the dirent delete code to
3863      * re-read the directory, or else we end up trying to delete a dirent
3864      * from what is now the extent tree root (or a block map).
3865      */
3866     force_reread = (new.dir->i_ino == old.dir->i_ino &&
3867             ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA));
3868 
3869     if (whiteout) {
3870         /*
3871          * Do this before adding a new entry, so the old entry is sure
3872          * to be still pointing to the valid old entry.
3873          */
3874         retval = ext4_setent(handle, &old, whiteout->i_ino,
3875                      EXT4_FT_CHRDEV);
3876         if (retval)
3877             goto end_rename;
3878         retval = ext4_mark_inode_dirty(handle, whiteout);
3879         if (unlikely(retval))
3880             goto end_rename;
3881 
3882     }
3883     if (!new.bh) {
3884         retval = ext4_add_entry(handle, new.dentry, old.inode);
3885         if (retval)
3886             goto end_rename;
3887     } else {
3888         retval = ext4_setent(handle, &new,
3889                      old.inode->i_ino, old_file_type);
3890         if (retval)
3891             goto end_rename;
3892     }
3893     if (force_reread)
3894         force_reread = !ext4_test_inode_flag(new.dir,
3895                              EXT4_INODE_INLINE_DATA);
3896 
3897     /*
3898      * Like most other Unix systems, set the ctime for inodes on a
3899      * rename.
3900      */
3901     old.inode->i_ctime = current_time(old.inode);
3902     retval = ext4_mark_inode_dirty(handle, old.inode);
3903     if (unlikely(retval))
3904         goto end_rename;
3905 
3906     if (!whiteout) {
3907         /*
3908          * ok, that's it
3909          */
3910         ext4_rename_delete(handle, &old, force_reread);
3911     }
3912 
3913     if (new.inode) {
3914         ext4_dec_count(new.inode);
3915         new.inode->i_ctime = current_time(new.inode);
3916     }
3917     old.dir->i_ctime = old.dir->i_mtime = current_time(old.dir);
3918     ext4_update_dx_flag(old.dir);
3919     if (old.dir_bh) {
3920         retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
3921         if (retval)
3922             goto end_rename;
3923 
3924         ext4_dec_count(old.dir);
3925         if (new.inode) {
3926             /* checked ext4_empty_dir above, can't have another
3927              * parent, ext4_dec_count() won't work for many-linked
3928              * dirs */
3929             clear_nlink(new.inode);
3930         } else {
3931             ext4_inc_count(new.dir);
3932             ext4_update_dx_flag(new.dir);
3933             retval = ext4_mark_inode_dirty(handle, new.dir);
3934             if (unlikely(retval))
3935                 goto end_rename;
3936         }
3937     }
3938     retval = ext4_mark_inode_dirty(handle, old.dir);
3939     if (unlikely(retval))
3940         goto end_rename;
3941 
3942     if (S_ISDIR(old.inode->i_mode)) {
3943         /*
3944          * We disable fast commits here that's because the
3945          * replay code is not yet capable of changing dot dot
3946          * dirents in directories.
3947          */
3948         ext4_fc_mark_ineligible(old.inode->i_sb,
3949             EXT4_FC_REASON_RENAME_DIR, handle);
3950     } else {
3951         struct super_block *sb = old.inode->i_sb;
3952 
3953         if (new.inode)
3954             ext4_fc_track_unlink(handle, new.dentry);
3955         if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
3956             !(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) &&
3957             !(ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE))) {
3958             __ext4_fc_track_link(handle, old.inode, new.dentry);
3959             __ext4_fc_track_unlink(handle, old.inode, old.dentry);
3960             if (whiteout)
3961                 __ext4_fc_track_create(handle, whiteout,
3962                                old.dentry);
3963         }
3964     }
3965 
3966     if (new.inode) {
3967         retval = ext4_mark_inode_dirty(handle, new.inode);
3968         if (unlikely(retval))
3969             goto end_rename;
3970         if (!new.inode->i_nlink)
3971             ext4_orphan_add(handle, new.inode);
3972     }
3973     retval = 0;
3974 
3975 end_rename:
3976     if (whiteout) {
3977         if (retval) {
3978             ext4_resetent(handle, &old,
3979                       old.inode->i_ino, old_file_type);
3980             drop_nlink(whiteout);
3981             ext4_orphan_add(handle, whiteout);
3982         }
3983         unlock_new_inode(whiteout);
3984         ext4_journal_stop(handle);
3985         iput(whiteout);
3986     } else {
3987         ext4_journal_stop(handle);
3988     }
3989 release_bh:
3990     brelse(old.dir_bh);
3991     brelse(old.bh);
3992     brelse(new.bh);
3993     return retval;
3994 }
3995 
3996 static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
3997                  struct inode *new_dir, struct dentry *new_dentry)
3998 {
3999     handle_t *handle = NULL;
4000     struct ext4_renament old = {
4001         .dir = old_dir,
4002         .dentry = old_dentry,
4003         .inode = d_inode(old_dentry),
4004     };
4005     struct ext4_renament new = {
4006         .dir = new_dir,
4007         .dentry = new_dentry,
4008         .inode = d_inode(new_dentry),
4009     };
4010     u8 new_file_type;
4011     int retval;
4012     struct timespec64 ctime;
4013 
4014     if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
4015          !projid_eq(EXT4_I(new_dir)->i_projid,
4016             EXT4_I(old_dentry->d_inode)->i_projid)) ||
4017         (ext4_test_inode_flag(old_dir, EXT4_INODE_PROJINHERIT) &&
4018          !projid_eq(EXT4_I(old_dir)->i_projid,
4019             EXT4_I(new_dentry->d_inode)->i_projid)))
4020         return -EXDEV;
4021 
4022     retval = dquot_initialize(old.dir);
4023     if (retval)
4024         return retval;
4025     retval = dquot_initialize(new.dir);
4026     if (retval)
4027         return retval;
4028 
4029     old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
4030                  &old.de, &old.inlined);
4031     if (IS_ERR(old.bh))
4032         return PTR_ERR(old.bh);
4033     /*
4034      *  Check for inode number is _not_ due to possible IO errors.
4035      *  We might rmdir the source, keep it as pwd of some process
4036      *  and merrily kill the link to whatever was created under the
4037      *  same name. Goodbye sticky bit ;-<
4038      */
4039     retval = -ENOENT;
4040     if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
4041         goto end_rename;
4042 
4043     new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
4044                  &new.de, &new.inlined);
4045     if (IS_ERR(new.bh)) {
4046         retval = PTR_ERR(new.bh);
4047         new.bh = NULL;
4048         goto end_rename;
4049     }
4050 
4051     /* RENAME_EXCHANGE case: old *and* new must both exist */
4052     if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino)
4053         goto end_rename;
4054 
4055     handle = ext4_journal_start(old.dir, EXT4_HT_DIR,
4056         (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
4057          2 * EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
4058     if (IS_ERR(handle)) {
4059         retval = PTR_ERR(handle);
4060         handle = NULL;
4061         goto end_rename;
4062     }
4063 
4064     if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
4065         ext4_handle_sync(handle);
4066 
4067     if (S_ISDIR(old.inode->i_mode)) {
4068         old.is_dir = true;
4069         retval = ext4_rename_dir_prepare(handle, &old);
4070         if (retval)
4071             goto end_rename;
4072     }
4073     if (S_ISDIR(new.inode->i_mode)) {
4074         new.is_dir = true;
4075         retval = ext4_rename_dir_prepare(handle, &new);
4076         if (retval)
4077             goto end_rename;
4078     }
4079 
4080     /*
4081      * Other than the special case of overwriting a directory, parents'
4082      * nlink only needs to be modified if this is a cross directory rename.
4083      */
4084     if (old.dir != new.dir && old.is_dir != new.is_dir) {
4085         old.dir_nlink_delta = old.is_dir ? -1 : 1;
4086         new.dir_nlink_delta = -old.dir_nlink_delta;
4087         retval = -EMLINK;
4088         if ((old.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(old.dir)) ||
4089             (new.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(new.dir)))
4090             goto end_rename;
4091     }
4092 
4093     new_file_type = new.de->file_type;
4094     retval = ext4_setent(handle, &new, old.inode->i_ino, old.de->file_type);
4095     if (retval)
4096         goto end_rename;
4097 
4098     retval = ext4_setent(handle, &old, new.inode->i_ino, new_file_type);
4099     if (retval)
4100         goto end_rename;
4101 
4102     /*
4103      * Like most other Unix systems, set the ctime for inodes on a
4104      * rename.
4105      */
4106     ctime = current_time(old.inode);
4107     old.inode->i_ctime = ctime;
4108     new.inode->i_ctime = ctime;
4109     retval = ext4_mark_inode_dirty(handle, old.inode);
4110     if (unlikely(retval))
4111         goto end_rename;
4112     retval = ext4_mark_inode_dirty(handle, new.inode);
4113     if (unlikely(retval))
4114         goto end_rename;
4115     ext4_fc_mark_ineligible(new.inode->i_sb,
4116                 EXT4_FC_REASON_CROSS_RENAME, handle);
4117     if (old.dir_bh) {
4118         retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
4119         if (retval)
4120             goto end_rename;
4121     }
4122     if (new.dir_bh) {
4123         retval = ext4_rename_dir_finish(handle, &new, old.dir->i_ino);
4124         if (retval)
4125             goto end_rename;
4126     }
4127     ext4_update_dir_count(handle, &old);
4128     ext4_update_dir_count(handle, &new);
4129     retval = 0;
4130 
4131 end_rename:
4132     brelse(old.dir_bh);
4133     brelse(new.dir_bh);
4134     brelse(old.bh);
4135     brelse(new.bh);
4136     if (handle)
4137         ext4_journal_stop(handle);
4138     return retval;
4139 }
4140 
4141 static int ext4_rename2(struct user_namespace *mnt_userns,
4142             struct inode *old_dir, struct dentry *old_dentry,
4143             struct inode *new_dir, struct dentry *new_dentry,
4144             unsigned int flags)
4145 {
4146     int err;
4147 
4148     if (unlikely(ext4_forced_shutdown(EXT4_SB(old_dir->i_sb))))
4149         return -EIO;
4150 
4151     if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
4152         return -EINVAL;
4153 
4154     err = fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry,
4155                      flags);
4156     if (err)
4157         return err;
4158 
4159     if (flags & RENAME_EXCHANGE) {
4160         return ext4_cross_rename(old_dir, old_dentry,
4161                      new_dir, new_dentry);
4162     }
4163 
4164     return ext4_rename(mnt_userns, old_dir, old_dentry, new_dir, new_dentry, flags);
4165 }
4166 
4167 /*
4168  * directories can handle most operations...
4169  */
4170 const struct inode_operations ext4_dir_inode_operations = {
4171     .create     = ext4_create,
4172     .lookup     = ext4_lookup,
4173     .link       = ext4_link,
4174     .unlink     = ext4_unlink,
4175     .symlink    = ext4_symlink,
4176     .mkdir      = ext4_mkdir,
4177     .rmdir      = ext4_rmdir,
4178     .mknod      = ext4_mknod,
4179     .tmpfile    = ext4_tmpfile,
4180     .rename     = ext4_rename2,
4181     .setattr    = ext4_setattr,
4182     .getattr    = ext4_getattr,
4183     .listxattr  = ext4_listxattr,
4184     .get_acl    = ext4_get_acl,
4185     .set_acl    = ext4_set_acl,
4186     .fiemap         = ext4_fiemap,
4187     .fileattr_get   = ext4_fileattr_get,
4188     .fileattr_set   = ext4_fileattr_set,
4189 };
4190 
4191 const struct inode_operations ext4_special_inode_operations = {
4192     .setattr    = ext4_setattr,
4193     .getattr    = ext4_getattr,
4194     .listxattr  = ext4_listxattr,
4195     .get_acl    = ext4_get_acl,
4196     .set_acl    = ext4_set_acl,
4197 };