Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * inode.c
0004  *
0005  * vfs' aops, fops, dops and iops
0006  *
0007  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
0008  */
0009 
0010 #include <linux/fs.h>
0011 #include <linux/types.h>
0012 #include <linux/highmem.h>
0013 #include <linux/pagemap.h>
0014 #include <linux/quotaops.h>
0015 #include <linux/iversion.h>
0016 
0017 #include <asm/byteorder.h>
0018 
0019 #include <cluster/masklog.h>
0020 
0021 #include "ocfs2.h"
0022 
0023 #include "alloc.h"
0024 #include "dir.h"
0025 #include "blockcheck.h"
0026 #include "dlmglue.h"
0027 #include "extent_map.h"
0028 #include "file.h"
0029 #include "heartbeat.h"
0030 #include "inode.h"
0031 #include "journal.h"
0032 #include "namei.h"
0033 #include "suballoc.h"
0034 #include "super.h"
0035 #include "symlink.h"
0036 #include "sysfile.h"
0037 #include "uptodate.h"
0038 #include "xattr.h"
0039 #include "refcounttree.h"
0040 #include "ocfs2_trace.h"
0041 #include "filecheck.h"
0042 
0043 #include "buffer_head_io.h"
0044 
0045 struct ocfs2_find_inode_args
0046 {
0047     u64     fi_blkno;
0048     unsigned long   fi_ino;
0049     unsigned int    fi_flags;
0050     unsigned int    fi_sysfile_type;
0051 };
0052 
0053 static struct lock_class_key ocfs2_sysfile_lock_key[NUM_SYSTEM_INODES];
0054 
0055 static int ocfs2_read_locked_inode(struct inode *inode,
0056                    struct ocfs2_find_inode_args *args);
0057 static int ocfs2_init_locked_inode(struct inode *inode, void *opaque);
0058 static int ocfs2_find_actor(struct inode *inode, void *opaque);
0059 static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
0060                     struct inode *inode,
0061                     struct buffer_head *fe_bh);
0062 
0063 static int ocfs2_filecheck_read_inode_block_full(struct inode *inode,
0064                          struct buffer_head **bh,
0065                          int flags, int type);
0066 static int ocfs2_filecheck_validate_inode_block(struct super_block *sb,
0067                         struct buffer_head *bh);
0068 static int ocfs2_filecheck_repair_inode_block(struct super_block *sb,
0069                           struct buffer_head *bh);
0070 
0071 void ocfs2_set_inode_flags(struct inode *inode)
0072 {
0073     unsigned int flags = OCFS2_I(inode)->ip_attr;
0074 
0075     inode->i_flags &= ~(S_IMMUTABLE |
0076         S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
0077 
0078     if (flags & OCFS2_IMMUTABLE_FL)
0079         inode->i_flags |= S_IMMUTABLE;
0080 
0081     if (flags & OCFS2_SYNC_FL)
0082         inode->i_flags |= S_SYNC;
0083     if (flags & OCFS2_APPEND_FL)
0084         inode->i_flags |= S_APPEND;
0085     if (flags & OCFS2_NOATIME_FL)
0086         inode->i_flags |= S_NOATIME;
0087     if (flags & OCFS2_DIRSYNC_FL)
0088         inode->i_flags |= S_DIRSYNC;
0089 }
0090 
0091 /* Propagate flags from i_flags to OCFS2_I(inode)->ip_attr */
0092 void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi)
0093 {
0094     unsigned int flags = oi->vfs_inode.i_flags;
0095 
0096     oi->ip_attr &= ~(OCFS2_SYNC_FL|OCFS2_APPEND_FL|
0097             OCFS2_IMMUTABLE_FL|OCFS2_NOATIME_FL|OCFS2_DIRSYNC_FL);
0098     if (flags & S_SYNC)
0099         oi->ip_attr |= OCFS2_SYNC_FL;
0100     if (flags & S_APPEND)
0101         oi->ip_attr |= OCFS2_APPEND_FL;
0102     if (flags & S_IMMUTABLE)
0103         oi->ip_attr |= OCFS2_IMMUTABLE_FL;
0104     if (flags & S_NOATIME)
0105         oi->ip_attr |= OCFS2_NOATIME_FL;
0106     if (flags & S_DIRSYNC)
0107         oi->ip_attr |= OCFS2_DIRSYNC_FL;
0108 }
0109 
0110 struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno)
0111 {
0112     struct ocfs2_find_inode_args args;
0113 
0114     args.fi_blkno = blkno;
0115     args.fi_flags = 0;
0116     args.fi_ino = ino_from_blkno(sb, blkno);
0117     args.fi_sysfile_type = 0;
0118 
0119     return ilookup5(sb, blkno, ocfs2_find_actor, &args);
0120 }
0121 struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
0122              int sysfile_type)
0123 {
0124     int rc = -ESTALE;
0125     struct inode *inode = NULL;
0126     struct super_block *sb = osb->sb;
0127     struct ocfs2_find_inode_args args;
0128     journal_t *journal = osb->journal->j_journal;
0129 
0130     trace_ocfs2_iget_begin((unsigned long long)blkno, flags,
0131                    sysfile_type);
0132 
0133     /* Ok. By now we've either got the offsets passed to us by the
0134      * caller, or we just pulled them off the bh. Lets do some
0135      * sanity checks to make sure they're OK. */
0136     if (blkno == 0) {
0137         inode = ERR_PTR(-EINVAL);
0138         mlog_errno(PTR_ERR(inode));
0139         goto bail;
0140     }
0141 
0142     args.fi_blkno = blkno;
0143     args.fi_flags = flags;
0144     args.fi_ino = ino_from_blkno(sb, blkno);
0145     args.fi_sysfile_type = sysfile_type;
0146 
0147     inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor,
0148                  ocfs2_init_locked_inode, &args);
0149     /* inode was *not* in the inode cache. 2.6.x requires
0150      * us to do our own read_inode call and unlock it
0151      * afterwards. */
0152     if (inode == NULL) {
0153         inode = ERR_PTR(-ENOMEM);
0154         mlog_errno(PTR_ERR(inode));
0155         goto bail;
0156     }
0157     trace_ocfs2_iget5_locked(inode->i_state);
0158     if (inode->i_state & I_NEW) {
0159         rc = ocfs2_read_locked_inode(inode, &args);
0160         unlock_new_inode(inode);
0161     }
0162     if (is_bad_inode(inode)) {
0163         iput(inode);
0164         inode = ERR_PTR(rc);
0165         goto bail;
0166     }
0167 
0168     /*
0169      * Set transaction id's of transactions that have to be committed
0170      * to finish f[data]sync. We set them to currently running transaction
0171      * as we cannot be sure that the inode or some of its metadata isn't
0172      * part of the transaction - the inode could have been reclaimed and
0173      * now it is reread from disk.
0174      */
0175     if (journal) {
0176         transaction_t *transaction;
0177         tid_t tid;
0178         struct ocfs2_inode_info *oi = OCFS2_I(inode);
0179 
0180         read_lock(&journal->j_state_lock);
0181         if (journal->j_running_transaction)
0182             transaction = journal->j_running_transaction;
0183         else
0184             transaction = journal->j_committing_transaction;
0185         if (transaction)
0186             tid = transaction->t_tid;
0187         else
0188             tid = journal->j_commit_sequence;
0189         read_unlock(&journal->j_state_lock);
0190         oi->i_sync_tid = tid;
0191         oi->i_datasync_tid = tid;
0192     }
0193 
0194 bail:
0195     if (!IS_ERR(inode)) {
0196         trace_ocfs2_iget_end(inode, 
0197             (unsigned long long)OCFS2_I(inode)->ip_blkno);
0198     }
0199 
0200     return inode;
0201 }
0202 
0203 
0204 /*
0205  * here's how inodes get read from disk:
0206  * iget5_locked -> find_actor -> OCFS2_FIND_ACTOR
0207  * found? : return the in-memory inode
0208  * not found? : get_new_inode -> OCFS2_INIT_LOCKED_INODE
0209  */
0210 
0211 static int ocfs2_find_actor(struct inode *inode, void *opaque)
0212 {
0213     struct ocfs2_find_inode_args *args = NULL;
0214     struct ocfs2_inode_info *oi = OCFS2_I(inode);
0215     int ret = 0;
0216 
0217     args = opaque;
0218 
0219     mlog_bug_on_msg(!inode, "No inode in find actor!\n");
0220 
0221     trace_ocfs2_find_actor(inode, inode->i_ino, opaque, args->fi_blkno);
0222 
0223     if (oi->ip_blkno != args->fi_blkno)
0224         goto bail;
0225 
0226     ret = 1;
0227 bail:
0228     return ret;
0229 }
0230 
0231 /*
0232  * initialize the new inode, but don't do anything that would cause
0233  * us to sleep.
0234  * return 0 on success, 1 on failure
0235  */
0236 static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
0237 {
0238     struct ocfs2_find_inode_args *args = opaque;
0239     static struct lock_class_key ocfs2_quota_ip_alloc_sem_key,
0240                      ocfs2_file_ip_alloc_sem_key;
0241 
0242     inode->i_ino = args->fi_ino;
0243     OCFS2_I(inode)->ip_blkno = args->fi_blkno;
0244     if (args->fi_sysfile_type != 0)
0245         lockdep_set_class(&inode->i_rwsem,
0246             &ocfs2_sysfile_lock_key[args->fi_sysfile_type]);
0247     if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE ||
0248         args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE ||
0249         args->fi_sysfile_type == LOCAL_USER_QUOTA_SYSTEM_INODE ||
0250         args->fi_sysfile_type == LOCAL_GROUP_QUOTA_SYSTEM_INODE)
0251         lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
0252                   &ocfs2_quota_ip_alloc_sem_key);
0253     else
0254         lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
0255                   &ocfs2_file_ip_alloc_sem_key);
0256 
0257     return 0;
0258 }
0259 
0260 void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
0261               int create_ino)
0262 {
0263     struct super_block *sb;
0264     struct ocfs2_super *osb;
0265     int use_plocks = 1;
0266 
0267     sb = inode->i_sb;
0268     osb = OCFS2_SB(sb);
0269 
0270     if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) ||
0271         ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks())
0272         use_plocks = 0;
0273 
0274     /*
0275      * These have all been checked by ocfs2_read_inode_block() or set
0276      * by ocfs2_mknod_locked(), so a failure is a code bug.
0277      */
0278     BUG_ON(!OCFS2_IS_VALID_DINODE(fe));  /* This means that read_inode
0279                         cannot create a superblock
0280                         inode today.  change if
0281                         that is needed. */
0282     BUG_ON(!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)));
0283     BUG_ON(le32_to_cpu(fe->i_fs_generation) != osb->fs_generation);
0284 
0285 
0286     OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
0287     OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
0288     OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features);
0289 
0290     inode_set_iversion(inode, 1);
0291     inode->i_generation = le32_to_cpu(fe->i_generation);
0292     inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
0293     inode->i_mode = le16_to_cpu(fe->i_mode);
0294     i_uid_write(inode, le32_to_cpu(fe->i_uid));
0295     i_gid_write(inode, le32_to_cpu(fe->i_gid));
0296 
0297     /* Fast symlinks will have i_size but no allocated clusters. */
0298     if (S_ISLNK(inode->i_mode) && !fe->i_clusters) {
0299         inode->i_blocks = 0;
0300         inode->i_mapping->a_ops = &ocfs2_fast_symlink_aops;
0301     } else {
0302         inode->i_blocks = ocfs2_inode_sector_count(inode);
0303         inode->i_mapping->a_ops = &ocfs2_aops;
0304     }
0305     inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime);
0306     inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
0307     inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime);
0308     inode->i_mtime.tv_nsec = le32_to_cpu(fe->i_mtime_nsec);
0309     inode->i_ctime.tv_sec = le64_to_cpu(fe->i_ctime);
0310     inode->i_ctime.tv_nsec = le32_to_cpu(fe->i_ctime_nsec);
0311 
0312     if (OCFS2_I(inode)->ip_blkno != le64_to_cpu(fe->i_blkno))
0313         mlog(ML_ERROR,
0314              "ip_blkno %llu != i_blkno %llu!\n",
0315              (unsigned long long)OCFS2_I(inode)->ip_blkno,
0316              (unsigned long long)le64_to_cpu(fe->i_blkno));
0317 
0318     set_nlink(inode, ocfs2_read_links_count(fe));
0319 
0320     trace_ocfs2_populate_inode(OCFS2_I(inode)->ip_blkno,
0321                    le32_to_cpu(fe->i_flags));
0322     if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) {
0323         OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
0324         inode->i_flags |= S_NOQUOTA;
0325     }
0326   
0327     if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) {
0328         OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
0329     } else if (fe->i_flags & cpu_to_le32(OCFS2_BITMAP_FL)) {
0330         OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
0331     } else if (fe->i_flags & cpu_to_le32(OCFS2_QUOTA_FL)) {
0332         inode->i_flags |= S_NOQUOTA;
0333     } else if (fe->i_flags & cpu_to_le32(OCFS2_SUPER_BLOCK_FL)) {
0334         /* we can't actually hit this as read_inode can't
0335          * handle superblocks today ;-) */
0336         BUG();
0337     }
0338 
0339     switch (inode->i_mode & S_IFMT) {
0340         case S_IFREG:
0341             if (use_plocks)
0342                 inode->i_fop = &ocfs2_fops;
0343             else
0344                 inode->i_fop = &ocfs2_fops_no_plocks;
0345             inode->i_op = &ocfs2_file_iops;
0346             i_size_write(inode, le64_to_cpu(fe->i_size));
0347             break;
0348         case S_IFDIR:
0349             inode->i_op = &ocfs2_dir_iops;
0350             if (use_plocks)
0351                 inode->i_fop = &ocfs2_dops;
0352             else
0353                 inode->i_fop = &ocfs2_dops_no_plocks;
0354             i_size_write(inode, le64_to_cpu(fe->i_size));
0355             OCFS2_I(inode)->ip_dir_lock_gen = 1;
0356             break;
0357         case S_IFLNK:
0358             inode->i_op = &ocfs2_symlink_inode_operations;
0359             inode_nohighmem(inode);
0360             i_size_write(inode, le64_to_cpu(fe->i_size));
0361             break;
0362         default:
0363             inode->i_op = &ocfs2_special_file_iops;
0364             init_special_inode(inode, inode->i_mode,
0365                        inode->i_rdev);
0366             break;
0367     }
0368 
0369     if (create_ino) {
0370         inode->i_ino = ino_from_blkno(inode->i_sb,
0371                    le64_to_cpu(fe->i_blkno));
0372 
0373         /*
0374          * If we ever want to create system files from kernel,
0375          * the generation argument to
0376          * ocfs2_inode_lock_res_init() will have to change.
0377          */
0378         BUG_ON(le32_to_cpu(fe->i_flags) & OCFS2_SYSTEM_FL);
0379 
0380         ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_inode_lockres,
0381                       OCFS2_LOCK_TYPE_META, 0, inode);
0382 
0383         ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres,
0384                       OCFS2_LOCK_TYPE_OPEN, 0, inode);
0385     }
0386 
0387     ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres,
0388                   OCFS2_LOCK_TYPE_RW, inode->i_generation,
0389                   inode);
0390 
0391     ocfs2_set_inode_flags(inode);
0392 
0393     OCFS2_I(inode)->ip_last_used_slot = 0;
0394     OCFS2_I(inode)->ip_last_used_group = 0;
0395 
0396     if (S_ISDIR(inode->i_mode))
0397         ocfs2_resv_set_type(&OCFS2_I(inode)->ip_la_data_resv,
0398                     OCFS2_RESV_FLAG_DIR);
0399 }
0400 
0401 static int ocfs2_read_locked_inode(struct inode *inode,
0402                    struct ocfs2_find_inode_args *args)
0403 {
0404     struct super_block *sb;
0405     struct ocfs2_super *osb;
0406     struct ocfs2_dinode *fe;
0407     struct buffer_head *bh = NULL;
0408     int status, can_lock, lock_level = 0;
0409     u32 generation = 0;
0410 
0411     status = -EINVAL;
0412     sb = inode->i_sb;
0413     osb = OCFS2_SB(sb);
0414 
0415     /*
0416      * To improve performance of cold-cache inode stats, we take
0417      * the cluster lock here if possible.
0418      *
0419      * Generally, OCFS2 never trusts the contents of an inode
0420      * unless it's holding a cluster lock, so taking it here isn't
0421      * a correctness issue as much as it is a performance
0422      * improvement.
0423      *
0424      * There are three times when taking the lock is not a good idea:
0425      *
0426      * 1) During startup, before we have initialized the DLM.
0427      *
0428      * 2) If we are reading certain system files which never get
0429      *    cluster locks (local alloc, truncate log).
0430      *
0431      * 3) If the process doing the iget() is responsible for
0432      *    orphan dir recovery. We're holding the orphan dir lock and
0433      *    can get into a deadlock with another process on another
0434      *    node in ->delete_inode().
0435      *
0436      * #1 and #2 can be simply solved by never taking the lock
0437      * here for system files (which are the only type we read
0438      * during mount). It's a heavier approach, but our main
0439      * concern is user-accessible files anyway.
0440      *
0441      * #3 works itself out because we'll eventually take the
0442      * cluster lock before trusting anything anyway.
0443      */
0444     can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
0445         && !(args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY)
0446         && !ocfs2_mount_local(osb);
0447 
0448     trace_ocfs2_read_locked_inode(
0449         (unsigned long long)OCFS2_I(inode)->ip_blkno, can_lock);
0450 
0451     /*
0452      * To maintain backwards compatibility with older versions of
0453      * ocfs2-tools, we still store the generation value for system
0454      * files. The only ones that actually matter to userspace are
0455      * the journals, but it's easier and inexpensive to just flag
0456      * all system files similarly.
0457      */
0458     if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
0459         generation = osb->fs_generation;
0460 
0461     ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_inode_lockres,
0462                   OCFS2_LOCK_TYPE_META,
0463                   generation, inode);
0464 
0465     ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres,
0466                   OCFS2_LOCK_TYPE_OPEN,
0467                   0, inode);
0468 
0469     if (can_lock) {
0470         status = ocfs2_open_lock(inode);
0471         if (status) {
0472             make_bad_inode(inode);
0473             mlog_errno(status);
0474             return status;
0475         }
0476         status = ocfs2_inode_lock(inode, NULL, lock_level);
0477         if (status) {
0478             make_bad_inode(inode);
0479             mlog_errno(status);
0480             return status;
0481         }
0482     }
0483 
0484     if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) {
0485         status = ocfs2_try_open_lock(inode, 0);
0486         if (status) {
0487             make_bad_inode(inode);
0488             return status;
0489         }
0490     }
0491 
0492     if (can_lock) {
0493         if (args->fi_flags & OCFS2_FI_FLAG_FILECHECK_CHK)
0494             status = ocfs2_filecheck_read_inode_block_full(inode,
0495                         &bh, OCFS2_BH_IGNORE_CACHE, 0);
0496         else if (args->fi_flags & OCFS2_FI_FLAG_FILECHECK_FIX)
0497             status = ocfs2_filecheck_read_inode_block_full(inode,
0498                         &bh, OCFS2_BH_IGNORE_CACHE, 1);
0499         else
0500             status = ocfs2_read_inode_block_full(inode,
0501                         &bh, OCFS2_BH_IGNORE_CACHE);
0502     } else {
0503         status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
0504         /*
0505          * If buffer is in jbd, then its checksum may not have been
0506          * computed as yet.
0507          */
0508         if (!status && !buffer_jbd(bh)) {
0509             if (args->fi_flags & OCFS2_FI_FLAG_FILECHECK_CHK)
0510                 status = ocfs2_filecheck_validate_inode_block(
0511                                 osb->sb, bh);
0512             else if (args->fi_flags & OCFS2_FI_FLAG_FILECHECK_FIX)
0513                 status = ocfs2_filecheck_repair_inode_block(
0514                                 osb->sb, bh);
0515             else
0516                 status = ocfs2_validate_inode_block(
0517                                 osb->sb, bh);
0518         }
0519     }
0520     if (status < 0) {
0521         mlog_errno(status);
0522         goto bail;
0523     }
0524 
0525     status = -EINVAL;
0526     fe = (struct ocfs2_dinode *) bh->b_data;
0527 
0528     /*
0529      * This is a code bug. Right now the caller needs to
0530      * understand whether it is asking for a system file inode or
0531      * not so the proper lock names can be built.
0532      */
0533     mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) !=
0534             !!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE),
0535             "Inode %llu: system file state is ambiguous\n",
0536             (unsigned long long)args->fi_blkno);
0537 
0538     if (S_ISCHR(le16_to_cpu(fe->i_mode)) ||
0539         S_ISBLK(le16_to_cpu(fe->i_mode)))
0540         inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
0541 
0542     ocfs2_populate_inode(inode, fe, 0);
0543 
0544     BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));
0545 
0546     if (buffer_dirty(bh) && !buffer_jbd(bh)) {
0547         if (can_lock) {
0548             ocfs2_inode_unlock(inode, lock_level);
0549             lock_level = 1;
0550             ocfs2_inode_lock(inode, NULL, lock_level);
0551         }
0552         status = ocfs2_write_block(osb, bh, INODE_CACHE(inode));
0553         if (status < 0) {
0554             mlog_errno(status);
0555             goto bail;
0556         }
0557     }
0558 
0559     status = 0;
0560 
0561 bail:
0562     if (can_lock)
0563         ocfs2_inode_unlock(inode, lock_level);
0564 
0565     if (status < 0)
0566         make_bad_inode(inode);
0567 
0568     brelse(bh);
0569 
0570     return status;
0571 }
0572 
0573 void ocfs2_sync_blockdev(struct super_block *sb)
0574 {
0575     sync_blockdev(sb->s_bdev);
0576 }
0577 
0578 static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
0579                      struct inode *inode,
0580                      struct buffer_head *fe_bh)
0581 {
0582     int status = 0;
0583     struct ocfs2_dinode *fe;
0584     handle_t *handle = NULL;
0585 
0586     fe = (struct ocfs2_dinode *) fe_bh->b_data;
0587 
0588     /*
0589      * This check will also skip truncate of inodes with inline
0590      * data and fast symlinks.
0591      */
0592     if (fe->i_clusters) {
0593         if (ocfs2_should_order_data(inode))
0594             ocfs2_begin_ordered_truncate(inode, 0);
0595 
0596         handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
0597         if (IS_ERR(handle)) {
0598             status = PTR_ERR(handle);
0599             handle = NULL;
0600             mlog_errno(status);
0601             goto out;
0602         }
0603 
0604         status = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
0605                          fe_bh,
0606                          OCFS2_JOURNAL_ACCESS_WRITE);
0607         if (status < 0) {
0608             mlog_errno(status);
0609             goto out;
0610         }
0611 
0612         i_size_write(inode, 0);
0613 
0614         status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
0615         if (status < 0) {
0616             mlog_errno(status);
0617             goto out;
0618         }
0619 
0620         ocfs2_commit_trans(osb, handle);
0621         handle = NULL;
0622 
0623         status = ocfs2_commit_truncate(osb, inode, fe_bh);
0624         if (status < 0)
0625             mlog_errno(status);
0626     }
0627 
0628 out:
0629     if (handle)
0630         ocfs2_commit_trans(osb, handle);
0631     return status;
0632 }
0633 
0634 static int ocfs2_remove_inode(struct inode *inode,
0635                   struct buffer_head *di_bh,
0636                   struct inode *orphan_dir_inode,
0637                   struct buffer_head *orphan_dir_bh)
0638 {
0639     int status;
0640     struct inode *inode_alloc_inode = NULL;
0641     struct buffer_head *inode_alloc_bh = NULL;
0642     handle_t *handle;
0643     struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
0644     struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
0645 
0646     inode_alloc_inode =
0647         ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
0648                         le16_to_cpu(di->i_suballoc_slot));
0649     if (!inode_alloc_inode) {
0650         status = -ENOENT;
0651         mlog_errno(status);
0652         goto bail;
0653     }
0654 
0655     inode_lock(inode_alloc_inode);
0656     status = ocfs2_inode_lock(inode_alloc_inode, &inode_alloc_bh, 1);
0657     if (status < 0) {
0658         inode_unlock(inode_alloc_inode);
0659 
0660         mlog_errno(status);
0661         goto bail;
0662     }
0663 
0664     handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS +
0665                    ocfs2_quota_trans_credits(inode->i_sb));
0666     if (IS_ERR(handle)) {
0667         status = PTR_ERR(handle);
0668         mlog_errno(status);
0669         goto bail_unlock;
0670     }
0671 
0672     if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
0673         status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
0674                       orphan_dir_bh, false);
0675         if (status < 0) {
0676             mlog_errno(status);
0677             goto bail_commit;
0678         }
0679     }
0680 
0681     /* set the inodes dtime */
0682     status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
0683                      OCFS2_JOURNAL_ACCESS_WRITE);
0684     if (status < 0) {
0685         mlog_errno(status);
0686         goto bail_commit;
0687     }
0688 
0689     di->i_dtime = cpu_to_le64(ktime_get_real_seconds());
0690     di->i_flags &= cpu_to_le32(~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL));
0691     ocfs2_journal_dirty(handle, di_bh);
0692 
0693     ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh);
0694     dquot_free_inode(inode);
0695 
0696     status = ocfs2_free_dinode(handle, inode_alloc_inode,
0697                    inode_alloc_bh, di);
0698     if (status < 0)
0699         mlog_errno(status);
0700 
0701 bail_commit:
0702     ocfs2_commit_trans(osb, handle);
0703 bail_unlock:
0704     ocfs2_inode_unlock(inode_alloc_inode, 1);
0705     inode_unlock(inode_alloc_inode);
0706     brelse(inode_alloc_bh);
0707 bail:
0708     iput(inode_alloc_inode);
0709 
0710     return status;
0711 }
0712 
0713 /*
0714  * Serialize with orphan dir recovery. If the process doing
0715  * recovery on this orphan dir does an iget() with the dir
0716  * i_rwsem held, we'll deadlock here. Instead we detect this
0717  * and exit early - recovery will wipe this inode for us.
0718  */
0719 static int ocfs2_check_orphan_recovery_state(struct ocfs2_super *osb,
0720                          int slot)
0721 {
0722     int ret = 0;
0723 
0724     spin_lock(&osb->osb_lock);
0725     if (ocfs2_node_map_test_bit(osb, &osb->osb_recovering_orphan_dirs, slot)) {
0726         ret = -EDEADLK;
0727         goto out;
0728     }
0729     /* This signals to the orphan recovery process that it should
0730      * wait for us to handle the wipe. */
0731     osb->osb_orphan_wipes[slot]++;
0732 out:
0733     spin_unlock(&osb->osb_lock);
0734     trace_ocfs2_check_orphan_recovery_state(slot, ret);
0735     return ret;
0736 }
0737 
0738 static void ocfs2_signal_wipe_completion(struct ocfs2_super *osb,
0739                      int slot)
0740 {
0741     spin_lock(&osb->osb_lock);
0742     osb->osb_orphan_wipes[slot]--;
0743     spin_unlock(&osb->osb_lock);
0744 
0745     wake_up(&osb->osb_wipe_event);
0746 }
0747 
0748 static int ocfs2_wipe_inode(struct inode *inode,
0749                 struct buffer_head *di_bh)
0750 {
0751     int status, orphaned_slot = -1;
0752     struct inode *orphan_dir_inode = NULL;
0753     struct buffer_head *orphan_dir_bh = NULL;
0754     struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
0755     struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
0756 
0757     if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
0758         orphaned_slot = le16_to_cpu(di->i_orphaned_slot);
0759 
0760         status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot);
0761         if (status)
0762             return status;
0763 
0764         orphan_dir_inode = ocfs2_get_system_file_inode(osb,
0765                                    ORPHAN_DIR_SYSTEM_INODE,
0766                                    orphaned_slot);
0767         if (!orphan_dir_inode) {
0768             status = -ENOENT;
0769             mlog_errno(status);
0770             goto bail;
0771         }
0772 
0773         /* Lock the orphan dir. The lock will be held for the entire
0774          * delete_inode operation. We do this now to avoid races with
0775          * recovery completion on other nodes. */
0776         inode_lock(orphan_dir_inode);
0777         status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
0778         if (status < 0) {
0779             inode_unlock(orphan_dir_inode);
0780 
0781             mlog_errno(status);
0782             goto bail;
0783         }
0784     }
0785 
0786     /* we do this while holding the orphan dir lock because we
0787      * don't want recovery being run from another node to try an
0788      * inode delete underneath us -- this will result in two nodes
0789      * truncating the same file! */
0790     status = ocfs2_truncate_for_delete(osb, inode, di_bh);
0791     if (status < 0) {
0792         mlog_errno(status);
0793         goto bail_unlock_dir;
0794     }
0795 
0796     /* Remove any dir index tree */
0797     if (S_ISDIR(inode->i_mode)) {
0798         status = ocfs2_dx_dir_truncate(inode, di_bh);
0799         if (status) {
0800             mlog_errno(status);
0801             goto bail_unlock_dir;
0802         }
0803     }
0804 
0805     /*Free extended attribute resources associated with this inode.*/
0806     status = ocfs2_xattr_remove(inode, di_bh);
0807     if (status < 0) {
0808         mlog_errno(status);
0809         goto bail_unlock_dir;
0810     }
0811 
0812     status = ocfs2_remove_refcount_tree(inode, di_bh);
0813     if (status < 0) {
0814         mlog_errno(status);
0815         goto bail_unlock_dir;
0816     }
0817 
0818     status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode,
0819                     orphan_dir_bh);
0820     if (status < 0)
0821         mlog_errno(status);
0822 
0823 bail_unlock_dir:
0824     if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)
0825         return status;
0826 
0827     ocfs2_inode_unlock(orphan_dir_inode, 1);
0828     inode_unlock(orphan_dir_inode);
0829     brelse(orphan_dir_bh);
0830 bail:
0831     iput(orphan_dir_inode);
0832     ocfs2_signal_wipe_completion(osb, orphaned_slot);
0833 
0834     return status;
0835 }
0836 
0837 /* There is a series of simple checks that should be done before a
0838  * trylock is even considered. Encapsulate those in this function. */
0839 static int ocfs2_inode_is_valid_to_delete(struct inode *inode)
0840 {
0841     int ret = 0;
0842     struct ocfs2_inode_info *oi = OCFS2_I(inode);
0843     struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
0844 
0845     trace_ocfs2_inode_is_valid_to_delete(current, osb->dc_task,
0846                          (unsigned long long)oi->ip_blkno,
0847                          oi->ip_flags);
0848 
0849     /* We shouldn't be getting here for the root directory
0850      * inode.. */
0851     if (inode == osb->root_inode) {
0852         mlog(ML_ERROR, "Skipping delete of root inode.\n");
0853         goto bail;
0854     }
0855 
0856     /*
0857      * If we're coming from downconvert_thread we can't go into our own
0858      * voting [hello, deadlock city!] so we cannot delete the inode. But
0859      * since we dropped last inode ref when downconverting dentry lock,
0860      * we cannot have the file open and thus the node doing unlink will
0861      * take care of deleting the inode.
0862      */
0863     if (current == osb->dc_task)
0864         goto bail;
0865 
0866     spin_lock(&oi->ip_lock);
0867     /* OCFS2 *never* deletes system files. This should technically
0868      * never get here as system file inodes should always have a
0869      * positive link count. */
0870     if (oi->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
0871         mlog(ML_ERROR, "Skipping delete of system file %llu\n",
0872              (unsigned long long)oi->ip_blkno);
0873         goto bail_unlock;
0874     }
0875 
0876     ret = 1;
0877 bail_unlock:
0878     spin_unlock(&oi->ip_lock);
0879 bail:
0880     return ret;
0881 }
0882 
0883 /* Query the cluster to determine whether we should wipe an inode from
0884  * disk or not.
0885  *
0886  * Requires the inode to have the cluster lock. */
0887 static int ocfs2_query_inode_wipe(struct inode *inode,
0888                   struct buffer_head *di_bh,
0889                   int *wipe)
0890 {
0891     int status = 0, reason = 0;
0892     struct ocfs2_inode_info *oi = OCFS2_I(inode);
0893     struct ocfs2_dinode *di;
0894 
0895     *wipe = 0;
0896 
0897     trace_ocfs2_query_inode_wipe_begin((unsigned long long)oi->ip_blkno,
0898                        inode->i_nlink);
0899 
0900     /* While we were waiting for the cluster lock in
0901      * ocfs2_delete_inode, another node might have asked to delete
0902      * the inode. Recheck our flags to catch this. */
0903     if (!ocfs2_inode_is_valid_to_delete(inode)) {
0904         reason = 1;
0905         goto bail;
0906     }
0907 
0908     /* Now that we have an up to date inode, we can double check
0909      * the link count. */
0910     if (inode->i_nlink)
0911         goto bail;
0912 
0913     /* Do some basic inode verification... */
0914     di = (struct ocfs2_dinode *) di_bh->b_data;
0915     if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL)) &&
0916         !(oi->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
0917         /*
0918          * Inodes in the orphan dir must have ORPHANED_FL.  The only
0919          * inodes that come back out of the orphan dir are reflink
0920          * targets. A reflink target may be moved out of the orphan
0921          * dir between the time we scan the directory and the time we
0922          * process it. This would lead to HAS_REFCOUNT_FL being set but
0923          * ORPHANED_FL not.
0924          */
0925         if (di->i_dyn_features & cpu_to_le16(OCFS2_HAS_REFCOUNT_FL)) {
0926             reason = 2;
0927             goto bail;
0928         }
0929 
0930         /* for lack of a better error? */
0931         status = -EEXIST;
0932         mlog(ML_ERROR,
0933              "Inode %llu (on-disk %llu) not orphaned! "
0934              "Disk flags  0x%x, inode flags 0x%x\n",
0935              (unsigned long long)oi->ip_blkno,
0936              (unsigned long long)le64_to_cpu(di->i_blkno),
0937              le32_to_cpu(di->i_flags), oi->ip_flags);
0938         goto bail;
0939     }
0940 
0941     /* has someone already deleted us?! baaad... */
0942     if (di->i_dtime) {
0943         status = -EEXIST;
0944         mlog_errno(status);
0945         goto bail;
0946     }
0947 
0948     /*
0949      * This is how ocfs2 determines whether an inode is still live
0950      * within the cluster. Every node takes a shared read lock on
0951      * the inode open lock in ocfs2_read_locked_inode(). When we
0952      * get to ->delete_inode(), each node tries to convert it's
0953      * lock to an exclusive. Trylocks are serialized by the inode
0954      * meta data lock. If the upconvert succeeds, we know the inode
0955      * is no longer live and can be deleted.
0956      *
0957      * Though we call this with the meta data lock held, the
0958      * trylock keeps us from ABBA deadlock.
0959      */
0960     status = ocfs2_try_open_lock(inode, 1);
0961     if (status == -EAGAIN) {
0962         status = 0;
0963         reason = 3;
0964         goto bail;
0965     }
0966     if (status < 0) {
0967         mlog_errno(status);
0968         goto bail;
0969     }
0970 
0971     *wipe = 1;
0972     trace_ocfs2_query_inode_wipe_succ(le16_to_cpu(di->i_orphaned_slot));
0973 
0974 bail:
0975     trace_ocfs2_query_inode_wipe_end(status, reason);
0976     return status;
0977 }
0978 
0979 /* Support function for ocfs2_delete_inode. Will help us keep the
0980  * inode data in a consistent state for clear_inode. Always truncates
0981  * pages, optionally sync's them first. */
0982 static void ocfs2_cleanup_delete_inode(struct inode *inode,
0983                        int sync_data)
0984 {
0985     trace_ocfs2_cleanup_delete_inode(
0986         (unsigned long long)OCFS2_I(inode)->ip_blkno, sync_data);
0987     if (sync_data)
0988         filemap_write_and_wait(inode->i_mapping);
0989     truncate_inode_pages_final(&inode->i_data);
0990 }
0991 
0992 static void ocfs2_delete_inode(struct inode *inode)
0993 {
0994     int wipe, status;
0995     sigset_t oldset;
0996     struct buffer_head *di_bh = NULL;
0997     struct ocfs2_dinode *di = NULL;
0998 
0999     trace_ocfs2_delete_inode(inode->i_ino,
1000                  (unsigned long long)OCFS2_I(inode)->ip_blkno,
1001                  is_bad_inode(inode));
1002 
1003     /* When we fail in read_inode() we mark inode as bad. The second test
1004      * catches the case when inode allocation fails before allocating
1005      * a block for inode. */
1006     if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno)
1007         goto bail;
1008 
1009     if (!ocfs2_inode_is_valid_to_delete(inode)) {
1010         /* It's probably not necessary to truncate_inode_pages
1011          * here but we do it for safety anyway (it will most
1012          * likely be a no-op anyway) */
1013         ocfs2_cleanup_delete_inode(inode, 0);
1014         goto bail;
1015     }
1016 
1017     dquot_initialize(inode);
1018 
1019     /* We want to block signals in delete_inode as the lock and
1020      * messaging paths may return us -ERESTARTSYS. Which would
1021      * cause us to exit early, resulting in inodes being orphaned
1022      * forever. */
1023     ocfs2_block_signals(&oldset);
1024 
1025     /*
1026      * Synchronize us against ocfs2_get_dentry. We take this in
1027      * shared mode so that all nodes can still concurrently
1028      * process deletes.
1029      */
1030     status = ocfs2_nfs_sync_lock(OCFS2_SB(inode->i_sb), 0);
1031     if (status < 0) {
1032         mlog(ML_ERROR, "getting nfs sync lock(PR) failed %d\n", status);
1033         ocfs2_cleanup_delete_inode(inode, 0);
1034         goto bail_unblock;
1035     }
1036     /* Lock down the inode. This gives us an up to date view of
1037      * it's metadata (for verification), and allows us to
1038      * serialize delete_inode on multiple nodes.
1039      *
1040      * Even though we might be doing a truncate, we don't take the
1041      * allocation lock here as it won't be needed - nobody will
1042      * have the file open.
1043      */
1044     status = ocfs2_inode_lock(inode, &di_bh, 1);
1045     if (status < 0) {
1046         if (status != -ENOENT)
1047             mlog_errno(status);
1048         ocfs2_cleanup_delete_inode(inode, 0);
1049         goto bail_unlock_nfs_sync;
1050     }
1051 
1052     di = (struct ocfs2_dinode *)di_bh->b_data;
1053     /* Skip inode deletion and wait for dio orphan entry recovered
1054      * first */
1055     if (unlikely(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL))) {
1056         ocfs2_cleanup_delete_inode(inode, 0);
1057         goto bail_unlock_inode;
1058     }
1059 
1060     /* Query the cluster. This will be the final decision made
1061      * before we go ahead and wipe the inode. */
1062     status = ocfs2_query_inode_wipe(inode, di_bh, &wipe);
1063     if (!wipe || status < 0) {
1064         /* Error and remote inode busy both mean we won't be
1065          * removing the inode, so they take almost the same
1066          * path. */
1067         if (status < 0)
1068             mlog_errno(status);
1069 
1070         /* Someone in the cluster has disallowed a wipe of
1071          * this inode, or it was never completely
1072          * orphaned. Write out the pages and exit now. */
1073         ocfs2_cleanup_delete_inode(inode, 1);
1074         goto bail_unlock_inode;
1075     }
1076 
1077     ocfs2_cleanup_delete_inode(inode, 0);
1078 
1079     status = ocfs2_wipe_inode(inode, di_bh);
1080     if (status < 0) {
1081         if (status != -EDEADLK)
1082             mlog_errno(status);
1083         goto bail_unlock_inode;
1084     }
1085 
1086     /*
1087      * Mark the inode as successfully deleted.
1088      *
1089      * This is important for ocfs2_clear_inode() as it will check
1090      * this flag and skip any checkpointing work
1091      *
1092      * ocfs2_stuff_meta_lvb() also uses this flag to invalidate
1093      * the LVB for other nodes.
1094      */
1095     OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED;
1096 
1097 bail_unlock_inode:
1098     ocfs2_inode_unlock(inode, 1);
1099     brelse(di_bh);
1100 
1101 bail_unlock_nfs_sync:
1102     ocfs2_nfs_sync_unlock(OCFS2_SB(inode->i_sb), 0);
1103 
1104 bail_unblock:
1105     ocfs2_unblock_signals(&oldset);
1106 bail:
1107     return;
1108 }
1109 
1110 static void ocfs2_clear_inode(struct inode *inode)
1111 {
1112     int status;
1113     struct ocfs2_inode_info *oi = OCFS2_I(inode);
1114     struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1115 
1116     clear_inode(inode);
1117     trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno,
1118                 inode->i_nlink);
1119 
1120     mlog_bug_on_msg(osb == NULL,
1121             "Inode=%lu\n", inode->i_ino);
1122 
1123     dquot_drop(inode);
1124 
1125     /* To preven remote deletes we hold open lock before, now it
1126      * is time to unlock PR and EX open locks. */
1127     ocfs2_open_unlock(inode);
1128 
1129     /* Do these before all the other work so that we don't bounce
1130      * the downconvert thread while waiting to destroy the locks. */
1131     ocfs2_mark_lockres_freeing(osb, &oi->ip_rw_lockres);
1132     ocfs2_mark_lockres_freeing(osb, &oi->ip_inode_lockres);
1133     ocfs2_mark_lockres_freeing(osb, &oi->ip_open_lockres);
1134 
1135     ocfs2_resv_discard(&osb->osb_la_resmap,
1136                &oi->ip_la_data_resv);
1137     ocfs2_resv_init_once(&oi->ip_la_data_resv);
1138 
1139     /* We very well may get a clear_inode before all an inodes
1140      * metadata has hit disk. Of course, we can't drop any cluster
1141      * locks until the journal has finished with it. The only
1142      * exception here are successfully wiped inodes - their
1143      * metadata can now be considered to be part of the system
1144      * inodes from which it came. */
1145     if (!(oi->ip_flags & OCFS2_INODE_DELETED))
1146         ocfs2_checkpoint_inode(inode);
1147 
1148     mlog_bug_on_msg(!list_empty(&oi->ip_io_markers),
1149             "Clear inode of %llu, inode has io markers\n",
1150             (unsigned long long)oi->ip_blkno);
1151     mlog_bug_on_msg(!list_empty(&oi->ip_unwritten_list),
1152             "Clear inode of %llu, inode has unwritten extents\n",
1153             (unsigned long long)oi->ip_blkno);
1154 
1155     ocfs2_extent_map_trunc(inode, 0);
1156 
1157     status = ocfs2_drop_inode_locks(inode);
1158     if (status < 0)
1159         mlog_errno(status);
1160 
1161     ocfs2_lock_res_free(&oi->ip_rw_lockres);
1162     ocfs2_lock_res_free(&oi->ip_inode_lockres);
1163     ocfs2_lock_res_free(&oi->ip_open_lockres);
1164 
1165     ocfs2_metadata_cache_exit(INODE_CACHE(inode));
1166 
1167     mlog_bug_on_msg(INODE_CACHE(inode)->ci_num_cached,
1168             "Clear inode of %llu, inode has %u cache items\n",
1169             (unsigned long long)oi->ip_blkno,
1170             INODE_CACHE(inode)->ci_num_cached);
1171 
1172     mlog_bug_on_msg(!(INODE_CACHE(inode)->ci_flags & OCFS2_CACHE_FL_INLINE),
1173             "Clear inode of %llu, inode has a bad flag\n",
1174             (unsigned long long)oi->ip_blkno);
1175 
1176     mlog_bug_on_msg(spin_is_locked(&oi->ip_lock),
1177             "Clear inode of %llu, inode is locked\n",
1178             (unsigned long long)oi->ip_blkno);
1179 
1180     mlog_bug_on_msg(!mutex_trylock(&oi->ip_io_mutex),
1181             "Clear inode of %llu, io_mutex is locked\n",
1182             (unsigned long long)oi->ip_blkno);
1183     mutex_unlock(&oi->ip_io_mutex);
1184 
1185     /*
1186      * down_trylock() returns 0, down_write_trylock() returns 1
1187      * kernel 1, world 0
1188      */
1189     mlog_bug_on_msg(!down_write_trylock(&oi->ip_alloc_sem),
1190             "Clear inode of %llu, alloc_sem is locked\n",
1191             (unsigned long long)oi->ip_blkno);
1192     up_write(&oi->ip_alloc_sem);
1193 
1194     mlog_bug_on_msg(oi->ip_open_count,
1195             "Clear inode of %llu has open count %d\n",
1196             (unsigned long long)oi->ip_blkno, oi->ip_open_count);
1197 
1198     /* Clear all other flags. */
1199     oi->ip_flags = 0;
1200     oi->ip_dir_start_lookup = 0;
1201     oi->ip_blkno = 0ULL;
1202 
1203     /*
1204      * ip_jinode is used to track txns against this inode. We ensure that
1205      * the journal is flushed before journal shutdown. Thus it is safe to
1206      * have inodes get cleaned up after journal shutdown.
1207      */
1208     jbd2_journal_release_jbd_inode(osb->journal->j_journal,
1209                        &oi->ip_jinode);
1210 }
1211 
1212 void ocfs2_evict_inode(struct inode *inode)
1213 {
1214     if (!inode->i_nlink ||
1215         (OCFS2_I(inode)->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)) {
1216         ocfs2_delete_inode(inode);
1217     } else {
1218         truncate_inode_pages_final(&inode->i_data);
1219     }
1220     ocfs2_clear_inode(inode);
1221 }
1222 
1223 /* Called under inode_lock, with no more references on the
1224  * struct inode, so it's safe here to check the flags field
1225  * and to manipulate i_nlink without any other locks. */
1226 int ocfs2_drop_inode(struct inode *inode)
1227 {
1228     struct ocfs2_inode_info *oi = OCFS2_I(inode);
1229 
1230     trace_ocfs2_drop_inode((unsigned long long)oi->ip_blkno,
1231                 inode->i_nlink, oi->ip_flags);
1232 
1233     assert_spin_locked(&inode->i_lock);
1234     inode->i_state |= I_WILL_FREE;
1235     spin_unlock(&inode->i_lock);
1236     write_inode_now(inode, 1);
1237     spin_lock(&inode->i_lock);
1238     WARN_ON(inode->i_state & I_NEW);
1239     inode->i_state &= ~I_WILL_FREE;
1240 
1241     return 1;
1242 }
1243 
1244 /*
1245  * This is called from our getattr.
1246  */
1247 int ocfs2_inode_revalidate(struct dentry *dentry)
1248 {
1249     struct inode *inode = d_inode(dentry);
1250     int status = 0;
1251 
1252     trace_ocfs2_inode_revalidate(inode,
1253         inode ? (unsigned long long)OCFS2_I(inode)->ip_blkno : 0ULL,
1254         inode ? (unsigned long long)OCFS2_I(inode)->ip_flags : 0);
1255 
1256     if (!inode) {
1257         status = -ENOENT;
1258         goto bail;
1259     }
1260 
1261     spin_lock(&OCFS2_I(inode)->ip_lock);
1262     if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
1263         spin_unlock(&OCFS2_I(inode)->ip_lock);
1264         status = -ENOENT;
1265         goto bail;
1266     }
1267     spin_unlock(&OCFS2_I(inode)->ip_lock);
1268 
1269     /* Let ocfs2_inode_lock do the work of updating our struct
1270      * inode for us. */
1271     status = ocfs2_inode_lock(inode, NULL, 0);
1272     if (status < 0) {
1273         if (status != -ENOENT)
1274             mlog_errno(status);
1275         goto bail;
1276     }
1277     ocfs2_inode_unlock(inode, 0);
1278 bail:
1279     return status;
1280 }
1281 
1282 /*
1283  * Updates a disk inode from a
1284  * struct inode.
1285  * Only takes ip_lock.
1286  */
1287 int ocfs2_mark_inode_dirty(handle_t *handle,
1288                struct inode *inode,
1289                struct buffer_head *bh)
1290 {
1291     int status;
1292     struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
1293 
1294     trace_ocfs2_mark_inode_dirty((unsigned long long)OCFS2_I(inode)->ip_blkno);
1295 
1296     status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
1297                      OCFS2_JOURNAL_ACCESS_WRITE);
1298     if (status < 0) {
1299         mlog_errno(status);
1300         goto leave;
1301     }
1302 
1303     spin_lock(&OCFS2_I(inode)->ip_lock);
1304     fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
1305     ocfs2_get_inode_flags(OCFS2_I(inode));
1306     fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr);
1307     fe->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features);
1308     spin_unlock(&OCFS2_I(inode)->ip_lock);
1309 
1310     fe->i_size = cpu_to_le64(i_size_read(inode));
1311     ocfs2_set_links_count(fe, inode->i_nlink);
1312     fe->i_uid = cpu_to_le32(i_uid_read(inode));
1313     fe->i_gid = cpu_to_le32(i_gid_read(inode));
1314     fe->i_mode = cpu_to_le16(inode->i_mode);
1315     fe->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
1316     fe->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
1317     fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
1318     fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
1319     fe->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
1320     fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
1321 
1322     ocfs2_journal_dirty(handle, bh);
1323     ocfs2_update_inode_fsync_trans(handle, inode, 1);
1324 leave:
1325     return status;
1326 }
1327 
1328 /*
1329  *
1330  * Updates a struct inode from a disk inode.
1331  * does no i/o, only takes ip_lock.
1332  */
1333 void ocfs2_refresh_inode(struct inode *inode,
1334              struct ocfs2_dinode *fe)
1335 {
1336     spin_lock(&OCFS2_I(inode)->ip_lock);
1337 
1338     OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
1339     OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
1340     OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features);
1341     ocfs2_set_inode_flags(inode);
1342     i_size_write(inode, le64_to_cpu(fe->i_size));
1343     set_nlink(inode, ocfs2_read_links_count(fe));
1344     i_uid_write(inode, le32_to_cpu(fe->i_uid));
1345     i_gid_write(inode, le32_to_cpu(fe->i_gid));
1346     inode->i_mode = le16_to_cpu(fe->i_mode);
1347     if (S_ISLNK(inode->i_mode) && le32_to_cpu(fe->i_clusters) == 0)
1348         inode->i_blocks = 0;
1349     else
1350         inode->i_blocks = ocfs2_inode_sector_count(inode);
1351     inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime);
1352     inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
1353     inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime);
1354     inode->i_mtime.tv_nsec = le32_to_cpu(fe->i_mtime_nsec);
1355     inode->i_ctime.tv_sec = le64_to_cpu(fe->i_ctime);
1356     inode->i_ctime.tv_nsec = le32_to_cpu(fe->i_ctime_nsec);
1357 
1358     spin_unlock(&OCFS2_I(inode)->ip_lock);
1359 }
1360 
1361 int ocfs2_validate_inode_block(struct super_block *sb,
1362                    struct buffer_head *bh)
1363 {
1364     int rc;
1365     struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
1366 
1367     trace_ocfs2_validate_inode_block((unsigned long long)bh->b_blocknr);
1368 
1369     BUG_ON(!buffer_uptodate(bh));
1370 
1371     /*
1372      * If the ecc fails, we return the error but otherwise
1373      * leave the filesystem running.  We know any error is
1374      * local to this block.
1375      */
1376     rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check);
1377     if (rc) {
1378         mlog(ML_ERROR, "Checksum failed for dinode %llu\n",
1379              (unsigned long long)bh->b_blocknr);
1380         goto bail;
1381     }
1382 
1383     /*
1384      * Errors after here are fatal.
1385      */
1386 
1387     rc = -EINVAL;
1388 
1389     if (!OCFS2_IS_VALID_DINODE(di)) {
1390         rc = ocfs2_error(sb, "Invalid dinode #%llu: signature = %.*s\n",
1391                  (unsigned long long)bh->b_blocknr, 7,
1392                  di->i_signature);
1393         goto bail;
1394     }
1395 
1396     if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) {
1397         rc = ocfs2_error(sb, "Invalid dinode #%llu: i_blkno is %llu\n",
1398                  (unsigned long long)bh->b_blocknr,
1399                  (unsigned long long)le64_to_cpu(di->i_blkno));
1400         goto bail;
1401     }
1402 
1403     if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
1404         rc = ocfs2_error(sb,
1405                  "Invalid dinode #%llu: OCFS2_VALID_FL not set\n",
1406                  (unsigned long long)bh->b_blocknr);
1407         goto bail;
1408     }
1409 
1410     if (le32_to_cpu(di->i_fs_generation) !=
1411         OCFS2_SB(sb)->fs_generation) {
1412         rc = ocfs2_error(sb,
1413                  "Invalid dinode #%llu: fs_generation is %u\n",
1414                  (unsigned long long)bh->b_blocknr,
1415                  le32_to_cpu(di->i_fs_generation));
1416         goto bail;
1417     }
1418 
1419     rc = 0;
1420 
1421 bail:
1422     return rc;
1423 }
1424 
1425 static int ocfs2_filecheck_validate_inode_block(struct super_block *sb,
1426                         struct buffer_head *bh)
1427 {
1428     int rc = 0;
1429     struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
1430 
1431     trace_ocfs2_filecheck_validate_inode_block(
1432         (unsigned long long)bh->b_blocknr);
1433 
1434     BUG_ON(!buffer_uptodate(bh));
1435 
1436     /*
1437      * Call ocfs2_validate_meta_ecc() first since it has ecc repair
1438      * function, but we should not return error immediately when ecc
1439      * validation fails, because the reason is quite likely the invalid
1440      * inode number inputed.
1441      */
1442     rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check);
1443     if (rc) {
1444         mlog(ML_ERROR,
1445              "Filecheck: checksum failed for dinode %llu\n",
1446              (unsigned long long)bh->b_blocknr);
1447         rc = -OCFS2_FILECHECK_ERR_BLOCKECC;
1448     }
1449 
1450     if (!OCFS2_IS_VALID_DINODE(di)) {
1451         mlog(ML_ERROR,
1452              "Filecheck: invalid dinode #%llu: signature = %.*s\n",
1453              (unsigned long long)bh->b_blocknr, 7, di->i_signature);
1454         rc = -OCFS2_FILECHECK_ERR_INVALIDINO;
1455         goto bail;
1456     } else if (rc)
1457         goto bail;
1458 
1459     if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) {
1460         mlog(ML_ERROR,
1461              "Filecheck: invalid dinode #%llu: i_blkno is %llu\n",
1462              (unsigned long long)bh->b_blocknr,
1463              (unsigned long long)le64_to_cpu(di->i_blkno));
1464         rc = -OCFS2_FILECHECK_ERR_BLOCKNO;
1465         goto bail;
1466     }
1467 
1468     if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
1469         mlog(ML_ERROR,
1470              "Filecheck: invalid dinode #%llu: OCFS2_VALID_FL "
1471              "not set\n",
1472              (unsigned long long)bh->b_blocknr);
1473         rc = -OCFS2_FILECHECK_ERR_VALIDFLAG;
1474         goto bail;
1475     }
1476 
1477     if (le32_to_cpu(di->i_fs_generation) !=
1478         OCFS2_SB(sb)->fs_generation) {
1479         mlog(ML_ERROR,
1480              "Filecheck: invalid dinode #%llu: fs_generation is %u\n",
1481              (unsigned long long)bh->b_blocknr,
1482              le32_to_cpu(di->i_fs_generation));
1483         rc = -OCFS2_FILECHECK_ERR_GENERATION;
1484     }
1485 
1486 bail:
1487     return rc;
1488 }
1489 
1490 static int ocfs2_filecheck_repair_inode_block(struct super_block *sb,
1491                           struct buffer_head *bh)
1492 {
1493     int changed = 0;
1494     struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
1495 
1496     if (!ocfs2_filecheck_validate_inode_block(sb, bh))
1497         return 0;
1498 
1499     trace_ocfs2_filecheck_repair_inode_block(
1500         (unsigned long long)bh->b_blocknr);
1501 
1502     if (ocfs2_is_hard_readonly(OCFS2_SB(sb)) ||
1503         ocfs2_is_soft_readonly(OCFS2_SB(sb))) {
1504         mlog(ML_ERROR,
1505              "Filecheck: cannot repair dinode #%llu "
1506              "on readonly filesystem\n",
1507              (unsigned long long)bh->b_blocknr);
1508         return -OCFS2_FILECHECK_ERR_READONLY;
1509     }
1510 
1511     if (buffer_jbd(bh)) {
1512         mlog(ML_ERROR,
1513              "Filecheck: cannot repair dinode #%llu, "
1514              "its buffer is in jbd\n",
1515              (unsigned long long)bh->b_blocknr);
1516         return -OCFS2_FILECHECK_ERR_INJBD;
1517     }
1518 
1519     if (!OCFS2_IS_VALID_DINODE(di)) {
1520         /* Cannot fix invalid inode block */
1521         return -OCFS2_FILECHECK_ERR_INVALIDINO;
1522     }
1523 
1524     if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
1525         /* Cannot just add VALID_FL flag back as a fix,
1526          * need more things to check here.
1527          */
1528         return -OCFS2_FILECHECK_ERR_VALIDFLAG;
1529     }
1530 
1531     if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) {
1532         di->i_blkno = cpu_to_le64(bh->b_blocknr);
1533         changed = 1;
1534         mlog(ML_ERROR,
1535              "Filecheck: reset dinode #%llu: i_blkno to %llu\n",
1536              (unsigned long long)bh->b_blocknr,
1537              (unsigned long long)le64_to_cpu(di->i_blkno));
1538     }
1539 
1540     if (le32_to_cpu(di->i_fs_generation) !=
1541         OCFS2_SB(sb)->fs_generation) {
1542         di->i_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
1543         changed = 1;
1544         mlog(ML_ERROR,
1545              "Filecheck: reset dinode #%llu: fs_generation to %u\n",
1546              (unsigned long long)bh->b_blocknr,
1547              le32_to_cpu(di->i_fs_generation));
1548     }
1549 
1550     if (changed || ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check)) {
1551         ocfs2_compute_meta_ecc(sb, bh->b_data, &di->i_check);
1552         mark_buffer_dirty(bh);
1553         mlog(ML_ERROR,
1554              "Filecheck: reset dinode #%llu: compute meta ecc\n",
1555              (unsigned long long)bh->b_blocknr);
1556     }
1557 
1558     return 0;
1559 }
1560 
1561 static int
1562 ocfs2_filecheck_read_inode_block_full(struct inode *inode,
1563                       struct buffer_head **bh,
1564                       int flags, int type)
1565 {
1566     int rc;
1567     struct buffer_head *tmp = *bh;
1568 
1569     if (!type) /* Check inode block */
1570         rc = ocfs2_read_blocks(INODE_CACHE(inode),
1571                 OCFS2_I(inode)->ip_blkno,
1572                 1, &tmp, flags,
1573                 ocfs2_filecheck_validate_inode_block);
1574     else /* Repair inode block */
1575         rc = ocfs2_read_blocks(INODE_CACHE(inode),
1576                 OCFS2_I(inode)->ip_blkno,
1577                 1, &tmp, flags,
1578                 ocfs2_filecheck_repair_inode_block);
1579 
1580     /* If ocfs2_read_blocks() got us a new bh, pass it up. */
1581     if (!rc && !*bh)
1582         *bh = tmp;
1583 
1584     return rc;
1585 }
1586 
1587 int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
1588                 int flags)
1589 {
1590     int rc;
1591     struct buffer_head *tmp = *bh;
1592 
1593     rc = ocfs2_read_blocks(INODE_CACHE(inode), OCFS2_I(inode)->ip_blkno,
1594                    1, &tmp, flags, ocfs2_validate_inode_block);
1595 
1596     /* If ocfs2_read_blocks() got us a new bh, pass it up. */
1597     if (!rc && !*bh)
1598         *bh = tmp;
1599 
1600     return rc;
1601 }
1602 
1603 int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh)
1604 {
1605     return ocfs2_read_inode_block_full(inode, bh, 0);
1606 }
1607 
1608 
1609 static u64 ocfs2_inode_cache_owner(struct ocfs2_caching_info *ci)
1610 {
1611     struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
1612 
1613     return oi->ip_blkno;
1614 }
1615 
1616 static struct super_block *ocfs2_inode_cache_get_super(struct ocfs2_caching_info *ci)
1617 {
1618     struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
1619 
1620     return oi->vfs_inode.i_sb;
1621 }
1622 
1623 static void ocfs2_inode_cache_lock(struct ocfs2_caching_info *ci)
1624 {
1625     struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
1626 
1627     spin_lock(&oi->ip_lock);
1628 }
1629 
1630 static void ocfs2_inode_cache_unlock(struct ocfs2_caching_info *ci)
1631 {
1632     struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
1633 
1634     spin_unlock(&oi->ip_lock);
1635 }
1636 
1637 static void ocfs2_inode_cache_io_lock(struct ocfs2_caching_info *ci)
1638 {
1639     struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
1640 
1641     mutex_lock(&oi->ip_io_mutex);
1642 }
1643 
1644 static void ocfs2_inode_cache_io_unlock(struct ocfs2_caching_info *ci)
1645 {
1646     struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
1647 
1648     mutex_unlock(&oi->ip_io_mutex);
1649 }
1650 
1651 const struct ocfs2_caching_operations ocfs2_inode_caching_ops = {
1652     .co_owner       = ocfs2_inode_cache_owner,
1653     .co_get_super       = ocfs2_inode_cache_get_super,
1654     .co_cache_lock      = ocfs2_inode_cache_lock,
1655     .co_cache_unlock    = ocfs2_inode_cache_unlock,
1656     .co_io_lock     = ocfs2_inode_cache_io_lock,
1657     .co_io_unlock       = ocfs2_inode_cache_io_unlock,
1658 };
1659