Back to home page

LXR

 
 

    


0001 /*
0002  * (C) 1997 Linus Torvalds
0003  * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation)
0004  */
0005 #include <linux/export.h>
0006 #include <linux/fs.h>
0007 #include <linux/mm.h>
0008 #include <linux/backing-dev.h>
0009 #include <linux/hash.h>
0010 #include <linux/swap.h>
0011 #include <linux/security.h>
0012 #include <linux/cdev.h>
0013 #include <linux/bootmem.h>
0014 #include <linux/fsnotify.h>
0015 #include <linux/mount.h>
0016 #include <linux/posix_acl.h>
0017 #include <linux/prefetch.h>
0018 #include <linux/buffer_head.h> /* for inode_has_buffers */
0019 #include <linux/ratelimit.h>
0020 #include <linux/list_lru.h>
0021 #include <trace/events/writeback.h>
0022 #include "internal.h"
0023 
0024 /*
0025  * Inode locking rules:
0026  *
0027  * inode->i_lock protects:
0028  *   inode->i_state, inode->i_hash, __iget()
0029  * Inode LRU list locks protect:
0030  *   inode->i_sb->s_inode_lru, inode->i_lru
0031  * inode->i_sb->s_inode_list_lock protects:
0032  *   inode->i_sb->s_inodes, inode->i_sb_list
0033  * bdi->wb.list_lock protects:
0034  *   bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_io_list
0035  * inode_hash_lock protects:
0036  *   inode_hashtable, inode->i_hash
0037  *
0038  * Lock ordering:
0039  *
0040  * inode->i_sb->s_inode_list_lock
0041  *   inode->i_lock
0042  *     Inode LRU list locks
0043  *
0044  * bdi->wb.list_lock
0045  *   inode->i_lock
0046  *
0047  * inode_hash_lock
0048  *   inode->i_sb->s_inode_list_lock
0049  *   inode->i_lock
0050  *
0051  * iunique_lock
0052  *   inode_hash_lock
0053  */
0054 
0055 static unsigned int i_hash_mask __read_mostly;
0056 static unsigned int i_hash_shift __read_mostly;
0057 static struct hlist_head *inode_hashtable __read_mostly;
0058 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
0059 
0060 /*
0061  * Empty aops. Can be used for the cases where the user does not
0062  * define any of the address_space operations.
0063  */
0064 const struct address_space_operations empty_aops = {
0065 };
0066 EXPORT_SYMBOL(empty_aops);
0067 
0068 /*
0069  * Statistics gathering..
0070  */
0071 struct inodes_stat_t inodes_stat;
0072 
0073 static DEFINE_PER_CPU(unsigned long, nr_inodes);
0074 static DEFINE_PER_CPU(unsigned long, nr_unused);
0075 
0076 static struct kmem_cache *inode_cachep __read_mostly;
0077 
0078 static long get_nr_inodes(void)
0079 {
0080     int i;
0081     long sum = 0;
0082     for_each_possible_cpu(i)
0083         sum += per_cpu(nr_inodes, i);
0084     return sum < 0 ? 0 : sum;
0085 }
0086 
0087 static inline long get_nr_inodes_unused(void)
0088 {
0089     int i;
0090     long sum = 0;
0091     for_each_possible_cpu(i)
0092         sum += per_cpu(nr_unused, i);
0093     return sum < 0 ? 0 : sum;
0094 }
0095 
0096 long get_nr_dirty_inodes(void)
0097 {
0098     /* not actually dirty inodes, but a wild approximation */
0099     long nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
0100     return nr_dirty > 0 ? nr_dirty : 0;
0101 }
0102 
0103 /*
0104  * Handle nr_inode sysctl
0105  */
0106 #ifdef CONFIG_SYSCTL
0107 int proc_nr_inodes(struct ctl_table *table, int write,
0108            void __user *buffer, size_t *lenp, loff_t *ppos)
0109 {
0110     inodes_stat.nr_inodes = get_nr_inodes();
0111     inodes_stat.nr_unused = get_nr_inodes_unused();
0112     return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
0113 }
0114 #endif
0115 
0116 static int no_open(struct inode *inode, struct file *file)
0117 {
0118     return -ENXIO;
0119 }
0120 
0121 /**
0122  * inode_init_always - perform inode structure intialisation
0123  * @sb: superblock inode belongs to
0124  * @inode: inode to initialise
0125  *
0126  * These are initializations that need to be done on every inode
0127  * allocation as the fields are not initialised by slab allocation.
0128  */
0129 int inode_init_always(struct super_block *sb, struct inode *inode)
0130 {
0131     static const struct inode_operations empty_iops;
0132     static const struct file_operations no_open_fops = {.open = no_open};
0133     struct address_space *const mapping = &inode->i_data;
0134 
0135     inode->i_sb = sb;
0136     inode->i_blkbits = sb->s_blocksize_bits;
0137     inode->i_flags = 0;
0138     atomic_set(&inode->i_count, 1);
0139     inode->i_op = &empty_iops;
0140     inode->i_fop = &no_open_fops;
0141     inode->__i_nlink = 1;
0142     inode->i_opflags = 0;
0143     if (sb->s_xattr)
0144         inode->i_opflags |= IOP_XATTR;
0145     i_uid_write(inode, 0);
0146     i_gid_write(inode, 0);
0147     atomic_set(&inode->i_writecount, 0);
0148     inode->i_size = 0;
0149     inode->i_blocks = 0;
0150     inode->i_bytes = 0;
0151     inode->i_generation = 0;
0152     inode->i_pipe = NULL;
0153     inode->i_bdev = NULL;
0154     inode->i_cdev = NULL;
0155     inode->i_link = NULL;
0156     inode->i_dir_seq = 0;
0157     inode->i_rdev = 0;
0158     inode->dirtied_when = 0;
0159 
0160 #ifdef CONFIG_CGROUP_WRITEBACK
0161     inode->i_wb_frn_winner = 0;
0162     inode->i_wb_frn_avg_time = 0;
0163     inode->i_wb_frn_history = 0;
0164 #endif
0165 
0166     if (security_inode_alloc(inode))
0167         goto out;
0168     spin_lock_init(&inode->i_lock);
0169     lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
0170 
0171     init_rwsem(&inode->i_rwsem);
0172     lockdep_set_class(&inode->i_rwsem, &sb->s_type->i_mutex_key);
0173 
0174     atomic_set(&inode->i_dio_count, 0);
0175 
0176     mapping->a_ops = &empty_aops;
0177     mapping->host = inode;
0178     mapping->flags = 0;
0179     atomic_set(&mapping->i_mmap_writable, 0);
0180     mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
0181     mapping->private_data = NULL;
0182     mapping->writeback_index = 0;
0183     inode->i_private = NULL;
0184     inode->i_mapping = mapping;
0185     INIT_HLIST_HEAD(&inode->i_dentry);  /* buggered by rcu freeing */
0186 #ifdef CONFIG_FS_POSIX_ACL
0187     inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
0188 #endif
0189 
0190 #ifdef CONFIG_FSNOTIFY
0191     inode->i_fsnotify_mask = 0;
0192 #endif
0193     inode->i_flctx = NULL;
0194     this_cpu_inc(nr_inodes);
0195 
0196     return 0;
0197 out:
0198     return -ENOMEM;
0199 }
0200 EXPORT_SYMBOL(inode_init_always);
0201 
0202 static struct inode *alloc_inode(struct super_block *sb)
0203 {
0204     struct inode *inode;
0205 
0206     if (sb->s_op->alloc_inode)
0207         inode = sb->s_op->alloc_inode(sb);
0208     else
0209         inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
0210 
0211     if (!inode)
0212         return NULL;
0213 
0214     if (unlikely(inode_init_always(sb, inode))) {
0215         if (inode->i_sb->s_op->destroy_inode)
0216             inode->i_sb->s_op->destroy_inode(inode);
0217         else
0218             kmem_cache_free(inode_cachep, inode);
0219         return NULL;
0220     }
0221 
0222     return inode;
0223 }
0224 
0225 void free_inode_nonrcu(struct inode *inode)
0226 {
0227     kmem_cache_free(inode_cachep, inode);
0228 }
0229 EXPORT_SYMBOL(free_inode_nonrcu);
0230 
0231 void __destroy_inode(struct inode *inode)
0232 {
0233     BUG_ON(inode_has_buffers(inode));
0234     inode_detach_wb(inode);
0235     security_inode_free(inode);
0236     fsnotify_inode_delete(inode);
0237     locks_free_lock_context(inode);
0238     if (!inode->i_nlink) {
0239         WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
0240         atomic_long_dec(&inode->i_sb->s_remove_count);
0241     }
0242 
0243 #ifdef CONFIG_FS_POSIX_ACL
0244     if (inode->i_acl && !is_uncached_acl(inode->i_acl))
0245         posix_acl_release(inode->i_acl);
0246     if (inode->i_default_acl && !is_uncached_acl(inode->i_default_acl))
0247         posix_acl_release(inode->i_default_acl);
0248 #endif
0249     this_cpu_dec(nr_inodes);
0250 }
0251 EXPORT_SYMBOL(__destroy_inode);
0252 
0253 static void i_callback(struct rcu_head *head)
0254 {
0255     struct inode *inode = container_of(head, struct inode, i_rcu);
0256     kmem_cache_free(inode_cachep, inode);
0257 }
0258 
0259 static void destroy_inode(struct inode *inode)
0260 {
0261     BUG_ON(!list_empty(&inode->i_lru));
0262     __destroy_inode(inode);
0263     if (inode->i_sb->s_op->destroy_inode)
0264         inode->i_sb->s_op->destroy_inode(inode);
0265     else
0266         call_rcu(&inode->i_rcu, i_callback);
0267 }
0268 
0269 /**
0270  * drop_nlink - directly drop an inode's link count
0271  * @inode: inode
0272  *
0273  * This is a low-level filesystem helper to replace any
0274  * direct filesystem manipulation of i_nlink.  In cases
0275  * where we are attempting to track writes to the
0276  * filesystem, a decrement to zero means an imminent
0277  * write when the file is truncated and actually unlinked
0278  * on the filesystem.
0279  */
0280 void drop_nlink(struct inode *inode)
0281 {
0282     WARN_ON(inode->i_nlink == 0);
0283     inode->__i_nlink--;
0284     if (!inode->i_nlink)
0285         atomic_long_inc(&inode->i_sb->s_remove_count);
0286 }
0287 EXPORT_SYMBOL(drop_nlink);
0288 
0289 /**
0290  * clear_nlink - directly zero an inode's link count
0291  * @inode: inode
0292  *
0293  * This is a low-level filesystem helper to replace any
0294  * direct filesystem manipulation of i_nlink.  See
0295  * drop_nlink() for why we care about i_nlink hitting zero.
0296  */
0297 void clear_nlink(struct inode *inode)
0298 {
0299     if (inode->i_nlink) {
0300         inode->__i_nlink = 0;
0301         atomic_long_inc(&inode->i_sb->s_remove_count);
0302     }
0303 }
0304 EXPORT_SYMBOL(clear_nlink);
0305 
0306 /**
0307  * set_nlink - directly set an inode's link count
0308  * @inode: inode
0309  * @nlink: new nlink (should be non-zero)
0310  *
0311  * This is a low-level filesystem helper to replace any
0312  * direct filesystem manipulation of i_nlink.
0313  */
0314 void set_nlink(struct inode *inode, unsigned int nlink)
0315 {
0316     if (!nlink) {
0317         clear_nlink(inode);
0318     } else {
0319         /* Yes, some filesystems do change nlink from zero to one */
0320         if (inode->i_nlink == 0)
0321             atomic_long_dec(&inode->i_sb->s_remove_count);
0322 
0323         inode->__i_nlink = nlink;
0324     }
0325 }
0326 EXPORT_SYMBOL(set_nlink);
0327 
0328 /**
0329  * inc_nlink - directly increment an inode's link count
0330  * @inode: inode
0331  *
0332  * This is a low-level filesystem helper to replace any
0333  * direct filesystem manipulation of i_nlink.  Currently,
0334  * it is only here for parity with dec_nlink().
0335  */
0336 void inc_nlink(struct inode *inode)
0337 {
0338     if (unlikely(inode->i_nlink == 0)) {
0339         WARN_ON(!(inode->i_state & I_LINKABLE));
0340         atomic_long_dec(&inode->i_sb->s_remove_count);
0341     }
0342 
0343     inode->__i_nlink++;
0344 }
0345 EXPORT_SYMBOL(inc_nlink);
0346 
0347 void address_space_init_once(struct address_space *mapping)
0348 {
0349     memset(mapping, 0, sizeof(*mapping));
0350     INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC | __GFP_ACCOUNT);
0351     spin_lock_init(&mapping->tree_lock);
0352     init_rwsem(&mapping->i_mmap_rwsem);
0353     INIT_LIST_HEAD(&mapping->private_list);
0354     spin_lock_init(&mapping->private_lock);
0355     mapping->i_mmap = RB_ROOT;
0356 }
0357 EXPORT_SYMBOL(address_space_init_once);
0358 
0359 /*
0360  * These are initializations that only need to be done
0361  * once, because the fields are idempotent across use
0362  * of the inode, so let the slab aware of that.
0363  */
0364 void inode_init_once(struct inode *inode)
0365 {
0366     memset(inode, 0, sizeof(*inode));
0367     INIT_HLIST_NODE(&inode->i_hash);
0368     INIT_LIST_HEAD(&inode->i_devices);
0369     INIT_LIST_HEAD(&inode->i_io_list);
0370     INIT_LIST_HEAD(&inode->i_wb_list);
0371     INIT_LIST_HEAD(&inode->i_lru);
0372     address_space_init_once(&inode->i_data);
0373     i_size_ordered_init(inode);
0374 #ifdef CONFIG_FSNOTIFY
0375     INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
0376 #endif
0377 }
0378 EXPORT_SYMBOL(inode_init_once);
0379 
0380 static void init_once(void *foo)
0381 {
0382     struct inode *inode = (struct inode *) foo;
0383 
0384     inode_init_once(inode);
0385 }
0386 
0387 /*
0388  * inode->i_lock must be held
0389  */
0390 void __iget(struct inode *inode)
0391 {
0392     atomic_inc(&inode->i_count);
0393 }
0394 
0395 /*
0396  * get additional reference to inode; caller must already hold one.
0397  */
0398 void ihold(struct inode *inode)
0399 {
0400     WARN_ON(atomic_inc_return(&inode->i_count) < 2);
0401 }
0402 EXPORT_SYMBOL(ihold);
0403 
0404 static void inode_lru_list_add(struct inode *inode)
0405 {
0406     if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru))
0407         this_cpu_inc(nr_unused);
0408 }
0409 
0410 /*
0411  * Add inode to LRU if needed (inode is unused and clean).
0412  *
0413  * Needs inode->i_lock held.
0414  */
0415 void inode_add_lru(struct inode *inode)
0416 {
0417     if (!(inode->i_state & (I_DIRTY_ALL | I_SYNC |
0418                 I_FREEING | I_WILL_FREE)) &&
0419         !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE)
0420         inode_lru_list_add(inode);
0421 }
0422 
0423 
0424 static void inode_lru_list_del(struct inode *inode)
0425 {
0426 
0427     if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru))
0428         this_cpu_dec(nr_unused);
0429 }
0430 
0431 /**
0432  * inode_sb_list_add - add inode to the superblock list of inodes
0433  * @inode: inode to add
0434  */
0435 void inode_sb_list_add(struct inode *inode)
0436 {
0437     spin_lock(&inode->i_sb->s_inode_list_lock);
0438     list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
0439     spin_unlock(&inode->i_sb->s_inode_list_lock);
0440 }
0441 EXPORT_SYMBOL_GPL(inode_sb_list_add);
0442 
0443 static inline void inode_sb_list_del(struct inode *inode)
0444 {
0445     if (!list_empty(&inode->i_sb_list)) {
0446         spin_lock(&inode->i_sb->s_inode_list_lock);
0447         list_del_init(&inode->i_sb_list);
0448         spin_unlock(&inode->i_sb->s_inode_list_lock);
0449     }
0450 }
0451 
0452 static unsigned long hash(struct super_block *sb, unsigned long hashval)
0453 {
0454     unsigned long tmp;
0455 
0456     tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
0457             L1_CACHE_BYTES;
0458     tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift);
0459     return tmp & i_hash_mask;
0460 }
0461 
0462 /**
0463  *  __insert_inode_hash - hash an inode
0464  *  @inode: unhashed inode
0465  *  @hashval: unsigned long value used to locate this object in the
0466  *      inode_hashtable.
0467  *
0468  *  Add an inode to the inode hash for this superblock.
0469  */
0470 void __insert_inode_hash(struct inode *inode, unsigned long hashval)
0471 {
0472     struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
0473 
0474     spin_lock(&inode_hash_lock);
0475     spin_lock(&inode->i_lock);
0476     hlist_add_head(&inode->i_hash, b);
0477     spin_unlock(&inode->i_lock);
0478     spin_unlock(&inode_hash_lock);
0479 }
0480 EXPORT_SYMBOL(__insert_inode_hash);
0481 
0482 /**
0483  *  __remove_inode_hash - remove an inode from the hash
0484  *  @inode: inode to unhash
0485  *
0486  *  Remove an inode from the superblock.
0487  */
0488 void __remove_inode_hash(struct inode *inode)
0489 {
0490     spin_lock(&inode_hash_lock);
0491     spin_lock(&inode->i_lock);
0492     hlist_del_init(&inode->i_hash);
0493     spin_unlock(&inode->i_lock);
0494     spin_unlock(&inode_hash_lock);
0495 }
0496 EXPORT_SYMBOL(__remove_inode_hash);
0497 
0498 void clear_inode(struct inode *inode)
0499 {
0500     might_sleep();
0501     /*
0502      * We have to cycle tree_lock here because reclaim can be still in the
0503      * process of removing the last page (in __delete_from_page_cache())
0504      * and we must not free mapping under it.
0505      */
0506     spin_lock_irq(&inode->i_data.tree_lock);
0507     BUG_ON(inode->i_data.nrpages);
0508     BUG_ON(inode->i_data.nrexceptional);
0509     spin_unlock_irq(&inode->i_data.tree_lock);
0510     BUG_ON(!list_empty(&inode->i_data.private_list));
0511     BUG_ON(!(inode->i_state & I_FREEING));
0512     BUG_ON(inode->i_state & I_CLEAR);
0513     BUG_ON(!list_empty(&inode->i_wb_list));
0514     /* don't need i_lock here, no concurrent mods to i_state */
0515     inode->i_state = I_FREEING | I_CLEAR;
0516 }
0517 EXPORT_SYMBOL(clear_inode);
0518 
0519 /*
0520  * Free the inode passed in, removing it from the lists it is still connected
0521  * to. We remove any pages still attached to the inode and wait for any IO that
0522  * is still in progress before finally destroying the inode.
0523  *
0524  * An inode must already be marked I_FREEING so that we avoid the inode being
0525  * moved back onto lists if we race with other code that manipulates the lists
0526  * (e.g. writeback_single_inode). The caller is responsible for setting this.
0527  *
0528  * An inode must already be removed from the LRU list before being evicted from
0529  * the cache. This should occur atomically with setting the I_FREEING state
0530  * flag, so no inodes here should ever be on the LRU when being evicted.
0531  */
0532 static void evict(struct inode *inode)
0533 {
0534     const struct super_operations *op = inode->i_sb->s_op;
0535 
0536     BUG_ON(!(inode->i_state & I_FREEING));
0537     BUG_ON(!list_empty(&inode->i_lru));
0538 
0539     if (!list_empty(&inode->i_io_list))
0540         inode_io_list_del(inode);
0541 
0542     inode_sb_list_del(inode);
0543 
0544     /*
0545      * Wait for flusher thread to be done with the inode so that filesystem
0546      * does not start destroying it while writeback is still running. Since
0547      * the inode has I_FREEING set, flusher thread won't start new work on
0548      * the inode.  We just have to wait for running writeback to finish.
0549      */
0550     inode_wait_for_writeback(inode);
0551 
0552     if (op->evict_inode) {
0553         op->evict_inode(inode);
0554     } else {
0555         truncate_inode_pages_final(&inode->i_data);
0556         clear_inode(inode);
0557     }
0558     if (S_ISBLK(inode->i_mode) && inode->i_bdev)
0559         bd_forget(inode);
0560     if (S_ISCHR(inode->i_mode) && inode->i_cdev)
0561         cd_forget(inode);
0562 
0563     remove_inode_hash(inode);
0564 
0565     spin_lock(&inode->i_lock);
0566     wake_up_bit(&inode->i_state, __I_NEW);
0567     BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
0568     spin_unlock(&inode->i_lock);
0569 
0570     destroy_inode(inode);
0571 }
0572 
0573 /*
0574  * dispose_list - dispose of the contents of a local list
0575  * @head: the head of the list to free
0576  *
0577  * Dispose-list gets a local list with local inodes in it, so it doesn't
0578  * need to worry about list corruption and SMP locks.
0579  */
0580 static void dispose_list(struct list_head *head)
0581 {
0582     while (!list_empty(head)) {
0583         struct inode *inode;
0584 
0585         inode = list_first_entry(head, struct inode, i_lru);
0586         list_del_init(&inode->i_lru);
0587 
0588         evict(inode);
0589         cond_resched();
0590     }
0591 }
0592 
0593 /**
0594  * evict_inodes - evict all evictable inodes for a superblock
0595  * @sb:     superblock to operate on
0596  *
0597  * Make sure that no inodes with zero refcount are retained.  This is
0598  * called by superblock shutdown after having MS_ACTIVE flag removed,
0599  * so any inode reaching zero refcount during or after that call will
0600  * be immediately evicted.
0601  */
0602 void evict_inodes(struct super_block *sb)
0603 {
0604     struct inode *inode, *next;
0605     LIST_HEAD(dispose);
0606 
0607 again:
0608     spin_lock(&sb->s_inode_list_lock);
0609     list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
0610         if (atomic_read(&inode->i_count))
0611             continue;
0612 
0613         spin_lock(&inode->i_lock);
0614         if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
0615             spin_unlock(&inode->i_lock);
0616             continue;
0617         }
0618 
0619         inode->i_state |= I_FREEING;
0620         inode_lru_list_del(inode);
0621         spin_unlock(&inode->i_lock);
0622         list_add(&inode->i_lru, &dispose);
0623 
0624         /*
0625          * We can have a ton of inodes to evict at unmount time given
0626          * enough memory, check to see if we need to go to sleep for a
0627          * bit so we don't livelock.
0628          */
0629         if (need_resched()) {
0630             spin_unlock(&sb->s_inode_list_lock);
0631             cond_resched();
0632             dispose_list(&dispose);
0633             goto again;
0634         }
0635     }
0636     spin_unlock(&sb->s_inode_list_lock);
0637 
0638     dispose_list(&dispose);
0639 }
0640 
0641 /**
0642  * invalidate_inodes    - attempt to free all inodes on a superblock
0643  * @sb:     superblock to operate on
0644  * @kill_dirty: flag to guide handling of dirty inodes
0645  *
0646  * Attempts to free all inodes for a given superblock.  If there were any
0647  * busy inodes return a non-zero value, else zero.
0648  * If @kill_dirty is set, discard dirty inodes too, otherwise treat
0649  * them as busy.
0650  */
0651 int invalidate_inodes(struct super_block *sb, bool kill_dirty)
0652 {
0653     int busy = 0;
0654     struct inode *inode, *next;
0655     LIST_HEAD(dispose);
0656 
0657     spin_lock(&sb->s_inode_list_lock);
0658     list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
0659         spin_lock(&inode->i_lock);
0660         if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
0661             spin_unlock(&inode->i_lock);
0662             continue;
0663         }
0664         if (inode->i_state & I_DIRTY_ALL && !kill_dirty) {
0665             spin_unlock(&inode->i_lock);
0666             busy = 1;
0667             continue;
0668         }
0669         if (atomic_read(&inode->i_count)) {
0670             spin_unlock(&inode->i_lock);
0671             busy = 1;
0672             continue;
0673         }
0674 
0675         inode->i_state |= I_FREEING;
0676         inode_lru_list_del(inode);
0677         spin_unlock(&inode->i_lock);
0678         list_add(&inode->i_lru, &dispose);
0679     }
0680     spin_unlock(&sb->s_inode_list_lock);
0681 
0682     dispose_list(&dispose);
0683 
0684     return busy;
0685 }
0686 
0687 /*
0688  * Isolate the inode from the LRU in preparation for freeing it.
0689  *
0690  * Any inodes which are pinned purely because of attached pagecache have their
0691  * pagecache removed.  If the inode has metadata buffers attached to
0692  * mapping->private_list then try to remove them.
0693  *
0694  * If the inode has the I_REFERENCED flag set, then it means that it has been
0695  * used recently - the flag is set in iput_final(). When we encounter such an
0696  * inode, clear the flag and move it to the back of the LRU so it gets another
0697  * pass through the LRU before it gets reclaimed. This is necessary because of
0698  * the fact we are doing lazy LRU updates to minimise lock contention so the
0699  * LRU does not have strict ordering. Hence we don't want to reclaim inodes
0700  * with this flag set because they are the inodes that are out of order.
0701  */
0702 static enum lru_status inode_lru_isolate(struct list_head *item,
0703         struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
0704 {
0705     struct list_head *freeable = arg;
0706     struct inode    *inode = container_of(item, struct inode, i_lru);
0707 
0708     /*
0709      * we are inverting the lru lock/inode->i_lock here, so use a trylock.
0710      * If we fail to get the lock, just skip it.
0711      */
0712     if (!spin_trylock(&inode->i_lock))
0713         return LRU_SKIP;
0714 
0715     /*
0716      * Referenced or dirty inodes are still in use. Give them another pass
0717      * through the LRU as we canot reclaim them now.
0718      */
0719     if (atomic_read(&inode->i_count) ||
0720         (inode->i_state & ~I_REFERENCED)) {
0721         list_lru_isolate(lru, &inode->i_lru);
0722         spin_unlock(&inode->i_lock);
0723         this_cpu_dec(nr_unused);
0724         return LRU_REMOVED;
0725     }
0726 
0727     /* recently referenced inodes get one more pass */
0728     if (inode->i_state & I_REFERENCED) {
0729         inode->i_state &= ~I_REFERENCED;
0730         spin_unlock(&inode->i_lock);
0731         return LRU_ROTATE;
0732     }
0733 
0734     if (inode_has_buffers(inode) || inode->i_data.nrpages) {
0735         __iget(inode);
0736         spin_unlock(&inode->i_lock);
0737         spin_unlock(lru_lock);
0738         if (remove_inode_buffers(inode)) {
0739             unsigned long reap;
0740             reap = invalidate_mapping_pages(&inode->i_data, 0, -1);
0741             if (current_is_kswapd())
0742                 __count_vm_events(KSWAPD_INODESTEAL, reap);
0743             else
0744                 __count_vm_events(PGINODESTEAL, reap);
0745             if (current->reclaim_state)
0746                 current->reclaim_state->reclaimed_slab += reap;
0747         }
0748         iput(inode);
0749         spin_lock(lru_lock);
0750         return LRU_RETRY;
0751     }
0752 
0753     WARN_ON(inode->i_state & I_NEW);
0754     inode->i_state |= I_FREEING;
0755     list_lru_isolate_move(lru, &inode->i_lru, freeable);
0756     spin_unlock(&inode->i_lock);
0757 
0758     this_cpu_dec(nr_unused);
0759     return LRU_REMOVED;
0760 }
0761 
0762 /*
0763  * Walk the superblock inode LRU for freeable inodes and attempt to free them.
0764  * This is called from the superblock shrinker function with a number of inodes
0765  * to trim from the LRU. Inodes to be freed are moved to a temporary list and
0766  * then are freed outside inode_lock by dispose_list().
0767  */
0768 long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
0769 {
0770     LIST_HEAD(freeable);
0771     long freed;
0772 
0773     freed = list_lru_shrink_walk(&sb->s_inode_lru, sc,
0774                      inode_lru_isolate, &freeable);
0775     dispose_list(&freeable);
0776     return freed;
0777 }
0778 
0779 static void __wait_on_freeing_inode(struct inode *inode);
0780 /*
0781  * Called with the inode lock held.
0782  */
0783 static struct inode *find_inode(struct super_block *sb,
0784                 struct hlist_head *head,
0785                 int (*test)(struct inode *, void *),
0786                 void *data)
0787 {
0788     struct inode *inode = NULL;
0789 
0790 repeat:
0791     hlist_for_each_entry(inode, head, i_hash) {
0792         if (inode->i_sb != sb)
0793             continue;
0794         if (!test(inode, data))
0795             continue;
0796         spin_lock(&inode->i_lock);
0797         if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
0798             __wait_on_freeing_inode(inode);
0799             goto repeat;
0800         }
0801         __iget(inode);
0802         spin_unlock(&inode->i_lock);
0803         return inode;
0804     }
0805     return NULL;
0806 }
0807 
0808 /*
0809  * find_inode_fast is the fast path version of find_inode, see the comment at
0810  * iget_locked for details.
0811  */
0812 static struct inode *find_inode_fast(struct super_block *sb,
0813                 struct hlist_head *head, unsigned long ino)
0814 {
0815     struct inode *inode = NULL;
0816 
0817 repeat:
0818     hlist_for_each_entry(inode, head, i_hash) {
0819         if (inode->i_ino != ino)
0820             continue;
0821         if (inode->i_sb != sb)
0822             continue;
0823         spin_lock(&inode->i_lock);
0824         if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
0825             __wait_on_freeing_inode(inode);
0826             goto repeat;
0827         }
0828         __iget(inode);
0829         spin_unlock(&inode->i_lock);
0830         return inode;
0831     }
0832     return NULL;
0833 }
0834 
0835 /*
0836  * Each cpu owns a range of LAST_INO_BATCH numbers.
0837  * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations,
0838  * to renew the exhausted range.
0839  *
0840  * This does not significantly increase overflow rate because every CPU can
0841  * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is
0842  * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the
0843  * 2^32 range, and is a worst-case. Even a 50% wastage would only increase
0844  * overflow rate by 2x, which does not seem too significant.
0845  *
0846  * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
0847  * error if st_ino won't fit in target struct field. Use 32bit counter
0848  * here to attempt to avoid that.
0849  */
0850 #define LAST_INO_BATCH 1024
0851 static DEFINE_PER_CPU(unsigned int, last_ino);
0852 
0853 unsigned int get_next_ino(void)
0854 {
0855     unsigned int *p = &get_cpu_var(last_ino);
0856     unsigned int res = *p;
0857 
0858 #ifdef CONFIG_SMP
0859     if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) {
0860         static atomic_t shared_last_ino;
0861         int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino);
0862 
0863         res = next - LAST_INO_BATCH;
0864     }
0865 #endif
0866 
0867     res++;
0868     /* get_next_ino should not provide a 0 inode number */
0869     if (unlikely(!res))
0870         res++;
0871     *p = res;
0872     put_cpu_var(last_ino);
0873     return res;
0874 }
0875 EXPORT_SYMBOL(get_next_ino);
0876 
0877 /**
0878  *  new_inode_pseudo    - obtain an inode
0879  *  @sb: superblock
0880  *
0881  *  Allocates a new inode for given superblock.
0882  *  Inode wont be chained in superblock s_inodes list
0883  *  This means :
0884  *  - fs can't be unmount
0885  *  - quotas, fsnotify, writeback can't work
0886  */
0887 struct inode *new_inode_pseudo(struct super_block *sb)
0888 {
0889     struct inode *inode = alloc_inode(sb);
0890 
0891     if (inode) {
0892         spin_lock(&inode->i_lock);
0893         inode->i_state = 0;
0894         spin_unlock(&inode->i_lock);
0895         INIT_LIST_HEAD(&inode->i_sb_list);
0896     }
0897     return inode;
0898 }
0899 
0900 /**
0901  *  new_inode   - obtain an inode
0902  *  @sb: superblock
0903  *
0904  *  Allocates a new inode for given superblock. The default gfp_mask
0905  *  for allocations related to inode->i_mapping is GFP_HIGHUSER_MOVABLE.
0906  *  If HIGHMEM pages are unsuitable or it is known that pages allocated
0907  *  for the page cache are not reclaimable or migratable,
0908  *  mapping_set_gfp_mask() must be called with suitable flags on the
0909  *  newly created inode's mapping
0910  *
0911  */
0912 struct inode *new_inode(struct super_block *sb)
0913 {
0914     struct inode *inode;
0915 
0916     spin_lock_prefetch(&sb->s_inode_list_lock);
0917 
0918     inode = new_inode_pseudo(sb);
0919     if (inode)
0920         inode_sb_list_add(inode);
0921     return inode;
0922 }
0923 EXPORT_SYMBOL(new_inode);
0924 
0925 #ifdef CONFIG_DEBUG_LOCK_ALLOC
0926 void lockdep_annotate_inode_mutex_key(struct inode *inode)
0927 {
0928     if (S_ISDIR(inode->i_mode)) {
0929         struct file_system_type *type = inode->i_sb->s_type;
0930 
0931         /* Set new key only if filesystem hasn't already changed it */
0932         if (lockdep_match_class(&inode->i_rwsem, &type->i_mutex_key)) {
0933             /*
0934              * ensure nobody is actually holding i_mutex
0935              */
0936             // mutex_destroy(&inode->i_mutex);
0937             init_rwsem(&inode->i_rwsem);
0938             lockdep_set_class(&inode->i_rwsem,
0939                       &type->i_mutex_dir_key);
0940         }
0941     }
0942 }
0943 EXPORT_SYMBOL(lockdep_annotate_inode_mutex_key);
0944 #endif
0945 
0946 /**
0947  * unlock_new_inode - clear the I_NEW state and wake up any waiters
0948  * @inode:  new inode to unlock
0949  *
0950  * Called when the inode is fully initialised to clear the new state of the
0951  * inode and wake up anyone waiting for the inode to finish initialisation.
0952  */
0953 void unlock_new_inode(struct inode *inode)
0954 {
0955     lockdep_annotate_inode_mutex_key(inode);
0956     spin_lock(&inode->i_lock);
0957     WARN_ON(!(inode->i_state & I_NEW));
0958     inode->i_state &= ~I_NEW;
0959     smp_mb();
0960     wake_up_bit(&inode->i_state, __I_NEW);
0961     spin_unlock(&inode->i_lock);
0962 }
0963 EXPORT_SYMBOL(unlock_new_inode);
0964 
0965 /**
0966  * lock_two_nondirectories - take two i_mutexes on non-directory objects
0967  *
0968  * Lock any non-NULL argument that is not a directory.
0969  * Zero, one or two objects may be locked by this function.
0970  *
0971  * @inode1: first inode to lock
0972  * @inode2: second inode to lock
0973  */
0974 void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
0975 {
0976     if (inode1 > inode2)
0977         swap(inode1, inode2);
0978 
0979     if (inode1 && !S_ISDIR(inode1->i_mode))
0980         inode_lock(inode1);
0981     if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
0982         inode_lock_nested(inode2, I_MUTEX_NONDIR2);
0983 }
0984 EXPORT_SYMBOL(lock_two_nondirectories);
0985 
0986 /**
0987  * unlock_two_nondirectories - release locks from lock_two_nondirectories()
0988  * @inode1: first inode to unlock
0989  * @inode2: second inode to unlock
0990  */
0991 void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
0992 {
0993     if (inode1 && !S_ISDIR(inode1->i_mode))
0994         inode_unlock(inode1);
0995     if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
0996         inode_unlock(inode2);
0997 }
0998 EXPORT_SYMBOL(unlock_two_nondirectories);
0999 
1000 /**
1001  * iget5_locked - obtain an inode from a mounted file system
1002  * @sb:     super block of file system
1003  * @hashval:    hash value (usually inode number) to get
1004  * @test:   callback used for comparisons between inodes
1005  * @set:    callback used to initialize a new struct inode
1006  * @data:   opaque data pointer to pass to @test and @set
1007  *
1008  * Search for the inode specified by @hashval and @data in the inode cache,
1009  * and if present it is return it with an increased reference count. This is
1010  * a generalized version of iget_locked() for file systems where the inode
1011  * number is not sufficient for unique identification of an inode.
1012  *
1013  * If the inode is not in cache, allocate a new inode and return it locked,
1014  * hashed, and with the I_NEW flag set. The file system gets to fill it in
1015  * before unlocking it via unlock_new_inode().
1016  *
1017  * Note both @test and @set are called with the inode_hash_lock held, so can't
1018  * sleep.
1019  */
1020 struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
1021         int (*test)(struct inode *, void *),
1022         int (*set)(struct inode *, void *), void *data)
1023 {
1024     struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1025     struct inode *inode;
1026 again:
1027     spin_lock(&inode_hash_lock);
1028     inode = find_inode(sb, head, test, data);
1029     spin_unlock(&inode_hash_lock);
1030 
1031     if (inode) {
1032         wait_on_inode(inode);
1033         if (unlikely(inode_unhashed(inode))) {
1034             iput(inode);
1035             goto again;
1036         }
1037         return inode;
1038     }
1039 
1040     inode = alloc_inode(sb);
1041     if (inode) {
1042         struct inode *old;
1043 
1044         spin_lock(&inode_hash_lock);
1045         /* We released the lock, so.. */
1046         old = find_inode(sb, head, test, data);
1047         if (!old) {
1048             if (set(inode, data))
1049                 goto set_failed;
1050 
1051             spin_lock(&inode->i_lock);
1052             inode->i_state = I_NEW;
1053             hlist_add_head(&inode->i_hash, head);
1054             spin_unlock(&inode->i_lock);
1055             inode_sb_list_add(inode);
1056             spin_unlock(&inode_hash_lock);
1057 
1058             /* Return the locked inode with I_NEW set, the
1059              * caller is responsible for filling in the contents
1060              */
1061             return inode;
1062         }
1063 
1064         /*
1065          * Uhhuh, somebody else created the same inode under
1066          * us. Use the old inode instead of the one we just
1067          * allocated.
1068          */
1069         spin_unlock(&inode_hash_lock);
1070         destroy_inode(inode);
1071         inode = old;
1072         wait_on_inode(inode);
1073         if (unlikely(inode_unhashed(inode))) {
1074             iput(inode);
1075             goto again;
1076         }
1077     }
1078     return inode;
1079 
1080 set_failed:
1081     spin_unlock(&inode_hash_lock);
1082     destroy_inode(inode);
1083     return NULL;
1084 }
1085 EXPORT_SYMBOL(iget5_locked);
1086 
1087 /**
1088  * iget_locked - obtain an inode from a mounted file system
1089  * @sb:     super block of file system
1090  * @ino:    inode number to get
1091  *
1092  * Search for the inode specified by @ino in the inode cache and if present
1093  * return it with an increased reference count. This is for file systems
1094  * where the inode number is sufficient for unique identification of an inode.
1095  *
1096  * If the inode is not in cache, allocate a new inode and return it locked,
1097  * hashed, and with the I_NEW flag set.  The file system gets to fill it in
1098  * before unlocking it via unlock_new_inode().
1099  */
1100 struct inode *iget_locked(struct super_block *sb, unsigned long ino)
1101 {
1102     struct hlist_head *head = inode_hashtable + hash(sb, ino);
1103     struct inode *inode;
1104 again:
1105     spin_lock(&inode_hash_lock);
1106     inode = find_inode_fast(sb, head, ino);
1107     spin_unlock(&inode_hash_lock);
1108     if (inode) {
1109         wait_on_inode(inode);
1110         if (unlikely(inode_unhashed(inode))) {
1111             iput(inode);
1112             goto again;
1113         }
1114         return inode;
1115     }
1116 
1117     inode = alloc_inode(sb);
1118     if (inode) {
1119         struct inode *old;
1120 
1121         spin_lock(&inode_hash_lock);
1122         /* We released the lock, so.. */
1123         old = find_inode_fast(sb, head, ino);
1124         if (!old) {
1125             inode->i_ino = ino;
1126             spin_lock(&inode->i_lock);
1127             inode->i_state = I_NEW;
1128             hlist_add_head(&inode->i_hash, head);
1129             spin_unlock(&inode->i_lock);
1130             inode_sb_list_add(inode);
1131             spin_unlock(&inode_hash_lock);
1132 
1133             /* Return the locked inode with I_NEW set, the
1134              * caller is responsible for filling in the contents
1135              */
1136             return inode;
1137         }
1138 
1139         /*
1140          * Uhhuh, somebody else created the same inode under
1141          * us. Use the old inode instead of the one we just
1142          * allocated.
1143          */
1144         spin_unlock(&inode_hash_lock);
1145         destroy_inode(inode);
1146         inode = old;
1147         wait_on_inode(inode);
1148         if (unlikely(inode_unhashed(inode))) {
1149             iput(inode);
1150             goto again;
1151         }
1152     }
1153     return inode;
1154 }
1155 EXPORT_SYMBOL(iget_locked);
1156 
1157 /*
1158  * search the inode cache for a matching inode number.
1159  * If we find one, then the inode number we are trying to
1160  * allocate is not unique and so we should not use it.
1161  *
1162  * Returns 1 if the inode number is unique, 0 if it is not.
1163  */
1164 static int test_inode_iunique(struct super_block *sb, unsigned long ino)
1165 {
1166     struct hlist_head *b = inode_hashtable + hash(sb, ino);
1167     struct inode *inode;
1168 
1169     spin_lock(&inode_hash_lock);
1170     hlist_for_each_entry(inode, b, i_hash) {
1171         if (inode->i_ino == ino && inode->i_sb == sb) {
1172             spin_unlock(&inode_hash_lock);
1173             return 0;
1174         }
1175     }
1176     spin_unlock(&inode_hash_lock);
1177 
1178     return 1;
1179 }
1180 
1181 /**
1182  *  iunique - get a unique inode number
1183  *  @sb: superblock
1184  *  @max_reserved: highest reserved inode number
1185  *
1186  *  Obtain an inode number that is unique on the system for a given
1187  *  superblock. This is used by file systems that have no natural
1188  *  permanent inode numbering system. An inode number is returned that
1189  *  is higher than the reserved limit but unique.
1190  *
1191  *  BUGS:
1192  *  With a large number of inodes live on the file system this function
1193  *  currently becomes quite slow.
1194  */
1195 ino_t iunique(struct super_block *sb, ino_t max_reserved)
1196 {
1197     /*
1198      * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
1199      * error if st_ino won't fit in target struct field. Use 32bit counter
1200      * here to attempt to avoid that.
1201      */
1202     static DEFINE_SPINLOCK(iunique_lock);
1203     static unsigned int counter;
1204     ino_t res;
1205 
1206     spin_lock(&iunique_lock);
1207     do {
1208         if (counter <= max_reserved)
1209             counter = max_reserved + 1;
1210         res = counter++;
1211     } while (!test_inode_iunique(sb, res));
1212     spin_unlock(&iunique_lock);
1213 
1214     return res;
1215 }
1216 EXPORT_SYMBOL(iunique);
1217 
1218 struct inode *igrab(struct inode *inode)
1219 {
1220     spin_lock(&inode->i_lock);
1221     if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) {
1222         __iget(inode);
1223         spin_unlock(&inode->i_lock);
1224     } else {
1225         spin_unlock(&inode->i_lock);
1226         /*
1227          * Handle the case where s_op->clear_inode is not been
1228          * called yet, and somebody is calling igrab
1229          * while the inode is getting freed.
1230          */
1231         inode = NULL;
1232     }
1233     return inode;
1234 }
1235 EXPORT_SYMBOL(igrab);
1236 
1237 /**
1238  * ilookup5_nowait - search for an inode in the inode cache
1239  * @sb:     super block of file system to search
1240  * @hashval:    hash value (usually inode number) to search for
1241  * @test:   callback used for comparisons between inodes
1242  * @data:   opaque data pointer to pass to @test
1243  *
1244  * Search for the inode specified by @hashval and @data in the inode cache.
1245  * If the inode is in the cache, the inode is returned with an incremented
1246  * reference count.
1247  *
1248  * Note: I_NEW is not waited upon so you have to be very careful what you do
1249  * with the returned inode.  You probably should be using ilookup5() instead.
1250  *
1251  * Note2: @test is called with the inode_hash_lock held, so can't sleep.
1252  */
1253 struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
1254         int (*test)(struct inode *, void *), void *data)
1255 {
1256     struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1257     struct inode *inode;
1258 
1259     spin_lock(&inode_hash_lock);
1260     inode = find_inode(sb, head, test, data);
1261     spin_unlock(&inode_hash_lock);
1262 
1263     return inode;
1264 }
1265 EXPORT_SYMBOL(ilookup5_nowait);
1266 
1267 /**
1268  * ilookup5 - search for an inode in the inode cache
1269  * @sb:     super block of file system to search
1270  * @hashval:    hash value (usually inode number) to search for
1271  * @test:   callback used for comparisons between inodes
1272  * @data:   opaque data pointer to pass to @test
1273  *
1274  * Search for the inode specified by @hashval and @data in the inode cache,
1275  * and if the inode is in the cache, return the inode with an incremented
1276  * reference count.  Waits on I_NEW before returning the inode.
1277  * returned with an incremented reference count.
1278  *
1279  * This is a generalized version of ilookup() for file systems where the
1280  * inode number is not sufficient for unique identification of an inode.
1281  *
1282  * Note: @test is called with the inode_hash_lock held, so can't sleep.
1283  */
1284 struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
1285         int (*test)(struct inode *, void *), void *data)
1286 {
1287     struct inode *inode;
1288 again:
1289     inode = ilookup5_nowait(sb, hashval, test, data);
1290     if (inode) {
1291         wait_on_inode(inode);
1292         if (unlikely(inode_unhashed(inode))) {
1293             iput(inode);
1294             goto again;
1295         }
1296     }
1297     return inode;
1298 }
1299 EXPORT_SYMBOL(ilookup5);
1300 
1301 /**
1302  * ilookup - search for an inode in the inode cache
1303  * @sb:     super block of file system to search
1304  * @ino:    inode number to search for
1305  *
1306  * Search for the inode @ino in the inode cache, and if the inode is in the
1307  * cache, the inode is returned with an incremented reference count.
1308  */
1309 struct inode *ilookup(struct super_block *sb, unsigned long ino)
1310 {
1311     struct hlist_head *head = inode_hashtable + hash(sb, ino);
1312     struct inode *inode;
1313 again:
1314     spin_lock(&inode_hash_lock);
1315     inode = find_inode_fast(sb, head, ino);
1316     spin_unlock(&inode_hash_lock);
1317 
1318     if (inode) {
1319         wait_on_inode(inode);
1320         if (unlikely(inode_unhashed(inode))) {
1321             iput(inode);
1322             goto again;
1323         }
1324     }
1325     return inode;
1326 }
1327 EXPORT_SYMBOL(ilookup);
1328 
1329 /**
1330  * find_inode_nowait - find an inode in the inode cache
1331  * @sb:     super block of file system to search
1332  * @hashval:    hash value (usually inode number) to search for
1333  * @match:  callback used for comparisons between inodes
1334  * @data:   opaque data pointer to pass to @match
1335  *
1336  * Search for the inode specified by @hashval and @data in the inode
1337  * cache, where the helper function @match will return 0 if the inode
1338  * does not match, 1 if the inode does match, and -1 if the search
1339  * should be stopped.  The @match function must be responsible for
1340  * taking the i_lock spin_lock and checking i_state for an inode being
1341  * freed or being initialized, and incrementing the reference count
1342  * before returning 1.  It also must not sleep, since it is called with
1343  * the inode_hash_lock spinlock held.
1344  *
1345  * This is a even more generalized version of ilookup5() when the
1346  * function must never block --- find_inode() can block in
1347  * __wait_on_freeing_inode() --- or when the caller can not increment
1348  * the reference count because the resulting iput() might cause an
1349  * inode eviction.  The tradeoff is that the @match funtion must be
1350  * very carefully implemented.
1351  */
1352 struct inode *find_inode_nowait(struct super_block *sb,
1353                 unsigned long hashval,
1354                 int (*match)(struct inode *, unsigned long,
1355                          void *),
1356                 void *data)
1357 {
1358     struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1359     struct inode *inode, *ret_inode = NULL;
1360     int mval;
1361 
1362     spin_lock(&inode_hash_lock);
1363     hlist_for_each_entry(inode, head, i_hash) {
1364         if (inode->i_sb != sb)
1365             continue;
1366         mval = match(inode, hashval, data);
1367         if (mval == 0)
1368             continue;
1369         if (mval == 1)
1370             ret_inode = inode;
1371         goto out;
1372     }
1373 out:
1374     spin_unlock(&inode_hash_lock);
1375     return ret_inode;
1376 }
1377 EXPORT_SYMBOL(find_inode_nowait);
1378 
1379 int insert_inode_locked(struct inode *inode)
1380 {
1381     struct super_block *sb = inode->i_sb;
1382     ino_t ino = inode->i_ino;
1383     struct hlist_head *head = inode_hashtable + hash(sb, ino);
1384 
1385     while (1) {
1386         struct inode *old = NULL;
1387         spin_lock(&inode_hash_lock);
1388         hlist_for_each_entry(old, head, i_hash) {
1389             if (old->i_ino != ino)
1390                 continue;
1391             if (old->i_sb != sb)
1392                 continue;
1393             spin_lock(&old->i_lock);
1394             if (old->i_state & (I_FREEING|I_WILL_FREE)) {
1395                 spin_unlock(&old->i_lock);
1396                 continue;
1397             }
1398             break;
1399         }
1400         if (likely(!old)) {
1401             spin_lock(&inode->i_lock);
1402             inode->i_state |= I_NEW;
1403             hlist_add_head(&inode->i_hash, head);
1404             spin_unlock(&inode->i_lock);
1405             spin_unlock(&inode_hash_lock);
1406             return 0;
1407         }
1408         __iget(old);
1409         spin_unlock(&old->i_lock);
1410         spin_unlock(&inode_hash_lock);
1411         wait_on_inode(old);
1412         if (unlikely(!inode_unhashed(old))) {
1413             iput(old);
1414             return -EBUSY;
1415         }
1416         iput(old);
1417     }
1418 }
1419 EXPORT_SYMBOL(insert_inode_locked);
1420 
1421 int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1422         int (*test)(struct inode *, void *), void *data)
1423 {
1424     struct super_block *sb = inode->i_sb;
1425     struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1426 
1427     while (1) {
1428         struct inode *old = NULL;
1429 
1430         spin_lock(&inode_hash_lock);
1431         hlist_for_each_entry(old, head, i_hash) {
1432             if (old->i_sb != sb)
1433                 continue;
1434             if (!test(old, data))
1435                 continue;
1436             spin_lock(&old->i_lock);
1437             if (old->i_state & (I_FREEING|I_WILL_FREE)) {
1438                 spin_unlock(&old->i_lock);
1439                 continue;
1440             }
1441             break;
1442         }
1443         if (likely(!old)) {
1444             spin_lock(&inode->i_lock);
1445             inode->i_state |= I_NEW;
1446             hlist_add_head(&inode->i_hash, head);
1447             spin_unlock(&inode->i_lock);
1448             spin_unlock(&inode_hash_lock);
1449             return 0;
1450         }
1451         __iget(old);
1452         spin_unlock(&old->i_lock);
1453         spin_unlock(&inode_hash_lock);
1454         wait_on_inode(old);
1455         if (unlikely(!inode_unhashed(old))) {
1456             iput(old);
1457             return -EBUSY;
1458         }
1459         iput(old);
1460     }
1461 }
1462 EXPORT_SYMBOL(insert_inode_locked4);
1463 
1464 
1465 int generic_delete_inode(struct inode *inode)
1466 {
1467     return 1;
1468 }
1469 EXPORT_SYMBOL(generic_delete_inode);
1470 
1471 /*
1472  * Called when we're dropping the last reference
1473  * to an inode.
1474  *
1475  * Call the FS "drop_inode()" function, defaulting to
1476  * the legacy UNIX filesystem behaviour.  If it tells
1477  * us to evict inode, do so.  Otherwise, retain inode
1478  * in cache if fs is alive, sync and evict if fs is
1479  * shutting down.
1480  */
1481 static void iput_final(struct inode *inode)
1482 {
1483     struct super_block *sb = inode->i_sb;
1484     const struct super_operations *op = inode->i_sb->s_op;
1485     int drop;
1486 
1487     WARN_ON(inode->i_state & I_NEW);
1488 
1489     if (op->drop_inode)
1490         drop = op->drop_inode(inode);
1491     else
1492         drop = generic_drop_inode(inode);
1493 
1494     if (!drop && (sb->s_flags & MS_ACTIVE)) {
1495         inode->i_state |= I_REFERENCED;
1496         inode_add_lru(inode);
1497         spin_unlock(&inode->i_lock);
1498         return;
1499     }
1500 
1501     if (!drop) {
1502         inode->i_state |= I_WILL_FREE;
1503         spin_unlock(&inode->i_lock);
1504         write_inode_now(inode, 1);
1505         spin_lock(&inode->i_lock);
1506         WARN_ON(inode->i_state & I_NEW);
1507         inode->i_state &= ~I_WILL_FREE;
1508     }
1509 
1510     inode->i_state |= I_FREEING;
1511     if (!list_empty(&inode->i_lru))
1512         inode_lru_list_del(inode);
1513     spin_unlock(&inode->i_lock);
1514 
1515     evict(inode);
1516 }
1517 
1518 /**
1519  *  iput    - put an inode
1520  *  @inode: inode to put
1521  *
1522  *  Puts an inode, dropping its usage count. If the inode use count hits
1523  *  zero, the inode is then freed and may also be destroyed.
1524  *
1525  *  Consequently, iput() can sleep.
1526  */
1527 void iput(struct inode *inode)
1528 {
1529     if (!inode)
1530         return;
1531     BUG_ON(inode->i_state & I_CLEAR);
1532 retry:
1533     if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
1534         if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
1535             atomic_inc(&inode->i_count);
1536             inode->i_state &= ~I_DIRTY_TIME;
1537             spin_unlock(&inode->i_lock);
1538             trace_writeback_lazytime_iput(inode);
1539             mark_inode_dirty_sync(inode);
1540             goto retry;
1541         }
1542         iput_final(inode);
1543     }
1544 }
1545 EXPORT_SYMBOL(iput);
1546 
1547 /**
1548  *  bmap    - find a block number in a file
1549  *  @inode: inode of file
1550  *  @block: block to find
1551  *
1552  *  Returns the block number on the device holding the inode that
1553  *  is the disk block number for the block of the file requested.
1554  *  That is, asked for block 4 of inode 1 the function will return the
1555  *  disk block relative to the disk start that holds that block of the
1556  *  file.
1557  */
1558 sector_t bmap(struct inode *inode, sector_t block)
1559 {
1560     sector_t res = 0;
1561     if (inode->i_mapping->a_ops->bmap)
1562         res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block);
1563     return res;
1564 }
1565 EXPORT_SYMBOL(bmap);
1566 
1567 /*
1568  * Update times in overlayed inode from underlying real inode
1569  */
1570 static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
1571                    bool rcu)
1572 {
1573     if (!rcu) {
1574         struct inode *realinode = d_real_inode(dentry);
1575 
1576         if (unlikely(inode != realinode) &&
1577             (!timespec_equal(&inode->i_mtime, &realinode->i_mtime) ||
1578              !timespec_equal(&inode->i_ctime, &realinode->i_ctime))) {
1579             inode->i_mtime = realinode->i_mtime;
1580             inode->i_ctime = realinode->i_ctime;
1581         }
1582     }
1583 }
1584 
1585 /*
1586  * With relative atime, only update atime if the previous atime is
1587  * earlier than either the ctime or mtime or if at least a day has
1588  * passed since the last atime update.
1589  */
1590 static int relatime_need_update(const struct path *path, struct inode *inode,
1591                 struct timespec now, bool rcu)
1592 {
1593 
1594     if (!(path->mnt->mnt_flags & MNT_RELATIME))
1595         return 1;
1596 
1597     update_ovl_inode_times(path->dentry, inode, rcu);
1598     /*
1599      * Is mtime younger than atime? If yes, update atime:
1600      */
1601     if (timespec_compare(&inode->i_mtime, &inode->i_atime) >= 0)
1602         return 1;
1603     /*
1604      * Is ctime younger than atime? If yes, update atime:
1605      */
1606     if (timespec_compare(&inode->i_ctime, &inode->i_atime) >= 0)
1607         return 1;
1608 
1609     /*
1610      * Is the previous atime value older than a day? If yes,
1611      * update atime:
1612      */
1613     if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60)
1614         return 1;
1615     /*
1616      * Good, we can skip the atime update:
1617      */
1618     return 0;
1619 }
1620 
1621 int generic_update_time(struct inode *inode, struct timespec *time, int flags)
1622 {
1623     int iflags = I_DIRTY_TIME;
1624 
1625     if (flags & S_ATIME)
1626         inode->i_atime = *time;
1627     if (flags & S_VERSION)
1628         inode_inc_iversion(inode);
1629     if (flags & S_CTIME)
1630         inode->i_ctime = *time;
1631     if (flags & S_MTIME)
1632         inode->i_mtime = *time;
1633 
1634     if (!(inode->i_sb->s_flags & MS_LAZYTIME) || (flags & S_VERSION))
1635         iflags |= I_DIRTY_SYNC;
1636     __mark_inode_dirty(inode, iflags);
1637     return 0;
1638 }
1639 EXPORT_SYMBOL(generic_update_time);
1640 
1641 /*
1642  * This does the actual work of updating an inodes time or version.  Must have
1643  * had called mnt_want_write() before calling this.
1644  */
1645 static int update_time(struct inode *inode, struct timespec *time, int flags)
1646 {
1647     int (*update_time)(struct inode *, struct timespec *, int);
1648 
1649     update_time = inode->i_op->update_time ? inode->i_op->update_time :
1650         generic_update_time;
1651 
1652     return update_time(inode, time, flags);
1653 }
1654 
1655 /**
1656  *  touch_atime -   update the access time
1657  *  @path: the &struct path to update
1658  *  @inode: inode to update
1659  *
1660  *  Update the accessed time on an inode and mark it for writeback.
1661  *  This function automatically handles read only file systems and media,
1662  *  as well as the "noatime" flag and inode specific "noatime" markers.
1663  */
1664 bool __atime_needs_update(const struct path *path, struct inode *inode,
1665               bool rcu)
1666 {
1667     struct vfsmount *mnt = path->mnt;
1668     struct timespec now;
1669 
1670     if (inode->i_flags & S_NOATIME)
1671         return false;
1672 
1673     /* Atime updates will likely cause i_uid and i_gid to be written
1674      * back improprely if their true value is unknown to the vfs.
1675      */
1676     if (HAS_UNMAPPED_ID(inode))
1677         return false;
1678 
1679     if (IS_NOATIME(inode))
1680         return false;
1681     if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1682         return false;
1683 
1684     if (mnt->mnt_flags & MNT_NOATIME)
1685         return false;
1686     if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1687         return false;
1688 
1689     now = current_time(inode);
1690 
1691     if (!relatime_need_update(path, inode, now, rcu))
1692         return false;
1693 
1694     if (timespec_equal(&inode->i_atime, &now))
1695         return false;
1696 
1697     return true;
1698 }
1699 
1700 void touch_atime(const struct path *path)
1701 {
1702     struct vfsmount *mnt = path->mnt;
1703     struct inode *inode = d_inode(path->dentry);
1704     struct timespec now;
1705 
1706     if (!__atime_needs_update(path, inode, false))
1707         return;
1708 
1709     if (!sb_start_write_trylock(inode->i_sb))
1710         return;
1711 
1712     if (__mnt_want_write(mnt) != 0)
1713         goto skip_update;
1714     /*
1715      * File systems can error out when updating inodes if they need to
1716      * allocate new space to modify an inode (such is the case for
1717      * Btrfs), but since we touch atime while walking down the path we
1718      * really don't care if we failed to update the atime of the file,
1719      * so just ignore the return value.
1720      * We may also fail on filesystems that have the ability to make parts
1721      * of the fs read only, e.g. subvolumes in Btrfs.
1722      */
1723     now = current_time(inode);
1724     update_time(inode, &now, S_ATIME);
1725     __mnt_drop_write(mnt);
1726 skip_update:
1727     sb_end_write(inode->i_sb);
1728 }
1729 EXPORT_SYMBOL(touch_atime);
1730 
1731 /*
1732  * The logic we want is
1733  *
1734  *  if suid or (sgid and xgrp)
1735  *      remove privs
1736  */
1737 int should_remove_suid(struct dentry *dentry)
1738 {
1739     umode_t mode = d_inode(dentry)->i_mode;
1740     int kill = 0;
1741 
1742     /* suid always must be killed */
1743     if (unlikely(mode & S_ISUID))
1744         kill = ATTR_KILL_SUID;
1745 
1746     /*
1747      * sgid without any exec bits is just a mandatory locking mark; leave
1748      * it alone.  If some exec bits are set, it's a real sgid; kill it.
1749      */
1750     if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
1751         kill |= ATTR_KILL_SGID;
1752 
1753     if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
1754         return kill;
1755 
1756     return 0;
1757 }
1758 EXPORT_SYMBOL(should_remove_suid);
1759 
1760 /*
1761  * Return mask of changes for notify_change() that need to be done as a
1762  * response to write or truncate. Return 0 if nothing has to be changed.
1763  * Negative value on error (change should be denied).
1764  */
1765 int dentry_needs_remove_privs(struct dentry *dentry)
1766 {
1767     struct inode *inode = d_inode(dentry);
1768     int mask = 0;
1769     int ret;
1770 
1771     if (IS_NOSEC(inode))
1772         return 0;
1773 
1774     mask = should_remove_suid(dentry);
1775     ret = security_inode_need_killpriv(dentry);
1776     if (ret < 0)
1777         return ret;
1778     if (ret)
1779         mask |= ATTR_KILL_PRIV;
1780     return mask;
1781 }
1782 
1783 static int __remove_privs(struct dentry *dentry, int kill)
1784 {
1785     struct iattr newattrs;
1786 
1787     newattrs.ia_valid = ATTR_FORCE | kill;
1788     /*
1789      * Note we call this on write, so notify_change will not
1790      * encounter any conflicting delegations:
1791      */
1792     return notify_change(dentry, &newattrs, NULL);
1793 }
1794 
1795 /*
1796  * Remove special file priviledges (suid, capabilities) when file is written
1797  * to or truncated.
1798  */
1799 int file_remove_privs(struct file *file)
1800 {
1801     struct dentry *dentry = file_dentry(file);
1802     struct inode *inode = file_inode(file);
1803     int kill;
1804     int error = 0;
1805 
1806     /* Fast path for nothing security related */
1807     if (IS_NOSEC(inode))
1808         return 0;
1809 
1810     kill = dentry_needs_remove_privs(dentry);
1811     if (kill < 0)
1812         return kill;
1813     if (kill)
1814         error = __remove_privs(dentry, kill);
1815     if (!error)
1816         inode_has_no_xattr(inode);
1817 
1818     return error;
1819 }
1820 EXPORT_SYMBOL(file_remove_privs);
1821 
1822 /**
1823  *  file_update_time    -   update mtime and ctime time
1824  *  @file: file accessed
1825  *
1826  *  Update the mtime and ctime members of an inode and mark the inode
1827  *  for writeback.  Note that this function is meant exclusively for
1828  *  usage in the file write path of filesystems, and filesystems may
1829  *  choose to explicitly ignore update via this function with the
1830  *  S_NOCMTIME inode flag, e.g. for network filesystem where these
1831  *  timestamps are handled by the server.  This can return an error for
1832  *  file systems who need to allocate space in order to update an inode.
1833  */
1834 
1835 int file_update_time(struct file *file)
1836 {
1837     struct inode *inode = file_inode(file);
1838     struct timespec now;
1839     int sync_it = 0;
1840     int ret;
1841 
1842     /* First try to exhaust all avenues to not sync */
1843     if (IS_NOCMTIME(inode))
1844         return 0;
1845 
1846     now = current_time(inode);
1847     if (!timespec_equal(&inode->i_mtime, &now))
1848         sync_it = S_MTIME;
1849 
1850     if (!timespec_equal(&inode->i_ctime, &now))
1851         sync_it |= S_CTIME;
1852 
1853     if (IS_I_VERSION(inode))
1854         sync_it |= S_VERSION;
1855 
1856     if (!sync_it)
1857         return 0;
1858 
1859     /* Finally allowed to write? Takes lock. */
1860     if (__mnt_want_write_file(file))
1861         return 0;
1862 
1863     ret = update_time(inode, &now, sync_it);
1864     __mnt_drop_write_file(file);
1865 
1866     return ret;
1867 }
1868 EXPORT_SYMBOL(file_update_time);
1869 
1870 int inode_needs_sync(struct inode *inode)
1871 {
1872     if (IS_SYNC(inode))
1873         return 1;
1874     if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
1875         return 1;
1876     return 0;
1877 }
1878 EXPORT_SYMBOL(inode_needs_sync);
1879 
1880 /*
1881  * If we try to find an inode in the inode hash while it is being
1882  * deleted, we have to wait until the filesystem completes its
1883  * deletion before reporting that it isn't found.  This function waits
1884  * until the deletion _might_ have completed.  Callers are responsible
1885  * to recheck inode state.
1886  *
1887  * It doesn't matter if I_NEW is not set initially, a call to
1888  * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
1889  * will DTRT.
1890  */
1891 static void __wait_on_freeing_inode(struct inode *inode)
1892 {
1893     wait_queue_head_t *wq;
1894     DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
1895     wq = bit_waitqueue(&inode->i_state, __I_NEW);
1896     prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
1897     spin_unlock(&inode->i_lock);
1898     spin_unlock(&inode_hash_lock);
1899     schedule();
1900     finish_wait(wq, &wait.wait);
1901     spin_lock(&inode_hash_lock);
1902 }
1903 
1904 static __initdata unsigned long ihash_entries;
1905 static int __init set_ihash_entries(char *str)
1906 {
1907     if (!str)
1908         return 0;
1909     ihash_entries = simple_strtoul(str, &str, 0);
1910     return 1;
1911 }
1912 __setup("ihash_entries=", set_ihash_entries);
1913 
1914 /*
1915  * Initialize the waitqueues and inode hash table.
1916  */
1917 void __init inode_init_early(void)
1918 {
1919     unsigned int loop;
1920 
1921     /* If hashes are distributed across NUMA nodes, defer
1922      * hash allocation until vmalloc space is available.
1923      */
1924     if (hashdist)
1925         return;
1926 
1927     inode_hashtable =
1928         alloc_large_system_hash("Inode-cache",
1929                     sizeof(struct hlist_head),
1930                     ihash_entries,
1931                     14,
1932                     HASH_EARLY,
1933                     &i_hash_shift,
1934                     &i_hash_mask,
1935                     0,
1936                     0);
1937 
1938     for (loop = 0; loop < (1U << i_hash_shift); loop++)
1939         INIT_HLIST_HEAD(&inode_hashtable[loop]);
1940 }
1941 
1942 void __init inode_init(void)
1943 {
1944     unsigned int loop;
1945 
1946     /* inode slab cache */
1947     inode_cachep = kmem_cache_create("inode_cache",
1948                      sizeof(struct inode),
1949                      0,
1950                      (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
1951                      SLAB_MEM_SPREAD|SLAB_ACCOUNT),
1952                      init_once);
1953 
1954     /* Hash may have been set up in inode_init_early */
1955     if (!hashdist)
1956         return;
1957 
1958     inode_hashtable =
1959         alloc_large_system_hash("Inode-cache",
1960                     sizeof(struct hlist_head),
1961                     ihash_entries,
1962                     14,
1963                     0,
1964                     &i_hash_shift,
1965                     &i_hash_mask,
1966                     0,
1967                     0);
1968 
1969     for (loop = 0; loop < (1U << i_hash_shift); loop++)
1970         INIT_HLIST_HEAD(&inode_hashtable[loop]);
1971 }
1972 
1973 void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
1974 {
1975     inode->i_mode = mode;
1976     if (S_ISCHR(mode)) {
1977         inode->i_fop = &def_chr_fops;
1978         inode->i_rdev = rdev;
1979     } else if (S_ISBLK(mode)) {
1980         inode->i_fop = &def_blk_fops;
1981         inode->i_rdev = rdev;
1982     } else if (S_ISFIFO(mode))
1983         inode->i_fop = &pipefifo_fops;
1984     else if (S_ISSOCK(mode))
1985         ;   /* leave it no_open_fops */
1986     else
1987         printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
1988                   " inode %s:%lu\n", mode, inode->i_sb->s_id,
1989                   inode->i_ino);
1990 }
1991 EXPORT_SYMBOL(init_special_inode);
1992 
1993 /**
1994  * inode_init_owner - Init uid,gid,mode for new inode according to posix standards
1995  * @inode: New inode
1996  * @dir: Directory inode
1997  * @mode: mode of the new inode
1998  */
1999 void inode_init_owner(struct inode *inode, const struct inode *dir,
2000             umode_t mode)
2001 {
2002     inode->i_uid = current_fsuid();
2003     if (dir && dir->i_mode & S_ISGID) {
2004         inode->i_gid = dir->i_gid;
2005         if (S_ISDIR(mode))
2006             mode |= S_ISGID;
2007     } else
2008         inode->i_gid = current_fsgid();
2009     inode->i_mode = mode;
2010 }
2011 EXPORT_SYMBOL(inode_init_owner);
2012 
2013 /**
2014  * inode_owner_or_capable - check current task permissions to inode
2015  * @inode: inode being checked
2016  *
2017  * Return true if current either has CAP_FOWNER in a namespace with the
2018  * inode owner uid mapped, or owns the file.
2019  */
2020 bool inode_owner_or_capable(const struct inode *inode)
2021 {
2022     struct user_namespace *ns;
2023 
2024     if (uid_eq(current_fsuid(), inode->i_uid))
2025         return true;
2026 
2027     ns = current_user_ns();
2028     if (ns_capable(ns, CAP_FOWNER) && kuid_has_mapping(ns, inode->i_uid))
2029         return true;
2030     return false;
2031 }
2032 EXPORT_SYMBOL(inode_owner_or_capable);
2033 
2034 /*
2035  * Direct i/o helper functions
2036  */
2037 static void __inode_dio_wait(struct inode *inode)
2038 {
2039     wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
2040     DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
2041 
2042     do {
2043         prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE);
2044         if (atomic_read(&inode->i_dio_count))
2045             schedule();
2046     } while (atomic_read(&inode->i_dio_count));
2047     finish_wait(wq, &q.wait);
2048 }
2049 
2050 /**
2051  * inode_dio_wait - wait for outstanding DIO requests to finish
2052  * @inode: inode to wait for
2053  *
2054  * Waits for all pending direct I/O requests to finish so that we can
2055  * proceed with a truncate or equivalent operation.
2056  *
2057  * Must be called under a lock that serializes taking new references
2058  * to i_dio_count, usually by inode->i_mutex.
2059  */
2060 void inode_dio_wait(struct inode *inode)
2061 {
2062     if (atomic_read(&inode->i_dio_count))
2063         __inode_dio_wait(inode);
2064 }
2065 EXPORT_SYMBOL(inode_dio_wait);
2066 
2067 /*
2068  * inode_set_flags - atomically set some inode flags
2069  *
2070  * Note: the caller should be holding i_mutex, or else be sure that
2071  * they have exclusive access to the inode structure (i.e., while the
2072  * inode is being instantiated).  The reason for the cmpxchg() loop
2073  * --- which wouldn't be necessary if all code paths which modify
2074  * i_flags actually followed this rule, is that there is at least one
2075  * code path which doesn't today so we use cmpxchg() out of an abundance
2076  * of caution.
2077  *
2078  * In the long run, i_mutex is overkill, and we should probably look
2079  * at using the i_lock spinlock to protect i_flags, and then make sure
2080  * it is so documented in include/linux/fs.h and that all code follows
2081  * the locking convention!!
2082  */
2083 void inode_set_flags(struct inode *inode, unsigned int flags,
2084              unsigned int mask)
2085 {
2086     unsigned int old_flags, new_flags;
2087 
2088     WARN_ON_ONCE(flags & ~mask);
2089     do {
2090         old_flags = ACCESS_ONCE(inode->i_flags);
2091         new_flags = (old_flags & ~mask) | flags;
2092     } while (unlikely(cmpxchg(&inode->i_flags, old_flags,
2093                   new_flags) != old_flags));
2094 }
2095 EXPORT_SYMBOL(inode_set_flags);
2096 
2097 void inode_nohighmem(struct inode *inode)
2098 {
2099     mapping_set_gfp_mask(inode->i_mapping, GFP_USER);
2100 }
2101 EXPORT_SYMBOL(inode_nohighmem);
2102 
2103 /**
2104  * current_time - Return FS time
2105  * @inode: inode.
2106  *
2107  * Return the current time truncated to the time granularity supported by
2108  * the fs.
2109  *
2110  * Note that inode and inode->sb cannot be NULL.
2111  * Otherwise, the function warns and returns time without truncation.
2112  */
2113 struct timespec current_time(struct inode *inode)
2114 {
2115     struct timespec now = current_kernel_time();
2116 
2117     if (unlikely(!inode->i_sb)) {
2118         WARN(1, "current_time() called with uninitialized super_block in the inode");
2119         return now;
2120     }
2121 
2122     return timespec_trunc(now, inode->i_sb->s_time_gran);
2123 }
2124 EXPORT_SYMBOL(current_time);