Back to home page

LXR

 
 

    


0001 /*
0002  * fs/dcache.c
0003  *
0004  * Complete reimplementation
0005  * (C) 1997 Thomas Schoebel-Theuer,
0006  * with heavy changes by Linus Torvalds
0007  */
0008 
0009 /*
0010  * Notes on the allocation strategy:
0011  *
0012  * The dcache is a master of the icache - whenever a dcache entry
0013  * exists, the inode will always exist. "iput()" is done either when
0014  * the dcache entry is deleted or garbage collected.
0015  */
0016 
0017 #include <linux/syscalls.h>
0018 #include <linux/string.h>
0019 #include <linux/mm.h>
0020 #include <linux/fs.h>
0021 #include <linux/fsnotify.h>
0022 #include <linux/slab.h>
0023 #include <linux/init.h>
0024 #include <linux/hash.h>
0025 #include <linux/cache.h>
0026 #include <linux/export.h>
0027 #include <linux/mount.h>
0028 #include <linux/file.h>
0029 #include <linux/uaccess.h>
0030 #include <linux/security.h>
0031 #include <linux/seqlock.h>
0032 #include <linux/swap.h>
0033 #include <linux/bootmem.h>
0034 #include <linux/fs_struct.h>
0035 #include <linux/hardirq.h>
0036 #include <linux/bit_spinlock.h>
0037 #include <linux/rculist_bl.h>
0038 #include <linux/prefetch.h>
0039 #include <linux/ratelimit.h>
0040 #include <linux/list_lru.h>
0041 #include <linux/kasan.h>
0042 
0043 #include "internal.h"
0044 #include "mount.h"
0045 
0046 /*
0047  * Usage:
0048  * dcache->d_inode->i_lock protects:
0049  *   - i_dentry, d_u.d_alias, d_inode of aliases
0050  * dcache_hash_bucket lock protects:
0051  *   - the dcache hash table
0052  * s_anon bl list spinlock protects:
0053  *   - the s_anon list (see __d_drop)
0054  * dentry->d_sb->s_dentry_lru_lock protects:
0055  *   - the dcache lru lists and counters
0056  * d_lock protects:
0057  *   - d_flags
0058  *   - d_name
0059  *   - d_lru
0060  *   - d_count
0061  *   - d_unhashed()
0062  *   - d_parent and d_subdirs
0063  *   - childrens' d_child and d_parent
0064  *   - d_u.d_alias, d_inode
0065  *
0066  * Ordering:
0067  * dentry->d_inode->i_lock
0068  *   dentry->d_lock
0069  *     dentry->d_sb->s_dentry_lru_lock
0070  *     dcache_hash_bucket lock
0071  *     s_anon lock
0072  *
0073  * If there is an ancestor relationship:
0074  * dentry->d_parent->...->d_parent->d_lock
0075  *   ...
0076  *     dentry->d_parent->d_lock
0077  *       dentry->d_lock
0078  *
0079  * If no ancestor relationship:
0080  * if (dentry1 < dentry2)
0081  *   dentry1->d_lock
0082  *     dentry2->d_lock
0083  */
0084 int sysctl_vfs_cache_pressure __read_mostly = 100;
0085 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
0086 
0087 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
0088 
0089 EXPORT_SYMBOL(rename_lock);
0090 
0091 static struct kmem_cache *dentry_cache __read_mostly;
0092 
0093 /*
0094  * This is the single most critical data structure when it comes
0095  * to the dcache: the hashtable for lookups. Somebody should try
0096  * to make this good - I've just made it work.
0097  *
0098  * This hash-function tries to avoid losing too many bits of hash
0099  * information, yet avoid using a prime hash-size or similar.
0100  */
0101 
0102 static unsigned int d_hash_mask __read_mostly;
0103 static unsigned int d_hash_shift __read_mostly;
0104 
0105 static struct hlist_bl_head *dentry_hashtable __read_mostly;
0106 
0107 static inline struct hlist_bl_head *d_hash(unsigned int hash)
0108 {
0109     return dentry_hashtable + (hash >> (32 - d_hash_shift));
0110 }
0111 
0112 #define IN_LOOKUP_SHIFT 10
0113 static struct hlist_bl_head in_lookup_hashtable[1 << IN_LOOKUP_SHIFT];
0114 
0115 static inline struct hlist_bl_head *in_lookup_hash(const struct dentry *parent,
0116                     unsigned int hash)
0117 {
0118     hash += (unsigned long) parent / L1_CACHE_BYTES;
0119     return in_lookup_hashtable + hash_32(hash, IN_LOOKUP_SHIFT);
0120 }
0121 
0122 
0123 /* Statistics gathering. */
0124 struct dentry_stat_t dentry_stat = {
0125     .age_limit = 45,
0126 };
0127 
0128 static DEFINE_PER_CPU(long, nr_dentry);
0129 static DEFINE_PER_CPU(long, nr_dentry_unused);
0130 
0131 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
0132 
0133 /*
0134  * Here we resort to our own counters instead of using generic per-cpu counters
0135  * for consistency with what the vfs inode code does. We are expected to harvest
0136  * better code and performance by having our own specialized counters.
0137  *
0138  * Please note that the loop is done over all possible CPUs, not over all online
0139  * CPUs. The reason for this is that we don't want to play games with CPUs going
0140  * on and off. If one of them goes off, we will just keep their counters.
0141  *
0142  * glommer: See cffbc8a for details, and if you ever intend to change this,
0143  * please update all vfs counters to match.
0144  */
0145 static long get_nr_dentry(void)
0146 {
0147     int i;
0148     long sum = 0;
0149     for_each_possible_cpu(i)
0150         sum += per_cpu(nr_dentry, i);
0151     return sum < 0 ? 0 : sum;
0152 }
0153 
0154 static long get_nr_dentry_unused(void)
0155 {
0156     int i;
0157     long sum = 0;
0158     for_each_possible_cpu(i)
0159         sum += per_cpu(nr_dentry_unused, i);
0160     return sum < 0 ? 0 : sum;
0161 }
0162 
0163 int proc_nr_dentry(struct ctl_table *table, int write, void __user *buffer,
0164            size_t *lenp, loff_t *ppos)
0165 {
0166     dentry_stat.nr_dentry = get_nr_dentry();
0167     dentry_stat.nr_unused = get_nr_dentry_unused();
0168     return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
0169 }
0170 #endif
0171 
0172 /*
0173  * Compare 2 name strings, return 0 if they match, otherwise non-zero.
0174  * The strings are both count bytes long, and count is non-zero.
0175  */
0176 #ifdef CONFIG_DCACHE_WORD_ACCESS
0177 
0178 #include <asm/word-at-a-time.h>
0179 /*
0180  * NOTE! 'cs' and 'scount' come from a dentry, so it has a
0181  * aligned allocation for this particular component. We don't
0182  * strictly need the load_unaligned_zeropad() safety, but it
0183  * doesn't hurt either.
0184  *
0185  * In contrast, 'ct' and 'tcount' can be from a pathname, and do
0186  * need the careful unaligned handling.
0187  */
0188 static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
0189 {
0190     unsigned long a,b,mask;
0191 
0192     for (;;) {
0193         a = *(unsigned long *)cs;
0194         b = load_unaligned_zeropad(ct);
0195         if (tcount < sizeof(unsigned long))
0196             break;
0197         if (unlikely(a != b))
0198             return 1;
0199         cs += sizeof(unsigned long);
0200         ct += sizeof(unsigned long);
0201         tcount -= sizeof(unsigned long);
0202         if (!tcount)
0203             return 0;
0204     }
0205     mask = bytemask_from_count(tcount);
0206     return unlikely(!!((a ^ b) & mask));
0207 }
0208 
0209 #else
0210 
0211 static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
0212 {
0213     do {
0214         if (*cs != *ct)
0215             return 1;
0216         cs++;
0217         ct++;
0218         tcount--;
0219     } while (tcount);
0220     return 0;
0221 }
0222 
0223 #endif
0224 
0225 static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
0226 {
0227     /*
0228      * Be careful about RCU walk racing with rename:
0229      * use 'lockless_dereference' to fetch the name pointer.
0230      *
0231      * NOTE! Even if a rename will mean that the length
0232      * was not loaded atomically, we don't care. The
0233      * RCU walk will check the sequence count eventually,
0234      * and catch it. And we won't overrun the buffer,
0235      * because we're reading the name pointer atomically,
0236      * and a dentry name is guaranteed to be properly
0237      * terminated with a NUL byte.
0238      *
0239      * End result: even if 'len' is wrong, we'll exit
0240      * early because the data cannot match (there can
0241      * be no NUL in the ct/tcount data)
0242      */
0243     const unsigned char *cs = lockless_dereference(dentry->d_name.name);
0244 
0245     return dentry_string_cmp(cs, ct, tcount);
0246 }
0247 
0248 struct external_name {
0249     union {
0250         atomic_t count;
0251         struct rcu_head head;
0252     } u;
0253     unsigned char name[];
0254 };
0255 
0256 static inline struct external_name *external_name(struct dentry *dentry)
0257 {
0258     return container_of(dentry->d_name.name, struct external_name, name[0]);
0259 }
0260 
0261 static void __d_free(struct rcu_head *head)
0262 {
0263     struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
0264 
0265     kmem_cache_free(dentry_cache, dentry); 
0266 }
0267 
0268 static void __d_free_external(struct rcu_head *head)
0269 {
0270     struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
0271     kfree(external_name(dentry));
0272     kmem_cache_free(dentry_cache, dentry); 
0273 }
0274 
0275 static inline int dname_external(const struct dentry *dentry)
0276 {
0277     return dentry->d_name.name != dentry->d_iname;
0278 }
0279 
0280 static inline void __d_set_inode_and_type(struct dentry *dentry,
0281                       struct inode *inode,
0282                       unsigned type_flags)
0283 {
0284     unsigned flags;
0285 
0286     dentry->d_inode = inode;
0287     flags = READ_ONCE(dentry->d_flags);
0288     flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
0289     flags |= type_flags;
0290     WRITE_ONCE(dentry->d_flags, flags);
0291 }
0292 
0293 static inline void __d_clear_type_and_inode(struct dentry *dentry)
0294 {
0295     unsigned flags = READ_ONCE(dentry->d_flags);
0296 
0297     flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
0298     WRITE_ONCE(dentry->d_flags, flags);
0299     dentry->d_inode = NULL;
0300 }
0301 
0302 static void dentry_free(struct dentry *dentry)
0303 {
0304     WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
0305     if (unlikely(dname_external(dentry))) {
0306         struct external_name *p = external_name(dentry);
0307         if (likely(atomic_dec_and_test(&p->u.count))) {
0308             call_rcu(&dentry->d_u.d_rcu, __d_free_external);
0309             return;
0310         }
0311     }
0312     /* if dentry was never visible to RCU, immediate free is OK */
0313     if (!(dentry->d_flags & DCACHE_RCUACCESS))
0314         __d_free(&dentry->d_u.d_rcu);
0315     else
0316         call_rcu(&dentry->d_u.d_rcu, __d_free);
0317 }
0318 
0319 /*
0320  * Release the dentry's inode, using the filesystem
0321  * d_iput() operation if defined.
0322  */
0323 static void dentry_unlink_inode(struct dentry * dentry)
0324     __releases(dentry->d_lock)
0325     __releases(dentry->d_inode->i_lock)
0326 {
0327     struct inode *inode = dentry->d_inode;
0328     bool hashed = !d_unhashed(dentry);
0329 
0330     if (hashed)
0331         raw_write_seqcount_begin(&dentry->d_seq);
0332     __d_clear_type_and_inode(dentry);
0333     hlist_del_init(&dentry->d_u.d_alias);
0334     if (hashed)
0335         raw_write_seqcount_end(&dentry->d_seq);
0336     spin_unlock(&dentry->d_lock);
0337     spin_unlock(&inode->i_lock);
0338     if (!inode->i_nlink)
0339         fsnotify_inoderemove(inode);
0340     if (dentry->d_op && dentry->d_op->d_iput)
0341         dentry->d_op->d_iput(dentry, inode);
0342     else
0343         iput(inode);
0344 }
0345 
0346 /*
0347  * The DCACHE_LRU_LIST bit is set whenever the 'd_lru' entry
0348  * is in use - which includes both the "real" per-superblock
0349  * LRU list _and_ the DCACHE_SHRINK_LIST use.
0350  *
0351  * The DCACHE_SHRINK_LIST bit is set whenever the dentry is
0352  * on the shrink list (ie not on the superblock LRU list).
0353  *
0354  * The per-cpu "nr_dentry_unused" counters are updated with
0355  * the DCACHE_LRU_LIST bit.
0356  *
0357  * These helper functions make sure we always follow the
0358  * rules. d_lock must be held by the caller.
0359  */
0360 #define D_FLAG_VERIFY(dentry,x) WARN_ON_ONCE(((dentry)->d_flags & (DCACHE_LRU_LIST | DCACHE_SHRINK_LIST)) != (x))
0361 static void d_lru_add(struct dentry *dentry)
0362 {
0363     D_FLAG_VERIFY(dentry, 0);
0364     dentry->d_flags |= DCACHE_LRU_LIST;
0365     this_cpu_inc(nr_dentry_unused);
0366     WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
0367 }
0368 
0369 static void d_lru_del(struct dentry *dentry)
0370 {
0371     D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
0372     dentry->d_flags &= ~DCACHE_LRU_LIST;
0373     this_cpu_dec(nr_dentry_unused);
0374     WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
0375 }
0376 
0377 static void d_shrink_del(struct dentry *dentry)
0378 {
0379     D_FLAG_VERIFY(dentry, DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
0380     list_del_init(&dentry->d_lru);
0381     dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
0382     this_cpu_dec(nr_dentry_unused);
0383 }
0384 
0385 static void d_shrink_add(struct dentry *dentry, struct list_head *list)
0386 {
0387     D_FLAG_VERIFY(dentry, 0);
0388     list_add(&dentry->d_lru, list);
0389     dentry->d_flags |= DCACHE_SHRINK_LIST | DCACHE_LRU_LIST;
0390     this_cpu_inc(nr_dentry_unused);
0391 }
0392 
0393 /*
0394  * These can only be called under the global LRU lock, ie during the
0395  * callback for freeing the LRU list. "isolate" removes it from the
0396  * LRU lists entirely, while shrink_move moves it to the indicated
0397  * private list.
0398  */
0399 static void d_lru_isolate(struct list_lru_one *lru, struct dentry *dentry)
0400 {
0401     D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
0402     dentry->d_flags &= ~DCACHE_LRU_LIST;
0403     this_cpu_dec(nr_dentry_unused);
0404     list_lru_isolate(lru, &dentry->d_lru);
0405 }
0406 
0407 static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry,
0408                   struct list_head *list)
0409 {
0410     D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
0411     dentry->d_flags |= DCACHE_SHRINK_LIST;
0412     list_lru_isolate_move(lru, &dentry->d_lru, list);
0413 }
0414 
0415 /*
0416  * dentry_lru_(add|del)_list) must be called with d_lock held.
0417  */
0418 static void dentry_lru_add(struct dentry *dentry)
0419 {
0420     if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
0421         d_lru_add(dentry);
0422 }
0423 
0424 /**
0425  * d_drop - drop a dentry
0426  * @dentry: dentry to drop
0427  *
0428  * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
0429  * be found through a VFS lookup any more. Note that this is different from
0430  * deleting the dentry - d_delete will try to mark the dentry negative if
0431  * possible, giving a successful _negative_ lookup, while d_drop will
0432  * just make the cache lookup fail.
0433  *
0434  * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
0435  * reason (NFS timeouts or autofs deletes).
0436  *
0437  * __d_drop requires dentry->d_lock.
0438  */
0439 void __d_drop(struct dentry *dentry)
0440 {
0441     if (!d_unhashed(dentry)) {
0442         struct hlist_bl_head *b;
0443         /*
0444          * Hashed dentries are normally on the dentry hashtable,
0445          * with the exception of those newly allocated by
0446          * d_obtain_alias, which are always IS_ROOT:
0447          */
0448         if (unlikely(IS_ROOT(dentry)))
0449             b = &dentry->d_sb->s_anon;
0450         else
0451             b = d_hash(dentry->d_name.hash);
0452 
0453         hlist_bl_lock(b);
0454         __hlist_bl_del(&dentry->d_hash);
0455         dentry->d_hash.pprev = NULL;
0456         hlist_bl_unlock(b);
0457         /* After this call, in-progress rcu-walk path lookup will fail. */
0458         write_seqcount_invalidate(&dentry->d_seq);
0459     }
0460 }
0461 EXPORT_SYMBOL(__d_drop);
0462 
0463 void d_drop(struct dentry *dentry)
0464 {
0465     spin_lock(&dentry->d_lock);
0466     __d_drop(dentry);
0467     spin_unlock(&dentry->d_lock);
0468 }
0469 EXPORT_SYMBOL(d_drop);
0470 
0471 static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent)
0472 {
0473     struct dentry *next;
0474     /*
0475      * Inform d_walk() and shrink_dentry_list() that we are no longer
0476      * attached to the dentry tree
0477      */
0478     dentry->d_flags |= DCACHE_DENTRY_KILLED;
0479     if (unlikely(list_empty(&dentry->d_child)))
0480         return;
0481     __list_del_entry(&dentry->d_child);
0482     /*
0483      * Cursors can move around the list of children.  While we'd been
0484      * a normal list member, it didn't matter - ->d_child.next would've
0485      * been updated.  However, from now on it won't be and for the
0486      * things like d_walk() it might end up with a nasty surprise.
0487      * Normally d_walk() doesn't care about cursors moving around -
0488      * ->d_lock on parent prevents that and since a cursor has no children
0489      * of its own, we get through it without ever unlocking the parent.
0490      * There is one exception, though - if we ascend from a child that
0491      * gets killed as soon as we unlock it, the next sibling is found
0492      * using the value left in its ->d_child.next.  And if _that_
0493      * pointed to a cursor, and cursor got moved (e.g. by lseek())
0494      * before d_walk() regains parent->d_lock, we'll end up skipping
0495      * everything the cursor had been moved past.
0496      *
0497      * Solution: make sure that the pointer left behind in ->d_child.next
0498      * points to something that won't be moving around.  I.e. skip the
0499      * cursors.
0500      */
0501     while (dentry->d_child.next != &parent->d_subdirs) {
0502         next = list_entry(dentry->d_child.next, struct dentry, d_child);
0503         if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR)))
0504             break;
0505         dentry->d_child.next = next->d_child.next;
0506     }
0507 }
0508 
0509 static void __dentry_kill(struct dentry *dentry)
0510 {
0511     struct dentry *parent = NULL;
0512     bool can_free = true;
0513     if (!IS_ROOT(dentry))
0514         parent = dentry->d_parent;
0515 
0516     /*
0517      * The dentry is now unrecoverably dead to the world.
0518      */
0519     lockref_mark_dead(&dentry->d_lockref);
0520 
0521     /*
0522      * inform the fs via d_prune that this dentry is about to be
0523      * unhashed and destroyed.
0524      */
0525     if (dentry->d_flags & DCACHE_OP_PRUNE)
0526         dentry->d_op->d_prune(dentry);
0527 
0528     if (dentry->d_flags & DCACHE_LRU_LIST) {
0529         if (!(dentry->d_flags & DCACHE_SHRINK_LIST))
0530             d_lru_del(dentry);
0531     }
0532     /* if it was on the hash then remove it */
0533     __d_drop(dentry);
0534     dentry_unlist(dentry, parent);
0535     if (parent)
0536         spin_unlock(&parent->d_lock);
0537     if (dentry->d_inode)
0538         dentry_unlink_inode(dentry);
0539     else
0540         spin_unlock(&dentry->d_lock);
0541     this_cpu_dec(nr_dentry);
0542     if (dentry->d_op && dentry->d_op->d_release)
0543         dentry->d_op->d_release(dentry);
0544 
0545     spin_lock(&dentry->d_lock);
0546     if (dentry->d_flags & DCACHE_SHRINK_LIST) {
0547         dentry->d_flags |= DCACHE_MAY_FREE;
0548         can_free = false;
0549     }
0550     spin_unlock(&dentry->d_lock);
0551     if (likely(can_free))
0552         dentry_free(dentry);
0553 }
0554 
0555 /*
0556  * Finish off a dentry we've decided to kill.
0557  * dentry->d_lock must be held, returns with it unlocked.
0558  * If ref is non-zero, then decrement the refcount too.
0559  * Returns dentry requiring refcount drop, or NULL if we're done.
0560  */
0561 static struct dentry *dentry_kill(struct dentry *dentry)
0562     __releases(dentry->d_lock)
0563 {
0564     struct inode *inode = dentry->d_inode;
0565     struct dentry *parent = NULL;
0566 
0567     if (inode && unlikely(!spin_trylock(&inode->i_lock)))
0568         goto failed;
0569 
0570     if (!IS_ROOT(dentry)) {
0571         parent = dentry->d_parent;
0572         if (unlikely(!spin_trylock(&parent->d_lock))) {
0573             if (inode)
0574                 spin_unlock(&inode->i_lock);
0575             goto failed;
0576         }
0577     }
0578 
0579     __dentry_kill(dentry);
0580     return parent;
0581 
0582 failed:
0583     spin_unlock(&dentry->d_lock);
0584     return dentry; /* try again with same dentry */
0585 }
0586 
0587 static inline struct dentry *lock_parent(struct dentry *dentry)
0588 {
0589     struct dentry *parent = dentry->d_parent;
0590     if (IS_ROOT(dentry))
0591         return NULL;
0592     if (unlikely(dentry->d_lockref.count < 0))
0593         return NULL;
0594     if (likely(spin_trylock(&parent->d_lock)))
0595         return parent;
0596     rcu_read_lock();
0597     spin_unlock(&dentry->d_lock);
0598 again:
0599     parent = ACCESS_ONCE(dentry->d_parent);
0600     spin_lock(&parent->d_lock);
0601     /*
0602      * We can't blindly lock dentry until we are sure
0603      * that we won't violate the locking order.
0604      * Any changes of dentry->d_parent must have
0605      * been done with parent->d_lock held, so
0606      * spin_lock() above is enough of a barrier
0607      * for checking if it's still our child.
0608      */
0609     if (unlikely(parent != dentry->d_parent)) {
0610         spin_unlock(&parent->d_lock);
0611         goto again;
0612     }
0613     rcu_read_unlock();
0614     if (parent != dentry)
0615         spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
0616     else
0617         parent = NULL;
0618     return parent;
0619 }
0620 
0621 /*
0622  * Try to do a lockless dput(), and return whether that was successful.
0623  *
0624  * If unsuccessful, we return false, having already taken the dentry lock.
0625  *
0626  * The caller needs to hold the RCU read lock, so that the dentry is
0627  * guaranteed to stay around even if the refcount goes down to zero!
0628  */
0629 static inline bool fast_dput(struct dentry *dentry)
0630 {
0631     int ret;
0632     unsigned int d_flags;
0633 
0634     /*
0635      * If we have a d_op->d_delete() operation, we sould not
0636      * let the dentry count go to zero, so use "put_or_lock".
0637      */
0638     if (unlikely(dentry->d_flags & DCACHE_OP_DELETE))
0639         return lockref_put_or_lock(&dentry->d_lockref);
0640 
0641     /*
0642      * .. otherwise, we can try to just decrement the
0643      * lockref optimistically.
0644      */
0645     ret = lockref_put_return(&dentry->d_lockref);
0646 
0647     /*
0648      * If the lockref_put_return() failed due to the lock being held
0649      * by somebody else, the fast path has failed. We will need to
0650      * get the lock, and then check the count again.
0651      */
0652     if (unlikely(ret < 0)) {
0653         spin_lock(&dentry->d_lock);
0654         if (dentry->d_lockref.count > 1) {
0655             dentry->d_lockref.count--;
0656             spin_unlock(&dentry->d_lock);
0657             return 1;
0658         }
0659         return 0;
0660     }
0661 
0662     /*
0663      * If we weren't the last ref, we're done.
0664      */
0665     if (ret)
0666         return 1;
0667 
0668     /*
0669      * Careful, careful. The reference count went down
0670      * to zero, but we don't hold the dentry lock, so
0671      * somebody else could get it again, and do another
0672      * dput(), and we need to not race with that.
0673      *
0674      * However, there is a very special and common case
0675      * where we don't care, because there is nothing to
0676      * do: the dentry is still hashed, it does not have
0677      * a 'delete' op, and it's referenced and already on
0678      * the LRU list.
0679      *
0680      * NOTE! Since we aren't locked, these values are
0681      * not "stable". However, it is sufficient that at
0682      * some point after we dropped the reference the
0683      * dentry was hashed and the flags had the proper
0684      * value. Other dentry users may have re-gotten
0685      * a reference to the dentry and change that, but
0686      * our work is done - we can leave the dentry
0687      * around with a zero refcount.
0688      */
0689     smp_rmb();
0690     d_flags = ACCESS_ONCE(dentry->d_flags);
0691     d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_DISCONNECTED;
0692 
0693     /* Nothing to do? Dropping the reference was all we needed? */
0694     if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry))
0695         return 1;
0696 
0697     /*
0698      * Not the fast normal case? Get the lock. We've already decremented
0699      * the refcount, but we'll need to re-check the situation after
0700      * getting the lock.
0701      */
0702     spin_lock(&dentry->d_lock);
0703 
0704     /*
0705      * Did somebody else grab a reference to it in the meantime, and
0706      * we're no longer the last user after all? Alternatively, somebody
0707      * else could have killed it and marked it dead. Either way, we
0708      * don't need to do anything else.
0709      */
0710     if (dentry->d_lockref.count) {
0711         spin_unlock(&dentry->d_lock);
0712         return 1;
0713     }
0714 
0715     /*
0716      * Re-get the reference we optimistically dropped. We hold the
0717      * lock, and we just tested that it was zero, so we can just
0718      * set it to 1.
0719      */
0720     dentry->d_lockref.count = 1;
0721     return 0;
0722 }
0723 
0724 
0725 /* 
0726  * This is dput
0727  *
0728  * This is complicated by the fact that we do not want to put
0729  * dentries that are no longer on any hash chain on the unused
0730  * list: we'd much rather just get rid of them immediately.
0731  *
0732  * However, that implies that we have to traverse the dentry
0733  * tree upwards to the parents which might _also_ now be
0734  * scheduled for deletion (it may have been only waiting for
0735  * its last child to go away).
0736  *
0737  * This tail recursion is done by hand as we don't want to depend
0738  * on the compiler to always get this right (gcc generally doesn't).
0739  * Real recursion would eat up our stack space.
0740  */
0741 
0742 /*
0743  * dput - release a dentry
0744  * @dentry: dentry to release 
0745  *
0746  * Release a dentry. This will drop the usage count and if appropriate
0747  * call the dentry unlink method as well as removing it from the queues and
0748  * releasing its resources. If the parent dentries were scheduled for release
0749  * they too may now get deleted.
0750  */
0751 void dput(struct dentry *dentry)
0752 {
0753     if (unlikely(!dentry))
0754         return;
0755 
0756 repeat:
0757     might_sleep();
0758 
0759     rcu_read_lock();
0760     if (likely(fast_dput(dentry))) {
0761         rcu_read_unlock();
0762         return;
0763     }
0764 
0765     /* Slow case: now with the dentry lock held */
0766     rcu_read_unlock();
0767 
0768     WARN_ON(d_in_lookup(dentry));
0769 
0770     /* Unreachable? Get rid of it */
0771     if (unlikely(d_unhashed(dentry)))
0772         goto kill_it;
0773 
0774     if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
0775         goto kill_it;
0776 
0777     if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
0778         if (dentry->d_op->d_delete(dentry))
0779             goto kill_it;
0780     }
0781 
0782     if (!(dentry->d_flags & DCACHE_REFERENCED))
0783         dentry->d_flags |= DCACHE_REFERENCED;
0784     dentry_lru_add(dentry);
0785 
0786     dentry->d_lockref.count--;
0787     spin_unlock(&dentry->d_lock);
0788     return;
0789 
0790 kill_it:
0791     dentry = dentry_kill(dentry);
0792     if (dentry) {
0793         cond_resched();
0794         goto repeat;
0795     }
0796 }
0797 EXPORT_SYMBOL(dput);
0798 
0799 
0800 /* This must be called with d_lock held */
0801 static inline void __dget_dlock(struct dentry *dentry)
0802 {
0803     dentry->d_lockref.count++;
0804 }
0805 
0806 static inline void __dget(struct dentry *dentry)
0807 {
0808     lockref_get(&dentry->d_lockref);
0809 }
0810 
0811 struct dentry *dget_parent(struct dentry *dentry)
0812 {
0813     int gotref;
0814     struct dentry *ret;
0815 
0816     /*
0817      * Do optimistic parent lookup without any
0818      * locking.
0819      */
0820     rcu_read_lock();
0821     ret = ACCESS_ONCE(dentry->d_parent);
0822     gotref = lockref_get_not_zero(&ret->d_lockref);
0823     rcu_read_unlock();
0824     if (likely(gotref)) {
0825         if (likely(ret == ACCESS_ONCE(dentry->d_parent)))
0826             return ret;
0827         dput(ret);
0828     }
0829 
0830 repeat:
0831     /*
0832      * Don't need rcu_dereference because we re-check it was correct under
0833      * the lock.
0834      */
0835     rcu_read_lock();
0836     ret = dentry->d_parent;
0837     spin_lock(&ret->d_lock);
0838     if (unlikely(ret != dentry->d_parent)) {
0839         spin_unlock(&ret->d_lock);
0840         rcu_read_unlock();
0841         goto repeat;
0842     }
0843     rcu_read_unlock();
0844     BUG_ON(!ret->d_lockref.count);
0845     ret->d_lockref.count++;
0846     spin_unlock(&ret->d_lock);
0847     return ret;
0848 }
0849 EXPORT_SYMBOL(dget_parent);
0850 
0851 /**
0852  * d_find_alias - grab a hashed alias of inode
0853  * @inode: inode in question
0854  *
0855  * If inode has a hashed alias, or is a directory and has any alias,
0856  * acquire the reference to alias and return it. Otherwise return NULL.
0857  * Notice that if inode is a directory there can be only one alias and
0858  * it can be unhashed only if it has no children, or if it is the root
0859  * of a filesystem, or if the directory was renamed and d_revalidate
0860  * was the first vfs operation to notice.
0861  *
0862  * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
0863  * any other hashed alias over that one.
0864  */
0865 static struct dentry *__d_find_alias(struct inode *inode)
0866 {
0867     struct dentry *alias, *discon_alias;
0868 
0869 again:
0870     discon_alias = NULL;
0871     hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
0872         spin_lock(&alias->d_lock);
0873         if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
0874             if (IS_ROOT(alias) &&
0875                 (alias->d_flags & DCACHE_DISCONNECTED)) {
0876                 discon_alias = alias;
0877             } else {
0878                 __dget_dlock(alias);
0879                 spin_unlock(&alias->d_lock);
0880                 return alias;
0881             }
0882         }
0883         spin_unlock(&alias->d_lock);
0884     }
0885     if (discon_alias) {
0886         alias = discon_alias;
0887         spin_lock(&alias->d_lock);
0888         if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
0889             __dget_dlock(alias);
0890             spin_unlock(&alias->d_lock);
0891             return alias;
0892         }
0893         spin_unlock(&alias->d_lock);
0894         goto again;
0895     }
0896     return NULL;
0897 }
0898 
0899 struct dentry *d_find_alias(struct inode *inode)
0900 {
0901     struct dentry *de = NULL;
0902 
0903     if (!hlist_empty(&inode->i_dentry)) {
0904         spin_lock(&inode->i_lock);
0905         de = __d_find_alias(inode);
0906         spin_unlock(&inode->i_lock);
0907     }
0908     return de;
0909 }
0910 EXPORT_SYMBOL(d_find_alias);
0911 
0912 /*
0913  *  Try to kill dentries associated with this inode.
0914  * WARNING: you must own a reference to inode.
0915  */
0916 void d_prune_aliases(struct inode *inode)
0917 {
0918     struct dentry *dentry;
0919 restart:
0920     spin_lock(&inode->i_lock);
0921     hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
0922         spin_lock(&dentry->d_lock);
0923         if (!dentry->d_lockref.count) {
0924             struct dentry *parent = lock_parent(dentry);
0925             if (likely(!dentry->d_lockref.count)) {
0926                 __dentry_kill(dentry);
0927                 dput(parent);
0928                 goto restart;
0929             }
0930             if (parent)
0931                 spin_unlock(&parent->d_lock);
0932         }
0933         spin_unlock(&dentry->d_lock);
0934     }
0935     spin_unlock(&inode->i_lock);
0936 }
0937 EXPORT_SYMBOL(d_prune_aliases);
0938 
0939 static void shrink_dentry_list(struct list_head *list)
0940 {
0941     struct dentry *dentry, *parent;
0942 
0943     while (!list_empty(list)) {
0944         struct inode *inode;
0945         dentry = list_entry(list->prev, struct dentry, d_lru);
0946         spin_lock(&dentry->d_lock);
0947         parent = lock_parent(dentry);
0948 
0949         /*
0950          * The dispose list is isolated and dentries are not accounted
0951          * to the LRU here, so we can simply remove it from the list
0952          * here regardless of whether it is referenced or not.
0953          */
0954         d_shrink_del(dentry);
0955 
0956         /*
0957          * We found an inuse dentry which was not removed from
0958          * the LRU because of laziness during lookup. Do not free it.
0959          */
0960         if (dentry->d_lockref.count > 0) {
0961             spin_unlock(&dentry->d_lock);
0962             if (parent)
0963                 spin_unlock(&parent->d_lock);
0964             continue;
0965         }
0966 
0967 
0968         if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) {
0969             bool can_free = dentry->d_flags & DCACHE_MAY_FREE;
0970             spin_unlock(&dentry->d_lock);
0971             if (parent)
0972                 spin_unlock(&parent->d_lock);
0973             if (can_free)
0974                 dentry_free(dentry);
0975             continue;
0976         }
0977 
0978         inode = dentry->d_inode;
0979         if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
0980             d_shrink_add(dentry, list);
0981             spin_unlock(&dentry->d_lock);
0982             if (parent)
0983                 spin_unlock(&parent->d_lock);
0984             continue;
0985         }
0986 
0987         __dentry_kill(dentry);
0988 
0989         /*
0990          * We need to prune ancestors too. This is necessary to prevent
0991          * quadratic behavior of shrink_dcache_parent(), but is also
0992          * expected to be beneficial in reducing dentry cache
0993          * fragmentation.
0994          */
0995         dentry = parent;
0996         while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) {
0997             parent = lock_parent(dentry);
0998             if (dentry->d_lockref.count != 1) {
0999                 dentry->d_lockref.count--;
1000                 spin_unlock(&dentry->d_lock);
1001                 if (parent)
1002                     spin_unlock(&parent->d_lock);
1003                 break;
1004             }
1005             inode = dentry->d_inode;    /* can't be NULL */
1006             if (unlikely(!spin_trylock(&inode->i_lock))) {
1007                 spin_unlock(&dentry->d_lock);
1008                 if (parent)
1009                     spin_unlock(&parent->d_lock);
1010                 cpu_relax();
1011                 continue;
1012             }
1013             __dentry_kill(dentry);
1014             dentry = parent;
1015         }
1016     }
1017 }
1018 
1019 static enum lru_status dentry_lru_isolate(struct list_head *item,
1020         struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
1021 {
1022     struct list_head *freeable = arg;
1023     struct dentry   *dentry = container_of(item, struct dentry, d_lru);
1024 
1025 
1026     /*
1027      * we are inverting the lru lock/dentry->d_lock here,
1028      * so use a trylock. If we fail to get the lock, just skip
1029      * it
1030      */
1031     if (!spin_trylock(&dentry->d_lock))
1032         return LRU_SKIP;
1033 
1034     /*
1035      * Referenced dentries are still in use. If they have active
1036      * counts, just remove them from the LRU. Otherwise give them
1037      * another pass through the LRU.
1038      */
1039     if (dentry->d_lockref.count) {
1040         d_lru_isolate(lru, dentry);
1041         spin_unlock(&dentry->d_lock);
1042         return LRU_REMOVED;
1043     }
1044 
1045     if (dentry->d_flags & DCACHE_REFERENCED) {
1046         dentry->d_flags &= ~DCACHE_REFERENCED;
1047         spin_unlock(&dentry->d_lock);
1048 
1049         /*
1050          * The list move itself will be made by the common LRU code. At
1051          * this point, we've dropped the dentry->d_lock but keep the
1052          * lru lock. This is safe to do, since every list movement is
1053          * protected by the lru lock even if both locks are held.
1054          *
1055          * This is guaranteed by the fact that all LRU management
1056          * functions are intermediated by the LRU API calls like
1057          * list_lru_add and list_lru_del. List movement in this file
1058          * only ever occur through this functions or through callbacks
1059          * like this one, that are called from the LRU API.
1060          *
1061          * The only exceptions to this are functions like
1062          * shrink_dentry_list, and code that first checks for the
1063          * DCACHE_SHRINK_LIST flag.  Those are guaranteed to be
1064          * operating only with stack provided lists after they are
1065          * properly isolated from the main list.  It is thus, always a
1066          * local access.
1067          */
1068         return LRU_ROTATE;
1069     }
1070 
1071     d_lru_shrink_move(lru, dentry, freeable);
1072     spin_unlock(&dentry->d_lock);
1073 
1074     return LRU_REMOVED;
1075 }
1076 
1077 /**
1078  * prune_dcache_sb - shrink the dcache
1079  * @sb: superblock
1080  * @sc: shrink control, passed to list_lru_shrink_walk()
1081  *
1082  * Attempt to shrink the superblock dcache LRU by @sc->nr_to_scan entries. This
1083  * is done when we need more memory and called from the superblock shrinker
1084  * function.
1085  *
1086  * This function may fail to free any resources if all the dentries are in
1087  * use.
1088  */
1089 long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc)
1090 {
1091     LIST_HEAD(dispose);
1092     long freed;
1093 
1094     freed = list_lru_shrink_walk(&sb->s_dentry_lru, sc,
1095                      dentry_lru_isolate, &dispose);
1096     shrink_dentry_list(&dispose);
1097     return freed;
1098 }
1099 
1100 static enum lru_status dentry_lru_isolate_shrink(struct list_head *item,
1101         struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
1102 {
1103     struct list_head *freeable = arg;
1104     struct dentry   *dentry = container_of(item, struct dentry, d_lru);
1105 
1106     /*
1107      * we are inverting the lru lock/dentry->d_lock here,
1108      * so use a trylock. If we fail to get the lock, just skip
1109      * it
1110      */
1111     if (!spin_trylock(&dentry->d_lock))
1112         return LRU_SKIP;
1113 
1114     d_lru_shrink_move(lru, dentry, freeable);
1115     spin_unlock(&dentry->d_lock);
1116 
1117     return LRU_REMOVED;
1118 }
1119 
1120 
1121 /**
1122  * shrink_dcache_sb - shrink dcache for a superblock
1123  * @sb: superblock
1124  *
1125  * Shrink the dcache for the specified super block. This is used to free
1126  * the dcache before unmounting a file system.
1127  */
1128 void shrink_dcache_sb(struct super_block *sb)
1129 {
1130     long freed;
1131 
1132     do {
1133         LIST_HEAD(dispose);
1134 
1135         freed = list_lru_walk(&sb->s_dentry_lru,
1136             dentry_lru_isolate_shrink, &dispose, UINT_MAX);
1137 
1138         this_cpu_sub(nr_dentry_unused, freed);
1139         shrink_dentry_list(&dispose);
1140     } while (freed > 0);
1141 }
1142 EXPORT_SYMBOL(shrink_dcache_sb);
1143 
1144 /**
1145  * enum d_walk_ret - action to talke during tree walk
1146  * @D_WALK_CONTINUE:    contrinue walk
1147  * @D_WALK_QUIT:    quit walk
1148  * @D_WALK_NORETRY: quit when retry is needed
1149  * @D_WALK_SKIP:    skip this dentry and its children
1150  */
1151 enum d_walk_ret {
1152     D_WALK_CONTINUE,
1153     D_WALK_QUIT,
1154     D_WALK_NORETRY,
1155     D_WALK_SKIP,
1156 };
1157 
1158 /**
1159  * d_walk - walk the dentry tree
1160  * @parent: start of walk
1161  * @data:   data passed to @enter() and @finish()
1162  * @enter:  callback when first entering the dentry
1163  * @finish: callback when successfully finished the walk
1164  *
1165  * The @enter() and @finish() callbacks are called with d_lock held.
1166  */
1167 static void d_walk(struct dentry *parent, void *data,
1168            enum d_walk_ret (*enter)(void *, struct dentry *),
1169            void (*finish)(void *))
1170 {
1171     struct dentry *this_parent;
1172     struct list_head *next;
1173     unsigned seq = 0;
1174     enum d_walk_ret ret;
1175     bool retry = true;
1176 
1177 again:
1178     read_seqbegin_or_lock(&rename_lock, &seq);
1179     this_parent = parent;
1180     spin_lock(&this_parent->d_lock);
1181 
1182     ret = enter(data, this_parent);
1183     switch (ret) {
1184     case D_WALK_CONTINUE:
1185         break;
1186     case D_WALK_QUIT:
1187     case D_WALK_SKIP:
1188         goto out_unlock;
1189     case D_WALK_NORETRY:
1190         retry = false;
1191         break;
1192     }
1193 repeat:
1194     next = this_parent->d_subdirs.next;
1195 resume:
1196     while (next != &this_parent->d_subdirs) {
1197         struct list_head *tmp = next;
1198         struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
1199         next = tmp->next;
1200 
1201         if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR))
1202             continue;
1203 
1204         spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1205 
1206         ret = enter(data, dentry);
1207         switch (ret) {
1208         case D_WALK_CONTINUE:
1209             break;
1210         case D_WALK_QUIT:
1211             spin_unlock(&dentry->d_lock);
1212             goto out_unlock;
1213         case D_WALK_NORETRY:
1214             retry = false;
1215             break;
1216         case D_WALK_SKIP:
1217             spin_unlock(&dentry->d_lock);
1218             continue;
1219         }
1220 
1221         if (!list_empty(&dentry->d_subdirs)) {
1222             spin_unlock(&this_parent->d_lock);
1223             spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
1224             this_parent = dentry;
1225             spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
1226             goto repeat;
1227         }
1228         spin_unlock(&dentry->d_lock);
1229     }
1230     /*
1231      * All done at this level ... ascend and resume the search.
1232      */
1233     rcu_read_lock();
1234 ascend:
1235     if (this_parent != parent) {
1236         struct dentry *child = this_parent;
1237         this_parent = child->d_parent;
1238 
1239         spin_unlock(&child->d_lock);
1240         spin_lock(&this_parent->d_lock);
1241 
1242         /* might go back up the wrong parent if we have had a rename. */
1243         if (need_seqretry(&rename_lock, seq))
1244             goto rename_retry;
1245         /* go into the first sibling still alive */
1246         do {
1247             next = child->d_child.next;
1248             if (next == &this_parent->d_subdirs)
1249                 goto ascend;
1250             child = list_entry(next, struct dentry, d_child);
1251         } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
1252         rcu_read_unlock();
1253         goto resume;
1254     }
1255     if (need_seqretry(&rename_lock, seq))
1256         goto rename_retry;
1257     rcu_read_unlock();
1258     if (finish)
1259         finish(data);
1260 
1261 out_unlock:
1262     spin_unlock(&this_parent->d_lock);
1263     done_seqretry(&rename_lock, seq);
1264     return;
1265 
1266 rename_retry:
1267     spin_unlock(&this_parent->d_lock);
1268     rcu_read_unlock();
1269     BUG_ON(seq & 1);
1270     if (!retry)
1271         return;
1272     seq = 1;
1273     goto again;
1274 }
1275 
1276 struct check_mount {
1277     struct vfsmount *mnt;
1278     unsigned int mounted;
1279 };
1280 
1281 static enum d_walk_ret path_check_mount(void *data, struct dentry *dentry)
1282 {
1283     struct check_mount *info = data;
1284     struct path path = { .mnt = info->mnt, .dentry = dentry };
1285 
1286     if (likely(!d_mountpoint(dentry)))
1287         return D_WALK_CONTINUE;
1288     if (__path_is_mountpoint(&path)) {
1289         info->mounted = 1;
1290         return D_WALK_QUIT;
1291     }
1292     return D_WALK_CONTINUE;
1293 }
1294 
1295 /**
1296  * path_has_submounts - check for mounts over a dentry in the
1297  *                      current namespace.
1298  * @parent: path to check.
1299  *
1300  * Return true if the parent or its subdirectories contain
1301  * a mount point in the current namespace.
1302  */
1303 int path_has_submounts(const struct path *parent)
1304 {
1305     struct check_mount data = { .mnt = parent->mnt, .mounted = 0 };
1306 
1307     read_seqlock_excl(&mount_lock);
1308     d_walk(parent->dentry, &data, path_check_mount, NULL);
1309     read_sequnlock_excl(&mount_lock);
1310 
1311     return data.mounted;
1312 }
1313 EXPORT_SYMBOL(path_has_submounts);
1314 
1315 /*
1316  * Called by mount code to set a mountpoint and check if the mountpoint is
1317  * reachable (e.g. NFS can unhash a directory dentry and then the complete
1318  * subtree can become unreachable).
1319  *
1320  * Only one of d_invalidate() and d_set_mounted() must succeed.  For
1321  * this reason take rename_lock and d_lock on dentry and ancestors.
1322  */
1323 int d_set_mounted(struct dentry *dentry)
1324 {
1325     struct dentry *p;
1326     int ret = -ENOENT;
1327     write_seqlock(&rename_lock);
1328     for (p = dentry->d_parent; !IS_ROOT(p); p = p->d_parent) {
1329         /* Need exclusion wrt. d_invalidate() */
1330         spin_lock(&p->d_lock);
1331         if (unlikely(d_unhashed(p))) {
1332             spin_unlock(&p->d_lock);
1333             goto out;
1334         }
1335         spin_unlock(&p->d_lock);
1336     }
1337     spin_lock(&dentry->d_lock);
1338     if (!d_unlinked(dentry)) {
1339         ret = -EBUSY;
1340         if (!d_mountpoint(dentry)) {
1341             dentry->d_flags |= DCACHE_MOUNTED;
1342             ret = 0;
1343         }
1344     }
1345     spin_unlock(&dentry->d_lock);
1346 out:
1347     write_sequnlock(&rename_lock);
1348     return ret;
1349 }
1350 
1351 /*
1352  * Search the dentry child list of the specified parent,
1353  * and move any unused dentries to the end of the unused
1354  * list for prune_dcache(). We descend to the next level
1355  * whenever the d_subdirs list is non-empty and continue
1356  * searching.
1357  *
1358  * It returns zero iff there are no unused children,
1359  * otherwise  it returns the number of children moved to
1360  * the end of the unused list. This may not be the total
1361  * number of unused children, because select_parent can
1362  * drop the lock and return early due to latency
1363  * constraints.
1364  */
1365 
1366 struct select_data {
1367     struct dentry *start;
1368     struct list_head dispose;
1369     int found;
1370 };
1371 
1372 static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
1373 {
1374     struct select_data *data = _data;
1375     enum d_walk_ret ret = D_WALK_CONTINUE;
1376 
1377     if (data->start == dentry)
1378         goto out;
1379 
1380     if (dentry->d_flags & DCACHE_SHRINK_LIST) {
1381         data->found++;
1382     } else {
1383         if (dentry->d_flags & DCACHE_LRU_LIST)
1384             d_lru_del(dentry);
1385         if (!dentry->d_lockref.count) {
1386             d_shrink_add(dentry, &data->dispose);
1387             data->found++;
1388         }
1389     }
1390     /*
1391      * We can return to the caller if we have found some (this
1392      * ensures forward progress). We'll be coming back to find
1393      * the rest.
1394      */
1395     if (!list_empty(&data->dispose))
1396         ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY;
1397 out:
1398     return ret;
1399 }
1400 
1401 /**
1402  * shrink_dcache_parent - prune dcache
1403  * @parent: parent of entries to prune
1404  *
1405  * Prune the dcache to remove unused children of the parent dentry.
1406  */
1407 void shrink_dcache_parent(struct dentry *parent)
1408 {
1409     for (;;) {
1410         struct select_data data;
1411 
1412         INIT_LIST_HEAD(&data.dispose);
1413         data.start = parent;
1414         data.found = 0;
1415 
1416         d_walk(parent, &data, select_collect, NULL);
1417         if (!data.found)
1418             break;
1419 
1420         shrink_dentry_list(&data.dispose);
1421         cond_resched();
1422     }
1423 }
1424 EXPORT_SYMBOL(shrink_dcache_parent);
1425 
1426 static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
1427 {
1428     /* it has busy descendents; complain about those instead */
1429     if (!list_empty(&dentry->d_subdirs))
1430         return D_WALK_CONTINUE;
1431 
1432     /* root with refcount 1 is fine */
1433     if (dentry == _data && dentry->d_lockref.count == 1)
1434         return D_WALK_CONTINUE;
1435 
1436     printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%pd} "
1437             " still in use (%d) [unmount of %s %s]\n",
1438                dentry,
1439                dentry->d_inode ?
1440                dentry->d_inode->i_ino : 0UL,
1441                dentry,
1442                dentry->d_lockref.count,
1443                dentry->d_sb->s_type->name,
1444                dentry->d_sb->s_id);
1445     WARN_ON(1);
1446     return D_WALK_CONTINUE;
1447 }
1448 
1449 static void do_one_tree(struct dentry *dentry)
1450 {
1451     shrink_dcache_parent(dentry);
1452     d_walk(dentry, dentry, umount_check, NULL);
1453     d_drop(dentry);
1454     dput(dentry);
1455 }
1456 
1457 /*
1458  * destroy the dentries attached to a superblock on unmounting
1459  */
1460 void shrink_dcache_for_umount(struct super_block *sb)
1461 {
1462     struct dentry *dentry;
1463 
1464     WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked");
1465 
1466     dentry = sb->s_root;
1467     sb->s_root = NULL;
1468     do_one_tree(dentry);
1469 
1470     while (!hlist_bl_empty(&sb->s_anon)) {
1471         dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash));
1472         do_one_tree(dentry);
1473     }
1474 }
1475 
1476 struct detach_data {
1477     struct select_data select;
1478     struct dentry *mountpoint;
1479 };
1480 static enum d_walk_ret detach_and_collect(void *_data, struct dentry *dentry)
1481 {
1482     struct detach_data *data = _data;
1483 
1484     if (d_mountpoint(dentry)) {
1485         __dget_dlock(dentry);
1486         data->mountpoint = dentry;
1487         return D_WALK_QUIT;
1488     }
1489 
1490     return select_collect(&data->select, dentry);
1491 }
1492 
1493 static void check_and_drop(void *_data)
1494 {
1495     struct detach_data *data = _data;
1496 
1497     if (!data->mountpoint && !data->select.found)
1498         __d_drop(data->select.start);
1499 }
1500 
1501 /**
1502  * d_invalidate - detach submounts, prune dcache, and drop
1503  * @dentry: dentry to invalidate (aka detach, prune and drop)
1504  *
1505  * no dcache lock.
1506  *
1507  * The final d_drop is done as an atomic operation relative to
1508  * rename_lock ensuring there are no races with d_set_mounted.  This
1509  * ensures there are no unhashed dentries on the path to a mountpoint.
1510  */
1511 void d_invalidate(struct dentry *dentry)
1512 {
1513     /*
1514      * If it's already been dropped, return OK.
1515      */
1516     spin_lock(&dentry->d_lock);
1517     if (d_unhashed(dentry)) {
1518         spin_unlock(&dentry->d_lock);
1519         return;
1520     }
1521     spin_unlock(&dentry->d_lock);
1522 
1523     /* Negative dentries can be dropped without further checks */
1524     if (!dentry->d_inode) {
1525         d_drop(dentry);
1526         return;
1527     }
1528 
1529     for (;;) {
1530         struct detach_data data;
1531 
1532         data.mountpoint = NULL;
1533         INIT_LIST_HEAD(&data.select.dispose);
1534         data.select.start = dentry;
1535         data.select.found = 0;
1536 
1537         d_walk(dentry, &data, detach_and_collect, check_and_drop);
1538 
1539         if (data.select.found)
1540             shrink_dentry_list(&data.select.dispose);
1541 
1542         if (data.mountpoint) {
1543             detach_mounts(data.mountpoint);
1544             dput(data.mountpoint);
1545         }
1546 
1547         if (!data.mountpoint && !data.select.found)
1548             break;
1549 
1550         cond_resched();
1551     }
1552 }
1553 EXPORT_SYMBOL(d_invalidate);
1554 
1555 /**
1556  * __d_alloc    -   allocate a dcache entry
1557  * @sb: filesystem it will belong to
1558  * @name: qstr of the name
1559  *
1560  * Allocates a dentry. It returns %NULL if there is insufficient memory
1561  * available. On a success the dentry is returned. The name passed in is
1562  * copied and the copy passed in may be reused after this call.
1563  */
1564  
1565 struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
1566 {
1567     struct dentry *dentry;
1568     char *dname;
1569     int err;
1570 
1571     dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
1572     if (!dentry)
1573         return NULL;
1574 
1575     /*
1576      * We guarantee that the inline name is always NUL-terminated.
1577      * This way the memcpy() done by the name switching in rename
1578      * will still always have a NUL at the end, even if we might
1579      * be overwriting an internal NUL character
1580      */
1581     dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
1582     if (unlikely(!name)) {
1583         static const struct qstr anon = QSTR_INIT("/", 1);
1584         name = &anon;
1585         dname = dentry->d_iname;
1586     } else if (name->len > DNAME_INLINE_LEN-1) {
1587         size_t size = offsetof(struct external_name, name[1]);
1588         struct external_name *p = kmalloc(size + name->len,
1589                           GFP_KERNEL_ACCOUNT);
1590         if (!p) {
1591             kmem_cache_free(dentry_cache, dentry); 
1592             return NULL;
1593         }
1594         atomic_set(&p->u.count, 1);
1595         dname = p->name;
1596         if (IS_ENABLED(CONFIG_DCACHE_WORD_ACCESS))
1597             kasan_unpoison_shadow(dname,
1598                 round_up(name->len + 1, sizeof(unsigned long)));
1599     } else  {
1600         dname = dentry->d_iname;
1601     }   
1602 
1603     dentry->d_name.len = name->len;
1604     dentry->d_name.hash = name->hash;
1605     memcpy(dname, name->name, name->len);
1606     dname[name->len] = 0;
1607 
1608     /* Make sure we always see the terminating NUL character */
1609     smp_wmb();
1610     dentry->d_name.name = dname;
1611 
1612     dentry->d_lockref.count = 1;
1613     dentry->d_flags = 0;
1614     spin_lock_init(&dentry->d_lock);
1615     seqcount_init(&dentry->d_seq);
1616     dentry->d_inode = NULL;
1617     dentry->d_parent = dentry;
1618     dentry->d_sb = sb;
1619     dentry->d_op = NULL;
1620     dentry->d_fsdata = NULL;
1621     INIT_HLIST_BL_NODE(&dentry->d_hash);
1622     INIT_LIST_HEAD(&dentry->d_lru);
1623     INIT_LIST_HEAD(&dentry->d_subdirs);
1624     INIT_HLIST_NODE(&dentry->d_u.d_alias);
1625     INIT_LIST_HEAD(&dentry->d_child);
1626     d_set_d_op(dentry, dentry->d_sb->s_d_op);
1627 
1628     if (dentry->d_op && dentry->d_op->d_init) {
1629         err = dentry->d_op->d_init(dentry);
1630         if (err) {
1631             if (dname_external(dentry))
1632                 kfree(external_name(dentry));
1633             kmem_cache_free(dentry_cache, dentry);
1634             return NULL;
1635         }
1636     }
1637 
1638     this_cpu_inc(nr_dentry);
1639 
1640     return dentry;
1641 }
1642 
1643 /**
1644  * d_alloc  -   allocate a dcache entry
1645  * @parent: parent of entry to allocate
1646  * @name: qstr of the name
1647  *
1648  * Allocates a dentry. It returns %NULL if there is insufficient memory
1649  * available. On a success the dentry is returned. The name passed in is
1650  * copied and the copy passed in may be reused after this call.
1651  */
1652 struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1653 {
1654     struct dentry *dentry = __d_alloc(parent->d_sb, name);
1655     if (!dentry)
1656         return NULL;
1657     dentry->d_flags |= DCACHE_RCUACCESS;
1658     spin_lock(&parent->d_lock);
1659     /*
1660      * don't need child lock because it is not subject
1661      * to concurrency here
1662      */
1663     __dget_dlock(parent);
1664     dentry->d_parent = parent;
1665     list_add(&dentry->d_child, &parent->d_subdirs);
1666     spin_unlock(&parent->d_lock);
1667 
1668     return dentry;
1669 }
1670 EXPORT_SYMBOL(d_alloc);
1671 
1672 struct dentry *d_alloc_cursor(struct dentry * parent)
1673 {
1674     struct dentry *dentry = __d_alloc(parent->d_sb, NULL);
1675     if (dentry) {
1676         dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR;
1677         dentry->d_parent = dget(parent);
1678     }
1679     return dentry;
1680 }
1681 
1682 /**
1683  * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems)
1684  * @sb: the superblock
1685  * @name: qstr of the name
1686  *
1687  * For a filesystem that just pins its dentries in memory and never
1688  * performs lookups at all, return an unhashed IS_ROOT dentry.
1689  */
1690 struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1691 {
1692     return __d_alloc(sb, name);
1693 }
1694 EXPORT_SYMBOL(d_alloc_pseudo);
1695 
1696 struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1697 {
1698     struct qstr q;
1699 
1700     q.name = name;
1701     q.hash_len = hashlen_string(parent, name);
1702     return d_alloc(parent, &q);
1703 }
1704 EXPORT_SYMBOL(d_alloc_name);
1705 
1706 void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1707 {
1708     WARN_ON_ONCE(dentry->d_op);
1709     WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH  |
1710                 DCACHE_OP_COMPARE   |
1711                 DCACHE_OP_REVALIDATE    |
1712                 DCACHE_OP_WEAK_REVALIDATE   |
1713                 DCACHE_OP_DELETE    |
1714                 DCACHE_OP_REAL));
1715     dentry->d_op = op;
1716     if (!op)
1717         return;
1718     if (op->d_hash)
1719         dentry->d_flags |= DCACHE_OP_HASH;
1720     if (op->d_compare)
1721         dentry->d_flags |= DCACHE_OP_COMPARE;
1722     if (op->d_revalidate)
1723         dentry->d_flags |= DCACHE_OP_REVALIDATE;
1724     if (op->d_weak_revalidate)
1725         dentry->d_flags |= DCACHE_OP_WEAK_REVALIDATE;
1726     if (op->d_delete)
1727         dentry->d_flags |= DCACHE_OP_DELETE;
1728     if (op->d_prune)
1729         dentry->d_flags |= DCACHE_OP_PRUNE;
1730     if (op->d_real)
1731         dentry->d_flags |= DCACHE_OP_REAL;
1732 
1733 }
1734 EXPORT_SYMBOL(d_set_d_op);
1735 
1736 
1737 /*
1738  * d_set_fallthru - Mark a dentry as falling through to a lower layer
1739  * @dentry - The dentry to mark
1740  *
1741  * Mark a dentry as falling through to the lower layer (as set with
1742  * d_pin_lower()).  This flag may be recorded on the medium.
1743  */
1744 void d_set_fallthru(struct dentry *dentry)
1745 {
1746     spin_lock(&dentry->d_lock);
1747     dentry->d_flags |= DCACHE_FALLTHRU;
1748     spin_unlock(&dentry->d_lock);
1749 }
1750 EXPORT_SYMBOL(d_set_fallthru);
1751 
1752 static unsigned d_flags_for_inode(struct inode *inode)
1753 {
1754     unsigned add_flags = DCACHE_REGULAR_TYPE;
1755 
1756     if (!inode)
1757         return DCACHE_MISS_TYPE;
1758 
1759     if (S_ISDIR(inode->i_mode)) {
1760         add_flags = DCACHE_DIRECTORY_TYPE;
1761         if (unlikely(!(inode->i_opflags & IOP_LOOKUP))) {
1762             if (unlikely(!inode->i_op->lookup))
1763                 add_flags = DCACHE_AUTODIR_TYPE;
1764             else
1765                 inode->i_opflags |= IOP_LOOKUP;
1766         }
1767         goto type_determined;
1768     }
1769 
1770     if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
1771         if (unlikely(inode->i_op->get_link)) {
1772             add_flags = DCACHE_SYMLINK_TYPE;
1773             goto type_determined;
1774         }
1775         inode->i_opflags |= IOP_NOFOLLOW;
1776     }
1777 
1778     if (unlikely(!S_ISREG(inode->i_mode)))
1779         add_flags = DCACHE_SPECIAL_TYPE;
1780 
1781 type_determined:
1782     if (unlikely(IS_AUTOMOUNT(inode)))
1783         add_flags |= DCACHE_NEED_AUTOMOUNT;
1784     return add_flags;
1785 }
1786 
1787 static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1788 {
1789     unsigned add_flags = d_flags_for_inode(inode);
1790     WARN_ON(d_in_lookup(dentry));
1791 
1792     spin_lock(&dentry->d_lock);
1793     hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
1794     raw_write_seqcount_begin(&dentry->d_seq);
1795     __d_set_inode_and_type(dentry, inode, add_flags);
1796     raw_write_seqcount_end(&dentry->d_seq);
1797     fsnotify_update_flags(dentry);
1798     spin_unlock(&dentry->d_lock);
1799 }
1800 
1801 /**
1802  * d_instantiate - fill in inode information for a dentry
1803  * @entry: dentry to complete
1804  * @inode: inode to attach to this dentry
1805  *
1806  * Fill in inode information in the entry.
1807  *
1808  * This turns negative dentries into productive full members
1809  * of society.
1810  *
1811  * NOTE! This assumes that the inode count has been incremented
1812  * (or otherwise set) by the caller to indicate that it is now
1813  * in use by the dcache.
1814  */
1815  
1816 void d_instantiate(struct dentry *entry, struct inode * inode)
1817 {
1818     BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
1819     if (inode) {
1820         security_d_instantiate(entry, inode);
1821         spin_lock(&inode->i_lock);
1822         __d_instantiate(entry, inode);
1823         spin_unlock(&inode->i_lock);
1824     }
1825 }
1826 EXPORT_SYMBOL(d_instantiate);
1827 
1828 /**
1829  * d_instantiate_no_diralias - instantiate a non-aliased dentry
1830  * @entry: dentry to complete
1831  * @inode: inode to attach to this dentry
1832  *
1833  * Fill in inode information in the entry.  If a directory alias is found, then
1834  * return an error (and drop inode).  Together with d_materialise_unique() this
1835  * guarantees that a directory inode may never have more than one alias.
1836  */
1837 int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode)
1838 {
1839     BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
1840 
1841     security_d_instantiate(entry, inode);
1842     spin_lock(&inode->i_lock);
1843     if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) {
1844         spin_unlock(&inode->i_lock);
1845         iput(inode);
1846         return -EBUSY;
1847     }
1848     __d_instantiate(entry, inode);
1849     spin_unlock(&inode->i_lock);
1850 
1851     return 0;
1852 }
1853 EXPORT_SYMBOL(d_instantiate_no_diralias);
1854 
1855 struct dentry *d_make_root(struct inode *root_inode)
1856 {
1857     struct dentry *res = NULL;
1858 
1859     if (root_inode) {
1860         res = __d_alloc(root_inode->i_sb, NULL);
1861         if (res)
1862             d_instantiate(res, root_inode);
1863         else
1864             iput(root_inode);
1865     }
1866     return res;
1867 }
1868 EXPORT_SYMBOL(d_make_root);
1869 
1870 static struct dentry * __d_find_any_alias(struct inode *inode)
1871 {
1872     struct dentry *alias;
1873 
1874     if (hlist_empty(&inode->i_dentry))
1875         return NULL;
1876     alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
1877     __dget(alias);
1878     return alias;
1879 }
1880 
1881 /**
1882  * d_find_any_alias - find any alias for a given inode
1883  * @inode: inode to find an alias for
1884  *
1885  * If any aliases exist for the given inode, take and return a
1886  * reference for one of them.  If no aliases exist, return %NULL.
1887  */
1888 struct dentry *d_find_any_alias(struct inode *inode)
1889 {
1890     struct dentry *de;
1891 
1892     spin_lock(&inode->i_lock);
1893     de = __d_find_any_alias(inode);
1894     spin_unlock(&inode->i_lock);
1895     return de;
1896 }
1897 EXPORT_SYMBOL(d_find_any_alias);
1898 
1899 static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
1900 {
1901     struct dentry *tmp;
1902     struct dentry *res;
1903     unsigned add_flags;
1904 
1905     if (!inode)
1906         return ERR_PTR(-ESTALE);
1907     if (IS_ERR(inode))
1908         return ERR_CAST(inode);
1909 
1910     res = d_find_any_alias(inode);
1911     if (res)
1912         goto out_iput;
1913 
1914     tmp = __d_alloc(inode->i_sb, NULL);
1915     if (!tmp) {
1916         res = ERR_PTR(-ENOMEM);
1917         goto out_iput;
1918     }
1919 
1920     security_d_instantiate(tmp, inode);
1921     spin_lock(&inode->i_lock);
1922     res = __d_find_any_alias(inode);
1923     if (res) {
1924         spin_unlock(&inode->i_lock);
1925         dput(tmp);
1926         goto out_iput;
1927     }
1928 
1929     /* attach a disconnected dentry */
1930     add_flags = d_flags_for_inode(inode);
1931 
1932     if (disconnected)
1933         add_flags |= DCACHE_DISCONNECTED;
1934 
1935     spin_lock(&tmp->d_lock);
1936     __d_set_inode_and_type(tmp, inode, add_flags);
1937     hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry);
1938     hlist_bl_lock(&tmp->d_sb->s_anon);
1939     hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1940     hlist_bl_unlock(&tmp->d_sb->s_anon);
1941     spin_unlock(&tmp->d_lock);
1942     spin_unlock(&inode->i_lock);
1943 
1944     return tmp;
1945 
1946  out_iput:
1947     iput(inode);
1948     return res;
1949 }
1950 
1951 /**
1952  * d_obtain_alias - find or allocate a DISCONNECTED dentry for a given inode
1953  * @inode: inode to allocate the dentry for
1954  *
1955  * Obtain a dentry for an inode resulting from NFS filehandle conversion or
1956  * similar open by handle operations.  The returned dentry may be anonymous,
1957  * or may have a full name (if the inode was already in the cache).
1958  *
1959  * When called on a directory inode, we must ensure that the inode only ever
1960  * has one dentry.  If a dentry is found, that is returned instead of
1961  * allocating a new one.
1962  *
1963  * On successful return, the reference to the inode has been transferred
1964  * to the dentry.  In case of an error the reference on the inode is released.
1965  * To make it easier to use in export operations a %NULL or IS_ERR inode may
1966  * be passed in and the error will be propagated to the return value,
1967  * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
1968  */
1969 struct dentry *d_obtain_alias(struct inode *inode)
1970 {
1971     return __d_obtain_alias(inode, 1);
1972 }
1973 EXPORT_SYMBOL(d_obtain_alias);
1974 
1975 /**
1976  * d_obtain_root - find or allocate a dentry for a given inode
1977  * @inode: inode to allocate the dentry for
1978  *
1979  * Obtain an IS_ROOT dentry for the root of a filesystem.
1980  *
1981  * We must ensure that directory inodes only ever have one dentry.  If a
1982  * dentry is found, that is returned instead of allocating a new one.
1983  *
1984  * On successful return, the reference to the inode has been transferred
1985  * to the dentry.  In case of an error the reference on the inode is
1986  * released.  A %NULL or IS_ERR inode may be passed in and will be the
1987  * error will be propagate to the return value, with a %NULL @inode
1988  * replaced by ERR_PTR(-ESTALE).
1989  */
1990 struct dentry *d_obtain_root(struct inode *inode)
1991 {
1992     return __d_obtain_alias(inode, 0);
1993 }
1994 EXPORT_SYMBOL(d_obtain_root);
1995 
1996 /**
1997  * d_add_ci - lookup or allocate new dentry with case-exact name
1998  * @inode:  the inode case-insensitive lookup has found
1999  * @dentry: the negative dentry that was passed to the parent's lookup func
2000  * @name:   the case-exact name to be associated with the returned dentry
2001  *
2002  * This is to avoid filling the dcache with case-insensitive names to the
2003  * same inode, only the actual correct case is stored in the dcache for
2004  * case-insensitive filesystems.
2005  *
2006  * For a case-insensitive lookup match and if the the case-exact dentry
2007  * already exists in in the dcache, use it and return it.
2008  *
2009  * If no entry exists with the exact case name, allocate new dentry with
2010  * the exact case, and return the spliced entry.
2011  */
2012 struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
2013             struct qstr *name)
2014 {
2015     struct dentry *found, *res;
2016 
2017     /*
2018      * First check if a dentry matching the name already exists,
2019      * if not go ahead and create it now.
2020      */
2021     found = d_hash_and_lookup(dentry->d_parent, name);
2022     if (found) {
2023         iput(inode);
2024         return found;
2025     }
2026     if (d_in_lookup(dentry)) {
2027         found = d_alloc_parallel(dentry->d_parent, name,
2028                     dentry->d_wait);
2029         if (IS_ERR(found) || !d_in_lookup(found)) {
2030             iput(inode);
2031             return found;
2032         }
2033     } else {
2034         found = d_alloc(dentry->d_parent, name);
2035         if (!found) {
2036             iput(inode);
2037             return ERR_PTR(-ENOMEM);
2038         } 
2039     }
2040     res = d_splice_alias(inode, found);
2041     if (res) {
2042         dput(found);
2043         return res;
2044     }
2045     return found;
2046 }
2047 EXPORT_SYMBOL(d_add_ci);
2048 
2049 
2050 static inline bool d_same_name(const struct dentry *dentry,
2051                 const struct dentry *parent,
2052                 const struct qstr *name)
2053 {
2054     if (likely(!(parent->d_flags & DCACHE_OP_COMPARE))) {
2055         if (dentry->d_name.len != name->len)
2056             return false;
2057         return dentry_cmp(dentry, name->name, name->len) == 0;
2058     }
2059     return parent->d_op->d_compare(dentry,
2060                        dentry->d_name.len, dentry->d_name.name,
2061                        name) == 0;
2062 }
2063 
2064 /**
2065  * __d_lookup_rcu - search for a dentry (racy, store-free)
2066  * @parent: parent dentry
2067  * @name: qstr of name we wish to find
2068  * @seqp: returns d_seq value at the point where the dentry was found
2069  * Returns: dentry, or NULL
2070  *
2071  * __d_lookup_rcu is the dcache lookup function for rcu-walk name
2072  * resolution (store-free path walking) design described in
2073  * Documentation/filesystems/path-lookup.txt.
2074  *
2075  * This is not to be used outside core vfs.
2076  *
2077  * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
2078  * held, and rcu_read_lock held. The returned dentry must not be stored into
2079  * without taking d_lock and checking d_seq sequence count against @seq
2080  * returned here.
2081  *
2082  * A refcount may be taken on the found dentry with the d_rcu_to_refcount
2083  * function.
2084  *
2085  * Alternatively, __d_lookup_rcu may be called again to look up the child of
2086  * the returned dentry, so long as its parent's seqlock is checked after the
2087  * child is looked up. Thus, an interlocking stepping of sequence lock checks
2088  * is formed, giving integrity down the path walk.
2089  *
2090  * NOTE! The caller *has* to check the resulting dentry against the sequence
2091  * number we've returned before using any of the resulting dentry state!
2092  */
2093 struct dentry *__d_lookup_rcu(const struct dentry *parent,
2094                 const struct qstr *name,
2095                 unsigned *seqp)
2096 {
2097     u64 hashlen = name->hash_len;
2098     const unsigned char *str = name->name;
2099     struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen));
2100     struct hlist_bl_node *node;
2101     struct dentry *dentry;
2102 
2103     /*
2104      * Note: There is significant duplication with __d_lookup_rcu which is
2105      * required to prevent single threaded performance regressions
2106      * especially on architectures where smp_rmb (in seqcounts) are costly.
2107      * Keep the two functions in sync.
2108      */
2109 
2110     /*
2111      * The hash list is protected using RCU.
2112      *
2113      * Carefully use d_seq when comparing a candidate dentry, to avoid
2114      * races with d_move().
2115      *
2116      * It is possible that concurrent renames can mess up our list
2117      * walk here and result in missing our dentry, resulting in the
2118      * false-negative result. d_lookup() protects against concurrent
2119      * renames using rename_lock seqlock.
2120      *
2121      * See Documentation/filesystems/path-lookup.txt for more details.
2122      */
2123     hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
2124         unsigned seq;
2125 
2126 seqretry:
2127         /*
2128          * The dentry sequence count protects us from concurrent
2129          * renames, and thus protects parent and name fields.
2130          *
2131          * The caller must perform a seqcount check in order
2132          * to do anything useful with the returned dentry.
2133          *
2134          * NOTE! We do a "raw" seqcount_begin here. That means that
2135          * we don't wait for the sequence count to stabilize if it
2136          * is in the middle of a sequence change. If we do the slow
2137          * dentry compare, we will do seqretries until it is stable,
2138          * and if we end up with a successful lookup, we actually
2139          * want to exit RCU lookup anyway.
2140          *
2141          * Note that raw_seqcount_begin still *does* smp_rmb(), so
2142          * we are still guaranteed NUL-termination of ->d_name.name.
2143          */
2144         seq = raw_seqcount_begin(&dentry->d_seq);
2145         if (dentry->d_parent != parent)
2146             continue;
2147         if (d_unhashed(dentry))
2148             continue;
2149 
2150         if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
2151             int tlen;
2152             const char *tname;
2153             if (dentry->d_name.hash != hashlen_hash(hashlen))
2154                 continue;
2155             tlen = dentry->d_name.len;
2156             tname = dentry->d_name.name;
2157             /* we want a consistent (name,len) pair */
2158             if (read_seqcount_retry(&dentry->d_seq, seq)) {
2159                 cpu_relax();
2160                 goto seqretry;
2161             }
2162             if (parent->d_op->d_compare(dentry,
2163                             tlen, tname, name) != 0)
2164                 continue;
2165         } else {
2166             if (dentry->d_name.hash_len != hashlen)
2167                 continue;
2168             if (dentry_cmp(dentry, str, hashlen_len(hashlen)) != 0)
2169                 continue;
2170         }
2171         *seqp = seq;
2172         return dentry;
2173     }
2174     return NULL;
2175 }
2176 
2177 /**
2178  * d_lookup - search for a dentry
2179  * @parent: parent dentry
2180  * @name: qstr of name we wish to find
2181  * Returns: dentry, or NULL
2182  *
2183  * d_lookup searches the children of the parent dentry for the name in
2184  * question. If the dentry is found its reference count is incremented and the
2185  * dentry is returned. The caller must use dput to free the entry when it has
2186  * finished using it. %NULL is returned if the dentry does not exist.
2187  */
2188 struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name)
2189 {
2190     struct dentry *dentry;
2191     unsigned seq;
2192 
2193     do {
2194         seq = read_seqbegin(&rename_lock);
2195         dentry = __d_lookup(parent, name);
2196         if (dentry)
2197             break;
2198     } while (read_seqretry(&rename_lock, seq));
2199     return dentry;
2200 }
2201 EXPORT_SYMBOL(d_lookup);
2202 
2203 /**
2204  * __d_lookup - search for a dentry (racy)
2205  * @parent: parent dentry
2206  * @name: qstr of name we wish to find
2207  * Returns: dentry, or NULL
2208  *
2209  * __d_lookup is like d_lookup, however it may (rarely) return a
2210  * false-negative result due to unrelated rename activity.
2211  *
2212  * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
2213  * however it must be used carefully, eg. with a following d_lookup in
2214  * the case of failure.
2215  *
2216  * __d_lookup callers must be commented.
2217  */
2218 struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name)
2219 {
2220     unsigned int hash = name->hash;
2221     struct hlist_bl_head *b = d_hash(hash);
2222     struct hlist_bl_node *node;
2223     struct dentry *found = NULL;
2224     struct dentry *dentry;
2225 
2226     /*
2227      * Note: There is significant duplication with __d_lookup_rcu which is
2228      * required to prevent single threaded performance regressions
2229      * especially on architectures where smp_rmb (in seqcounts) are costly.
2230      * Keep the two functions in sync.
2231      */
2232 
2233     /*
2234      * The hash list is protected using RCU.
2235      *
2236      * Take d_lock when comparing a candidate dentry, to avoid races
2237      * with d_move().
2238      *
2239      * It is possible that concurrent renames can mess up our list
2240      * walk here and result in missing our dentry, resulting in the
2241      * false-negative result. d_lookup() protects against concurrent
2242      * renames using rename_lock seqlock.
2243      *
2244      * See Documentation/filesystems/path-lookup.txt for more details.
2245      */
2246     rcu_read_lock();
2247     
2248     hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
2249 
2250         if (dentry->d_name.hash != hash)
2251             continue;
2252 
2253         spin_lock(&dentry->d_lock);
2254         if (dentry->d_parent != parent)
2255             goto next;
2256         if (d_unhashed(dentry))
2257             goto next;
2258 
2259         if (!d_same_name(dentry, parent, name))
2260             goto next;
2261 
2262         dentry->d_lockref.count++;
2263         found = dentry;
2264         spin_unlock(&dentry->d_lock);
2265         break;
2266 next:
2267         spin_unlock(&dentry->d_lock);
2268     }
2269     rcu_read_unlock();
2270 
2271     return found;
2272 }
2273 
2274 /**
2275  * d_hash_and_lookup - hash the qstr then search for a dentry
2276  * @dir: Directory to search in
2277  * @name: qstr of name we wish to find
2278  *
2279  * On lookup failure NULL is returned; on bad name - ERR_PTR(-error)
2280  */
2281 struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
2282 {
2283     /*
2284      * Check for a fs-specific hash function. Note that we must
2285      * calculate the standard hash first, as the d_op->d_hash()
2286      * routine may choose to leave the hash value unchanged.
2287      */
2288     name->hash = full_name_hash(dir, name->name, name->len);
2289     if (dir->d_flags & DCACHE_OP_HASH) {
2290         int err = dir->d_op->d_hash(dir, name);
2291         if (unlikely(err < 0))
2292             return ERR_PTR(err);
2293     }
2294     return d_lookup(dir, name);
2295 }
2296 EXPORT_SYMBOL(d_hash_and_lookup);
2297 
2298 /*
2299  * When a file is deleted, we have two options:
2300  * - turn this dentry into a negative dentry
2301  * - unhash this dentry and free it.
2302  *
2303  * Usually, we want to just turn this into
2304  * a negative dentry, but if anybody else is
2305  * currently using the dentry or the inode
2306  * we can't do that and we fall back on removing
2307  * it from the hash queues and waiting for
2308  * it to be deleted later when it has no users
2309  */
2310  
2311 /**
2312  * d_delete - delete a dentry
2313  * @dentry: The dentry to delete
2314  *
2315  * Turn the dentry into a negative dentry if possible, otherwise
2316  * remove it from the hash queues so it can be deleted later
2317  */
2318  
2319 void d_delete(struct dentry * dentry)
2320 {
2321     struct inode *inode;
2322     int isdir = 0;
2323     /*
2324      * Are we the only user?
2325      */
2326 again:
2327     spin_lock(&dentry->d_lock);
2328     inode = dentry->d_inode;
2329     isdir = S_ISDIR(inode->i_mode);
2330     if (dentry->d_lockref.count == 1) {
2331         if (!spin_trylock(&inode->i_lock)) {
2332             spin_unlock(&dentry->d_lock);
2333             cpu_relax();
2334             goto again;
2335         }
2336         dentry->d_flags &= ~DCACHE_CANT_MOUNT;
2337         dentry_unlink_inode(dentry);
2338         fsnotify_nameremove(dentry, isdir);
2339         return;
2340     }
2341 
2342     if (!d_unhashed(dentry))
2343         __d_drop(dentry);
2344 
2345     spin_unlock(&dentry->d_lock);
2346 
2347     fsnotify_nameremove(dentry, isdir);
2348 }
2349 EXPORT_SYMBOL(d_delete);
2350 
2351 static void __d_rehash(struct dentry *entry)
2352 {
2353     struct hlist_bl_head *b = d_hash(entry->d_name.hash);
2354     BUG_ON(!d_unhashed(entry));
2355     hlist_bl_lock(b);
2356     hlist_bl_add_head_rcu(&entry->d_hash, b);
2357     hlist_bl_unlock(b);
2358 }
2359 
2360 /**
2361  * d_rehash - add an entry back to the hash
2362  * @entry: dentry to add to the hash
2363  *
2364  * Adds a dentry to the hash according to its name.
2365  */
2366  
2367 void d_rehash(struct dentry * entry)
2368 {
2369     spin_lock(&entry->d_lock);
2370     __d_rehash(entry);
2371     spin_unlock(&entry->d_lock);
2372 }
2373 EXPORT_SYMBOL(d_rehash);
2374 
2375 static inline unsigned start_dir_add(struct inode *dir)
2376 {
2377 
2378     for (;;) {
2379         unsigned n = dir->i_dir_seq;
2380         if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
2381             return n;
2382         cpu_relax();
2383     }
2384 }
2385 
2386 static inline void end_dir_add(struct inode *dir, unsigned n)
2387 {
2388     smp_store_release(&dir->i_dir_seq, n + 2);
2389 }
2390 
2391 static void d_wait_lookup(struct dentry *dentry)
2392 {
2393     if (d_in_lookup(dentry)) {
2394         DECLARE_WAITQUEUE(wait, current);
2395         add_wait_queue(dentry->d_wait, &wait);
2396         do {
2397             set_current_state(TASK_UNINTERRUPTIBLE);
2398             spin_unlock(&dentry->d_lock);
2399             schedule();
2400             spin_lock(&dentry->d_lock);
2401         } while (d_in_lookup(dentry));
2402     }
2403 }
2404 
2405 struct dentry *d_alloc_parallel(struct dentry *parent,
2406                 const struct qstr *name,
2407                 wait_queue_head_t *wq)
2408 {
2409     unsigned int hash = name->hash;
2410     struct hlist_bl_head *b = in_lookup_hash(parent, hash);
2411     struct hlist_bl_node *node;
2412     struct dentry *new = d_alloc(parent, name);
2413     struct dentry *dentry;
2414     unsigned seq, r_seq, d_seq;
2415 
2416     if (unlikely(!new))
2417         return ERR_PTR(-ENOMEM);
2418 
2419 retry:
2420     rcu_read_lock();
2421     seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1;
2422     r_seq = read_seqbegin(&rename_lock);
2423     dentry = __d_lookup_rcu(parent, name, &d_seq);
2424     if (unlikely(dentry)) {
2425         if (!lockref_get_not_dead(&dentry->d_lockref)) {
2426             rcu_read_unlock();
2427             goto retry;
2428         }
2429         if (read_seqcount_retry(&dentry->d_seq, d_seq)) {
2430             rcu_read_unlock();
2431             dput(dentry);
2432             goto retry;
2433         }
2434         rcu_read_unlock();
2435         dput(new);
2436         return dentry;
2437     }
2438     if (unlikely(read_seqretry(&rename_lock, r_seq))) {
2439         rcu_read_unlock();
2440         goto retry;
2441     }
2442     hlist_bl_lock(b);
2443     if (unlikely(parent->d_inode->i_dir_seq != seq)) {
2444         hlist_bl_unlock(b);
2445         rcu_read_unlock();
2446         goto retry;
2447     }
2448     /*
2449      * No changes for the parent since the beginning of d_lookup().
2450      * Since all removals from the chain happen with hlist_bl_lock(),
2451      * any potential in-lookup matches are going to stay here until
2452      * we unlock the chain.  All fields are stable in everything
2453      * we encounter.
2454      */
2455     hlist_bl_for_each_entry(dentry, node, b, d_u.d_in_lookup_hash) {
2456         if (dentry->d_name.hash != hash)
2457             continue;
2458         if (dentry->d_parent != parent)
2459             continue;
2460         if (!d_same_name(dentry, parent, name))
2461             continue;
2462         hlist_bl_unlock(b);
2463         /* now we can try to grab a reference */
2464         if (!lockref_get_not_dead(&dentry->d_lockref)) {
2465             rcu_read_unlock();
2466             goto retry;
2467         }
2468 
2469         rcu_read_unlock();
2470         /*
2471          * somebody is likely to be still doing lookup for it;
2472          * wait for them to finish
2473          */
2474         spin_lock(&dentry->d_lock);
2475         d_wait_lookup(dentry);
2476         /*
2477          * it's not in-lookup anymore; in principle we should repeat
2478          * everything from dcache lookup, but it's likely to be what
2479          * d_lookup() would've found anyway.  If it is, just return it;
2480          * otherwise we really have to repeat the whole thing.
2481          */
2482         if (unlikely(dentry->d_name.hash != hash))
2483             goto mismatch;
2484         if (unlikely(dentry->d_parent != parent))
2485             goto mismatch;
2486         if (unlikely(d_unhashed(dentry)))
2487             goto mismatch;
2488         if (unlikely(!d_same_name(dentry, parent, name)))
2489             goto mismatch;
2490         /* OK, it *is* a hashed match; return it */
2491         spin_unlock(&dentry->d_lock);
2492         dput(new);
2493         return dentry;
2494     }
2495     rcu_read_unlock();
2496     /* we can't take ->d_lock here; it's OK, though. */
2497     new->d_flags |= DCACHE_PAR_LOOKUP;
2498     new->d_wait = wq;
2499     hlist_bl_add_head_rcu(&new->d_u.d_in_lookup_hash, b);
2500     hlist_bl_unlock(b);
2501     return new;
2502 mismatch:
2503     spin_unlock(&dentry->d_lock);
2504     dput(dentry);
2505     goto retry;
2506 }
2507 EXPORT_SYMBOL(d_alloc_parallel);
2508 
2509 void __d_lookup_done(struct dentry *dentry)
2510 {
2511     struct hlist_bl_head *b = in_lookup_hash(dentry->d_parent,
2512                          dentry->d_name.hash);
2513     hlist_bl_lock(b);
2514     dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
2515     __hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
2516     wake_up_all(dentry->d_wait);
2517     dentry->d_wait = NULL;
2518     hlist_bl_unlock(b);
2519     INIT_HLIST_NODE(&dentry->d_u.d_alias);
2520     INIT_LIST_HEAD(&dentry->d_lru);
2521 }
2522 EXPORT_SYMBOL(__d_lookup_done);
2523 
2524 /* inode->i_lock held if inode is non-NULL */
2525 
2526 static inline void __d_add(struct dentry *dentry, struct inode *inode)
2527 {
2528     struct inode *dir = NULL;
2529     unsigned n;
2530     spin_lock(&dentry->d_lock);
2531     if (unlikely(d_in_lookup(dentry))) {
2532         dir = dentry->d_parent->d_inode;
2533         n = start_dir_add(dir);
2534         __d_lookup_done(dentry);
2535     }
2536     if (inode) {
2537         unsigned add_flags = d_flags_for_inode(inode);
2538         hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
2539         raw_write_seqcount_begin(&dentry->d_seq);
2540         __d_set_inode_and_type(dentry, inode, add_flags);
2541         raw_write_seqcount_end(&dentry->d_seq);
2542         fsnotify_update_flags(dentry);
2543     }
2544     __d_rehash(dentry);
2545     if (dir)
2546         end_dir_add(dir, n);
2547     spin_unlock(&dentry->d_lock);
2548     if (inode)
2549         spin_unlock(&inode->i_lock);
2550 }
2551 
2552 /**
2553  * d_add - add dentry to hash queues
2554  * @entry: dentry to add
2555  * @inode: The inode to attach to this dentry
2556  *
2557  * This adds the entry to the hash queues and initializes @inode.
2558  * The entry was actually filled in earlier during d_alloc().
2559  */
2560 
2561 void d_add(struct dentry *entry, struct inode *inode)
2562 {
2563     if (inode) {
2564         security_d_instantiate(entry, inode);
2565         spin_lock(&inode->i_lock);
2566     }
2567     __d_add(entry, inode);
2568 }
2569 EXPORT_SYMBOL(d_add);
2570 
2571 /**
2572  * d_exact_alias - find and hash an exact unhashed alias
2573  * @entry: dentry to add
2574  * @inode: The inode to go with this dentry
2575  *
2576  * If an unhashed dentry with the same name/parent and desired
2577  * inode already exists, hash and return it.  Otherwise, return
2578  * NULL.
2579  *
2580  * Parent directory should be locked.
2581  */
2582 struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode)
2583 {
2584     struct dentry *alias;
2585     unsigned int hash = entry->d_name.hash;
2586 
2587     spin_lock(&inode->i_lock);
2588     hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
2589         /*
2590          * Don't need alias->d_lock here, because aliases with
2591          * d_parent == entry->d_parent are not subject to name or
2592          * parent changes, because the parent inode i_mutex is held.
2593          */
2594         if (alias->d_name.hash != hash)
2595             continue;
2596         if (alias->d_parent != entry->d_parent)
2597             continue;
2598         if (!d_same_name(alias, entry->d_parent, &entry->d_name))
2599             continue;
2600         spin_lock(&alias->d_lock);
2601         if (!d_unhashed(alias)) {
2602             spin_unlock(&alias->d_lock);
2603             alias = NULL;
2604         } else {
2605             __dget_dlock(alias);
2606             __d_rehash(alias);
2607             spin_unlock(&alias->d_lock);
2608         }
2609         spin_unlock(&inode->i_lock);
2610         return alias;
2611     }
2612     spin_unlock(&inode->i_lock);
2613     return NULL;
2614 }
2615 EXPORT_SYMBOL(d_exact_alias);
2616 
2617 /**
2618  * dentry_update_name_case - update case insensitive dentry with a new name
2619  * @dentry: dentry to be updated
2620  * @name: new name
2621  *
2622  * Update a case insensitive dentry with new case of name.
2623  *
2624  * dentry must have been returned by d_lookup with name @name. Old and new
2625  * name lengths must match (ie. no d_compare which allows mismatched name
2626  * lengths).
2627  *
2628  * Parent inode i_mutex must be held over d_lookup and into this call (to
2629  * keep renames and concurrent inserts, and readdir(2) away).
2630  */
2631 void dentry_update_name_case(struct dentry *dentry, const struct qstr *name)
2632 {
2633     BUG_ON(!inode_is_locked(dentry->d_parent->d_inode));
2634     BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2635 
2636     spin_lock(&dentry->d_lock);
2637     write_seqcount_begin(&dentry->d_seq);
2638     memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
2639     write_seqcount_end(&dentry->d_seq);
2640     spin_unlock(&dentry->d_lock);
2641 }
2642 EXPORT_SYMBOL(dentry_update_name_case);
2643 
2644 static void swap_names(struct dentry *dentry, struct dentry *target)
2645 {
2646     if (unlikely(dname_external(target))) {
2647         if (unlikely(dname_external(dentry))) {
2648             /*
2649              * Both external: swap the pointers
2650              */
2651             swap(target->d_name.name, dentry->d_name.name);
2652         } else {
2653             /*
2654              * dentry:internal, target:external.  Steal target's
2655              * storage and make target internal.
2656              */
2657             memcpy(target->d_iname, dentry->d_name.name,
2658                     dentry->d_name.len + 1);
2659             dentry->d_name.name = target->d_name.name;
2660             target->d_name.name = target->d_iname;
2661         }
2662     } else {
2663         if (unlikely(dname_external(dentry))) {
2664             /*
2665              * dentry:external, target:internal.  Give dentry's
2666              * storage to target and make dentry internal
2667              */
2668             memcpy(dentry->d_iname, target->d_name.name,
2669                     target->d_name.len + 1);
2670             target->d_name.name = dentry->d_name.name;
2671             dentry->d_name.name = dentry->d_iname;
2672         } else {
2673             /*
2674              * Both are internal.
2675              */
2676             unsigned int i;
2677             BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
2678             kmemcheck_mark_initialized(dentry->d_iname, DNAME_INLINE_LEN);
2679             kmemcheck_mark_initialized(target->d_iname, DNAME_INLINE_LEN);
2680             for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
2681                 swap(((long *) &dentry->d_iname)[i],
2682                      ((long *) &target->d_iname)[i]);
2683             }
2684         }
2685     }
2686     swap(dentry->d_name.hash_len, target->d_name.hash_len);
2687 }
2688 
2689 static void copy_name(struct dentry *dentry, struct dentry *target)
2690 {
2691     struct external_name *old_name = NULL;
2692     if (unlikely(dname_external(dentry)))
2693         old_name = external_name(dentry);
2694     if (unlikely(dname_external(target))) {
2695         atomic_inc(&external_name(target)->u.count);
2696         dentry->d_name = target->d_name;
2697     } else {
2698         memcpy(dentry->d_iname, target->d_name.name,
2699                 target->d_name.len + 1);
2700         dentry->d_name.name = dentry->d_iname;
2701         dentry->d_name.hash_len = target->d_name.hash_len;
2702     }
2703     if (old_name && likely(atomic_dec_and_test(&old_name->u.count)))
2704         kfree_rcu(old_name, u.head);
2705 }
2706 
2707 static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
2708 {
2709     /*
2710      * XXXX: do we really need to take target->d_lock?
2711      */
2712     if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
2713         spin_lock(&target->d_parent->d_lock);
2714     else {
2715         if (d_ancestor(dentry->d_parent, target->d_parent)) {
2716             spin_lock(&dentry->d_parent->d_lock);
2717             spin_lock_nested(&target->d_parent->d_lock,
2718                         DENTRY_D_LOCK_NESTED);
2719         } else {
2720             spin_lock(&target->d_parent->d_lock);
2721             spin_lock_nested(&dentry->d_parent->d_lock,
2722                         DENTRY_D_LOCK_NESTED);
2723         }
2724     }
2725     if (target < dentry) {
2726         spin_lock_nested(&target->d_lock, 2);
2727         spin_lock_nested(&dentry->d_lock, 3);
2728     } else {
2729         spin_lock_nested(&dentry->d_lock, 2);
2730         spin_lock_nested(&target->d_lock, 3);
2731     }
2732 }
2733 
2734 static void dentry_unlock_for_move(struct dentry *dentry, struct dentry *target)
2735 {
2736     if (target->d_parent != dentry->d_parent)
2737         spin_unlock(&dentry->d_parent->d_lock);
2738     if (target->d_parent != target)
2739         spin_unlock(&target->d_parent->d_lock);
2740     spin_unlock(&target->d_lock);
2741     spin_unlock(&dentry->d_lock);
2742 }
2743 
2744 /*
2745  * When switching names, the actual string doesn't strictly have to
2746  * be preserved in the target - because we're dropping the target
2747  * anyway. As such, we can just do a simple memcpy() to copy over
2748  * the new name before we switch, unless we are going to rehash
2749  * it.  Note that if we *do* unhash the target, we are not allowed
2750  * to rehash it without giving it a new name/hash key - whether
2751  * we swap or overwrite the names here, resulting name won't match
2752  * the reality in filesystem; it's only there for d_path() purposes.
2753  * Note that all of this is happening under rename_lock, so the
2754  * any hash lookup seeing it in the middle of manipulations will
2755  * be discarded anyway.  So we do not care what happens to the hash
2756  * key in that case.
2757  */
2758 /*
2759  * __d_move - move a dentry
2760  * @dentry: entry to move
2761  * @target: new dentry
2762  * @exchange: exchange the two dentries
2763  *
2764  * Update the dcache to reflect the move of a file name. Negative
2765  * dcache entries should not be moved in this way. Caller must hold
2766  * rename_lock, the i_mutex of the source and target directories,
2767  * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
2768  */
2769 static void __d_move(struct dentry *dentry, struct dentry *target,
2770              bool exchange)
2771 {
2772     struct inode *dir = NULL;
2773     unsigned n;
2774     if (!dentry->d_inode)
2775         printk(KERN_WARNING "VFS: moving negative dcache entry\n");
2776 
2777     BUG_ON(d_ancestor(dentry, target));
2778     BUG_ON(d_ancestor(target, dentry));
2779 
2780     dentry_lock_for_move(dentry, target);
2781     if (unlikely(d_in_lookup(target))) {
2782         dir = target->d_parent->d_inode;
2783         n = start_dir_add(dir);
2784         __d_lookup_done(target);
2785     }
2786 
2787     write_seqcount_begin(&dentry->d_seq);
2788     write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED);
2789 
2790     /* unhash both */
2791     /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
2792     __d_drop(dentry);
2793     __d_drop(target);
2794 
2795     /* Switch the names.. */
2796     if (exchange)
2797         swap_names(dentry, target);
2798     else
2799         copy_name(dentry, target);
2800 
2801     /* rehash in new place(s) */
2802     __d_rehash(dentry);
2803     if (exchange)
2804         __d_rehash(target);
2805 
2806     /* ... and switch them in the tree */
2807     if (IS_ROOT(dentry)) {
2808         /* splicing a tree */
2809         dentry->d_flags |= DCACHE_RCUACCESS;
2810         dentry->d_parent = target->d_parent;
2811         target->d_parent = target;
2812         list_del_init(&target->d_child);
2813         list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
2814     } else {
2815         /* swapping two dentries */
2816         swap(dentry->d_parent, target->d_parent);
2817         list_move(&target->d_child, &target->d_parent->d_subdirs);
2818         list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
2819         if (exchange)
2820             fsnotify_update_flags(target);
2821         fsnotify_update_flags(dentry);
2822     }
2823 
2824     write_seqcount_end(&target->d_seq);
2825     write_seqcount_end(&dentry->d_seq);
2826 
2827     if (dir)
2828         end_dir_add(dir, n);
2829     dentry_unlock_for_move(dentry, target);
2830 }
2831 
2832 /*
2833  * d_move - move a dentry
2834  * @dentry: entry to move
2835  * @target: new dentry
2836  *
2837  * Update the dcache to reflect the move of a file name. Negative
2838  * dcache entries should not be moved in this way. See the locking
2839  * requirements for __d_move.
2840  */
2841 void d_move(struct dentry *dentry, struct dentry *target)
2842 {
2843     write_seqlock(&rename_lock);
2844     __d_move(dentry, target, false);
2845     write_sequnlock(&rename_lock);
2846 }
2847 EXPORT_SYMBOL(d_move);
2848 
2849 /*
2850  * d_exchange - exchange two dentries
2851  * @dentry1: first dentry
2852  * @dentry2: second dentry
2853  */
2854 void d_exchange(struct dentry *dentry1, struct dentry *dentry2)
2855 {
2856     write_seqlock(&rename_lock);
2857 
2858     WARN_ON(!dentry1->d_inode);
2859     WARN_ON(!dentry2->d_inode);
2860     WARN_ON(IS_ROOT(dentry1));
2861     WARN_ON(IS_ROOT(dentry2));
2862 
2863     __d_move(dentry1, dentry2, true);
2864 
2865     write_sequnlock(&rename_lock);
2866 }
2867 
2868 /**
2869  * d_ancestor - search for an ancestor
2870  * @p1: ancestor dentry
2871  * @p2: child dentry
2872  *
2873  * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
2874  * an ancestor of p2, else NULL.
2875  */
2876 struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
2877 {
2878     struct dentry *p;
2879 
2880     for (p = p2; !IS_ROOT(p); p = p->d_parent) {
2881         if (p->d_parent == p1)
2882             return p;
2883     }
2884     return NULL;
2885 }
2886 
2887 /*
2888  * This helper attempts to cope with remotely renamed directories
2889  *
2890  * It assumes that the caller is already holding
2891  * dentry->d_parent->d_inode->i_mutex, and rename_lock
2892  *
2893  * Note: If ever the locking in lock_rename() changes, then please
2894  * remember to update this too...
2895  */
2896 static int __d_unalias(struct inode *inode,
2897         struct dentry *dentry, struct dentry *alias)
2898 {
2899     struct mutex *m1 = NULL;
2900     struct rw_semaphore *m2 = NULL;
2901     int ret = -ESTALE;
2902 
2903     /* If alias and dentry share a parent, then no extra locks required */
2904     if (alias->d_parent == dentry->d_parent)
2905         goto out_unalias;
2906 
2907     /* See lock_rename() */
2908     if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
2909         goto out_err;
2910     m1 = &dentry->d_sb->s_vfs_rename_mutex;
2911     if (!inode_trylock_shared(alias->d_parent->d_inode))
2912         goto out_err;
2913     m2 = &alias->d_parent->d_inode->i_rwsem;
2914 out_unalias:
2915     __d_move(alias, dentry, false);
2916     ret = 0;
2917 out_err:
2918     if (m2)
2919         up_read(m2);
2920     if (m1)
2921         mutex_unlock(m1);
2922     return ret;
2923 }
2924 
2925 /**
2926  * d_splice_alias - splice a disconnected dentry into the tree if one exists
2927  * @inode:  the inode which may have a disconnected dentry
2928  * @dentry: a negative dentry which we want to point to the inode.
2929  *
2930  * If inode is a directory and has an IS_ROOT alias, then d_move that in
2931  * place of the given dentry and return it, else simply d_add the inode
2932  * to the dentry and return NULL.
2933  *
2934  * If a non-IS_ROOT directory is found, the filesystem is corrupt, and
2935  * we should error out: directories can't have multiple aliases.
2936  *
2937  * This is needed in the lookup routine of any filesystem that is exportable
2938  * (via knfsd) so that we can build dcache paths to directories effectively.
2939  *
2940  * If a dentry was found and moved, then it is returned.  Otherwise NULL
2941  * is returned.  This matches the expected return value of ->lookup.
2942  *
2943  * Cluster filesystems may call this function with a negative, hashed dentry.
2944  * In that case, we know that the inode will be a regular file, and also this
2945  * will only occur during atomic_open. So we need to check for the dentry
2946  * being already hashed only in the final case.
2947  */
2948 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
2949 {
2950     if (IS_ERR(inode))
2951         return ERR_CAST(inode);
2952 
2953     BUG_ON(!d_unhashed(dentry));
2954 
2955     if (!inode)
2956         goto out;
2957 
2958     security_d_instantiate(dentry, inode);
2959     spin_lock(&inode->i_lock);
2960     if (S_ISDIR(inode->i_mode)) {
2961         struct dentry *new = __d_find_any_alias(inode);
2962         if (unlikely(new)) {
2963             /* The reference to new ensures it remains an alias */
2964             spin_unlock(&inode->i_lock);
2965             write_seqlock(&rename_lock);
2966             if (unlikely(d_ancestor(new, dentry))) {
2967                 write_sequnlock(&rename_lock);
2968                 dput(new);
2969                 new = ERR_PTR(-ELOOP);
2970                 pr_warn_ratelimited(
2971                     "VFS: Lookup of '%s' in %s %s"
2972                     " would have caused loop\n",
2973                     dentry->d_name.name,
2974                     inode->i_sb->s_type->name,
2975                     inode->i_sb->s_id);
2976             } else if (!IS_ROOT(new)) {
2977                 int err = __d_unalias(inode, dentry, new);
2978                 write_sequnlock(&rename_lock);
2979                 if (err) {
2980                     dput(new);
2981                     new = ERR_PTR(err);
2982                 }
2983             } else {
2984                 __d_move(new, dentry, false);
2985                 write_sequnlock(&rename_lock);
2986             }
2987             iput(inode);
2988             return new;
2989         }
2990     }
2991 out:
2992     __d_add(dentry, inode);
2993     return NULL;
2994 }
2995 EXPORT_SYMBOL(d_splice_alias);
2996 
2997 static int prepend(char **buffer, int *buflen, const char *str, int namelen)
2998 {
2999     *buflen -= namelen;
3000     if (*buflen < 0)
3001         return -ENAMETOOLONG;
3002     *buffer -= namelen;
3003     memcpy(*buffer, str, namelen);
3004     return 0;
3005 }
3006 
3007 /**
3008  * prepend_name - prepend a pathname in front of current buffer pointer
3009  * @buffer: buffer pointer
3010  * @buflen: allocated length of the buffer
3011  * @name:   name string and length qstr structure
3012  *
3013  * With RCU path tracing, it may race with d_move(). Use ACCESS_ONCE() to
3014  * make sure that either the old or the new name pointer and length are
3015  * fetched. However, there may be mismatch between length and pointer.
3016  * The length cannot be trusted, we need to copy it byte-by-byte until
3017  * the length is reached or a null byte is found. It also prepends "/" at
3018  * the beginning of the name. The sequence number check at the caller will
3019  * retry it again when a d_move() does happen. So any garbage in the buffer
3020  * due to mismatched pointer and length will be discarded.
3021  *
3022  * Data dependency barrier is needed to make sure that we see that terminating
3023  * NUL.  Alpha strikes again, film at 11...
3024  */
3025 static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
3026 {
3027     const char *dname = ACCESS_ONCE(name->name);
3028     u32 dlen = ACCESS_ONCE(name->len);
3029     char *p;
3030 
3031     smp_read_barrier_depends();
3032 
3033     *buflen -= dlen + 1;
3034     if (*buflen < 0)
3035         return -ENAMETOOLONG;
3036     p = *buffer -= dlen + 1;
3037     *p++ = '/';
3038     while (dlen--) {
3039         char c = *dname++;
3040         if (!c)
3041             break;
3042         *p++ = c;
3043     }
3044     return 0;
3045 }
3046 
3047 /**
3048  * prepend_path - Prepend path string to a buffer
3049  * @path: the dentry/vfsmount to report
3050  * @root: root vfsmnt/dentry
3051  * @buffer: pointer to the end of the buffer
3052  * @buflen: pointer to buffer length
3053  *
3054  * The function will first try to write out the pathname without taking any
3055  * lock other than the RCU read lock to make sure that dentries won't go away.
3056  * It only checks the sequence number of the global rename_lock as any change
3057  * in the dentry's d_seq will be preceded by changes in the rename_lock
3058  * sequence number. If the sequence number had been changed, it will restart
3059  * the whole pathname back-tracing sequence again by taking the rename_lock.
3060  * In this case, there is no need to take the RCU read lock as the recursive
3061  * parent pointer references will keep the dentry chain alive as long as no
3062  * rename operation is performed.
3063  */
3064 static int prepend_path(const struct path *path,
3065             const struct path *root,
3066             char **buffer, int *buflen)
3067 {
3068     struct dentry *dentry;
3069     struct vfsmount *vfsmnt;
3070     struct mount *mnt;
3071     int error = 0;
3072     unsigned seq, m_seq = 0;
3073     char *bptr;
3074     int blen;
3075 
3076     rcu_read_lock();
3077 restart_mnt:
3078     read_seqbegin_or_lock(&mount_lock, &m_seq);
3079     seq = 0;
3080     rcu_read_lock();
3081 restart:
3082     bptr = *buffer;
3083     blen = *buflen;
3084     error = 0;
3085     dentry = path->dentry;
3086     vfsmnt = path->mnt;
3087     mnt = real_mount(vfsmnt);
3088     read_seqbegin_or_lock(&rename_lock, &seq);
3089     while (dentry != root->dentry || vfsmnt != root->mnt) {
3090         struct dentry * parent;
3091 
3092         if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
3093             struct mount *parent = ACCESS_ONCE(mnt->mnt_parent);
3094             /* Escaped? */
3095             if (dentry != vfsmnt->mnt_root) {
3096                 bptr = *buffer;
3097                 blen = *buflen;
3098                 error = 3;
3099                 break;
3100             }
3101             /* Global root? */
3102             if (mnt != parent) {
3103                 dentry = ACCESS_ONCE(mnt->mnt_mountpoint);
3104                 mnt = parent;
3105                 vfsmnt = &mnt->mnt;
3106                 continue;
3107             }
3108             if (!error)
3109                 error = is_mounted(vfsmnt) ? 1 : 2;
3110             break;
3111         }
3112         parent = dentry->d_parent;
3113         prefetch(parent);
3114         error = prepend_name(&bptr, &blen, &dentry->d_name);
3115         if (error)
3116             break;
3117 
3118         dentry = parent;
3119     }
3120     if (!(seq & 1))
3121         rcu_read_unlock();
3122     if (need_seqretry(&rename_lock, seq)) {
3123         seq = 1;
3124         goto restart;
3125     }
3126     done_seqretry(&rename_lock, seq);
3127 
3128     if (!(m_seq & 1))
3129         rcu_read_unlock();
3130     if (need_seqretry(&mount_lock, m_seq)) {
3131         m_seq = 1;
3132         goto restart_mnt;
3133     }
3134     done_seqretry(&mount_lock, m_seq);
3135 
3136     if (error >= 0 && bptr == *buffer) {
3137         if (--blen < 0)
3138             error = -ENAMETOOLONG;
3139         else
3140             *--bptr = '/';
3141     }
3142     *buffer = bptr;
3143     *buflen = blen;
3144     return error;
3145 }
3146 
3147 /**
3148  * __d_path - return the path of a dentry
3149  * @path: the dentry/vfsmount to report
3150  * @root: root vfsmnt/dentry
3151  * @buf: buffer to return value in
3152  * @buflen: buffer length
3153  *
3154  * Convert a dentry into an ASCII path name.
3155  *
3156  * Returns a pointer into the buffer or an error code if the
3157  * path was too long.
3158  *
3159  * "buflen" should be positive.
3160  *
3161  * If the path is not reachable from the supplied root, return %NULL.
3162  */
3163 char *__d_path(const struct path *path,
3164            const struct path *root,
3165            char *buf, int buflen)
3166 {
3167     char *res = buf + buflen;
3168     int error;
3169 
3170     prepend(&res, &buflen, "\0", 1);
3171     error = prepend_path(path, root, &res, &buflen);
3172 
3173     if (error < 0)
3174         return ERR_PTR(error);
3175     if (error > 0)
3176         return NULL;
3177     return res;
3178 }
3179 
3180 char *d_absolute_path(const struct path *path,
3181            char *buf, int buflen)
3182 {
3183     struct path root = {};
3184     char *res = buf + buflen;
3185     int error;
3186 
3187     prepend(&res, &buflen, "\0", 1);
3188     error = prepend_path(path, &root, &res, &buflen);
3189 
3190     if (error > 1)
3191         error = -EINVAL;
3192     if (error < 0)
3193         return ERR_PTR(error);
3194     return res;
3195 }
3196 
3197 /*
3198  * same as __d_path but appends "(deleted)" for unlinked files.
3199  */
3200 static int path_with_deleted(const struct path *path,
3201                  const struct path *root,
3202                  char **buf, int *buflen)
3203 {
3204     prepend(buf, buflen, "\0", 1);
3205     if (d_unlinked(path->dentry)) {
3206         int error = prepend(buf, buflen, " (deleted)", 10);
3207         if (error)
3208             return error;
3209     }
3210 
3211     return prepend_path(path, root, buf, buflen);
3212 }
3213 
3214 static int prepend_unreachable(char **buffer, int *buflen)
3215 {
3216     return prepend(buffer, buflen, "(unreachable)", 13);
3217 }
3218 
3219 static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
3220 {
3221     unsigned seq;
3222 
3223     do {
3224         seq = read_seqcount_begin(&fs->seq);
3225         *root = fs->root;
3226     } while (read_seqcount_retry(&fs->seq, seq));
3227 }
3228 
3229 /**
3230  * d_path - return the path of a dentry
3231  * @path: path to report
3232  * @buf: buffer to return value in
3233  * @buflen: buffer length
3234  *
3235  * Convert a dentry into an ASCII path name. If the entry has been deleted
3236  * the string " (deleted)" is appended. Note that this is ambiguous.
3237  *
3238  * Returns a pointer into the buffer or an error code if the path was
3239  * too long. Note: Callers should use the returned pointer, not the passed
3240  * in buffer, to use the name! The implementation often starts at an offset
3241  * into the buffer, and may leave 0 bytes at the start.
3242  *
3243  * "buflen" should be positive.
3244  */
3245 char *d_path(const struct path *path, char *buf, int buflen)
3246 {
3247     char *res = buf + buflen;
3248     struct path root;
3249     int error;
3250 
3251     /*
3252      * We have various synthetic filesystems that never get mounted.  On
3253      * these filesystems dentries are never used for lookup purposes, and
3254      * thus don't need to be hashed.  They also don't need a name until a
3255      * user wants to identify the object in /proc/pid/fd/.  The little hack
3256      * below allows us to generate a name for these objects on demand:
3257      *
3258      * Some pseudo inodes are mountable.  When they are mounted
3259      * path->dentry == path->mnt->mnt_root.  In that case don't call d_dname
3260      * and instead have d_path return the mounted path.
3261      */
3262     if (path->dentry->d_op && path->dentry->d_op->d_dname &&
3263         (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root))
3264         return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
3265 
3266     rcu_read_lock();
3267     get_fs_root_rcu(current->fs, &root);
3268     error = path_with_deleted(path, &root, &res, &buflen);
3269     rcu_read_unlock();
3270 
3271     if (error < 0)
3272         res = ERR_PTR(error);
3273     return res;
3274 }
3275 EXPORT_SYMBOL(d_path);
3276 
3277 /*
3278  * Helper function for dentry_operations.d_dname() members
3279  */
3280 char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
3281             const char *fmt, ...)
3282 {
3283     va_list args;
3284     char temp[64];
3285     int sz;
3286 
3287     va_start(args, fmt);
3288     sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
3289     va_end(args);
3290 
3291     if (sz > sizeof(temp) || sz > buflen)
3292         return ERR_PTR(-ENAMETOOLONG);
3293 
3294     buffer += buflen - sz;
3295     return memcpy(buffer, temp, sz);
3296 }
3297 
3298 char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
3299 {
3300     char *end = buffer + buflen;
3301     /* these dentries are never renamed, so d_lock is not needed */
3302     if (prepend(&end, &buflen, " (deleted)", 11) ||
3303         prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) ||
3304         prepend(&end, &buflen, "/", 1))  
3305         end = ERR_PTR(-ENAMETOOLONG);
3306     return end;
3307 }
3308 EXPORT_SYMBOL(simple_dname);
3309 
3310 /*
3311  * Write full pathname from the root of the filesystem into the buffer.
3312  */
3313 static char *__dentry_path(struct dentry *d, char *buf, int buflen)
3314 {
3315     struct dentry *dentry;
3316     char *end, *retval;
3317     int len, seq = 0;
3318     int error = 0;
3319 
3320     if (buflen < 2)
3321         goto Elong;
3322 
3323     rcu_read_lock();
3324 restart:
3325     dentry = d;
3326     end = buf + buflen;
3327     len = buflen;
3328     prepend(&end, &len, "\0", 1);
3329     /* Get '/' right */
3330     retval = end-1;
3331     *retval = '/';
3332     read_seqbegin_or_lock(&rename_lock, &seq);
3333     while (!IS_ROOT(dentry)) {
3334         struct dentry *parent = dentry->d_parent;
3335 
3336         prefetch(parent);
3337         error = prepend_name(&end, &len, &dentry->d_name);
3338         if (error)
3339             break;
3340 
3341         retval = end;
3342         dentry = parent;
3343     }
3344     if (!(seq & 1))
3345         rcu_read_unlock();
3346     if (need_seqretry(&rename_lock, seq)) {
3347         seq = 1;
3348         goto restart;
3349     }
3350     done_seqretry(&rename_lock, seq);
3351     if (error)
3352         goto Elong;
3353     return retval;
3354 Elong:
3355     return ERR_PTR(-ENAMETOOLONG);
3356 }
3357 
3358 char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
3359 {
3360     return __dentry_path(dentry, buf, buflen);
3361 }
3362 EXPORT_SYMBOL(dentry_path_raw);
3363 
3364 char *dentry_path(struct dentry *dentry, char *buf, int buflen)
3365 {
3366     char *p = NULL;
3367     char *retval;
3368 
3369     if (d_unlinked(dentry)) {
3370         p = buf + buflen;
3371         if (prepend(&p, &buflen, "//deleted", 10) != 0)
3372             goto Elong;
3373         buflen++;
3374     }
3375     retval = __dentry_path(dentry, buf, buflen);
3376     if (!IS_ERR(retval) && p)
3377         *p = '/';   /* restore '/' overriden with '\0' */
3378     return retval;
3379 Elong:
3380     return ERR_PTR(-ENAMETOOLONG);
3381 }
3382 
3383 static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root,
3384                     struct path *pwd)
3385 {
3386     unsigned seq;
3387 
3388     do {
3389         seq = read_seqcount_begin(&fs->seq);
3390         *root = fs->root;
3391         *pwd = fs->pwd;
3392     } while (read_seqcount_retry(&fs->seq, seq));
3393 }
3394 
3395 /*
3396  * NOTE! The user-level library version returns a
3397  * character pointer. The kernel system call just
3398  * returns the length of the buffer filled (which
3399  * includes the ending '\0' character), or a negative
3400  * error value. So libc would do something like
3401  *
3402  *  char *getcwd(char * buf, size_t size)
3403  *  {
3404  *      int retval;
3405  *
3406  *      retval = sys_getcwd(buf, size);
3407  *      if (retval >= 0)
3408  *          return buf;
3409  *      errno = -retval;
3410  *      return NULL;
3411  *  }
3412  */
3413 SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
3414 {
3415     int error;
3416     struct path pwd, root;
3417     char *page = __getname();
3418 
3419     if (!page)
3420         return -ENOMEM;
3421 
3422     rcu_read_lock();
3423     get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
3424 
3425     error = -ENOENT;
3426     if (!d_unlinked(pwd.dentry)) {
3427         unsigned long len;
3428         char *cwd = page + PATH_MAX;
3429         int buflen = PATH_MAX;
3430 
3431         prepend(&cwd, &buflen, "\0", 1);
3432         error = prepend_path(&pwd, &root, &cwd, &buflen);
3433         rcu_read_unlock();
3434 
3435         if (error < 0)
3436             goto out;
3437 
3438         /* Unreachable from current root */
3439         if (error > 0) {
3440             error = prepend_unreachable(&cwd, &buflen);
3441             if (error)
3442                 goto out;
3443         }
3444 
3445         error = -ERANGE;
3446         len = PATH_MAX + page - cwd;
3447         if (len <= size) {
3448             error = len;
3449             if (copy_to_user(buf, cwd, len))
3450                 error = -EFAULT;
3451         }
3452     } else {
3453         rcu_read_unlock();
3454     }
3455 
3456 out:
3457     __putname(page);
3458     return error;
3459 }
3460 
3461 /*
3462  * Test whether new_dentry is a subdirectory of old_dentry.
3463  *
3464  * Trivially implemented using the dcache structure
3465  */
3466 
3467 /**
3468  * is_subdir - is new dentry a subdirectory of old_dentry
3469  * @new_dentry: new dentry
3470  * @old_dentry: old dentry
3471  *
3472  * Returns true if new_dentry is a subdirectory of the parent (at any depth).
3473  * Returns false otherwise.
3474  * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
3475  */
3476   
3477 bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
3478 {
3479     bool result;
3480     unsigned seq;
3481 
3482     if (new_dentry == old_dentry)
3483         return true;
3484 
3485     do {
3486         /* for restarting inner loop in case of seq retry */
3487         seq = read_seqbegin(&rename_lock);
3488         /*
3489          * Need rcu_readlock to protect against the d_parent trashing
3490          * due to d_move
3491          */
3492         rcu_read_lock();
3493         if (d_ancestor(old_dentry, new_dentry))
3494             result = true;
3495         else
3496             result = false;
3497         rcu_read_unlock();
3498     } while (read_seqretry(&rename_lock, seq));
3499 
3500     return result;
3501 }
3502 
3503 static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry)
3504 {
3505     struct dentry *root = data;
3506     if (dentry != root) {
3507         if (d_unhashed(dentry) || !dentry->d_inode)
3508             return D_WALK_SKIP;
3509 
3510         if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
3511             dentry->d_flags |= DCACHE_GENOCIDE;
3512             dentry->d_lockref.count--;
3513         }
3514     }
3515     return D_WALK_CONTINUE;
3516 }
3517 
3518 void d_genocide(struct dentry *parent)
3519 {
3520     d_walk(parent, parent, d_genocide_kill, NULL);
3521 }
3522 
3523 void d_tmpfile(struct dentry *dentry, struct inode *inode)
3524 {
3525     inode_dec_link_count(inode);
3526     BUG_ON(dentry->d_name.name != dentry->d_iname ||
3527         !hlist_unhashed(&dentry->d_u.d_alias) ||
3528         !d_unlinked(dentry));
3529     spin_lock(&dentry->d_parent->d_lock);
3530     spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
3531     dentry->d_name.len = sprintf(dentry->d_iname, "#%llu",
3532                 (unsigned long long)inode->i_ino);
3533     spin_unlock(&dentry->d_lock);
3534     spin_unlock(&dentry->d_parent->d_lock);
3535     d_instantiate(dentry, inode);
3536 }
3537 EXPORT_SYMBOL(d_tmpfile);
3538 
3539 static __initdata unsigned long dhash_entries;
3540 static int __init set_dhash_entries(char *str)
3541 {
3542     if (!str)
3543         return 0;
3544     dhash_entries = simple_strtoul(str, &str, 0);
3545     return 1;
3546 }
3547 __setup("dhash_entries=", set_dhash_entries);
3548 
3549 static void __init dcache_init_early(void)
3550 {
3551     unsigned int loop;
3552 
3553     /* If hashes are distributed across NUMA nodes, defer
3554      * hash allocation until vmalloc space is available.
3555      */
3556     if (hashdist)
3557         return;
3558 
3559     dentry_hashtable =
3560         alloc_large_system_hash("Dentry cache",
3561                     sizeof(struct hlist_bl_head),
3562                     dhash_entries,
3563                     13,
3564                     HASH_EARLY,
3565                     &d_hash_shift,
3566                     &d_hash_mask,
3567                     0,
3568                     0);
3569 
3570     for (loop = 0; loop < (1U << d_hash_shift); loop++)
3571         INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3572 }
3573 
3574 static void __init dcache_init(void)
3575 {
3576     unsigned int loop;
3577 
3578     /* 
3579      * A constructor could be added for stable state like the lists,
3580      * but it is probably not worth it because of the cache nature
3581      * of the dcache. 
3582      */
3583     dentry_cache = KMEM_CACHE(dentry,
3584         SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT);
3585 
3586     /* Hash may have been set up in dcache_init_early */
3587     if (!hashdist)
3588         return;
3589 
3590     dentry_hashtable =
3591         alloc_large_system_hash("Dentry cache",
3592                     sizeof(struct hlist_bl_head),
3593                     dhash_entries,
3594                     13,
3595                     0,
3596                     &d_hash_shift,
3597                     &d_hash_mask,
3598                     0,
3599                     0);
3600 
3601     for (loop = 0; loop < (1U << d_hash_shift); loop++)
3602         INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
3603 }
3604 
3605 /* SLAB cache for __getname() consumers */
3606 struct kmem_cache *names_cachep __read_mostly;
3607 EXPORT_SYMBOL(names_cachep);
3608 
3609 EXPORT_SYMBOL(d_genocide);
3610 
3611 void __init vfs_caches_init_early(void)
3612 {
3613     dcache_init_early();
3614     inode_init_early();
3615 }
3616 
3617 void __init vfs_caches_init(void)
3618 {
3619     names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
3620             SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
3621 
3622     dcache_init();
3623     inode_init();
3624     files_init();
3625     files_maxfiles_init();
3626     mnt_init();
3627     bdev_cache_init();
3628     chrdev_init();
3629 }