Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 
0003 /*
0004  * Copyright 2019, 2020 Amazon.com, Inc. or its affiliates. All rights reserved.
0005  *
0006  * User extended attribute client side cache functions.
0007  *
0008  * Author: Frank van der Linden <fllinden@amazon.com>
0009  */
0010 #include <linux/errno.h>
0011 #include <linux/nfs_fs.h>
0012 #include <linux/hashtable.h>
0013 #include <linux/refcount.h>
0014 #include <uapi/linux/xattr.h>
0015 
0016 #include "nfs4_fs.h"
0017 #include "internal.h"
0018 
0019 /*
0020  * User extended attributes client side caching is implemented by having
0021  * a cache structure attached to NFS inodes. This structure is allocated
0022  * when needed, and freed when the cache is zapped.
0023  *
0024  * The cache structure contains as hash table of entries, and a pointer
0025  * to a special-cased entry for the listxattr cache.
0026  *
0027  * Accessing and allocating / freeing the caches is done via reference
0028  * counting. The cache entries use a similar refcounting scheme.
0029  *
0030  * This makes freeing a cache, both from the shrinker and from the
0031  * zap cache path, easy. It also means that, in current use cases,
0032  * the large majority of inodes will not waste any memory, as they
0033  * will never have any user extended attributes assigned to them.
0034  *
0035  * Attribute entries are hashed in to a simple hash table. They are
0036  * also part of an LRU.
0037  *
0038  * There are three shrinkers.
0039  *
0040  * Two shrinkers deal with the cache entries themselves: one for
0041  * large entries (> PAGE_SIZE), and one for smaller entries. The
0042  * shrinker for the larger entries works more aggressively than
0043  * those for the smaller entries.
0044  *
0045  * The other shrinker frees the cache structures themselves.
0046  */
0047 
0048 /*
0049  * 64 buckets is a good default. There is likely no reasonable
0050  * workload that uses more than even 64 user extended attributes.
0051  * You can certainly add a lot more - but you get what you ask for
0052  * in those circumstances.
0053  */
0054 #define NFS4_XATTR_HASH_SIZE    64
0055 
0056 #define NFSDBG_FACILITY NFSDBG_XATTRCACHE
0057 
0058 struct nfs4_xattr_cache;
0059 struct nfs4_xattr_entry;
0060 
0061 struct nfs4_xattr_bucket {
0062     spinlock_t lock;
0063     struct hlist_head hlist;
0064     struct nfs4_xattr_cache *cache;
0065     bool draining;
0066 };
0067 
0068 struct nfs4_xattr_cache {
0069     struct kref ref;
0070     struct nfs4_xattr_bucket buckets[NFS4_XATTR_HASH_SIZE];
0071     struct list_head lru;
0072     struct list_head dispose;
0073     atomic_long_t nent;
0074     spinlock_t listxattr_lock;
0075     struct inode *inode;
0076     struct nfs4_xattr_entry *listxattr;
0077 };
0078 
0079 struct nfs4_xattr_entry {
0080     struct kref ref;
0081     struct hlist_node hnode;
0082     struct list_head lru;
0083     struct list_head dispose;
0084     char *xattr_name;
0085     void *xattr_value;
0086     size_t xattr_size;
0087     struct nfs4_xattr_bucket *bucket;
0088     uint32_t flags;
0089 };
0090 
0091 #define NFS4_XATTR_ENTRY_EXTVAL 0x0001
0092 
0093 /*
0094  * LRU list of NFS inodes that have xattr caches.
0095  */
0096 static struct list_lru nfs4_xattr_cache_lru;
0097 static struct list_lru nfs4_xattr_entry_lru;
0098 static struct list_lru nfs4_xattr_large_entry_lru;
0099 
0100 static struct kmem_cache *nfs4_xattr_cache_cachep;
0101 
0102 /*
0103  * Hashing helper functions.
0104  */
0105 static void
0106 nfs4_xattr_hash_init(struct nfs4_xattr_cache *cache)
0107 {
0108     unsigned int i;
0109 
0110     for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) {
0111         INIT_HLIST_HEAD(&cache->buckets[i].hlist);
0112         spin_lock_init(&cache->buckets[i].lock);
0113         cache->buckets[i].cache = cache;
0114         cache->buckets[i].draining = false;
0115     }
0116 }
0117 
0118 /*
0119  * Locking order:
0120  * 1. inode i_lock or bucket lock
0121  * 2. list_lru lock (taken by list_lru_* functions)
0122  */
0123 
0124 /*
0125  * Wrapper functions to add a cache entry to the right LRU.
0126  */
0127 static bool
0128 nfs4_xattr_entry_lru_add(struct nfs4_xattr_entry *entry)
0129 {
0130     struct list_lru *lru;
0131 
0132     lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ?
0133         &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
0134 
0135     return list_lru_add(lru, &entry->lru);
0136 }
0137 
0138 static bool
0139 nfs4_xattr_entry_lru_del(struct nfs4_xattr_entry *entry)
0140 {
0141     struct list_lru *lru;
0142 
0143     lru = (entry->flags & NFS4_XATTR_ENTRY_EXTVAL) ?
0144         &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
0145 
0146     return list_lru_del(lru, &entry->lru);
0147 }
0148 
0149 /*
0150  * This function allocates cache entries. They are the normal
0151  * extended attribute name/value pairs, but may also be a listxattr
0152  * cache. Those allocations use the same entry so that they can be
0153  * treated as one by the memory shrinker.
0154  *
0155  * xattr cache entries are allocated together with names. If the
0156  * value fits in to one page with the entry structure and the name,
0157  * it will also be part of the same allocation (kmalloc). This is
0158  * expected to be the vast majority of cases. Larger allocations
0159  * have a value pointer that is allocated separately by kvmalloc.
0160  *
0161  * Parameters:
0162  *
0163  * @name:  Name of the extended attribute. NULL for listxattr cache
0164  *         entry.
0165  * @value: Value of attribute, or listxattr cache. NULL if the
0166  *         value is to be copied from pages instead.
0167  * @pages: Pages to copy the value from, if not NULL. Passed in to
0168  *     make it easier to copy the value after an RPC, even if
0169  *     the value will not be passed up to application (e.g.
0170  *     for a 'query' getxattr with NULL buffer).
0171  * @len:   Length of the value. Can be 0 for zero-length attributes.
0172  *         @value and @pages will be NULL if @len is 0.
0173  */
0174 static struct nfs4_xattr_entry *
0175 nfs4_xattr_alloc_entry(const char *name, const void *value,
0176                struct page **pages, size_t len)
0177 {
0178     struct nfs4_xattr_entry *entry;
0179     void *valp;
0180     char *namep;
0181     size_t alloclen, slen;
0182     char *buf;
0183     uint32_t flags;
0184 
0185     BUILD_BUG_ON(sizeof(struct nfs4_xattr_entry) +
0186         XATTR_NAME_MAX + 1 > PAGE_SIZE);
0187 
0188     alloclen = sizeof(struct nfs4_xattr_entry);
0189     if (name != NULL) {
0190         slen = strlen(name) + 1;
0191         alloclen += slen;
0192     } else
0193         slen = 0;
0194 
0195     if (alloclen + len <= PAGE_SIZE) {
0196         alloclen += len;
0197         flags = 0;
0198     } else {
0199         flags = NFS4_XATTR_ENTRY_EXTVAL;
0200     }
0201 
0202     buf = kmalloc(alloclen, GFP_KERNEL);
0203     if (buf == NULL)
0204         return NULL;
0205     entry = (struct nfs4_xattr_entry *)buf;
0206 
0207     if (name != NULL) {
0208         namep = buf + sizeof(struct nfs4_xattr_entry);
0209         memcpy(namep, name, slen);
0210     } else {
0211         namep = NULL;
0212     }
0213 
0214 
0215     if (flags & NFS4_XATTR_ENTRY_EXTVAL) {
0216         valp = kvmalloc(len, GFP_KERNEL);
0217         if (valp == NULL) {
0218             kfree(buf);
0219             return NULL;
0220         }
0221     } else if (len != 0) {
0222         valp = buf + sizeof(struct nfs4_xattr_entry) + slen;
0223     } else
0224         valp = NULL;
0225 
0226     if (valp != NULL) {
0227         if (value != NULL)
0228             memcpy(valp, value, len);
0229         else
0230             _copy_from_pages(valp, pages, 0, len);
0231     }
0232 
0233     entry->flags = flags;
0234     entry->xattr_value = valp;
0235     kref_init(&entry->ref);
0236     entry->xattr_name = namep;
0237     entry->xattr_size = len;
0238     entry->bucket = NULL;
0239     INIT_LIST_HEAD(&entry->lru);
0240     INIT_LIST_HEAD(&entry->dispose);
0241     INIT_HLIST_NODE(&entry->hnode);
0242 
0243     return entry;
0244 }
0245 
0246 static void
0247 nfs4_xattr_free_entry(struct nfs4_xattr_entry *entry)
0248 {
0249     if (entry->flags & NFS4_XATTR_ENTRY_EXTVAL)
0250         kvfree(entry->xattr_value);
0251     kfree(entry);
0252 }
0253 
0254 static void
0255 nfs4_xattr_free_entry_cb(struct kref *kref)
0256 {
0257     struct nfs4_xattr_entry *entry;
0258 
0259     entry = container_of(kref, struct nfs4_xattr_entry, ref);
0260 
0261     if (WARN_ON(!list_empty(&entry->lru)))
0262         return;
0263 
0264     nfs4_xattr_free_entry(entry);
0265 }
0266 
0267 static void
0268 nfs4_xattr_free_cache_cb(struct kref *kref)
0269 {
0270     struct nfs4_xattr_cache *cache;
0271     int i;
0272 
0273     cache = container_of(kref, struct nfs4_xattr_cache, ref);
0274 
0275     for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) {
0276         if (WARN_ON(!hlist_empty(&cache->buckets[i].hlist)))
0277             return;
0278         cache->buckets[i].draining = false;
0279     }
0280 
0281     cache->listxattr = NULL;
0282 
0283     kmem_cache_free(nfs4_xattr_cache_cachep, cache);
0284 
0285 }
0286 
0287 static struct nfs4_xattr_cache *
0288 nfs4_xattr_alloc_cache(void)
0289 {
0290     struct nfs4_xattr_cache *cache;
0291 
0292     cache = kmem_cache_alloc(nfs4_xattr_cache_cachep, GFP_KERNEL);
0293     if (cache == NULL)
0294         return NULL;
0295 
0296     kref_init(&cache->ref);
0297     atomic_long_set(&cache->nent, 0);
0298 
0299     return cache;
0300 }
0301 
0302 /*
0303  * Set the listxattr cache, which is a special-cased cache entry.
0304  * The special value ERR_PTR(-ESTALE) is used to indicate that
0305  * the cache is being drained - this prevents a new listxattr
0306  * cache from being added to what is now a stale cache.
0307  */
0308 static int
0309 nfs4_xattr_set_listcache(struct nfs4_xattr_cache *cache,
0310              struct nfs4_xattr_entry *new)
0311 {
0312     struct nfs4_xattr_entry *old;
0313     int ret = 1;
0314 
0315     spin_lock(&cache->listxattr_lock);
0316 
0317     old = cache->listxattr;
0318 
0319     if (old == ERR_PTR(-ESTALE)) {
0320         ret = 0;
0321         goto out;
0322     }
0323 
0324     cache->listxattr = new;
0325     if (new != NULL && new != ERR_PTR(-ESTALE))
0326         nfs4_xattr_entry_lru_add(new);
0327 
0328     if (old != NULL) {
0329         nfs4_xattr_entry_lru_del(old);
0330         kref_put(&old->ref, nfs4_xattr_free_entry_cb);
0331     }
0332 out:
0333     spin_unlock(&cache->listxattr_lock);
0334 
0335     return ret;
0336 }
0337 
0338 /*
0339  * Unlink a cache from its parent inode, clearing out an invalid
0340  * cache. Must be called with i_lock held.
0341  */
0342 static struct nfs4_xattr_cache *
0343 nfs4_xattr_cache_unlink(struct inode *inode)
0344 {
0345     struct nfs_inode *nfsi;
0346     struct nfs4_xattr_cache *oldcache;
0347 
0348     nfsi = NFS_I(inode);
0349 
0350     oldcache = nfsi->xattr_cache;
0351     if (oldcache != NULL) {
0352         list_lru_del(&nfs4_xattr_cache_lru, &oldcache->lru);
0353         oldcache->inode = NULL;
0354     }
0355     nfsi->xattr_cache = NULL;
0356     nfsi->cache_validity &= ~NFS_INO_INVALID_XATTR;
0357 
0358     return oldcache;
0359 
0360 }
0361 
0362 /*
0363  * Discard a cache. Called by get_cache() if there was an old,
0364  * invalid cache. Can also be called from a shrinker callback.
0365  *
0366  * The cache is dead, it has already been unlinked from its inode,
0367  * and no longer appears on the cache LRU list.
0368  *
0369  * Mark all buckets as draining, so that no new entries are added. This
0370  * could still happen in the unlikely, but possible case that another
0371  * thread had grabbed a reference before it was unlinked from the inode,
0372  * and is still holding it for an add operation.
0373  *
0374  * Remove all entries from the LRU lists, so that there is no longer
0375  * any way to 'find' this cache. Then, remove the entries from the hash
0376  * table.
0377  *
0378  * At that point, the cache will remain empty and can be freed when the final
0379  * reference drops, which is very likely the kref_put at the end of
0380  * this function, or the one called immediately afterwards in the
0381  * shrinker callback.
0382  */
0383 static void
0384 nfs4_xattr_discard_cache(struct nfs4_xattr_cache *cache)
0385 {
0386     unsigned int i;
0387     struct nfs4_xattr_entry *entry;
0388     struct nfs4_xattr_bucket *bucket;
0389     struct hlist_node *n;
0390 
0391     nfs4_xattr_set_listcache(cache, ERR_PTR(-ESTALE));
0392 
0393     for (i = 0; i < NFS4_XATTR_HASH_SIZE; i++) {
0394         bucket = &cache->buckets[i];
0395 
0396         spin_lock(&bucket->lock);
0397         bucket->draining = true;
0398         hlist_for_each_entry_safe(entry, n, &bucket->hlist, hnode) {
0399             nfs4_xattr_entry_lru_del(entry);
0400             hlist_del_init(&entry->hnode);
0401             kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
0402         }
0403         spin_unlock(&bucket->lock);
0404     }
0405 
0406     atomic_long_set(&cache->nent, 0);
0407 
0408     kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
0409 }
0410 
0411 /*
0412  * Get a referenced copy of the cache structure. Avoid doing allocs
0413  * while holding i_lock. Which means that we do some optimistic allocation,
0414  * and might have to free the result in rare cases.
0415  *
0416  * This function only checks the NFS_INO_INVALID_XATTR cache validity bit
0417  * and acts accordingly, replacing the cache when needed. For the read case
0418  * (!add), this means that the caller must make sure that the cache
0419  * is valid before caling this function. getxattr and listxattr call
0420  * revalidate_inode to do this. The attribute cache timeout (for the
0421  * non-delegated case) is expected to be dealt with in the revalidate
0422  * call.
0423  */
0424 
0425 static struct nfs4_xattr_cache *
0426 nfs4_xattr_get_cache(struct inode *inode, int add)
0427 {
0428     struct nfs_inode *nfsi;
0429     struct nfs4_xattr_cache *cache, *oldcache, *newcache;
0430 
0431     nfsi = NFS_I(inode);
0432 
0433     cache = oldcache = NULL;
0434 
0435     spin_lock(&inode->i_lock);
0436 
0437     if (nfsi->cache_validity & NFS_INO_INVALID_XATTR)
0438         oldcache = nfs4_xattr_cache_unlink(inode);
0439     else
0440         cache = nfsi->xattr_cache;
0441 
0442     if (cache != NULL)
0443         kref_get(&cache->ref);
0444 
0445     spin_unlock(&inode->i_lock);
0446 
0447     if (add && cache == NULL) {
0448         newcache = NULL;
0449 
0450         cache = nfs4_xattr_alloc_cache();
0451         if (cache == NULL)
0452             goto out;
0453 
0454         spin_lock(&inode->i_lock);
0455         if (nfsi->cache_validity & NFS_INO_INVALID_XATTR) {
0456             /*
0457              * The cache was invalidated again. Give up,
0458              * since what we want to enter is now likely
0459              * outdated anyway.
0460              */
0461             spin_unlock(&inode->i_lock);
0462             kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
0463             cache = NULL;
0464             goto out;
0465         }
0466 
0467         /*
0468          * Check if someone beat us to it.
0469          */
0470         if (nfsi->xattr_cache != NULL) {
0471             newcache = nfsi->xattr_cache;
0472             kref_get(&newcache->ref);
0473         } else {
0474             kref_get(&cache->ref);
0475             nfsi->xattr_cache = cache;
0476             cache->inode = inode;
0477             list_lru_add(&nfs4_xattr_cache_lru, &cache->lru);
0478         }
0479 
0480         spin_unlock(&inode->i_lock);
0481 
0482         /*
0483          * If there was a race, throw away the cache we just
0484          * allocated, and use the new one allocated by someone
0485          * else.
0486          */
0487         if (newcache != NULL) {
0488             kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
0489             cache = newcache;
0490         }
0491     }
0492 
0493 out:
0494     /*
0495      * Discard the now orphaned old cache.
0496      */
0497     if (oldcache != NULL)
0498         nfs4_xattr_discard_cache(oldcache);
0499 
0500     return cache;
0501 }
0502 
0503 static inline struct nfs4_xattr_bucket *
0504 nfs4_xattr_hash_bucket(struct nfs4_xattr_cache *cache, const char *name)
0505 {
0506     return &cache->buckets[jhash(name, strlen(name), 0) &
0507         (ARRAY_SIZE(cache->buckets) - 1)];
0508 }
0509 
0510 static struct nfs4_xattr_entry *
0511 nfs4_xattr_get_entry(struct nfs4_xattr_bucket *bucket, const char *name)
0512 {
0513     struct nfs4_xattr_entry *entry;
0514 
0515     entry = NULL;
0516 
0517     hlist_for_each_entry(entry, &bucket->hlist, hnode) {
0518         if (!strcmp(entry->xattr_name, name))
0519             break;
0520     }
0521 
0522     return entry;
0523 }
0524 
0525 static int
0526 nfs4_xattr_hash_add(struct nfs4_xattr_cache *cache,
0527             struct nfs4_xattr_entry *entry)
0528 {
0529     struct nfs4_xattr_bucket *bucket;
0530     struct nfs4_xattr_entry *oldentry = NULL;
0531     int ret = 1;
0532 
0533     bucket = nfs4_xattr_hash_bucket(cache, entry->xattr_name);
0534     entry->bucket = bucket;
0535 
0536     spin_lock(&bucket->lock);
0537 
0538     if (bucket->draining) {
0539         ret = 0;
0540         goto out;
0541     }
0542 
0543     oldentry = nfs4_xattr_get_entry(bucket, entry->xattr_name);
0544     if (oldentry != NULL) {
0545         hlist_del_init(&oldentry->hnode);
0546         nfs4_xattr_entry_lru_del(oldentry);
0547     } else {
0548         atomic_long_inc(&cache->nent);
0549     }
0550 
0551     hlist_add_head(&entry->hnode, &bucket->hlist);
0552     nfs4_xattr_entry_lru_add(entry);
0553 
0554 out:
0555     spin_unlock(&bucket->lock);
0556 
0557     if (oldentry != NULL)
0558         kref_put(&oldentry->ref, nfs4_xattr_free_entry_cb);
0559 
0560     return ret;
0561 }
0562 
0563 static void
0564 nfs4_xattr_hash_remove(struct nfs4_xattr_cache *cache, const char *name)
0565 {
0566     struct nfs4_xattr_bucket *bucket;
0567     struct nfs4_xattr_entry *entry;
0568 
0569     bucket = nfs4_xattr_hash_bucket(cache, name);
0570 
0571     spin_lock(&bucket->lock);
0572 
0573     entry = nfs4_xattr_get_entry(bucket, name);
0574     if (entry != NULL) {
0575         hlist_del_init(&entry->hnode);
0576         nfs4_xattr_entry_lru_del(entry);
0577         atomic_long_dec(&cache->nent);
0578     }
0579 
0580     spin_unlock(&bucket->lock);
0581 
0582     if (entry != NULL)
0583         kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
0584 }
0585 
0586 static struct nfs4_xattr_entry *
0587 nfs4_xattr_hash_find(struct nfs4_xattr_cache *cache, const char *name)
0588 {
0589     struct nfs4_xattr_bucket *bucket;
0590     struct nfs4_xattr_entry *entry;
0591 
0592     bucket = nfs4_xattr_hash_bucket(cache, name);
0593 
0594     spin_lock(&bucket->lock);
0595 
0596     entry = nfs4_xattr_get_entry(bucket, name);
0597     if (entry != NULL)
0598         kref_get(&entry->ref);
0599 
0600     spin_unlock(&bucket->lock);
0601 
0602     return entry;
0603 }
0604 
0605 /*
0606  * Entry point to retrieve an entry from the cache.
0607  */
0608 ssize_t nfs4_xattr_cache_get(struct inode *inode, const char *name, char *buf,
0609              ssize_t buflen)
0610 {
0611     struct nfs4_xattr_cache *cache;
0612     struct nfs4_xattr_entry *entry;
0613     ssize_t ret;
0614 
0615     cache = nfs4_xattr_get_cache(inode, 0);
0616     if (cache == NULL)
0617         return -ENOENT;
0618 
0619     ret = 0;
0620     entry = nfs4_xattr_hash_find(cache, name);
0621 
0622     if (entry != NULL) {
0623         dprintk("%s: cache hit '%s', len %lu\n", __func__,
0624             entry->xattr_name, (unsigned long)entry->xattr_size);
0625         if (buflen == 0) {
0626             /* Length probe only */
0627             ret = entry->xattr_size;
0628         } else if (buflen < entry->xattr_size)
0629             ret = -ERANGE;
0630         else {
0631             memcpy(buf, entry->xattr_value, entry->xattr_size);
0632             ret = entry->xattr_size;
0633         }
0634         kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
0635     } else {
0636         dprintk("%s: cache miss '%s'\n", __func__, name);
0637         ret = -ENOENT;
0638     }
0639 
0640     kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
0641 
0642     return ret;
0643 }
0644 
0645 /*
0646  * Retrieve a cached list of xattrs from the cache.
0647  */
0648 ssize_t nfs4_xattr_cache_list(struct inode *inode, char *buf, ssize_t buflen)
0649 {
0650     struct nfs4_xattr_cache *cache;
0651     struct nfs4_xattr_entry *entry;
0652     ssize_t ret;
0653 
0654     cache = nfs4_xattr_get_cache(inode, 0);
0655     if (cache == NULL)
0656         return -ENOENT;
0657 
0658     spin_lock(&cache->listxattr_lock);
0659 
0660     entry = cache->listxattr;
0661 
0662     if (entry != NULL && entry != ERR_PTR(-ESTALE)) {
0663         if (buflen == 0) {
0664             /* Length probe only */
0665             ret = entry->xattr_size;
0666         } else if (entry->xattr_size > buflen)
0667             ret = -ERANGE;
0668         else {
0669             memcpy(buf, entry->xattr_value, entry->xattr_size);
0670             ret = entry->xattr_size;
0671         }
0672     } else {
0673         ret = -ENOENT;
0674     }
0675 
0676     spin_unlock(&cache->listxattr_lock);
0677 
0678     kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
0679 
0680     return ret;
0681 }
0682 
0683 /*
0684  * Add an xattr to the cache.
0685  *
0686  * This also invalidates the xattr list cache.
0687  */
0688 void nfs4_xattr_cache_add(struct inode *inode, const char *name,
0689               const char *buf, struct page **pages, ssize_t buflen)
0690 {
0691     struct nfs4_xattr_cache *cache;
0692     struct nfs4_xattr_entry *entry;
0693 
0694     dprintk("%s: add '%s' len %lu\n", __func__,
0695         name, (unsigned long)buflen);
0696 
0697     cache = nfs4_xattr_get_cache(inode, 1);
0698     if (cache == NULL)
0699         return;
0700 
0701     entry = nfs4_xattr_alloc_entry(name, buf, pages, buflen);
0702     if (entry == NULL)
0703         goto out;
0704 
0705     (void)nfs4_xattr_set_listcache(cache, NULL);
0706 
0707     if (!nfs4_xattr_hash_add(cache, entry))
0708         kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
0709 
0710 out:
0711     kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
0712 }
0713 
0714 
0715 /*
0716  * Remove an xattr from the cache.
0717  *
0718  * This also invalidates the xattr list cache.
0719  */
0720 void nfs4_xattr_cache_remove(struct inode *inode, const char *name)
0721 {
0722     struct nfs4_xattr_cache *cache;
0723 
0724     dprintk("%s: remove '%s'\n", __func__, name);
0725 
0726     cache = nfs4_xattr_get_cache(inode, 0);
0727     if (cache == NULL)
0728         return;
0729 
0730     (void)nfs4_xattr_set_listcache(cache, NULL);
0731     nfs4_xattr_hash_remove(cache, name);
0732 
0733     kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
0734 }
0735 
0736 /*
0737  * Cache listxattr output, replacing any possible old one.
0738  */
0739 void nfs4_xattr_cache_set_list(struct inode *inode, const char *buf,
0740                    ssize_t buflen)
0741 {
0742     struct nfs4_xattr_cache *cache;
0743     struct nfs4_xattr_entry *entry;
0744 
0745     cache = nfs4_xattr_get_cache(inode, 1);
0746     if (cache == NULL)
0747         return;
0748 
0749     entry = nfs4_xattr_alloc_entry(NULL, buf, NULL, buflen);
0750     if (entry == NULL)
0751         goto out;
0752 
0753     /*
0754      * This is just there to be able to get to bucket->cache,
0755      * which is obviously the same for all buckets, so just
0756      * use bucket 0.
0757      */
0758     entry->bucket = &cache->buckets[0];
0759 
0760     if (!nfs4_xattr_set_listcache(cache, entry))
0761         kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
0762 
0763 out:
0764     kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
0765 }
0766 
0767 /*
0768  * Zap the entire cache. Called when an inode is evicted.
0769  */
0770 void nfs4_xattr_cache_zap(struct inode *inode)
0771 {
0772     struct nfs4_xattr_cache *oldcache;
0773 
0774     spin_lock(&inode->i_lock);
0775     oldcache = nfs4_xattr_cache_unlink(inode);
0776     spin_unlock(&inode->i_lock);
0777 
0778     if (oldcache)
0779         nfs4_xattr_discard_cache(oldcache);
0780 }
0781 
0782 /*
0783  * The entry LRU is shrunk more aggressively than the cache LRU,
0784  * by settings @seeks to 1.
0785  *
0786  * Cache structures are freed only when they've become empty, after
0787  * pruning all but one entry.
0788  */
0789 
0790 static unsigned long nfs4_xattr_cache_count(struct shrinker *shrink,
0791                         struct shrink_control *sc);
0792 static unsigned long nfs4_xattr_entry_count(struct shrinker *shrink,
0793                         struct shrink_control *sc);
0794 static unsigned long nfs4_xattr_cache_scan(struct shrinker *shrink,
0795                        struct shrink_control *sc);
0796 static unsigned long nfs4_xattr_entry_scan(struct shrinker *shrink,
0797                        struct shrink_control *sc);
0798 
0799 static struct shrinker nfs4_xattr_cache_shrinker = {
0800     .count_objects  = nfs4_xattr_cache_count,
0801     .scan_objects   = nfs4_xattr_cache_scan,
0802     .seeks      = DEFAULT_SEEKS,
0803     .flags      = SHRINKER_MEMCG_AWARE,
0804 };
0805 
0806 static struct shrinker nfs4_xattr_entry_shrinker = {
0807     .count_objects  = nfs4_xattr_entry_count,
0808     .scan_objects   = nfs4_xattr_entry_scan,
0809     .seeks      = DEFAULT_SEEKS,
0810     .batch      = 512,
0811     .flags      = SHRINKER_MEMCG_AWARE,
0812 };
0813 
0814 static struct shrinker nfs4_xattr_large_entry_shrinker = {
0815     .count_objects  = nfs4_xattr_entry_count,
0816     .scan_objects   = nfs4_xattr_entry_scan,
0817     .seeks      = 1,
0818     .batch      = 512,
0819     .flags      = SHRINKER_MEMCG_AWARE,
0820 };
0821 
0822 static enum lru_status
0823 cache_lru_isolate(struct list_head *item,
0824     struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
0825 {
0826     struct list_head *dispose = arg;
0827     struct inode *inode;
0828     struct nfs4_xattr_cache *cache = container_of(item,
0829         struct nfs4_xattr_cache, lru);
0830 
0831     if (atomic_long_read(&cache->nent) > 1)
0832         return LRU_SKIP;
0833 
0834     /*
0835      * If a cache structure is on the LRU list, we know that
0836      * its inode is valid. Try to lock it to break the link.
0837      * Since we're inverting the lock order here, only try.
0838      */
0839     inode = cache->inode;
0840 
0841     if (!spin_trylock(&inode->i_lock))
0842         return LRU_SKIP;
0843 
0844     kref_get(&cache->ref);
0845 
0846     cache->inode = NULL;
0847     NFS_I(inode)->xattr_cache = NULL;
0848     NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_XATTR;
0849     list_lru_isolate(lru, &cache->lru);
0850 
0851     spin_unlock(&inode->i_lock);
0852 
0853     list_add_tail(&cache->dispose, dispose);
0854     return LRU_REMOVED;
0855 }
0856 
0857 static unsigned long
0858 nfs4_xattr_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
0859 {
0860     LIST_HEAD(dispose);
0861     unsigned long freed;
0862     struct nfs4_xattr_cache *cache;
0863 
0864     freed = list_lru_shrink_walk(&nfs4_xattr_cache_lru, sc,
0865         cache_lru_isolate, &dispose);
0866     while (!list_empty(&dispose)) {
0867         cache = list_first_entry(&dispose, struct nfs4_xattr_cache,
0868             dispose);
0869         list_del_init(&cache->dispose);
0870         nfs4_xattr_discard_cache(cache);
0871         kref_put(&cache->ref, nfs4_xattr_free_cache_cb);
0872     }
0873 
0874     return freed;
0875 }
0876 
0877 
0878 static unsigned long
0879 nfs4_xattr_cache_count(struct shrinker *shrink, struct shrink_control *sc)
0880 {
0881     unsigned long count;
0882 
0883     count = list_lru_shrink_count(&nfs4_xattr_cache_lru, sc);
0884     return vfs_pressure_ratio(count);
0885 }
0886 
0887 static enum lru_status
0888 entry_lru_isolate(struct list_head *item,
0889     struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
0890 {
0891     struct list_head *dispose = arg;
0892     struct nfs4_xattr_bucket *bucket;
0893     struct nfs4_xattr_cache *cache;
0894     struct nfs4_xattr_entry *entry = container_of(item,
0895         struct nfs4_xattr_entry, lru);
0896 
0897     bucket = entry->bucket;
0898     cache = bucket->cache;
0899 
0900     /*
0901      * Unhook the entry from its parent (either a cache bucket
0902      * or a cache structure if it's a listxattr buf), so that
0903      * it's no longer found. Then add it to the isolate list,
0904      * to be freed later.
0905      *
0906      * In both cases, we're reverting lock order, so use
0907      * trylock and skip the entry if we can't get the lock.
0908      */
0909     if (entry->xattr_name != NULL) {
0910         /* Regular cache entry */
0911         if (!spin_trylock(&bucket->lock))
0912             return LRU_SKIP;
0913 
0914         kref_get(&entry->ref);
0915 
0916         hlist_del_init(&entry->hnode);
0917         atomic_long_dec(&cache->nent);
0918         list_lru_isolate(lru, &entry->lru);
0919 
0920         spin_unlock(&bucket->lock);
0921     } else {
0922         /* Listxattr cache entry */
0923         if (!spin_trylock(&cache->listxattr_lock))
0924             return LRU_SKIP;
0925 
0926         kref_get(&entry->ref);
0927 
0928         cache->listxattr = NULL;
0929         list_lru_isolate(lru, &entry->lru);
0930 
0931         spin_unlock(&cache->listxattr_lock);
0932     }
0933 
0934     list_add_tail(&entry->dispose, dispose);
0935     return LRU_REMOVED;
0936 }
0937 
0938 static unsigned long
0939 nfs4_xattr_entry_scan(struct shrinker *shrink, struct shrink_control *sc)
0940 {
0941     LIST_HEAD(dispose);
0942     unsigned long freed;
0943     struct nfs4_xattr_entry *entry;
0944     struct list_lru *lru;
0945 
0946     lru = (shrink == &nfs4_xattr_large_entry_shrinker) ?
0947         &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
0948 
0949     freed = list_lru_shrink_walk(lru, sc, entry_lru_isolate, &dispose);
0950 
0951     while (!list_empty(&dispose)) {
0952         entry = list_first_entry(&dispose, struct nfs4_xattr_entry,
0953             dispose);
0954         list_del_init(&entry->dispose);
0955 
0956         /*
0957          * Drop two references: the one that we just grabbed
0958          * in entry_lru_isolate, and the one that was set
0959          * when the entry was first allocated.
0960          */
0961         kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
0962         kref_put(&entry->ref, nfs4_xattr_free_entry_cb);
0963     }
0964 
0965     return freed;
0966 }
0967 
0968 static unsigned long
0969 nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc)
0970 {
0971     unsigned long count;
0972     struct list_lru *lru;
0973 
0974     lru = (shrink == &nfs4_xattr_large_entry_shrinker) ?
0975         &nfs4_xattr_large_entry_lru : &nfs4_xattr_entry_lru;
0976 
0977     count = list_lru_shrink_count(lru, sc);
0978     return vfs_pressure_ratio(count);
0979 }
0980 
0981 
0982 static void nfs4_xattr_cache_init_once(void *p)
0983 {
0984     struct nfs4_xattr_cache *cache = (struct nfs4_xattr_cache *)p;
0985 
0986     spin_lock_init(&cache->listxattr_lock);
0987     atomic_long_set(&cache->nent, 0);
0988     nfs4_xattr_hash_init(cache);
0989     cache->listxattr = NULL;
0990     INIT_LIST_HEAD(&cache->lru);
0991     INIT_LIST_HEAD(&cache->dispose);
0992 }
0993 
0994 int __init nfs4_xattr_cache_init(void)
0995 {
0996     int ret = 0;
0997 
0998     nfs4_xattr_cache_cachep = kmem_cache_create("nfs4_xattr_cache_cache",
0999         sizeof(struct nfs4_xattr_cache), 0,
1000         (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
1001         nfs4_xattr_cache_init_once);
1002     if (nfs4_xattr_cache_cachep == NULL)
1003         return -ENOMEM;
1004 
1005     ret = list_lru_init_memcg(&nfs4_xattr_large_entry_lru,
1006         &nfs4_xattr_large_entry_shrinker);
1007     if (ret)
1008         goto out4;
1009 
1010     ret = list_lru_init_memcg(&nfs4_xattr_entry_lru,
1011         &nfs4_xattr_entry_shrinker);
1012     if (ret)
1013         goto out3;
1014 
1015     ret = list_lru_init_memcg(&nfs4_xattr_cache_lru,
1016         &nfs4_xattr_cache_shrinker);
1017     if (ret)
1018         goto out2;
1019 
1020     ret = register_shrinker(&nfs4_xattr_cache_shrinker, "nfs-xattr_cache");
1021     if (ret)
1022         goto out1;
1023 
1024     ret = register_shrinker(&nfs4_xattr_entry_shrinker, "nfs-xattr_entry");
1025     if (ret)
1026         goto out;
1027 
1028     ret = register_shrinker(&nfs4_xattr_large_entry_shrinker,
1029                 "nfs-xattr_large_entry");
1030     if (!ret)
1031         return 0;
1032 
1033     unregister_shrinker(&nfs4_xattr_entry_shrinker);
1034 out:
1035     unregister_shrinker(&nfs4_xattr_cache_shrinker);
1036 out1:
1037     list_lru_destroy(&nfs4_xattr_cache_lru);
1038 out2:
1039     list_lru_destroy(&nfs4_xattr_entry_lru);
1040 out3:
1041     list_lru_destroy(&nfs4_xattr_large_entry_lru);
1042 out4:
1043     kmem_cache_destroy(nfs4_xattr_cache_cachep);
1044 
1045     return ret;
1046 }
1047 
1048 void nfs4_xattr_cache_exit(void)
1049 {
1050     unregister_shrinker(&nfs4_xattr_large_entry_shrinker);
1051     unregister_shrinker(&nfs4_xattr_entry_shrinker);
1052     unregister_shrinker(&nfs4_xattr_cache_shrinker);
1053     list_lru_destroy(&nfs4_xattr_large_entry_lru);
1054     list_lru_destroy(&nfs4_xattr_entry_lru);
1055     list_lru_destroy(&nfs4_xattr_cache_lru);
1056     kmem_cache_destroy(nfs4_xattr_cache_cachep);
1057 }