Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Open file cache.
0003  *
0004  * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
0005  */
0006 
0007 #include <linux/hash.h>
0008 #include <linux/slab.h>
0009 #include <linux/file.h>
0010 #include <linux/pagemap.h>
0011 #include <linux/sched.h>
0012 #include <linux/list_lru.h>
0013 #include <linux/fsnotify_backend.h>
0014 #include <linux/fsnotify.h>
0015 #include <linux/seq_file.h>
0016 #include <linux/rhashtable.h>
0017 
0018 #include "vfs.h"
0019 #include "nfsd.h"
0020 #include "nfsfh.h"
0021 #include "netns.h"
0022 #include "filecache.h"
0023 #include "trace.h"
0024 
0025 #define NFSD_LAUNDRETTE_DELAY            (2 * HZ)
0026 
0027 #define NFSD_FILE_CACHE_UP           (0)
0028 
0029 /* We only care about NFSD_MAY_READ/WRITE for this cache */
0030 #define NFSD_FILE_MAY_MASK  (NFSD_MAY_READ|NFSD_MAY_WRITE)
0031 
0032 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
0033 static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions);
0034 static DEFINE_PER_CPU(unsigned long, nfsd_file_releases);
0035 static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age);
0036 static DEFINE_PER_CPU(unsigned long, nfsd_file_pages_flushed);
0037 static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions);
0038 
0039 struct nfsd_fcache_disposal {
0040     struct work_struct work;
0041     spinlock_t lock;
0042     struct list_head freeme;
0043 };
0044 
0045 static struct workqueue_struct *nfsd_filecache_wq __read_mostly;
0046 
0047 static struct kmem_cache        *nfsd_file_slab;
0048 static struct kmem_cache        *nfsd_file_mark_slab;
0049 static struct list_lru          nfsd_file_lru;
0050 static unsigned long            nfsd_file_flags;
0051 static struct fsnotify_group        *nfsd_file_fsnotify_group;
0052 static struct delayed_work      nfsd_filecache_laundrette;
0053 static struct rhashtable        nfsd_file_rhash_tbl
0054                         ____cacheline_aligned_in_smp;
0055 
0056 enum nfsd_file_lookup_type {
0057     NFSD_FILE_KEY_INODE,
0058     NFSD_FILE_KEY_FULL,
0059 };
0060 
0061 struct nfsd_file_lookup_key {
0062     struct inode            *inode;
0063     struct net          *net;
0064     const struct cred       *cred;
0065     unsigned char           need;
0066     enum nfsd_file_lookup_type  type;
0067 };
0068 
0069 /*
0070  * The returned hash value is based solely on the address of an in-code
0071  * inode, a pointer to a slab-allocated object. The entropy in such a
0072  * pointer is concentrated in its middle bits.
0073  */
0074 static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed)
0075 {
0076     unsigned long ptr = (unsigned long)inode;
0077     u32 k;
0078 
0079     k = ptr >> L1_CACHE_SHIFT;
0080     k &= 0x00ffffff;
0081     return jhash2(&k, 1, seed);
0082 }
0083 
0084 /**
0085  * nfsd_file_key_hashfn - Compute the hash value of a lookup key
0086  * @data: key on which to compute the hash value
0087  * @len: rhash table's key_len parameter (unused)
0088  * @seed: rhash table's random seed of the day
0089  *
0090  * Return value:
0091  *   Computed 32-bit hash value
0092  */
0093 static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed)
0094 {
0095     const struct nfsd_file_lookup_key *key = data;
0096 
0097     return nfsd_file_inode_hash(key->inode, seed);
0098 }
0099 
0100 /**
0101  * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file
0102  * @data: object on which to compute the hash value
0103  * @len: rhash table's key_len parameter (unused)
0104  * @seed: rhash table's random seed of the day
0105  *
0106  * Return value:
0107  *   Computed 32-bit hash value
0108  */
0109 static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed)
0110 {
0111     const struct nfsd_file *nf = data;
0112 
0113     return nfsd_file_inode_hash(nf->nf_inode, seed);
0114 }
0115 
0116 static bool
0117 nfsd_match_cred(const struct cred *c1, const struct cred *c2)
0118 {
0119     int i;
0120 
0121     if (!uid_eq(c1->fsuid, c2->fsuid))
0122         return false;
0123     if (!gid_eq(c1->fsgid, c2->fsgid))
0124         return false;
0125     if (c1->group_info == NULL || c2->group_info == NULL)
0126         return c1->group_info == c2->group_info;
0127     if (c1->group_info->ngroups != c2->group_info->ngroups)
0128         return false;
0129     for (i = 0; i < c1->group_info->ngroups; i++) {
0130         if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
0131             return false;
0132     }
0133     return true;
0134 }
0135 
0136 /**
0137  * nfsd_file_obj_cmpfn - Match a cache item against search criteria
0138  * @arg: search criteria
0139  * @ptr: cache item to check
0140  *
0141  * Return values:
0142  *   %0 - Item matches search criteria
0143  *   %1 - Item does not match search criteria
0144  */
0145 static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg,
0146                    const void *ptr)
0147 {
0148     const struct nfsd_file_lookup_key *key = arg->key;
0149     const struct nfsd_file *nf = ptr;
0150 
0151     switch (key->type) {
0152     case NFSD_FILE_KEY_INODE:
0153         if (nf->nf_inode != key->inode)
0154             return 1;
0155         break;
0156     case NFSD_FILE_KEY_FULL:
0157         if (nf->nf_inode != key->inode)
0158             return 1;
0159         if (nf->nf_may != key->need)
0160             return 1;
0161         if (nf->nf_net != key->net)
0162             return 1;
0163         if (!nfsd_match_cred(nf->nf_cred, key->cred))
0164             return 1;
0165         if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
0166             return 1;
0167         break;
0168     }
0169     return 0;
0170 }
0171 
0172 static const struct rhashtable_params nfsd_file_rhash_params = {
0173     .key_len        = sizeof_field(struct nfsd_file, nf_inode),
0174     .key_offset     = offsetof(struct nfsd_file, nf_inode),
0175     .head_offset        = offsetof(struct nfsd_file, nf_rhash),
0176     .hashfn         = nfsd_file_key_hashfn,
0177     .obj_hashfn     = nfsd_file_obj_hashfn,
0178     .obj_cmpfn      = nfsd_file_obj_cmpfn,
0179     /* Reduce resizing churn on light workloads */
0180     .min_size       = 512,      /* buckets */
0181     .automatic_shrinking    = true,
0182 };
0183 
0184 static void
0185 nfsd_file_schedule_laundrette(void)
0186 {
0187     if ((atomic_read(&nfsd_file_rhash_tbl.nelems) == 0) ||
0188         test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0)
0189         return;
0190 
0191     queue_delayed_work(system_wq, &nfsd_filecache_laundrette,
0192             NFSD_LAUNDRETTE_DELAY);
0193 }
0194 
0195 static void
0196 nfsd_file_slab_free(struct rcu_head *rcu)
0197 {
0198     struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu);
0199 
0200     put_cred(nf->nf_cred);
0201     kmem_cache_free(nfsd_file_slab, nf);
0202 }
0203 
0204 static void
0205 nfsd_file_mark_free(struct fsnotify_mark *mark)
0206 {
0207     struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark,
0208                           nfm_mark);
0209 
0210     kmem_cache_free(nfsd_file_mark_slab, nfm);
0211 }
0212 
0213 static struct nfsd_file_mark *
0214 nfsd_file_mark_get(struct nfsd_file_mark *nfm)
0215 {
0216     if (!refcount_inc_not_zero(&nfm->nfm_ref))
0217         return NULL;
0218     return nfm;
0219 }
0220 
0221 static void
0222 nfsd_file_mark_put(struct nfsd_file_mark *nfm)
0223 {
0224     if (refcount_dec_and_test(&nfm->nfm_ref)) {
0225         fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group);
0226         fsnotify_put_mark(&nfm->nfm_mark);
0227     }
0228 }
0229 
0230 static struct nfsd_file_mark *
0231 nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
0232 {
0233     int         err;
0234     struct fsnotify_mark    *mark;
0235     struct nfsd_file_mark   *nfm = NULL, *new;
0236 
0237     do {
0238         fsnotify_group_lock(nfsd_file_fsnotify_group);
0239         mark = fsnotify_find_mark(&inode->i_fsnotify_marks,
0240                       nfsd_file_fsnotify_group);
0241         if (mark) {
0242             nfm = nfsd_file_mark_get(container_of(mark,
0243                          struct nfsd_file_mark,
0244                          nfm_mark));
0245             fsnotify_group_unlock(nfsd_file_fsnotify_group);
0246             if (nfm) {
0247                 fsnotify_put_mark(mark);
0248                 break;
0249             }
0250             /* Avoid soft lockup race with nfsd_file_mark_put() */
0251             fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group);
0252             fsnotify_put_mark(mark);
0253         } else {
0254             fsnotify_group_unlock(nfsd_file_fsnotify_group);
0255         }
0256 
0257         /* allocate a new nfm */
0258         new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
0259         if (!new)
0260             return NULL;
0261         fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group);
0262         new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF;
0263         refcount_set(&new->nfm_ref, 1);
0264 
0265         err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0);
0266 
0267         /*
0268          * If the add was successful, then return the object.
0269          * Otherwise, we need to put the reference we hold on the
0270          * nfm_mark. The fsnotify code will take a reference and put
0271          * it on failure, so we can't just free it directly. It's also
0272          * not safe to call fsnotify_destroy_mark on it as the
0273          * mark->group will be NULL. Thus, we can't let the nfm_ref
0274          * counter drive the destruction at this point.
0275          */
0276         if (likely(!err))
0277             nfm = new;
0278         else
0279             fsnotify_put_mark(&new->nfm_mark);
0280     } while (unlikely(err == -EEXIST));
0281 
0282     return nfm;
0283 }
0284 
0285 static struct nfsd_file *
0286 nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
0287 {
0288     struct nfsd_file *nf;
0289 
0290     nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
0291     if (nf) {
0292         INIT_LIST_HEAD(&nf->nf_lru);
0293         nf->nf_birthtime = ktime_get();
0294         nf->nf_file = NULL;
0295         nf->nf_cred = get_current_cred();
0296         nf->nf_net = key->net;
0297         nf->nf_flags = 0;
0298         __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
0299         __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
0300         nf->nf_inode = key->inode;
0301         /* nf_ref is pre-incremented for hash table */
0302         refcount_set(&nf->nf_ref, 2);
0303         nf->nf_may = key->need;
0304         nf->nf_mark = NULL;
0305     }
0306     return nf;
0307 }
0308 
0309 static bool
0310 nfsd_file_free(struct nfsd_file *nf)
0311 {
0312     s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime));
0313     bool flush = false;
0314 
0315     this_cpu_inc(nfsd_file_releases);
0316     this_cpu_add(nfsd_file_total_age, age);
0317 
0318     trace_nfsd_file_put_final(nf);
0319     if (nf->nf_mark)
0320         nfsd_file_mark_put(nf->nf_mark);
0321     if (nf->nf_file) {
0322         get_file(nf->nf_file);
0323         filp_close(nf->nf_file, NULL);
0324         fput(nf->nf_file);
0325         flush = true;
0326     }
0327 
0328     /*
0329      * If this item is still linked via nf_lru, that's a bug.
0330      * WARN and leak it to preserve system stability.
0331      */
0332     if (WARN_ON_ONCE(!list_empty(&nf->nf_lru)))
0333         return flush;
0334 
0335     call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
0336     return flush;
0337 }
0338 
0339 static bool
0340 nfsd_file_check_writeback(struct nfsd_file *nf)
0341 {
0342     struct file *file = nf->nf_file;
0343     struct address_space *mapping;
0344 
0345     if (!file || !(file->f_mode & FMODE_WRITE))
0346         return false;
0347     mapping = file->f_mapping;
0348     return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ||
0349         mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
0350 }
0351 
0352 static int
0353 nfsd_file_check_write_error(struct nfsd_file *nf)
0354 {
0355     struct file *file = nf->nf_file;
0356 
0357     if (!file || !(file->f_mode & FMODE_WRITE))
0358         return 0;
0359     return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
0360 }
0361 
0362 static void
0363 nfsd_file_flush(struct nfsd_file *nf)
0364 {
0365     struct file *file = nf->nf_file;
0366 
0367     if (!file || !(file->f_mode & FMODE_WRITE))
0368         return;
0369     this_cpu_add(nfsd_file_pages_flushed, file->f_mapping->nrpages);
0370     if (vfs_fsync(file, 1) != 0)
0371         nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
0372 }
0373 
0374 static void nfsd_file_lru_add(struct nfsd_file *nf)
0375 {
0376     set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
0377     if (list_lru_add(&nfsd_file_lru, &nf->nf_lru))
0378         trace_nfsd_file_lru_add(nf);
0379 }
0380 
0381 static void nfsd_file_lru_remove(struct nfsd_file *nf)
0382 {
0383     if (list_lru_del(&nfsd_file_lru, &nf->nf_lru))
0384         trace_nfsd_file_lru_del(nf);
0385 }
0386 
0387 static void
0388 nfsd_file_hash_remove(struct nfsd_file *nf)
0389 {
0390     trace_nfsd_file_unhash(nf);
0391 
0392     if (nfsd_file_check_write_error(nf))
0393         nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
0394     rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash,
0395                    nfsd_file_rhash_params);
0396 }
0397 
0398 static bool
0399 nfsd_file_unhash(struct nfsd_file *nf)
0400 {
0401     if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
0402         nfsd_file_hash_remove(nf);
0403         return true;
0404     }
0405     return false;
0406 }
0407 
0408 /*
0409  * Return true if the file was unhashed.
0410  */
0411 static bool
0412 nfsd_file_unhash_and_dispose(struct nfsd_file *nf, struct list_head *dispose)
0413 {
0414     trace_nfsd_file_unhash_and_dispose(nf);
0415     if (!nfsd_file_unhash(nf))
0416         return false;
0417     /* keep final reference for nfsd_file_lru_dispose */
0418     if (refcount_dec_not_one(&nf->nf_ref))
0419         return true;
0420 
0421     nfsd_file_lru_remove(nf);
0422     list_add(&nf->nf_lru, dispose);
0423     return true;
0424 }
0425 
0426 static void
0427 nfsd_file_put_noref(struct nfsd_file *nf)
0428 {
0429     trace_nfsd_file_put(nf);
0430 
0431     if (refcount_dec_and_test(&nf->nf_ref)) {
0432         WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
0433         nfsd_file_lru_remove(nf);
0434         nfsd_file_free(nf);
0435     }
0436 }
0437 
0438 void
0439 nfsd_file_put(struct nfsd_file *nf)
0440 {
0441     might_sleep();
0442 
0443     nfsd_file_lru_add(nf);
0444     if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) {
0445         nfsd_file_flush(nf);
0446         nfsd_file_put_noref(nf);
0447     } else if (nf->nf_file) {
0448         nfsd_file_put_noref(nf);
0449         nfsd_file_schedule_laundrette();
0450     } else
0451         nfsd_file_put_noref(nf);
0452 }
0453 
0454 /**
0455  * nfsd_file_close - Close an nfsd_file
0456  * @nf: nfsd_file to close
0457  *
0458  * If this is the final reference for @nf, free it immediately.
0459  * This reflects an on-the-wire CLOSE or DELEGRETURN into the
0460  * VFS and exported filesystem.
0461  */
0462 void nfsd_file_close(struct nfsd_file *nf)
0463 {
0464     nfsd_file_put(nf);
0465     if (refcount_dec_if_one(&nf->nf_ref)) {
0466         nfsd_file_unhash(nf);
0467         nfsd_file_lru_remove(nf);
0468         nfsd_file_free(nf);
0469     }
0470 }
0471 
0472 struct nfsd_file *
0473 nfsd_file_get(struct nfsd_file *nf)
0474 {
0475     if (likely(refcount_inc_not_zero(&nf->nf_ref)))
0476         return nf;
0477     return NULL;
0478 }
0479 
0480 static void
0481 nfsd_file_dispose_list(struct list_head *dispose)
0482 {
0483     struct nfsd_file *nf;
0484 
0485     while(!list_empty(dispose)) {
0486         nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
0487         list_del_init(&nf->nf_lru);
0488         nfsd_file_flush(nf);
0489         nfsd_file_put_noref(nf);
0490     }
0491 }
0492 
0493 static void
0494 nfsd_file_dispose_list_sync(struct list_head *dispose)
0495 {
0496     bool flush = false;
0497     struct nfsd_file *nf;
0498 
0499     while(!list_empty(dispose)) {
0500         nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
0501         list_del_init(&nf->nf_lru);
0502         nfsd_file_flush(nf);
0503         if (!refcount_dec_and_test(&nf->nf_ref))
0504             continue;
0505         if (nfsd_file_free(nf))
0506             flush = true;
0507     }
0508     if (flush)
0509         flush_delayed_fput();
0510 }
0511 
0512 static void
0513 nfsd_file_list_remove_disposal(struct list_head *dst,
0514         struct nfsd_fcache_disposal *l)
0515 {
0516     spin_lock(&l->lock);
0517     list_splice_init(&l->freeme, dst);
0518     spin_unlock(&l->lock);
0519 }
0520 
0521 static void
0522 nfsd_file_list_add_disposal(struct list_head *files, struct net *net)
0523 {
0524     struct nfsd_net *nn = net_generic(net, nfsd_net_id);
0525     struct nfsd_fcache_disposal *l = nn->fcache_disposal;
0526 
0527     spin_lock(&l->lock);
0528     list_splice_tail_init(files, &l->freeme);
0529     spin_unlock(&l->lock);
0530     queue_work(nfsd_filecache_wq, &l->work);
0531 }
0532 
0533 static void
0534 nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src,
0535         struct net *net)
0536 {
0537     struct nfsd_file *nf, *tmp;
0538 
0539     list_for_each_entry_safe(nf, tmp, src, nf_lru) {
0540         if (nf->nf_net == net)
0541             list_move_tail(&nf->nf_lru, dst);
0542     }
0543 }
0544 
0545 static void
0546 nfsd_file_dispose_list_delayed(struct list_head *dispose)
0547 {
0548     LIST_HEAD(list);
0549     struct nfsd_file *nf;
0550 
0551     while(!list_empty(dispose)) {
0552         nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
0553         nfsd_file_list_add_pernet(&list, dispose, nf->nf_net);
0554         nfsd_file_list_add_disposal(&list, nf->nf_net);
0555     }
0556 }
0557 
0558 /**
0559  * nfsd_file_lru_cb - Examine an entry on the LRU list
0560  * @item: LRU entry to examine
0561  * @lru: controlling LRU
0562  * @lock: LRU list lock (unused)
0563  * @arg: dispose list
0564  *
0565  * Note this can deadlock with nfsd_file_cache_purge.
0566  *
0567  * Return values:
0568  *   %LRU_REMOVED: @item was removed from the LRU
0569  *   %LRU_ROTATE: @item is to be moved to the LRU tail
0570  *   %LRU_SKIP: @item cannot be evicted
0571  */
0572 static enum lru_status
0573 nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
0574          spinlock_t *lock, void *arg)
0575     __releases(lock)
0576     __acquires(lock)
0577 {
0578     struct list_head *head = arg;
0579     struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
0580 
0581     /*
0582      * Do a lockless refcount check. The hashtable holds one reference, so
0583      * we look to see if anything else has a reference, or if any have
0584      * been put since the shrinker last ran. Those don't get unhashed and
0585      * released.
0586      *
0587      * Note that in the put path, we set the flag and then decrement the
0588      * counter. Here we check the counter and then test and clear the flag.
0589      * That order is deliberate to ensure that we can do this locklessly.
0590      */
0591     if (refcount_read(&nf->nf_ref) > 1) {
0592         list_lru_isolate(lru, &nf->nf_lru);
0593         trace_nfsd_file_gc_in_use(nf);
0594         return LRU_REMOVED;
0595     }
0596 
0597     /*
0598      * Don't throw out files that are still undergoing I/O or
0599      * that have uncleared errors pending.
0600      */
0601     if (nfsd_file_check_writeback(nf)) {
0602         trace_nfsd_file_gc_writeback(nf);
0603         return LRU_SKIP;
0604     }
0605 
0606     if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) {
0607         trace_nfsd_file_gc_referenced(nf);
0608         return LRU_ROTATE;
0609     }
0610 
0611     if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
0612         trace_nfsd_file_gc_hashed(nf);
0613         return LRU_SKIP;
0614     }
0615 
0616     list_lru_isolate_move(lru, &nf->nf_lru, head);
0617     this_cpu_inc(nfsd_file_evictions);
0618     trace_nfsd_file_gc_disposed(nf);
0619     return LRU_REMOVED;
0620 }
0621 
0622 /*
0623  * Unhash items on @dispose immediately, then queue them on the
0624  * disposal workqueue to finish releasing them in the background.
0625  *
0626  * cel: Note that between the time list_lru_shrink_walk runs and
0627  * now, these items are in the hash table but marked unhashed.
0628  * Why release these outside of lru_cb ? There's no lock ordering
0629  * problem since lru_cb currently takes no lock.
0630  */
0631 static void nfsd_file_gc_dispose_list(struct list_head *dispose)
0632 {
0633     struct nfsd_file *nf;
0634 
0635     list_for_each_entry(nf, dispose, nf_lru)
0636         nfsd_file_hash_remove(nf);
0637     nfsd_file_dispose_list_delayed(dispose);
0638 }
0639 
0640 static void
0641 nfsd_file_gc(void)
0642 {
0643     LIST_HEAD(dispose);
0644     unsigned long ret;
0645 
0646     ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb,
0647                 &dispose, list_lru_count(&nfsd_file_lru));
0648     trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru));
0649     nfsd_file_gc_dispose_list(&dispose);
0650 }
0651 
0652 static void
0653 nfsd_file_gc_worker(struct work_struct *work)
0654 {
0655     nfsd_file_gc();
0656     nfsd_file_schedule_laundrette();
0657 }
0658 
0659 static unsigned long
0660 nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
0661 {
0662     return list_lru_count(&nfsd_file_lru);
0663 }
0664 
0665 static unsigned long
0666 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
0667 {
0668     LIST_HEAD(dispose);
0669     unsigned long ret;
0670 
0671     ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
0672                    nfsd_file_lru_cb, &dispose);
0673     trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru));
0674     nfsd_file_gc_dispose_list(&dispose);
0675     return ret;
0676 }
0677 
0678 static struct shrinker  nfsd_file_shrinker = {
0679     .scan_objects = nfsd_file_lru_scan,
0680     .count_objects = nfsd_file_lru_count,
0681     .seeks = 1,
0682 };
0683 
0684 /*
0685  * Find all cache items across all net namespaces that match @inode and
0686  * move them to @dispose. The lookup is atomic wrt nfsd_file_acquire().
0687  */
0688 static unsigned int
0689 __nfsd_file_close_inode(struct inode *inode, struct list_head *dispose)
0690 {
0691     struct nfsd_file_lookup_key key = {
0692         .type   = NFSD_FILE_KEY_INODE,
0693         .inode  = inode,
0694     };
0695     unsigned int count = 0;
0696     struct nfsd_file *nf;
0697 
0698     rcu_read_lock();
0699     do {
0700         nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
0701                        nfsd_file_rhash_params);
0702         if (!nf)
0703             break;
0704         nfsd_file_unhash_and_dispose(nf, dispose);
0705         count++;
0706     } while (1);
0707     rcu_read_unlock();
0708     return count;
0709 }
0710 
0711 /**
0712  * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
0713  * @inode: inode of the file to attempt to remove
0714  *
0715  * Unhash and put, then flush and fput all cache items associated with @inode.
0716  */
0717 void
0718 nfsd_file_close_inode_sync(struct inode *inode)
0719 {
0720     LIST_HEAD(dispose);
0721     unsigned int count;
0722 
0723     count = __nfsd_file_close_inode(inode, &dispose);
0724     trace_nfsd_file_close_inode_sync(inode, count);
0725     nfsd_file_dispose_list_sync(&dispose);
0726 }
0727 
0728 /**
0729  * nfsd_file_close_inode - attempt a delayed close of a nfsd_file
0730  * @inode: inode of the file to attempt to remove
0731  *
0732  * Unhash and put all cache item associated with @inode.
0733  */
0734 static void
0735 nfsd_file_close_inode(struct inode *inode)
0736 {
0737     LIST_HEAD(dispose);
0738     unsigned int count;
0739 
0740     count = __nfsd_file_close_inode(inode, &dispose);
0741     trace_nfsd_file_close_inode(inode, count);
0742     nfsd_file_dispose_list_delayed(&dispose);
0743 }
0744 
0745 /**
0746  * nfsd_file_delayed_close - close unused nfsd_files
0747  * @work: dummy
0748  *
0749  * Walk the LRU list and close any entries that have not been used since
0750  * the last scan.
0751  *
0752  * Note this can deadlock with nfsd_file_cache_purge.
0753  */
0754 static void
0755 nfsd_file_delayed_close(struct work_struct *work)
0756 {
0757     LIST_HEAD(head);
0758     struct nfsd_fcache_disposal *l = container_of(work,
0759             struct nfsd_fcache_disposal, work);
0760 
0761     nfsd_file_list_remove_disposal(&head, l);
0762     nfsd_file_dispose_list(&head);
0763 }
0764 
0765 static int
0766 nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
0767                 void *data)
0768 {
0769     struct file_lock *fl = data;
0770 
0771     /* Only close files for F_SETLEASE leases */
0772     if (fl->fl_flags & FL_LEASE)
0773         nfsd_file_close_inode_sync(file_inode(fl->fl_file));
0774     return 0;
0775 }
0776 
0777 static struct notifier_block nfsd_file_lease_notifier = {
0778     .notifier_call = nfsd_file_lease_notifier_call,
0779 };
0780 
0781 static int
0782 nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask,
0783                 struct inode *inode, struct inode *dir,
0784                 const struct qstr *name, u32 cookie)
0785 {
0786     if (WARN_ON_ONCE(!inode))
0787         return 0;
0788 
0789     trace_nfsd_file_fsnotify_handle_event(inode, mask);
0790 
0791     /* Should be no marks on non-regular files */
0792     if (!S_ISREG(inode->i_mode)) {
0793         WARN_ON_ONCE(1);
0794         return 0;
0795     }
0796 
0797     /* don't close files if this was not the last link */
0798     if (mask & FS_ATTRIB) {
0799         if (inode->i_nlink)
0800             return 0;
0801     }
0802 
0803     nfsd_file_close_inode(inode);
0804     return 0;
0805 }
0806 
0807 
0808 static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
0809     .handle_inode_event = nfsd_file_fsnotify_handle_event,
0810     .free_mark = nfsd_file_mark_free,
0811 };
0812 
0813 int
0814 nfsd_file_cache_init(void)
0815 {
0816     int ret;
0817 
0818     lockdep_assert_held(&nfsd_mutex);
0819     if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1)
0820         return 0;
0821 
0822     ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params);
0823     if (ret)
0824         return ret;
0825 
0826     ret = -ENOMEM;
0827     nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0);
0828     if (!nfsd_filecache_wq)
0829         goto out;
0830 
0831     nfsd_file_slab = kmem_cache_create("nfsd_file",
0832                 sizeof(struct nfsd_file), 0, 0, NULL);
0833     if (!nfsd_file_slab) {
0834         pr_err("nfsd: unable to create nfsd_file_slab\n");
0835         goto out_err;
0836     }
0837 
0838     nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark",
0839                     sizeof(struct nfsd_file_mark), 0, 0, NULL);
0840     if (!nfsd_file_mark_slab) {
0841         pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
0842         goto out_err;
0843     }
0844 
0845 
0846     ret = list_lru_init(&nfsd_file_lru);
0847     if (ret) {
0848         pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
0849         goto out_err;
0850     }
0851 
0852     ret = register_shrinker(&nfsd_file_shrinker, "nfsd-filecache");
0853     if (ret) {
0854         pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret);
0855         goto out_lru;
0856     }
0857 
0858     ret = lease_register_notifier(&nfsd_file_lease_notifier);
0859     if (ret) {
0860         pr_err("nfsd: unable to register lease notifier: %d\n", ret);
0861         goto out_shrinker;
0862     }
0863 
0864     nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops,
0865                             FSNOTIFY_GROUP_NOFS);
0866     if (IS_ERR(nfsd_file_fsnotify_group)) {
0867         pr_err("nfsd: unable to create fsnotify group: %ld\n",
0868             PTR_ERR(nfsd_file_fsnotify_group));
0869         ret = PTR_ERR(nfsd_file_fsnotify_group);
0870         nfsd_file_fsnotify_group = NULL;
0871         goto out_notifier;
0872     }
0873 
0874     INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker);
0875 out:
0876     return ret;
0877 out_notifier:
0878     lease_unregister_notifier(&nfsd_file_lease_notifier);
0879 out_shrinker:
0880     unregister_shrinker(&nfsd_file_shrinker);
0881 out_lru:
0882     list_lru_destroy(&nfsd_file_lru);
0883 out_err:
0884     kmem_cache_destroy(nfsd_file_slab);
0885     nfsd_file_slab = NULL;
0886     kmem_cache_destroy(nfsd_file_mark_slab);
0887     nfsd_file_mark_slab = NULL;
0888     destroy_workqueue(nfsd_filecache_wq);
0889     nfsd_filecache_wq = NULL;
0890     rhashtable_destroy(&nfsd_file_rhash_tbl);
0891     goto out;
0892 }
0893 
0894 /*
0895  * Note this can deadlock with nfsd_file_lru_cb.
0896  */
0897 static void
0898 __nfsd_file_cache_purge(struct net *net)
0899 {
0900     struct rhashtable_iter iter;
0901     struct nfsd_file *nf;
0902     LIST_HEAD(dispose);
0903     bool del;
0904 
0905     rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter);
0906     do {
0907         rhashtable_walk_start(&iter);
0908 
0909         nf = rhashtable_walk_next(&iter);
0910         while (!IS_ERR_OR_NULL(nf)) {
0911             if (net && nf->nf_net != net)
0912                 continue;
0913             del = nfsd_file_unhash_and_dispose(nf, &dispose);
0914 
0915             /*
0916              * Deadlock detected! Something marked this entry as
0917              * unhased, but hasn't removed it from the hash list.
0918              */
0919             WARN_ON_ONCE(!del);
0920 
0921             nf = rhashtable_walk_next(&iter);
0922         }
0923 
0924         rhashtable_walk_stop(&iter);
0925     } while (nf == ERR_PTR(-EAGAIN));
0926     rhashtable_walk_exit(&iter);
0927 
0928     nfsd_file_dispose_list(&dispose);
0929 }
0930 
0931 static struct nfsd_fcache_disposal *
0932 nfsd_alloc_fcache_disposal(void)
0933 {
0934     struct nfsd_fcache_disposal *l;
0935 
0936     l = kmalloc(sizeof(*l), GFP_KERNEL);
0937     if (!l)
0938         return NULL;
0939     INIT_WORK(&l->work, nfsd_file_delayed_close);
0940     spin_lock_init(&l->lock);
0941     INIT_LIST_HEAD(&l->freeme);
0942     return l;
0943 }
0944 
0945 static void
0946 nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l)
0947 {
0948     cancel_work_sync(&l->work);
0949     nfsd_file_dispose_list(&l->freeme);
0950     kfree(l);
0951 }
0952 
0953 static void
0954 nfsd_free_fcache_disposal_net(struct net *net)
0955 {
0956     struct nfsd_net *nn = net_generic(net, nfsd_net_id);
0957     struct nfsd_fcache_disposal *l = nn->fcache_disposal;
0958 
0959     nfsd_free_fcache_disposal(l);
0960 }
0961 
0962 int
0963 nfsd_file_cache_start_net(struct net *net)
0964 {
0965     struct nfsd_net *nn = net_generic(net, nfsd_net_id);
0966 
0967     nn->fcache_disposal = nfsd_alloc_fcache_disposal();
0968     return nn->fcache_disposal ? 0 : -ENOMEM;
0969 }
0970 
0971 /**
0972  * nfsd_file_cache_purge - Remove all cache items associated with @net
0973  * @net: target net namespace
0974  *
0975  */
0976 void
0977 nfsd_file_cache_purge(struct net *net)
0978 {
0979     lockdep_assert_held(&nfsd_mutex);
0980     if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1)
0981         __nfsd_file_cache_purge(net);
0982 }
0983 
0984 void
0985 nfsd_file_cache_shutdown_net(struct net *net)
0986 {
0987     nfsd_file_cache_purge(net);
0988     nfsd_free_fcache_disposal_net(net);
0989 }
0990 
0991 void
0992 nfsd_file_cache_shutdown(void)
0993 {
0994     int i;
0995 
0996     lockdep_assert_held(&nfsd_mutex);
0997     if (test_and_clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0)
0998         return;
0999 
1000     lease_unregister_notifier(&nfsd_file_lease_notifier);
1001     unregister_shrinker(&nfsd_file_shrinker);
1002     /*
1003      * make sure all callers of nfsd_file_lru_cb are done before
1004      * calling nfsd_file_cache_purge
1005      */
1006     cancel_delayed_work_sync(&nfsd_filecache_laundrette);
1007     __nfsd_file_cache_purge(NULL);
1008     list_lru_destroy(&nfsd_file_lru);
1009     rcu_barrier();
1010     fsnotify_put_group(nfsd_file_fsnotify_group);
1011     nfsd_file_fsnotify_group = NULL;
1012     kmem_cache_destroy(nfsd_file_slab);
1013     nfsd_file_slab = NULL;
1014     fsnotify_wait_marks_destroyed();
1015     kmem_cache_destroy(nfsd_file_mark_slab);
1016     nfsd_file_mark_slab = NULL;
1017     destroy_workqueue(nfsd_filecache_wq);
1018     nfsd_filecache_wq = NULL;
1019     rhashtable_destroy(&nfsd_file_rhash_tbl);
1020 
1021     for_each_possible_cpu(i) {
1022         per_cpu(nfsd_file_cache_hits, i) = 0;
1023         per_cpu(nfsd_file_acquisitions, i) = 0;
1024         per_cpu(nfsd_file_releases, i) = 0;
1025         per_cpu(nfsd_file_total_age, i) = 0;
1026         per_cpu(nfsd_file_pages_flushed, i) = 0;
1027         per_cpu(nfsd_file_evictions, i) = 0;
1028     }
1029 }
1030 
1031 /**
1032  * nfsd_file_is_cached - are there any cached open files for this inode?
1033  * @inode: inode to check
1034  *
1035  * The lookup matches inodes in all net namespaces and is atomic wrt
1036  * nfsd_file_acquire().
1037  *
1038  * Return values:
1039  *   %true: filecache contains at least one file matching this inode
1040  *   %false: filecache contains no files matching this inode
1041  */
1042 bool
1043 nfsd_file_is_cached(struct inode *inode)
1044 {
1045     struct nfsd_file_lookup_key key = {
1046         .type   = NFSD_FILE_KEY_INODE,
1047         .inode  = inode,
1048     };
1049     bool ret = false;
1050 
1051     if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key,
1052                    nfsd_file_rhash_params) != NULL)
1053         ret = true;
1054     trace_nfsd_file_is_cached(inode, (int)ret);
1055     return ret;
1056 }
1057 
1058 static __be32
1059 nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
1060              unsigned int may_flags, struct nfsd_file **pnf, bool open)
1061 {
1062     struct nfsd_file_lookup_key key = {
1063         .type   = NFSD_FILE_KEY_FULL,
1064         .need   = may_flags & NFSD_FILE_MAY_MASK,
1065         .net    = SVC_NET(rqstp),
1066     };
1067     struct nfsd_file *nf, *new;
1068     bool retry = true;
1069     __be32 status;
1070 
1071     status = fh_verify(rqstp, fhp, S_IFREG,
1072                 may_flags|NFSD_MAY_OWNER_OVERRIDE);
1073     if (status != nfs_ok)
1074         return status;
1075     key.inode = d_inode(fhp->fh_dentry);
1076     key.cred = get_current_cred();
1077 
1078 retry:
1079     /* Avoid allocation if the item is already in cache */
1080     nf = rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key,
1081                     nfsd_file_rhash_params);
1082     if (nf)
1083         nf = nfsd_file_get(nf);
1084     if (nf)
1085         goto wait_for_construction;
1086 
1087     new = nfsd_file_alloc(&key, may_flags);
1088     if (!new) {
1089         status = nfserr_jukebox;
1090         goto out_status;
1091     }
1092 
1093     nf = rhashtable_lookup_get_insert_key(&nfsd_file_rhash_tbl,
1094                           &key, &new->nf_rhash,
1095                           nfsd_file_rhash_params);
1096     if (!nf) {
1097         nf = new;
1098         goto open_file;
1099     }
1100     if (IS_ERR(nf))
1101         goto insert_err;
1102     nf = nfsd_file_get(nf);
1103     if (nf == NULL) {
1104         nf = new;
1105         goto open_file;
1106     }
1107     nfsd_file_slab_free(&new->nf_rcu);
1108 
1109 wait_for_construction:
1110     wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
1111 
1112     /* Did construction of this file fail? */
1113     if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
1114         trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf);
1115         if (!retry) {
1116             status = nfserr_jukebox;
1117             goto out;
1118         }
1119         retry = false;
1120         nfsd_file_put_noref(nf);
1121         goto retry;
1122     }
1123 
1124     nfsd_file_lru_remove(nf);
1125     this_cpu_inc(nfsd_file_cache_hits);
1126 
1127     status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
1128 out:
1129     if (status == nfs_ok) {
1130         if (open)
1131             this_cpu_inc(nfsd_file_acquisitions);
1132         *pnf = nf;
1133     } else {
1134         nfsd_file_put(nf);
1135         nf = NULL;
1136     }
1137 
1138 out_status:
1139     put_cred(key.cred);
1140     if (open)
1141         trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
1142     return status;
1143 
1144 open_file:
1145     trace_nfsd_file_alloc(nf);
1146     nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode);
1147     if (nf->nf_mark) {
1148         if (open) {
1149             status = nfsd_open_verified(rqstp, fhp, may_flags,
1150                             &nf->nf_file);
1151             trace_nfsd_file_open(nf, status);
1152         } else
1153             status = nfs_ok;
1154     } else
1155         status = nfserr_jukebox;
1156     /*
1157      * If construction failed, or we raced with a call to unlink()
1158      * then unhash.
1159      */
1160     if (status != nfs_ok || key.inode->i_nlink == 0)
1161         if (nfsd_file_unhash(nf))
1162             nfsd_file_put_noref(nf);
1163     clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
1164     smp_mb__after_atomic();
1165     wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
1166     goto out;
1167 
1168 insert_err:
1169     nfsd_file_slab_free(&new->nf_rcu);
1170     trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, PTR_ERR(nf));
1171     nf = NULL;
1172     status = nfserr_jukebox;
1173     goto out_status;
1174 }
1175 
1176 /**
1177  * nfsd_file_acquire - Get a struct nfsd_file with an open file
1178  * @rqstp: the RPC transaction being executed
1179  * @fhp: the NFS filehandle of the file to be opened
1180  * @may_flags: NFSD_MAY_ settings for the file
1181  * @pnf: OUT: new or found "struct nfsd_file" object
1182  *
1183  * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
1184  * network byte order is returned.
1185  */
1186 __be32
1187 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
1188           unsigned int may_flags, struct nfsd_file **pnf)
1189 {
1190     return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true);
1191 }
1192 
1193 /**
1194  * nfsd_file_create - Get a struct nfsd_file, do not open
1195  * @rqstp: the RPC transaction being executed
1196  * @fhp: the NFS filehandle of the file just created
1197  * @may_flags: NFSD_MAY_ settings for the file
1198  * @pnf: OUT: new or found "struct nfsd_file" object
1199  *
1200  * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
1201  * network byte order is returned.
1202  */
1203 __be32
1204 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1205          unsigned int may_flags, struct nfsd_file **pnf)
1206 {
1207     return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false);
1208 }
1209 
1210 /*
1211  * Note that fields may be added, removed or reordered in the future. Programs
1212  * scraping this file for info should test the labels to ensure they're
1213  * getting the correct field.
1214  */
1215 static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
1216 {
1217     unsigned long releases = 0, pages_flushed = 0, evictions = 0;
1218     unsigned long hits = 0, acquisitions = 0;
1219     unsigned int i, count = 0, buckets = 0;
1220     unsigned long lru = 0, total_age = 0;
1221 
1222     /* Serialize with server shutdown */
1223     mutex_lock(&nfsd_mutex);
1224     if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) {
1225         struct bucket_table *tbl;
1226         struct rhashtable *ht;
1227 
1228         lru = list_lru_count(&nfsd_file_lru);
1229 
1230         rcu_read_lock();
1231         ht = &nfsd_file_rhash_tbl;
1232         count = atomic_read(&ht->nelems);
1233         tbl = rht_dereference_rcu(ht->tbl, ht);
1234         buckets = tbl->size;
1235         rcu_read_unlock();
1236     }
1237     mutex_unlock(&nfsd_mutex);
1238 
1239     for_each_possible_cpu(i) {
1240         hits += per_cpu(nfsd_file_cache_hits, i);
1241         acquisitions += per_cpu(nfsd_file_acquisitions, i);
1242         releases += per_cpu(nfsd_file_releases, i);
1243         total_age += per_cpu(nfsd_file_total_age, i);
1244         evictions += per_cpu(nfsd_file_evictions, i);
1245         pages_flushed += per_cpu(nfsd_file_pages_flushed, i);
1246     }
1247 
1248     seq_printf(m, "total entries: %u\n", count);
1249     seq_printf(m, "hash buckets:  %u\n", buckets);
1250     seq_printf(m, "lru entries:   %lu\n", lru);
1251     seq_printf(m, "cache hits:    %lu\n", hits);
1252     seq_printf(m, "acquisitions:  %lu\n", acquisitions);
1253     seq_printf(m, "releases:      %lu\n", releases);
1254     seq_printf(m, "evictions:     %lu\n", evictions);
1255     if (releases)
1256         seq_printf(m, "mean age (ms): %ld\n", total_age / releases);
1257     else
1258         seq_printf(m, "mean age (ms): -\n");
1259     seq_printf(m, "pages flushed: %lu\n", pages_flushed);
1260     return 0;
1261 }
1262 
1263 int nfsd_file_cache_stats_open(struct inode *inode, struct file *file)
1264 {
1265     return single_open(file, nfsd_file_cache_stats_show, NULL);
1266 }