Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
0004  */
0005 
0006 /*
0007  * fsnotify inode mark locking/lifetime/and refcnting
0008  *
0009  * REFCNT:
0010  * The group->recnt and mark->refcnt tell how many "things" in the kernel
0011  * currently are referencing the objects. Both kind of objects typically will
0012  * live inside the kernel with a refcnt of 2, one for its creation and one for
0013  * the reference a group and a mark hold to each other.
0014  * If you are holding the appropriate locks, you can take a reference and the
0015  * object itself is guaranteed to survive until the reference is dropped.
0016  *
0017  * LOCKING:
0018  * There are 3 locks involved with fsnotify inode marks and they MUST be taken
0019  * in order as follows:
0020  *
0021  * group->mark_mutex
0022  * mark->lock
0023  * mark->connector->lock
0024  *
0025  * group->mark_mutex protects the marks_list anchored inside a given group and
0026  * each mark is hooked via the g_list.  It also protects the groups private
0027  * data (i.e group limits).
0028 
0029  * mark->lock protects the marks attributes like its masks and flags.
0030  * Furthermore it protects the access to a reference of the group that the mark
0031  * is assigned to as well as the access to a reference of the inode/vfsmount
0032  * that is being watched by the mark.
0033  *
0034  * mark->connector->lock protects the list of marks anchored inside an
0035  * inode / vfsmount and each mark is hooked via the i_list.
0036  *
0037  * A list of notification marks relating to inode / mnt is contained in
0038  * fsnotify_mark_connector. That structure is alive as long as there are any
0039  * marks in the list and is also protected by fsnotify_mark_srcu. A mark gets
0040  * detached from fsnotify_mark_connector when last reference to the mark is
0041  * dropped.  Thus having mark reference is enough to protect mark->connector
0042  * pointer and to make sure fsnotify_mark_connector cannot disappear. Also
0043  * because we remove mark from g_list before dropping mark reference associated
0044  * with that, any mark found through g_list is guaranteed to have
0045  * mark->connector set until we drop group->mark_mutex.
0046  *
0047  * LIFETIME:
0048  * Inode marks survive between when they are added to an inode and when their
0049  * refcnt==0. Marks are also protected by fsnotify_mark_srcu.
0050  *
0051  * The inode mark can be cleared for a number of different reasons including:
0052  * - The inode is unlinked for the last time.  (fsnotify_inode_remove)
0053  * - The inode is being evicted from cache. (fsnotify_inode_delete)
0054  * - The fs the inode is on is unmounted.  (fsnotify_inode_delete/fsnotify_unmount_inodes)
0055  * - Something explicitly requests that it be removed.  (fsnotify_destroy_mark)
0056  * - The fsnotify_group associated with the mark is going away and all such marks
0057  *   need to be cleaned up. (fsnotify_clear_marks_by_group)
0058  *
0059  * This has the very interesting property of being able to run concurrently with
0060  * any (or all) other directions.
0061  */
0062 
0063 #include <linux/fs.h>
0064 #include <linux/init.h>
0065 #include <linux/kernel.h>
0066 #include <linux/kthread.h>
0067 #include <linux/module.h>
0068 #include <linux/mutex.h>
0069 #include <linux/slab.h>
0070 #include <linux/spinlock.h>
0071 #include <linux/srcu.h>
0072 #include <linux/ratelimit.h>
0073 
0074 #include <linux/atomic.h>
0075 
0076 #include <linux/fsnotify_backend.h>
0077 #include "fsnotify.h"
0078 
0079 #define FSNOTIFY_REAPER_DELAY   (1) /* 1 jiffy */
0080 
0081 struct srcu_struct fsnotify_mark_srcu;
0082 struct kmem_cache *fsnotify_mark_connector_cachep;
0083 
0084 static DEFINE_SPINLOCK(destroy_lock);
0085 static LIST_HEAD(destroy_list);
0086 static struct fsnotify_mark_connector *connector_destroy_list;
0087 
0088 static void fsnotify_mark_destroy_workfn(struct work_struct *work);
0089 static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy_workfn);
0090 
0091 static void fsnotify_connector_destroy_workfn(struct work_struct *work);
0092 static DECLARE_WORK(connector_reaper_work, fsnotify_connector_destroy_workfn);
0093 
0094 void fsnotify_get_mark(struct fsnotify_mark *mark)
0095 {
0096     WARN_ON_ONCE(!refcount_read(&mark->refcnt));
0097     refcount_inc(&mark->refcnt);
0098 }
0099 
0100 static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn)
0101 {
0102     if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
0103         return &fsnotify_conn_inode(conn)->i_fsnotify_mask;
0104     else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT)
0105         return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask;
0106     else if (conn->type == FSNOTIFY_OBJ_TYPE_SB)
0107         return &fsnotify_conn_sb(conn)->s_fsnotify_mask;
0108     return NULL;
0109 }
0110 
0111 __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn)
0112 {
0113     if (WARN_ON(!fsnotify_valid_obj_type(conn->type)))
0114         return 0;
0115 
0116     return *fsnotify_conn_mask_p(conn);
0117 }
0118 
0119 static void fsnotify_get_inode_ref(struct inode *inode)
0120 {
0121     ihold(inode);
0122     atomic_long_inc(&inode->i_sb->s_fsnotify_connectors);
0123 }
0124 
0125 /*
0126  * Grab or drop inode reference for the connector if needed.
0127  *
0128  * When it's time to drop the reference, we only clear the HAS_IREF flag and
0129  * return the inode object. fsnotify_drop_object() will be resonsible for doing
0130  * iput() outside of spinlocks. This happens when last mark that wanted iref is
0131  * detached.
0132  */
0133 static struct inode *fsnotify_update_iref(struct fsnotify_mark_connector *conn,
0134                       bool want_iref)
0135 {
0136     bool has_iref = conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF;
0137     struct inode *inode = NULL;
0138 
0139     if (conn->type != FSNOTIFY_OBJ_TYPE_INODE ||
0140         want_iref == has_iref)
0141         return NULL;
0142 
0143     if (want_iref) {
0144         /* Pin inode if any mark wants inode refcount held */
0145         fsnotify_get_inode_ref(fsnotify_conn_inode(conn));
0146         conn->flags |= FSNOTIFY_CONN_FLAG_HAS_IREF;
0147     } else {
0148         /* Unpin inode after detach of last mark that wanted iref */
0149         inode = fsnotify_conn_inode(conn);
0150         conn->flags &= ~FSNOTIFY_CONN_FLAG_HAS_IREF;
0151     }
0152 
0153     return inode;
0154 }
0155 
0156 static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
0157 {
0158     u32 new_mask = 0;
0159     bool want_iref = false;
0160     struct fsnotify_mark *mark;
0161 
0162     assert_spin_locked(&conn->lock);
0163     /* We can get detached connector here when inode is getting unlinked. */
0164     if (!fsnotify_valid_obj_type(conn->type))
0165         return NULL;
0166     hlist_for_each_entry(mark, &conn->list, obj_list) {
0167         if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED))
0168             continue;
0169         new_mask |= fsnotify_calc_mask(mark);
0170         if (conn->type == FSNOTIFY_OBJ_TYPE_INODE &&
0171             !(mark->flags & FSNOTIFY_MARK_FLAG_NO_IREF))
0172             want_iref = true;
0173     }
0174     *fsnotify_conn_mask_p(conn) = new_mask;
0175 
0176     return fsnotify_update_iref(conn, want_iref);
0177 }
0178 
0179 /*
0180  * Calculate mask of events for a list of marks. The caller must make sure
0181  * connector and connector->obj cannot disappear under us.  Callers achieve
0182  * this by holding a mark->lock or mark->group->mark_mutex for a mark on this
0183  * list.
0184  */
0185 void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
0186 {
0187     if (!conn)
0188         return;
0189 
0190     spin_lock(&conn->lock);
0191     __fsnotify_recalc_mask(conn);
0192     spin_unlock(&conn->lock);
0193     if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
0194         __fsnotify_update_child_dentry_flags(
0195                     fsnotify_conn_inode(conn));
0196 }
0197 
0198 /* Free all connectors queued for freeing once SRCU period ends */
0199 static void fsnotify_connector_destroy_workfn(struct work_struct *work)
0200 {
0201     struct fsnotify_mark_connector *conn, *free;
0202 
0203     spin_lock(&destroy_lock);
0204     conn = connector_destroy_list;
0205     connector_destroy_list = NULL;
0206     spin_unlock(&destroy_lock);
0207 
0208     synchronize_srcu(&fsnotify_mark_srcu);
0209     while (conn) {
0210         free = conn;
0211         conn = conn->destroy_next;
0212         kmem_cache_free(fsnotify_mark_connector_cachep, free);
0213     }
0214 }
0215 
0216 static void fsnotify_put_inode_ref(struct inode *inode)
0217 {
0218     struct super_block *sb = inode->i_sb;
0219 
0220     iput(inode);
0221     if (atomic_long_dec_and_test(&sb->s_fsnotify_connectors))
0222         wake_up_var(&sb->s_fsnotify_connectors);
0223 }
0224 
0225 static void fsnotify_get_sb_connectors(struct fsnotify_mark_connector *conn)
0226 {
0227     struct super_block *sb = fsnotify_connector_sb(conn);
0228 
0229     if (sb)
0230         atomic_long_inc(&sb->s_fsnotify_connectors);
0231 }
0232 
0233 static void fsnotify_put_sb_connectors(struct fsnotify_mark_connector *conn)
0234 {
0235     struct super_block *sb = fsnotify_connector_sb(conn);
0236 
0237     if (sb && atomic_long_dec_and_test(&sb->s_fsnotify_connectors))
0238         wake_up_var(&sb->s_fsnotify_connectors);
0239 }
0240 
0241 static void *fsnotify_detach_connector_from_object(
0242                     struct fsnotify_mark_connector *conn,
0243                     unsigned int *type)
0244 {
0245     struct inode *inode = NULL;
0246 
0247     *type = conn->type;
0248     if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED)
0249         return NULL;
0250 
0251     if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
0252         inode = fsnotify_conn_inode(conn);
0253         inode->i_fsnotify_mask = 0;
0254 
0255         /* Unpin inode when detaching from connector */
0256         if (!(conn->flags & FSNOTIFY_CONN_FLAG_HAS_IREF))
0257             inode = NULL;
0258     } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
0259         fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
0260     } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
0261         fsnotify_conn_sb(conn)->s_fsnotify_mask = 0;
0262     }
0263 
0264     fsnotify_put_sb_connectors(conn);
0265     rcu_assign_pointer(*(conn->obj), NULL);
0266     conn->obj = NULL;
0267     conn->type = FSNOTIFY_OBJ_TYPE_DETACHED;
0268 
0269     return inode;
0270 }
0271 
0272 static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark)
0273 {
0274     struct fsnotify_group *group = mark->group;
0275 
0276     if (WARN_ON_ONCE(!group))
0277         return;
0278     group->ops->free_mark(mark);
0279     fsnotify_put_group(group);
0280 }
0281 
0282 /* Drop object reference originally held by a connector */
0283 static void fsnotify_drop_object(unsigned int type, void *objp)
0284 {
0285     if (!objp)
0286         return;
0287     /* Currently only inode references are passed to be dropped */
0288     if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE))
0289         return;
0290     fsnotify_put_inode_ref(objp);
0291 }
0292 
0293 void fsnotify_put_mark(struct fsnotify_mark *mark)
0294 {
0295     struct fsnotify_mark_connector *conn = READ_ONCE(mark->connector);
0296     void *objp = NULL;
0297     unsigned int type = FSNOTIFY_OBJ_TYPE_DETACHED;
0298     bool free_conn = false;
0299 
0300     /* Catch marks that were actually never attached to object */
0301     if (!conn) {
0302         if (refcount_dec_and_test(&mark->refcnt))
0303             fsnotify_final_mark_destroy(mark);
0304         return;
0305     }
0306 
0307     /*
0308      * We have to be careful so that traversals of obj_list under lock can
0309      * safely grab mark reference.
0310      */
0311     if (!refcount_dec_and_lock(&mark->refcnt, &conn->lock))
0312         return;
0313 
0314     hlist_del_init_rcu(&mark->obj_list);
0315     if (hlist_empty(&conn->list)) {
0316         objp = fsnotify_detach_connector_from_object(conn, &type);
0317         free_conn = true;
0318     } else {
0319         objp = __fsnotify_recalc_mask(conn);
0320         type = conn->type;
0321     }
0322     WRITE_ONCE(mark->connector, NULL);
0323     spin_unlock(&conn->lock);
0324 
0325     fsnotify_drop_object(type, objp);
0326 
0327     if (free_conn) {
0328         spin_lock(&destroy_lock);
0329         conn->destroy_next = connector_destroy_list;
0330         connector_destroy_list = conn;
0331         spin_unlock(&destroy_lock);
0332         queue_work(system_unbound_wq, &connector_reaper_work);
0333     }
0334     /*
0335      * Note that we didn't update flags telling whether inode cares about
0336      * what's happening with children. We update these flags from
0337      * __fsnotify_parent() lazily when next event happens on one of our
0338      * children.
0339      */
0340     spin_lock(&destroy_lock);
0341     list_add(&mark->g_list, &destroy_list);
0342     spin_unlock(&destroy_lock);
0343     queue_delayed_work(system_unbound_wq, &reaper_work,
0344                FSNOTIFY_REAPER_DELAY);
0345 }
0346 EXPORT_SYMBOL_GPL(fsnotify_put_mark);
0347 
0348 /*
0349  * Get mark reference when we found the mark via lockless traversal of object
0350  * list. Mark can be already removed from the list by now and on its way to be
0351  * destroyed once SRCU period ends.
0352  *
0353  * Also pin the group so it doesn't disappear under us.
0354  */
0355 static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
0356 {
0357     if (!mark)
0358         return true;
0359 
0360     if (refcount_inc_not_zero(&mark->refcnt)) {
0361         spin_lock(&mark->lock);
0362         if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) {
0363             /* mark is attached, group is still alive then */
0364             atomic_inc(&mark->group->user_waits);
0365             spin_unlock(&mark->lock);
0366             return true;
0367         }
0368         spin_unlock(&mark->lock);
0369         fsnotify_put_mark(mark);
0370     }
0371     return false;
0372 }
0373 
0374 /*
0375  * Puts marks and wakes up group destruction if necessary.
0376  *
0377  * Pairs with fsnotify_get_mark_safe()
0378  */
0379 static void fsnotify_put_mark_wake(struct fsnotify_mark *mark)
0380 {
0381     if (mark) {
0382         struct fsnotify_group *group = mark->group;
0383 
0384         fsnotify_put_mark(mark);
0385         /*
0386          * We abuse notification_waitq on group shutdown for waiting for
0387          * all marks pinned when waiting for userspace.
0388          */
0389         if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
0390             wake_up(&group->notification_waitq);
0391     }
0392 }
0393 
0394 bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
0395     __releases(&fsnotify_mark_srcu)
0396 {
0397     int type;
0398 
0399     fsnotify_foreach_iter_type(type) {
0400         /* This can fail if mark is being removed */
0401         if (!fsnotify_get_mark_safe(iter_info->marks[type])) {
0402             __release(&fsnotify_mark_srcu);
0403             goto fail;
0404         }
0405     }
0406 
0407     /*
0408      * Now that both marks are pinned by refcount in the inode / vfsmount
0409      * lists, we can drop SRCU lock, and safely resume the list iteration
0410      * once userspace returns.
0411      */
0412     srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx);
0413 
0414     return true;
0415 
0416 fail:
0417     for (type--; type >= 0; type--)
0418         fsnotify_put_mark_wake(iter_info->marks[type]);
0419     return false;
0420 }
0421 
0422 void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
0423     __acquires(&fsnotify_mark_srcu)
0424 {
0425     int type;
0426 
0427     iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
0428     fsnotify_foreach_iter_type(type)
0429         fsnotify_put_mark_wake(iter_info->marks[type]);
0430 }
0431 
0432 /*
0433  * Mark mark as detached, remove it from group list. Mark still stays in object
0434  * list until its last reference is dropped. Note that we rely on mark being
0435  * removed from group list before corresponding reference to it is dropped. In
0436  * particular we rely on mark->connector being valid while we hold
0437  * group->mark_mutex if we found the mark through g_list.
0438  *
0439  * Must be called with group->mark_mutex held. The caller must either hold
0440  * reference to the mark or be protected by fsnotify_mark_srcu.
0441  */
0442 void fsnotify_detach_mark(struct fsnotify_mark *mark)
0443 {
0444     fsnotify_group_assert_locked(mark->group);
0445     WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) &&
0446              refcount_read(&mark->refcnt) < 1 +
0447             !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED));
0448 
0449     spin_lock(&mark->lock);
0450     /* something else already called this function on this mark */
0451     if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
0452         spin_unlock(&mark->lock);
0453         return;
0454     }
0455     mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED;
0456     list_del_init(&mark->g_list);
0457     spin_unlock(&mark->lock);
0458 
0459     /* Drop mark reference acquired in fsnotify_add_mark_locked() */
0460     fsnotify_put_mark(mark);
0461 }
0462 
0463 /*
0464  * Free fsnotify mark. The mark is actually only marked as being freed.  The
0465  * freeing is actually happening only once last reference to the mark is
0466  * dropped from a workqueue which first waits for srcu period end.
0467  *
0468  * Caller must have a reference to the mark or be protected by
0469  * fsnotify_mark_srcu.
0470  */
0471 void fsnotify_free_mark(struct fsnotify_mark *mark)
0472 {
0473     struct fsnotify_group *group = mark->group;
0474 
0475     spin_lock(&mark->lock);
0476     /* something else already called this function on this mark */
0477     if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
0478         spin_unlock(&mark->lock);
0479         return;
0480     }
0481     mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
0482     spin_unlock(&mark->lock);
0483 
0484     /*
0485      * Some groups like to know that marks are being freed.  This is a
0486      * callback to the group function to let it know that this mark
0487      * is being freed.
0488      */
0489     if (group->ops->freeing_mark)
0490         group->ops->freeing_mark(mark, group);
0491 }
0492 
0493 void fsnotify_destroy_mark(struct fsnotify_mark *mark,
0494                struct fsnotify_group *group)
0495 {
0496     fsnotify_group_lock(group);
0497     fsnotify_detach_mark(mark);
0498     fsnotify_group_unlock(group);
0499     fsnotify_free_mark(mark);
0500 }
0501 EXPORT_SYMBOL_GPL(fsnotify_destroy_mark);
0502 
0503 /*
0504  * Sorting function for lists of fsnotify marks.
0505  *
0506  * Fanotify supports different notification classes (reflected as priority of
0507  * notification group). Events shall be passed to notification groups in
0508  * decreasing priority order. To achieve this marks in notification lists for
0509  * inodes and vfsmounts are sorted so that priorities of corresponding groups
0510  * are descending.
0511  *
0512  * Furthermore correct handling of the ignore mask requires processing inode
0513  * and vfsmount marks of each group together. Using the group address as
0514  * further sort criterion provides a unique sorting order and thus we can
0515  * merge inode and vfsmount lists of marks in linear time and find groups
0516  * present in both lists.
0517  *
0518  * A return value of 1 signifies that b has priority over a.
0519  * A return value of 0 signifies that the two marks have to be handled together.
0520  * A return value of -1 signifies that a has priority over b.
0521  */
0522 int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
0523 {
0524     if (a == b)
0525         return 0;
0526     if (!a)
0527         return 1;
0528     if (!b)
0529         return -1;
0530     if (a->priority < b->priority)
0531         return 1;
0532     if (a->priority > b->priority)
0533         return -1;
0534     if (a < b)
0535         return 1;
0536     return -1;
0537 }
0538 
0539 static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
0540                            unsigned int obj_type,
0541                            __kernel_fsid_t *fsid)
0542 {
0543     struct fsnotify_mark_connector *conn;
0544 
0545     conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL);
0546     if (!conn)
0547         return -ENOMEM;
0548     spin_lock_init(&conn->lock);
0549     INIT_HLIST_HEAD(&conn->list);
0550     conn->flags = 0;
0551     conn->type = obj_type;
0552     conn->obj = connp;
0553     /* Cache fsid of filesystem containing the object */
0554     if (fsid) {
0555         conn->fsid = *fsid;
0556         conn->flags = FSNOTIFY_CONN_FLAG_HAS_FSID;
0557     } else {
0558         conn->fsid.val[0] = conn->fsid.val[1] = 0;
0559         conn->flags = 0;
0560     }
0561     fsnotify_get_sb_connectors(conn);
0562 
0563     /*
0564      * cmpxchg() provides the barrier so that readers of *connp can see
0565      * only initialized structure
0566      */
0567     if (cmpxchg(connp, NULL, conn)) {
0568         /* Someone else created list structure for us */
0569         fsnotify_put_sb_connectors(conn);
0570         kmem_cache_free(fsnotify_mark_connector_cachep, conn);
0571     }
0572 
0573     return 0;
0574 }
0575 
0576 /*
0577  * Get mark connector, make sure it is alive and return with its lock held.
0578  * This is for users that get connector pointer from inode or mount. Users that
0579  * hold reference to a mark on the list may directly lock connector->lock as
0580  * they are sure list cannot go away under them.
0581  */
0582 static struct fsnotify_mark_connector *fsnotify_grab_connector(
0583                         fsnotify_connp_t *connp)
0584 {
0585     struct fsnotify_mark_connector *conn;
0586     int idx;
0587 
0588     idx = srcu_read_lock(&fsnotify_mark_srcu);
0589     conn = srcu_dereference(*connp, &fsnotify_mark_srcu);
0590     if (!conn)
0591         goto out;
0592     spin_lock(&conn->lock);
0593     if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED) {
0594         spin_unlock(&conn->lock);
0595         srcu_read_unlock(&fsnotify_mark_srcu, idx);
0596         return NULL;
0597     }
0598 out:
0599     srcu_read_unlock(&fsnotify_mark_srcu, idx);
0600     return conn;
0601 }
0602 
0603 /*
0604  * Add mark into proper place in given list of marks. These marks may be used
0605  * for the fsnotify backend to determine which event types should be delivered
0606  * to which group and for which inodes. These marks are ordered according to
0607  * priority, highest number first, and then by the group's location in memory.
0608  */
0609 static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
0610                   fsnotify_connp_t *connp,
0611                   unsigned int obj_type,
0612                   int add_flags, __kernel_fsid_t *fsid)
0613 {
0614     struct fsnotify_mark *lmark, *last = NULL;
0615     struct fsnotify_mark_connector *conn;
0616     int cmp;
0617     int err = 0;
0618 
0619     if (WARN_ON(!fsnotify_valid_obj_type(obj_type)))
0620         return -EINVAL;
0621 
0622     /* Backend is expected to check for zero fsid (e.g. tmpfs) */
0623     if (fsid && WARN_ON_ONCE(!fsid->val[0] && !fsid->val[1]))
0624         return -ENODEV;
0625 
0626 restart:
0627     spin_lock(&mark->lock);
0628     conn = fsnotify_grab_connector(connp);
0629     if (!conn) {
0630         spin_unlock(&mark->lock);
0631         err = fsnotify_attach_connector_to_object(connp, obj_type,
0632                               fsid);
0633         if (err)
0634             return err;
0635         goto restart;
0636     } else if (fsid && !(conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID)) {
0637         conn->fsid = *fsid;
0638         /* Pairs with smp_rmb() in fanotify_get_fsid() */
0639         smp_wmb();
0640         conn->flags |= FSNOTIFY_CONN_FLAG_HAS_FSID;
0641     } else if (fsid && (conn->flags & FSNOTIFY_CONN_FLAG_HAS_FSID) &&
0642            (fsid->val[0] != conn->fsid.val[0] ||
0643             fsid->val[1] != conn->fsid.val[1])) {
0644         /*
0645          * Backend is expected to check for non uniform fsid
0646          * (e.g. btrfs), but maybe we missed something?
0647          * Only allow setting conn->fsid once to non zero fsid.
0648          * inotify and non-fid fanotify groups do not set nor test
0649          * conn->fsid.
0650          */
0651         pr_warn_ratelimited("%s: fsid mismatch on object of type %u: "
0652                     "%x.%x != %x.%x\n", __func__, conn->type,
0653                     fsid->val[0], fsid->val[1],
0654                     conn->fsid.val[0], conn->fsid.val[1]);
0655         err = -EXDEV;
0656         goto out_err;
0657     }
0658 
0659     /* is mark the first mark? */
0660     if (hlist_empty(&conn->list)) {
0661         hlist_add_head_rcu(&mark->obj_list, &conn->list);
0662         goto added;
0663     }
0664 
0665     /* should mark be in the middle of the current list? */
0666     hlist_for_each_entry(lmark, &conn->list, obj_list) {
0667         last = lmark;
0668 
0669         if ((lmark->group == mark->group) &&
0670             (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) &&
0671             !(mark->group->flags & FSNOTIFY_GROUP_DUPS)) {
0672             err = -EEXIST;
0673             goto out_err;
0674         }
0675 
0676         cmp = fsnotify_compare_groups(lmark->group, mark->group);
0677         if (cmp >= 0) {
0678             hlist_add_before_rcu(&mark->obj_list, &lmark->obj_list);
0679             goto added;
0680         }
0681     }
0682 
0683     BUG_ON(last == NULL);
0684     /* mark should be the last entry.  last is the current last entry */
0685     hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
0686 added:
0687     /*
0688      * Since connector is attached to object using cmpxchg() we are
0689      * guaranteed that connector initialization is fully visible by anyone
0690      * seeing mark->connector set.
0691      */
0692     WRITE_ONCE(mark->connector, conn);
0693 out_err:
0694     spin_unlock(&conn->lock);
0695     spin_unlock(&mark->lock);
0696     return err;
0697 }
0698 
0699 /*
0700  * Attach an initialized mark to a given group and fs object.
0701  * These marks may be used for the fsnotify backend to determine which
0702  * event types should be delivered to which group.
0703  */
0704 int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
0705                  fsnotify_connp_t *connp, unsigned int obj_type,
0706                  int add_flags, __kernel_fsid_t *fsid)
0707 {
0708     struct fsnotify_group *group = mark->group;
0709     int ret = 0;
0710 
0711     fsnotify_group_assert_locked(group);
0712 
0713     /*
0714      * LOCKING ORDER!!!!
0715      * group->mark_mutex
0716      * mark->lock
0717      * mark->connector->lock
0718      */
0719     spin_lock(&mark->lock);
0720     mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED;
0721 
0722     list_add(&mark->g_list, &group->marks_list);
0723     fsnotify_get_mark(mark); /* for g_list */
0724     spin_unlock(&mark->lock);
0725 
0726     ret = fsnotify_add_mark_list(mark, connp, obj_type, add_flags, fsid);
0727     if (ret)
0728         goto err;
0729 
0730     fsnotify_recalc_mask(mark->connector);
0731 
0732     return ret;
0733 err:
0734     spin_lock(&mark->lock);
0735     mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE |
0736              FSNOTIFY_MARK_FLAG_ATTACHED);
0737     list_del_init(&mark->g_list);
0738     spin_unlock(&mark->lock);
0739 
0740     fsnotify_put_mark(mark);
0741     return ret;
0742 }
0743 
0744 int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
0745               unsigned int obj_type, int add_flags,
0746               __kernel_fsid_t *fsid)
0747 {
0748     int ret;
0749     struct fsnotify_group *group = mark->group;
0750 
0751     fsnotify_group_lock(group);
0752     ret = fsnotify_add_mark_locked(mark, connp, obj_type, add_flags, fsid);
0753     fsnotify_group_unlock(group);
0754     return ret;
0755 }
0756 EXPORT_SYMBOL_GPL(fsnotify_add_mark);
0757 
0758 /*
0759  * Given a list of marks, find the mark associated with given group. If found
0760  * take a reference to that mark and return it, else return NULL.
0761  */
0762 struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
0763                      struct fsnotify_group *group)
0764 {
0765     struct fsnotify_mark_connector *conn;
0766     struct fsnotify_mark *mark;
0767 
0768     conn = fsnotify_grab_connector(connp);
0769     if (!conn)
0770         return NULL;
0771 
0772     hlist_for_each_entry(mark, &conn->list, obj_list) {
0773         if (mark->group == group &&
0774             (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
0775             fsnotify_get_mark(mark);
0776             spin_unlock(&conn->lock);
0777             return mark;
0778         }
0779     }
0780     spin_unlock(&conn->lock);
0781     return NULL;
0782 }
0783 EXPORT_SYMBOL_GPL(fsnotify_find_mark);
0784 
0785 /* Clear any marks in a group with given type mask */
0786 void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
0787                    unsigned int obj_type)
0788 {
0789     struct fsnotify_mark *lmark, *mark;
0790     LIST_HEAD(to_free);
0791     struct list_head *head = &to_free;
0792 
0793     /* Skip selection step if we want to clear all marks. */
0794     if (obj_type == FSNOTIFY_OBJ_TYPE_ANY) {
0795         head = &group->marks_list;
0796         goto clear;
0797     }
0798     /*
0799      * We have to be really careful here. Anytime we drop mark_mutex, e.g.
0800      * fsnotify_clear_marks_by_inode() can come and free marks. Even in our
0801      * to_free list so we have to use mark_mutex even when accessing that
0802      * list. And freeing mark requires us to drop mark_mutex. So we can
0803      * reliably free only the first mark in the list. That's why we first
0804      * move marks to free to to_free list in one go and then free marks in
0805      * to_free list one by one.
0806      */
0807     fsnotify_group_lock(group);
0808     list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
0809         if (mark->connector->type == obj_type)
0810             list_move(&mark->g_list, &to_free);
0811     }
0812     fsnotify_group_unlock(group);
0813 
0814 clear:
0815     while (1) {
0816         fsnotify_group_lock(group);
0817         if (list_empty(head)) {
0818             fsnotify_group_unlock(group);
0819             break;
0820         }
0821         mark = list_first_entry(head, struct fsnotify_mark, g_list);
0822         fsnotify_get_mark(mark);
0823         fsnotify_detach_mark(mark);
0824         fsnotify_group_unlock(group);
0825         fsnotify_free_mark(mark);
0826         fsnotify_put_mark(mark);
0827     }
0828 }
0829 
0830 /* Destroy all marks attached to an object via connector */
0831 void fsnotify_destroy_marks(fsnotify_connp_t *connp)
0832 {
0833     struct fsnotify_mark_connector *conn;
0834     struct fsnotify_mark *mark, *old_mark = NULL;
0835     void *objp;
0836     unsigned int type;
0837 
0838     conn = fsnotify_grab_connector(connp);
0839     if (!conn)
0840         return;
0841     /*
0842      * We have to be careful since we can race with e.g.
0843      * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the
0844      * list can get modified. However we are holding mark reference and
0845      * thus our mark cannot be removed from obj_list so we can continue
0846      * iteration after regaining conn->lock.
0847      */
0848     hlist_for_each_entry(mark, &conn->list, obj_list) {
0849         fsnotify_get_mark(mark);
0850         spin_unlock(&conn->lock);
0851         if (old_mark)
0852             fsnotify_put_mark(old_mark);
0853         old_mark = mark;
0854         fsnotify_destroy_mark(mark, mark->group);
0855         spin_lock(&conn->lock);
0856     }
0857     /*
0858      * Detach list from object now so that we don't pin inode until all
0859      * mark references get dropped. It would lead to strange results such
0860      * as delaying inode deletion or blocking unmount.
0861      */
0862     objp = fsnotify_detach_connector_from_object(conn, &type);
0863     spin_unlock(&conn->lock);
0864     if (old_mark)
0865         fsnotify_put_mark(old_mark);
0866     fsnotify_drop_object(type, objp);
0867 }
0868 
0869 /*
0870  * Nothing fancy, just initialize lists and locks and counters.
0871  */
0872 void fsnotify_init_mark(struct fsnotify_mark *mark,
0873             struct fsnotify_group *group)
0874 {
0875     memset(mark, 0, sizeof(*mark));
0876     spin_lock_init(&mark->lock);
0877     refcount_set(&mark->refcnt, 1);
0878     fsnotify_get_group(group);
0879     mark->group = group;
0880     WRITE_ONCE(mark->connector, NULL);
0881 }
0882 EXPORT_SYMBOL_GPL(fsnotify_init_mark);
0883 
0884 /*
0885  * Destroy all marks in destroy_list, waits for SRCU period to finish before
0886  * actually freeing marks.
0887  */
0888 static void fsnotify_mark_destroy_workfn(struct work_struct *work)
0889 {
0890     struct fsnotify_mark *mark, *next;
0891     struct list_head private_destroy_list;
0892 
0893     spin_lock(&destroy_lock);
0894     /* exchange the list head */
0895     list_replace_init(&destroy_list, &private_destroy_list);
0896     spin_unlock(&destroy_lock);
0897 
0898     synchronize_srcu(&fsnotify_mark_srcu);
0899 
0900     list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) {
0901         list_del_init(&mark->g_list);
0902         fsnotify_final_mark_destroy(mark);
0903     }
0904 }
0905 
0906 /* Wait for all marks queued for destruction to be actually destroyed */
0907 void fsnotify_wait_marks_destroyed(void)
0908 {
0909     flush_delayed_work(&reaper_work);
0910 }
0911 EXPORT_SYMBOL_GPL(fsnotify_wait_marks_destroyed);