Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
0004  */
0005 
0006 #include <linux/dcache.h>
0007 #include <linux/fs.h>
0008 #include <linux/gfp.h>
0009 #include <linux/init.h>
0010 #include <linux/module.h>
0011 #include <linux/mount.h>
0012 #include <linux/srcu.h>
0013 
0014 #include <linux/fsnotify_backend.h>
0015 #include "fsnotify.h"
0016 
0017 /*
0018  * Clear all of the marks on an inode when it is being evicted from core
0019  */
0020 void __fsnotify_inode_delete(struct inode *inode)
0021 {
0022     fsnotify_clear_marks_by_inode(inode);
0023 }
0024 EXPORT_SYMBOL_GPL(__fsnotify_inode_delete);
0025 
0026 void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
0027 {
0028     fsnotify_clear_marks_by_mount(mnt);
0029 }
0030 
0031 /**
0032  * fsnotify_unmount_inodes - an sb is unmounting.  handle any watched inodes.
0033  * @sb: superblock being unmounted.
0034  *
0035  * Called during unmount with no locks held, so needs to be safe against
0036  * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
0037  */
0038 static void fsnotify_unmount_inodes(struct super_block *sb)
0039 {
0040     struct inode *inode, *iput_inode = NULL;
0041 
0042     spin_lock(&sb->s_inode_list_lock);
0043     list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
0044         /*
0045          * We cannot __iget() an inode in state I_FREEING,
0046          * I_WILL_FREE, or I_NEW which is fine because by that point
0047          * the inode cannot have any associated watches.
0048          */
0049         spin_lock(&inode->i_lock);
0050         if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
0051             spin_unlock(&inode->i_lock);
0052             continue;
0053         }
0054 
0055         /*
0056          * If i_count is zero, the inode cannot have any watches and
0057          * doing an __iget/iput with SB_ACTIVE clear would actually
0058          * evict all inodes with zero i_count from icache which is
0059          * unnecessarily violent and may in fact be illegal to do.
0060          * However, we should have been called /after/ evict_inodes
0061          * removed all zero refcount inodes, in any case.  Test to
0062          * be sure.
0063          */
0064         if (!atomic_read(&inode->i_count)) {
0065             spin_unlock(&inode->i_lock);
0066             continue;
0067         }
0068 
0069         __iget(inode);
0070         spin_unlock(&inode->i_lock);
0071         spin_unlock(&sb->s_inode_list_lock);
0072 
0073         iput(iput_inode);
0074 
0075         /* for each watch, send FS_UNMOUNT and then remove it */
0076         fsnotify_inode(inode, FS_UNMOUNT);
0077 
0078         fsnotify_inode_delete(inode);
0079 
0080         iput_inode = inode;
0081 
0082         cond_resched();
0083         spin_lock(&sb->s_inode_list_lock);
0084     }
0085     spin_unlock(&sb->s_inode_list_lock);
0086 
0087     iput(iput_inode);
0088 }
0089 
0090 void fsnotify_sb_delete(struct super_block *sb)
0091 {
0092     fsnotify_unmount_inodes(sb);
0093     fsnotify_clear_marks_by_sb(sb);
0094     /* Wait for outstanding object references from connectors */
0095     wait_var_event(&sb->s_fsnotify_connectors,
0096                !atomic_long_read(&sb->s_fsnotify_connectors));
0097 }
0098 
0099 /*
0100  * Given an inode, first check if we care what happens to our children.  Inotify
0101  * and dnotify both tell their parents about events.  If we care about any event
0102  * on a child we run all of our children and set a dentry flag saying that the
0103  * parent cares.  Thus when an event happens on a child it can quickly tell
0104  * if there is a need to find a parent and send the event to the parent.
0105  */
0106 void __fsnotify_update_child_dentry_flags(struct inode *inode)
0107 {
0108     struct dentry *alias;
0109     int watched;
0110 
0111     if (!S_ISDIR(inode->i_mode))
0112         return;
0113 
0114     /* determine if the children should tell inode about their events */
0115     watched = fsnotify_inode_watches_children(inode);
0116 
0117     spin_lock(&inode->i_lock);
0118     /* run all of the dentries associated with this inode.  Since this is a
0119      * directory, there damn well better only be one item on this list */
0120     hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
0121         struct dentry *child;
0122 
0123         /* run all of the children of the original inode and fix their
0124          * d_flags to indicate parental interest (their parent is the
0125          * original inode) */
0126         spin_lock(&alias->d_lock);
0127         list_for_each_entry(child, &alias->d_subdirs, d_child) {
0128             if (!child->d_inode)
0129                 continue;
0130 
0131             spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
0132             if (watched)
0133                 child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
0134             else
0135                 child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
0136             spin_unlock(&child->d_lock);
0137         }
0138         spin_unlock(&alias->d_lock);
0139     }
0140     spin_unlock(&inode->i_lock);
0141 }
0142 
0143 /* Are inode/sb/mount interested in parent and name info with this event? */
0144 static bool fsnotify_event_needs_parent(struct inode *inode, struct mount *mnt,
0145                     __u32 mask)
0146 {
0147     __u32 marks_mask = 0;
0148 
0149     /* We only send parent/name to inode/sb/mount for events on non-dir */
0150     if (mask & FS_ISDIR)
0151         return false;
0152 
0153     /*
0154      * All events that are possible on child can also may be reported with
0155      * parent/name info to inode/sb/mount.  Otherwise, a watching parent
0156      * could result in events reported with unexpected name info to sb/mount.
0157      */
0158     BUILD_BUG_ON(FS_EVENTS_POSS_ON_CHILD & ~FS_EVENTS_POSS_TO_PARENT);
0159 
0160     /* Did either inode/sb/mount subscribe for events with parent/name? */
0161     marks_mask |= fsnotify_parent_needed_mask(inode->i_fsnotify_mask);
0162     marks_mask |= fsnotify_parent_needed_mask(inode->i_sb->s_fsnotify_mask);
0163     if (mnt)
0164         marks_mask |= fsnotify_parent_needed_mask(mnt->mnt_fsnotify_mask);
0165 
0166     /* Did they subscribe for this event with parent/name info? */
0167     return mask & marks_mask;
0168 }
0169 
0170 /*
0171  * Notify this dentry's parent about a child's events with child name info
0172  * if parent is watching or if inode/sb/mount are interested in events with
0173  * parent and name info.
0174  *
0175  * Notify only the child without name info if parent is not watching and
0176  * inode/sb/mount are not interested in events with parent and name info.
0177  */
0178 int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
0179               int data_type)
0180 {
0181     const struct path *path = fsnotify_data_path(data, data_type);
0182     struct mount *mnt = path ? real_mount(path->mnt) : NULL;
0183     struct inode *inode = d_inode(dentry);
0184     struct dentry *parent;
0185     bool parent_watched = dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED;
0186     bool parent_needed, parent_interested;
0187     __u32 p_mask;
0188     struct inode *p_inode = NULL;
0189     struct name_snapshot name;
0190     struct qstr *file_name = NULL;
0191     int ret = 0;
0192 
0193     /*
0194      * Do inode/sb/mount care about parent and name info on non-dir?
0195      * Do they care about any event at all?
0196      */
0197     if (!inode->i_fsnotify_marks && !inode->i_sb->s_fsnotify_marks &&
0198         (!mnt || !mnt->mnt_fsnotify_marks) && !parent_watched)
0199         return 0;
0200 
0201     parent = NULL;
0202     parent_needed = fsnotify_event_needs_parent(inode, mnt, mask);
0203     if (!parent_watched && !parent_needed)
0204         goto notify;
0205 
0206     /* Does parent inode care about events on children? */
0207     parent = dget_parent(dentry);
0208     p_inode = parent->d_inode;
0209     p_mask = fsnotify_inode_watches_children(p_inode);
0210     if (unlikely(parent_watched && !p_mask))
0211         __fsnotify_update_child_dentry_flags(p_inode);
0212 
0213     /*
0214      * Include parent/name in notification either if some notification
0215      * groups require parent info or the parent is interested in this event.
0216      */
0217     parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS;
0218     if (parent_needed || parent_interested) {
0219         /* When notifying parent, child should be passed as data */
0220         WARN_ON_ONCE(inode != fsnotify_data_inode(data, data_type));
0221 
0222         /* Notify both parent and child with child name info */
0223         take_dentry_name_snapshot(&name, dentry);
0224         file_name = &name.name;
0225         if (parent_interested)
0226             mask |= FS_EVENT_ON_CHILD;
0227     }
0228 
0229 notify:
0230     ret = fsnotify(mask, data, data_type, p_inode, file_name, inode, 0);
0231 
0232     if (file_name)
0233         release_dentry_name_snapshot(&name);
0234     dput(parent);
0235 
0236     return ret;
0237 }
0238 EXPORT_SYMBOL_GPL(__fsnotify_parent);
0239 
0240 static int fsnotify_handle_inode_event(struct fsnotify_group *group,
0241                        struct fsnotify_mark *inode_mark,
0242                        u32 mask, const void *data, int data_type,
0243                        struct inode *dir, const struct qstr *name,
0244                        u32 cookie)
0245 {
0246     const struct path *path = fsnotify_data_path(data, data_type);
0247     struct inode *inode = fsnotify_data_inode(data, data_type);
0248     const struct fsnotify_ops *ops = group->ops;
0249 
0250     if (WARN_ON_ONCE(!ops->handle_inode_event))
0251         return 0;
0252 
0253     if (WARN_ON_ONCE(!inode && !dir))
0254         return 0;
0255 
0256     if ((inode_mark->flags & FSNOTIFY_MARK_FLAG_EXCL_UNLINK) &&
0257         path && d_unlinked(path->dentry))
0258         return 0;
0259 
0260     /* Check interest of this mark in case event was sent with two marks */
0261     if (!(mask & inode_mark->mask & ALL_FSNOTIFY_EVENTS))
0262         return 0;
0263 
0264     return ops->handle_inode_event(inode_mark, mask, inode, dir, name, cookie);
0265 }
0266 
0267 static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask,
0268                  const void *data, int data_type,
0269                  struct inode *dir, const struct qstr *name,
0270                  u32 cookie, struct fsnotify_iter_info *iter_info)
0271 {
0272     struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info);
0273     struct fsnotify_mark *parent_mark = fsnotify_iter_parent_mark(iter_info);
0274     int ret;
0275 
0276     if (WARN_ON_ONCE(fsnotify_iter_sb_mark(iter_info)) ||
0277         WARN_ON_ONCE(fsnotify_iter_vfsmount_mark(iter_info)))
0278         return 0;
0279 
0280     /*
0281      * For FS_RENAME, 'dir' is old dir and 'data' is new dentry.
0282      * The only ->handle_inode_event() backend that supports FS_RENAME is
0283      * dnotify, where it means file was renamed within same parent.
0284      */
0285     if (mask & FS_RENAME) {
0286         struct dentry *moved = fsnotify_data_dentry(data, data_type);
0287 
0288         if (dir != moved->d_parent->d_inode)
0289             return 0;
0290     }
0291 
0292     if (parent_mark) {
0293         ret = fsnotify_handle_inode_event(group, parent_mark, mask,
0294                           data, data_type, dir, name, 0);
0295         if (ret)
0296             return ret;
0297     }
0298 
0299     if (!inode_mark)
0300         return 0;
0301 
0302     if (mask & FS_EVENT_ON_CHILD) {
0303         /*
0304          * Some events can be sent on both parent dir and child marks
0305          * (e.g. FS_ATTRIB).  If both parent dir and child are
0306          * watching, report the event once to parent dir with name (if
0307          * interested) and once to child without name (if interested).
0308          * The child watcher is expecting an event without a file name
0309          * and without the FS_EVENT_ON_CHILD flag.
0310          */
0311         mask &= ~FS_EVENT_ON_CHILD;
0312         dir = NULL;
0313         name = NULL;
0314     }
0315 
0316     return fsnotify_handle_inode_event(group, inode_mark, mask, data, data_type,
0317                        dir, name, cookie);
0318 }
0319 
0320 static int send_to_group(__u32 mask, const void *data, int data_type,
0321              struct inode *dir, const struct qstr *file_name,
0322              u32 cookie, struct fsnotify_iter_info *iter_info)
0323 {
0324     struct fsnotify_group *group = NULL;
0325     __u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
0326     __u32 marks_mask = 0;
0327     __u32 marks_ignore_mask = 0;
0328     bool is_dir = mask & FS_ISDIR;
0329     struct fsnotify_mark *mark;
0330     int type;
0331 
0332     if (!iter_info->report_mask)
0333         return 0;
0334 
0335     /* clear ignored on inode modification */
0336     if (mask & FS_MODIFY) {
0337         fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
0338             if (!(mark->flags &
0339                   FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
0340                 mark->ignore_mask = 0;
0341         }
0342     }
0343 
0344     /* Are any of the group marks interested in this event? */
0345     fsnotify_foreach_iter_mark_type(iter_info, mark, type) {
0346         group = mark->group;
0347         marks_mask |= mark->mask;
0348         marks_ignore_mask |=
0349             fsnotify_effective_ignore_mask(mark, is_dir, type);
0350     }
0351 
0352     pr_debug("%s: group=%p mask=%x marks_mask=%x marks_ignore_mask=%x data=%p data_type=%d dir=%p cookie=%d\n",
0353          __func__, group, mask, marks_mask, marks_ignore_mask,
0354          data, data_type, dir, cookie);
0355 
0356     if (!(test_mask & marks_mask & ~marks_ignore_mask))
0357         return 0;
0358 
0359     if (group->ops->handle_event) {
0360         return group->ops->handle_event(group, mask, data, data_type, dir,
0361                         file_name, cookie, iter_info);
0362     }
0363 
0364     return fsnotify_handle_event(group, mask, data, data_type, dir,
0365                      file_name, cookie, iter_info);
0366 }
0367 
0368 static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp)
0369 {
0370     struct fsnotify_mark_connector *conn;
0371     struct hlist_node *node = NULL;
0372 
0373     conn = srcu_dereference(*connp, &fsnotify_mark_srcu);
0374     if (conn)
0375         node = srcu_dereference(conn->list.first, &fsnotify_mark_srcu);
0376 
0377     return hlist_entry_safe(node, struct fsnotify_mark, obj_list);
0378 }
0379 
0380 static struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark)
0381 {
0382     struct hlist_node *node = NULL;
0383 
0384     if (mark)
0385         node = srcu_dereference(mark->obj_list.next,
0386                     &fsnotify_mark_srcu);
0387 
0388     return hlist_entry_safe(node, struct fsnotify_mark, obj_list);
0389 }
0390 
0391 /*
0392  * iter_info is a multi head priority queue of marks.
0393  * Pick a subset of marks from queue heads, all with the same group
0394  * and set the report_mask to a subset of the selected marks.
0395  * Returns false if there are no more groups to iterate.
0396  */
0397 static bool fsnotify_iter_select_report_types(
0398         struct fsnotify_iter_info *iter_info)
0399 {
0400     struct fsnotify_group *max_prio_group = NULL;
0401     struct fsnotify_mark *mark;
0402     int type;
0403 
0404     /* Choose max prio group among groups of all queue heads */
0405     fsnotify_foreach_iter_type(type) {
0406         mark = iter_info->marks[type];
0407         if (mark &&
0408             fsnotify_compare_groups(max_prio_group, mark->group) > 0)
0409             max_prio_group = mark->group;
0410     }
0411 
0412     if (!max_prio_group)
0413         return false;
0414 
0415     /* Set the report mask for marks from same group as max prio group */
0416     iter_info->current_group = max_prio_group;
0417     iter_info->report_mask = 0;
0418     fsnotify_foreach_iter_type(type) {
0419         mark = iter_info->marks[type];
0420         if (mark && mark->group == iter_info->current_group) {
0421             /*
0422              * FSNOTIFY_ITER_TYPE_PARENT indicates that this inode
0423              * is watching children and interested in this event,
0424              * which is an event possible on child.
0425              * But is *this mark* watching children?
0426              */
0427             if (type == FSNOTIFY_ITER_TYPE_PARENT &&
0428                 !(mark->mask & FS_EVENT_ON_CHILD) &&
0429                 !(fsnotify_ignore_mask(mark) & FS_EVENT_ON_CHILD))
0430                 continue;
0431 
0432             fsnotify_iter_set_report_type(iter_info, type);
0433         }
0434     }
0435 
0436     return true;
0437 }
0438 
0439 /*
0440  * Pop from iter_info multi head queue, the marks that belong to the group of
0441  * current iteration step.
0442  */
0443 static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info)
0444 {
0445     struct fsnotify_mark *mark;
0446     int type;
0447 
0448     /*
0449      * We cannot use fsnotify_foreach_iter_mark_type() here because we
0450      * may need to advance a mark of type X that belongs to current_group
0451      * but was not selected for reporting.
0452      */
0453     fsnotify_foreach_iter_type(type) {
0454         mark = iter_info->marks[type];
0455         if (mark && mark->group == iter_info->current_group)
0456             iter_info->marks[type] =
0457                 fsnotify_next_mark(iter_info->marks[type]);
0458     }
0459 }
0460 
0461 /*
0462  * fsnotify - This is the main call to fsnotify.
0463  *
0464  * The VFS calls into hook specific functions in linux/fsnotify.h.
0465  * Those functions then in turn call here.  Here will call out to all of the
0466  * registered fsnotify_group.  Those groups can then use the notification event
0467  * in whatever means they feel necessary.
0468  *
0469  * @mask:   event type and flags
0470  * @data:   object that event happened on
0471  * @data_type:  type of object for fanotify_data_XXX() accessors
0472  * @dir:    optional directory associated with event -
0473  *      if @file_name is not NULL, this is the directory that
0474  *      @file_name is relative to
0475  * @file_name:  optional file name associated with event
0476  * @inode:  optional inode associated with event -
0477  *      If @dir and @inode are both non-NULL, event may be
0478  *      reported to both.
0479  * @cookie: inotify rename cookie
0480  */
0481 int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
0482          const struct qstr *file_name, struct inode *inode, u32 cookie)
0483 {
0484     const struct path *path = fsnotify_data_path(data, data_type);
0485     struct super_block *sb = fsnotify_data_sb(data, data_type);
0486     struct fsnotify_iter_info iter_info = {};
0487     struct mount *mnt = NULL;
0488     struct inode *inode2 = NULL;
0489     struct dentry *moved;
0490     int inode2_type;
0491     int ret = 0;
0492     __u32 test_mask, marks_mask;
0493 
0494     if (path)
0495         mnt = real_mount(path->mnt);
0496 
0497     if (!inode) {
0498         /* Dirent event - report on TYPE_INODE to dir */
0499         inode = dir;
0500         /* For FS_RENAME, inode is old_dir and inode2 is new_dir */
0501         if (mask & FS_RENAME) {
0502             moved = fsnotify_data_dentry(data, data_type);
0503             inode2 = moved->d_parent->d_inode;
0504             inode2_type = FSNOTIFY_ITER_TYPE_INODE2;
0505         }
0506     } else if (mask & FS_EVENT_ON_CHILD) {
0507         /*
0508          * Event on child - report on TYPE_PARENT to dir if it is
0509          * watching children and on TYPE_INODE to child.
0510          */
0511         inode2 = dir;
0512         inode2_type = FSNOTIFY_ITER_TYPE_PARENT;
0513     }
0514 
0515     /*
0516      * Optimization: srcu_read_lock() has a memory barrier which can
0517      * be expensive.  It protects walking the *_fsnotify_marks lists.
0518      * However, if we do not walk the lists, we do not have to do
0519      * SRCU because we have no references to any objects and do not
0520      * need SRCU to keep them "alive".
0521      */
0522     if (!sb->s_fsnotify_marks &&
0523         (!mnt || !mnt->mnt_fsnotify_marks) &&
0524         (!inode || !inode->i_fsnotify_marks) &&
0525         (!inode2 || !inode2->i_fsnotify_marks))
0526         return 0;
0527 
0528     marks_mask = sb->s_fsnotify_mask;
0529     if (mnt)
0530         marks_mask |= mnt->mnt_fsnotify_mask;
0531     if (inode)
0532         marks_mask |= inode->i_fsnotify_mask;
0533     if (inode2)
0534         marks_mask |= inode2->i_fsnotify_mask;
0535 
0536 
0537     /*
0538      * If this is a modify event we may need to clear some ignore masks.
0539      * In that case, the object with ignore masks will have the FS_MODIFY
0540      * event in its mask.
0541      * Otherwise, return if none of the marks care about this type of event.
0542      */
0543     test_mask = (mask & ALL_FSNOTIFY_EVENTS);
0544     if (!(test_mask & marks_mask))
0545         return 0;
0546 
0547     iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
0548 
0549     iter_info.marks[FSNOTIFY_ITER_TYPE_SB] =
0550         fsnotify_first_mark(&sb->s_fsnotify_marks);
0551     if (mnt) {
0552         iter_info.marks[FSNOTIFY_ITER_TYPE_VFSMOUNT] =
0553             fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
0554     }
0555     if (inode) {
0556         iter_info.marks[FSNOTIFY_ITER_TYPE_INODE] =
0557             fsnotify_first_mark(&inode->i_fsnotify_marks);
0558     }
0559     if (inode2) {
0560         iter_info.marks[inode2_type] =
0561             fsnotify_first_mark(&inode2->i_fsnotify_marks);
0562     }
0563 
0564     /*
0565      * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark
0566      * ignore masks are properly reflected for mount/sb mark notifications.
0567      * That's why this traversal is so complicated...
0568      */
0569     while (fsnotify_iter_select_report_types(&iter_info)) {
0570         ret = send_to_group(mask, data, data_type, dir, file_name,
0571                     cookie, &iter_info);
0572 
0573         if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
0574             goto out;
0575 
0576         fsnotify_iter_next(&iter_info);
0577     }
0578     ret = 0;
0579 out:
0580     srcu_read_unlock(&fsnotify_mark_srcu, iter_info.srcu_idx);
0581 
0582     return ret;
0583 }
0584 EXPORT_SYMBOL_GPL(fsnotify);
0585 
0586 static __init int fsnotify_init(void)
0587 {
0588     int ret;
0589 
0590     BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23);
0591 
0592     ret = init_srcu_struct(&fsnotify_mark_srcu);
0593     if (ret)
0594         panic("initializing fsnotify_mark_srcu");
0595 
0596     fsnotify_mark_connector_cachep = KMEM_CACHE(fsnotify_mark_connector,
0597                             SLAB_PANIC);
0598 
0599     return 0;
0600 }
0601 core_initcall(fsnotify_init);