Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  *  inode.c - part of tracefs, a pseudo file system for activating tracing
0004  *
0005  * Based on debugfs by: Greg Kroah-Hartman <greg@kroah.com>
0006  *
0007  *  Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com>
0008  *
0009  * tracefs is the file system that is used by the tracing infrastructure.
0010  */
0011 
0012 #include <linux/module.h>
0013 #include <linux/fs.h>
0014 #include <linux/mount.h>
0015 #include <linux/kobject.h>
0016 #include <linux/namei.h>
0017 #include <linux/tracefs.h>
0018 #include <linux/fsnotify.h>
0019 #include <linux/security.h>
0020 #include <linux/seq_file.h>
0021 #include <linux/parser.h>
0022 #include <linux/magic.h>
0023 #include <linux/slab.h>
0024 
0025 #define TRACEFS_DEFAULT_MODE    0700
0026 
0027 static struct vfsmount *tracefs_mount;
0028 static int tracefs_mount_count;
0029 static bool tracefs_registered;
0030 
0031 static ssize_t default_read_file(struct file *file, char __user *buf,
0032                  size_t count, loff_t *ppos)
0033 {
0034     return 0;
0035 }
0036 
0037 static ssize_t default_write_file(struct file *file, const char __user *buf,
0038                    size_t count, loff_t *ppos)
0039 {
0040     return count;
0041 }
0042 
0043 static const struct file_operations tracefs_file_operations = {
0044     .read =     default_read_file,
0045     .write =    default_write_file,
0046     .open =     simple_open,
0047     .llseek =   noop_llseek,
0048 };
0049 
0050 static struct tracefs_dir_ops {
0051     int (*mkdir)(const char *name);
0052     int (*rmdir)(const char *name);
0053 } tracefs_ops __ro_after_init;
0054 
0055 static char *get_dname(struct dentry *dentry)
0056 {
0057     const char *dname;
0058     char *name;
0059     int len = dentry->d_name.len;
0060 
0061     dname = dentry->d_name.name;
0062     name = kmalloc(len + 1, GFP_KERNEL);
0063     if (!name)
0064         return NULL;
0065     memcpy(name, dname, len);
0066     name[len] = 0;
0067     return name;
0068 }
0069 
0070 static int tracefs_syscall_mkdir(struct user_namespace *mnt_userns,
0071                  struct inode *inode, struct dentry *dentry,
0072                  umode_t mode)
0073 {
0074     char *name;
0075     int ret;
0076 
0077     name = get_dname(dentry);
0078     if (!name)
0079         return -ENOMEM;
0080 
0081     /*
0082      * The mkdir call can call the generic functions that create
0083      * the files within the tracefs system. It is up to the individual
0084      * mkdir routine to handle races.
0085      */
0086     inode_unlock(inode);
0087     ret = tracefs_ops.mkdir(name);
0088     inode_lock(inode);
0089 
0090     kfree(name);
0091 
0092     return ret;
0093 }
0094 
0095 static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry)
0096 {
0097     char *name;
0098     int ret;
0099 
0100     name = get_dname(dentry);
0101     if (!name)
0102         return -ENOMEM;
0103 
0104     /*
0105      * The rmdir call can call the generic functions that create
0106      * the files within the tracefs system. It is up to the individual
0107      * rmdir routine to handle races.
0108      * This time we need to unlock not only the parent (inode) but
0109      * also the directory that is being deleted.
0110      */
0111     inode_unlock(inode);
0112     inode_unlock(d_inode(dentry));
0113 
0114     ret = tracefs_ops.rmdir(name);
0115 
0116     inode_lock_nested(inode, I_MUTEX_PARENT);
0117     inode_lock(d_inode(dentry));
0118 
0119     kfree(name);
0120 
0121     return ret;
0122 }
0123 
0124 static const struct inode_operations tracefs_dir_inode_operations = {
0125     .lookup     = simple_lookup,
0126     .mkdir      = tracefs_syscall_mkdir,
0127     .rmdir      = tracefs_syscall_rmdir,
0128 };
0129 
0130 static struct inode *tracefs_get_inode(struct super_block *sb)
0131 {
0132     struct inode *inode = new_inode(sb);
0133     if (inode) {
0134         inode->i_ino = get_next_ino();
0135         inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
0136     }
0137     return inode;
0138 }
0139 
0140 struct tracefs_mount_opts {
0141     kuid_t uid;
0142     kgid_t gid;
0143     umode_t mode;
0144     /* Opt_* bitfield. */
0145     unsigned int opts;
0146 };
0147 
0148 enum {
0149     Opt_uid,
0150     Opt_gid,
0151     Opt_mode,
0152     Opt_err
0153 };
0154 
0155 static const match_table_t tokens = {
0156     {Opt_uid, "uid=%u"},
0157     {Opt_gid, "gid=%u"},
0158     {Opt_mode, "mode=%o"},
0159     {Opt_err, NULL}
0160 };
0161 
0162 struct tracefs_fs_info {
0163     struct tracefs_mount_opts mount_opts;
0164 };
0165 
0166 static void change_gid(struct dentry *dentry, kgid_t gid)
0167 {
0168     if (!dentry->d_inode)
0169         return;
0170     dentry->d_inode->i_gid = gid;
0171 }
0172 
0173 /*
0174  * Taken from d_walk, but without he need for handling renames.
0175  * Nothing can be renamed while walking the list, as tracefs
0176  * does not support renames. This is only called when mounting
0177  * or remounting the file system, to set all the files to
0178  * the given gid.
0179  */
0180 static void set_gid(struct dentry *parent, kgid_t gid)
0181 {
0182     struct dentry *this_parent;
0183     struct list_head *next;
0184 
0185     this_parent = parent;
0186     spin_lock(&this_parent->d_lock);
0187 
0188     change_gid(this_parent, gid);
0189 repeat:
0190     next = this_parent->d_subdirs.next;
0191 resume:
0192     while (next != &this_parent->d_subdirs) {
0193         struct list_head *tmp = next;
0194         struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
0195         next = tmp->next;
0196 
0197         spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
0198 
0199         change_gid(dentry, gid);
0200 
0201         if (!list_empty(&dentry->d_subdirs)) {
0202             spin_unlock(&this_parent->d_lock);
0203             spin_release(&dentry->d_lock.dep_map, _RET_IP_);
0204             this_parent = dentry;
0205             spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
0206             goto repeat;
0207         }
0208         spin_unlock(&dentry->d_lock);
0209     }
0210     /*
0211      * All done at this level ... ascend and resume the search.
0212      */
0213     rcu_read_lock();
0214 ascend:
0215     if (this_parent != parent) {
0216         struct dentry *child = this_parent;
0217         this_parent = child->d_parent;
0218 
0219         spin_unlock(&child->d_lock);
0220         spin_lock(&this_parent->d_lock);
0221 
0222         /* go into the first sibling still alive */
0223         do {
0224             next = child->d_child.next;
0225             if (next == &this_parent->d_subdirs)
0226                 goto ascend;
0227             child = list_entry(next, struct dentry, d_child);
0228         } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
0229         rcu_read_unlock();
0230         goto resume;
0231     }
0232     rcu_read_unlock();
0233     spin_unlock(&this_parent->d_lock);
0234     return;
0235 }
0236 
0237 static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
0238 {
0239     substring_t args[MAX_OPT_ARGS];
0240     int option;
0241     int token;
0242     kuid_t uid;
0243     kgid_t gid;
0244     char *p;
0245 
0246     opts->opts = 0;
0247     opts->mode = TRACEFS_DEFAULT_MODE;
0248 
0249     while ((p = strsep(&data, ",")) != NULL) {
0250         if (!*p)
0251             continue;
0252 
0253         token = match_token(p, tokens, args);
0254         switch (token) {
0255         case Opt_uid:
0256             if (match_int(&args[0], &option))
0257                 return -EINVAL;
0258             uid = make_kuid(current_user_ns(), option);
0259             if (!uid_valid(uid))
0260                 return -EINVAL;
0261             opts->uid = uid;
0262             break;
0263         case Opt_gid:
0264             if (match_int(&args[0], &option))
0265                 return -EINVAL;
0266             gid = make_kgid(current_user_ns(), option);
0267             if (!gid_valid(gid))
0268                 return -EINVAL;
0269             opts->gid = gid;
0270             break;
0271         case Opt_mode:
0272             if (match_octal(&args[0], &option))
0273                 return -EINVAL;
0274             opts->mode = option & S_IALLUGO;
0275             break;
0276         /*
0277          * We might like to report bad mount options here;
0278          * but traditionally tracefs has ignored all mount options
0279          */
0280         }
0281 
0282         opts->opts |= BIT(token);
0283     }
0284 
0285     return 0;
0286 }
0287 
0288 static int tracefs_apply_options(struct super_block *sb, bool remount)
0289 {
0290     struct tracefs_fs_info *fsi = sb->s_fs_info;
0291     struct inode *inode = d_inode(sb->s_root);
0292     struct tracefs_mount_opts *opts = &fsi->mount_opts;
0293 
0294     /*
0295      * On remount, only reset mode/uid/gid if they were provided as mount
0296      * options.
0297      */
0298 
0299     if (!remount || opts->opts & BIT(Opt_mode)) {
0300         inode->i_mode &= ~S_IALLUGO;
0301         inode->i_mode |= opts->mode;
0302     }
0303 
0304     if (!remount || opts->opts & BIT(Opt_uid))
0305         inode->i_uid = opts->uid;
0306 
0307     if (!remount || opts->opts & BIT(Opt_gid)) {
0308         /* Set all the group ids to the mount option */
0309         set_gid(sb->s_root, opts->gid);
0310     }
0311 
0312     return 0;
0313 }
0314 
0315 static int tracefs_remount(struct super_block *sb, int *flags, char *data)
0316 {
0317     int err;
0318     struct tracefs_fs_info *fsi = sb->s_fs_info;
0319 
0320     sync_filesystem(sb);
0321     err = tracefs_parse_options(data, &fsi->mount_opts);
0322     if (err)
0323         goto fail;
0324 
0325     tracefs_apply_options(sb, true);
0326 
0327 fail:
0328     return err;
0329 }
0330 
0331 static int tracefs_show_options(struct seq_file *m, struct dentry *root)
0332 {
0333     struct tracefs_fs_info *fsi = root->d_sb->s_fs_info;
0334     struct tracefs_mount_opts *opts = &fsi->mount_opts;
0335 
0336     if (!uid_eq(opts->uid, GLOBAL_ROOT_UID))
0337         seq_printf(m, ",uid=%u",
0338                from_kuid_munged(&init_user_ns, opts->uid));
0339     if (!gid_eq(opts->gid, GLOBAL_ROOT_GID))
0340         seq_printf(m, ",gid=%u",
0341                from_kgid_munged(&init_user_ns, opts->gid));
0342     if (opts->mode != TRACEFS_DEFAULT_MODE)
0343         seq_printf(m, ",mode=%o", opts->mode);
0344 
0345     return 0;
0346 }
0347 
0348 static const struct super_operations tracefs_super_operations = {
0349     .statfs     = simple_statfs,
0350     .remount_fs = tracefs_remount,
0351     .show_options   = tracefs_show_options,
0352 };
0353 
0354 static int trace_fill_super(struct super_block *sb, void *data, int silent)
0355 {
0356     static const struct tree_descr trace_files[] = {{""}};
0357     struct tracefs_fs_info *fsi;
0358     int err;
0359 
0360     fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL);
0361     sb->s_fs_info = fsi;
0362     if (!fsi) {
0363         err = -ENOMEM;
0364         goto fail;
0365     }
0366 
0367     err = tracefs_parse_options(data, &fsi->mount_opts);
0368     if (err)
0369         goto fail;
0370 
0371     err  =  simple_fill_super(sb, TRACEFS_MAGIC, trace_files);
0372     if (err)
0373         goto fail;
0374 
0375     sb->s_op = &tracefs_super_operations;
0376 
0377     tracefs_apply_options(sb, false);
0378 
0379     return 0;
0380 
0381 fail:
0382     kfree(fsi);
0383     sb->s_fs_info = NULL;
0384     return err;
0385 }
0386 
0387 static struct dentry *trace_mount(struct file_system_type *fs_type,
0388             int flags, const char *dev_name,
0389             void *data)
0390 {
0391     return mount_single(fs_type, flags, data, trace_fill_super);
0392 }
0393 
0394 static struct file_system_type trace_fs_type = {
0395     .owner =    THIS_MODULE,
0396     .name =     "tracefs",
0397     .mount =    trace_mount,
0398     .kill_sb =  kill_litter_super,
0399 };
0400 MODULE_ALIAS_FS("tracefs");
0401 
0402 static struct dentry *start_creating(const char *name, struct dentry *parent)
0403 {
0404     struct dentry *dentry;
0405     int error;
0406 
0407     pr_debug("tracefs: creating file '%s'\n",name);
0408 
0409     error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
0410                   &tracefs_mount_count);
0411     if (error)
0412         return ERR_PTR(error);
0413 
0414     /* If the parent is not specified, we create it in the root.
0415      * We need the root dentry to do this, which is in the super
0416      * block. A pointer to that is in the struct vfsmount that we
0417      * have around.
0418      */
0419     if (!parent)
0420         parent = tracefs_mount->mnt_root;
0421 
0422     inode_lock(d_inode(parent));
0423     if (unlikely(IS_DEADDIR(d_inode(parent))))
0424         dentry = ERR_PTR(-ENOENT);
0425     else
0426         dentry = lookup_one_len(name, parent, strlen(name));
0427     if (!IS_ERR(dentry) && d_inode(dentry)) {
0428         dput(dentry);
0429         dentry = ERR_PTR(-EEXIST);
0430     }
0431 
0432     if (IS_ERR(dentry)) {
0433         inode_unlock(d_inode(parent));
0434         simple_release_fs(&tracefs_mount, &tracefs_mount_count);
0435     }
0436 
0437     return dentry;
0438 }
0439 
0440 static struct dentry *failed_creating(struct dentry *dentry)
0441 {
0442     inode_unlock(d_inode(dentry->d_parent));
0443     dput(dentry);
0444     simple_release_fs(&tracefs_mount, &tracefs_mount_count);
0445     return NULL;
0446 }
0447 
0448 static struct dentry *end_creating(struct dentry *dentry)
0449 {
0450     inode_unlock(d_inode(dentry->d_parent));
0451     return dentry;
0452 }
0453 
0454 /**
0455  * tracefs_create_file - create a file in the tracefs filesystem
0456  * @name: a pointer to a string containing the name of the file to create.
0457  * @mode: the permission that the file should have.
0458  * @parent: a pointer to the parent dentry for this file.  This should be a
0459  *          directory dentry if set.  If this parameter is NULL, then the
0460  *          file will be created in the root of the tracefs filesystem.
0461  * @data: a pointer to something that the caller will want to get to later
0462  *        on.  The inode.i_private pointer will point to this value on
0463  *        the open() call.
0464  * @fops: a pointer to a struct file_operations that should be used for
0465  *        this file.
0466  *
0467  * This is the basic "create a file" function for tracefs.  It allows for a
0468  * wide range of flexibility in creating a file, or a directory (if you want
0469  * to create a directory, the tracefs_create_dir() function is
0470  * recommended to be used instead.)
0471  *
0472  * This function will return a pointer to a dentry if it succeeds.  This
0473  * pointer must be passed to the tracefs_remove() function when the file is
0474  * to be removed (no automatic cleanup happens if your module is unloaded,
0475  * you are responsible here.)  If an error occurs, %NULL will be returned.
0476  *
0477  * If tracefs is not enabled in the kernel, the value -%ENODEV will be
0478  * returned.
0479  */
0480 struct dentry *tracefs_create_file(const char *name, umode_t mode,
0481                    struct dentry *parent, void *data,
0482                    const struct file_operations *fops)
0483 {
0484     struct dentry *dentry;
0485     struct inode *inode;
0486 
0487     if (security_locked_down(LOCKDOWN_TRACEFS))
0488         return NULL;
0489 
0490     if (!(mode & S_IFMT))
0491         mode |= S_IFREG;
0492     BUG_ON(!S_ISREG(mode));
0493     dentry = start_creating(name, parent);
0494 
0495     if (IS_ERR(dentry))
0496         return NULL;
0497 
0498     inode = tracefs_get_inode(dentry->d_sb);
0499     if (unlikely(!inode))
0500         return failed_creating(dentry);
0501 
0502     inode->i_mode = mode;
0503     inode->i_fop = fops ? fops : &tracefs_file_operations;
0504     inode->i_private = data;
0505     inode->i_uid = d_inode(dentry->d_parent)->i_uid;
0506     inode->i_gid = d_inode(dentry->d_parent)->i_gid;
0507     d_instantiate(dentry, inode);
0508     fsnotify_create(d_inode(dentry->d_parent), dentry);
0509     return end_creating(dentry);
0510 }
0511 
0512 static struct dentry *__create_dir(const char *name, struct dentry *parent,
0513                    const struct inode_operations *ops)
0514 {
0515     struct dentry *dentry = start_creating(name, parent);
0516     struct inode *inode;
0517 
0518     if (IS_ERR(dentry))
0519         return NULL;
0520 
0521     inode = tracefs_get_inode(dentry->d_sb);
0522     if (unlikely(!inode))
0523         return failed_creating(dentry);
0524 
0525     /* Do not set bits for OTH */
0526     inode->i_mode = S_IFDIR | S_IRWXU | S_IRUSR| S_IRGRP | S_IXUSR | S_IXGRP;
0527     inode->i_op = ops;
0528     inode->i_fop = &simple_dir_operations;
0529     inode->i_uid = d_inode(dentry->d_parent)->i_uid;
0530     inode->i_gid = d_inode(dentry->d_parent)->i_gid;
0531 
0532     /* directory inodes start off with i_nlink == 2 (for "." entry) */
0533     inc_nlink(inode);
0534     d_instantiate(dentry, inode);
0535     inc_nlink(d_inode(dentry->d_parent));
0536     fsnotify_mkdir(d_inode(dentry->d_parent), dentry);
0537     return end_creating(dentry);
0538 }
0539 
0540 /**
0541  * tracefs_create_dir - create a directory in the tracefs filesystem
0542  * @name: a pointer to a string containing the name of the directory to
0543  *        create.
0544  * @parent: a pointer to the parent dentry for this file.  This should be a
0545  *          directory dentry if set.  If this parameter is NULL, then the
0546  *          directory will be created in the root of the tracefs filesystem.
0547  *
0548  * This function creates a directory in tracefs with the given name.
0549  *
0550  * This function will return a pointer to a dentry if it succeeds.  This
0551  * pointer must be passed to the tracefs_remove() function when the file is
0552  * to be removed. If an error occurs, %NULL will be returned.
0553  *
0554  * If tracing is not enabled in the kernel, the value -%ENODEV will be
0555  * returned.
0556  */
0557 struct dentry *tracefs_create_dir(const char *name, struct dentry *parent)
0558 {
0559     return __create_dir(name, parent, &simple_dir_inode_operations);
0560 }
0561 
0562 /**
0563  * tracefs_create_instance_dir - create the tracing instances directory
0564  * @name: The name of the instances directory to create
0565  * @parent: The parent directory that the instances directory will exist
0566  * @mkdir: The function to call when a mkdir is performed.
0567  * @rmdir: The function to call when a rmdir is performed.
0568  *
0569  * Only one instances directory is allowed.
0570  *
0571  * The instances directory is special as it allows for mkdir and rmdir
0572  * to be done by userspace. When a mkdir or rmdir is performed, the inode
0573  * locks are released and the methods passed in (@mkdir and @rmdir) are
0574  * called without locks and with the name of the directory being created
0575  * within the instances directory.
0576  *
0577  * Returns the dentry of the instances directory.
0578  */
0579 __init struct dentry *tracefs_create_instance_dir(const char *name,
0580                       struct dentry *parent,
0581                       int (*mkdir)(const char *name),
0582                       int (*rmdir)(const char *name))
0583 {
0584     struct dentry *dentry;
0585 
0586     /* Only allow one instance of the instances directory. */
0587     if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir))
0588         return NULL;
0589 
0590     dentry = __create_dir(name, parent, &tracefs_dir_inode_operations);
0591     if (!dentry)
0592         return NULL;
0593 
0594     tracefs_ops.mkdir = mkdir;
0595     tracefs_ops.rmdir = rmdir;
0596 
0597     return dentry;
0598 }
0599 
0600 static void remove_one(struct dentry *victim)
0601 {
0602     simple_release_fs(&tracefs_mount, &tracefs_mount_count);
0603 }
0604 
0605 /**
0606  * tracefs_remove - recursively removes a directory
0607  * @dentry: a pointer to a the dentry of the directory to be removed.
0608  *
0609  * This function recursively removes a directory tree in tracefs that
0610  * was previously created with a call to another tracefs function
0611  * (like tracefs_create_file() or variants thereof.)
0612  */
0613 void tracefs_remove(struct dentry *dentry)
0614 {
0615     if (IS_ERR_OR_NULL(dentry))
0616         return;
0617 
0618     simple_pin_fs(&trace_fs_type, &tracefs_mount, &tracefs_mount_count);
0619     simple_recursive_removal(dentry, remove_one);
0620     simple_release_fs(&tracefs_mount, &tracefs_mount_count);
0621 }
0622 
0623 /**
0624  * tracefs_initialized - Tells whether tracefs has been registered
0625  */
0626 bool tracefs_initialized(void)
0627 {
0628     return tracefs_registered;
0629 }
0630 
0631 static int __init tracefs_init(void)
0632 {
0633     int retval;
0634 
0635     retval = sysfs_create_mount_point(kernel_kobj, "tracing");
0636     if (retval)
0637         return -EINVAL;
0638 
0639     retval = register_filesystem(&trace_fs_type);
0640     if (!retval)
0641         tracefs_registered = true;
0642 
0643     return retval;
0644 }
0645 core_initcall(tracefs_init);