0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #include <linux/syscalls.h>
0012 #include <linux/export.h>
0013 #include <linux/capability.h>
0014 #include <linux/mnt_namespace.h>
0015 #include <linux/user_namespace.h>
0016 #include <linux/namei.h>
0017 #include <linux/security.h>
0018 #include <linux/cred.h>
0019 #include <linux/idr.h>
0020 #include <linux/init.h> /* init_rootfs */
0021 #include <linux/fs_struct.h> /* get_fs_root et.al. */
0022 #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
0023 #include <linux/file.h>
0024 #include <linux/uaccess.h>
0025 #include <linux/proc_ns.h>
0026 #include <linux/magic.h>
0027 #include <linux/memblock.h>
0028 #include <linux/proc_fs.h>
0029 #include <linux/task_work.h>
0030 #include <linux/sched/task.h>
0031 #include <uapi/linux/mount.h>
0032 #include <linux/fs_context.h>
0033 #include <linux/shmem_fs.h>
0034 #include <linux/mnt_idmapping.h>
0035
0036 #include "pnode.h"
0037 #include "internal.h"
0038
0039
0040 static unsigned int sysctl_mount_max __read_mostly = 100000;
0041
0042 static unsigned int m_hash_mask __read_mostly;
0043 static unsigned int m_hash_shift __read_mostly;
0044 static unsigned int mp_hash_mask __read_mostly;
0045 static unsigned int mp_hash_shift __read_mostly;
0046
0047 static __initdata unsigned long mhash_entries;
0048 static int __init set_mhash_entries(char *str)
0049 {
0050 if (!str)
0051 return 0;
0052 mhash_entries = simple_strtoul(str, &str, 0);
0053 return 1;
0054 }
0055 __setup("mhash_entries=", set_mhash_entries);
0056
0057 static __initdata unsigned long mphash_entries;
0058 static int __init set_mphash_entries(char *str)
0059 {
0060 if (!str)
0061 return 0;
0062 mphash_entries = simple_strtoul(str, &str, 0);
0063 return 1;
0064 }
0065 __setup("mphash_entries=", set_mphash_entries);
0066
0067 static u64 event;
0068 static DEFINE_IDA(mnt_id_ida);
0069 static DEFINE_IDA(mnt_group_ida);
0070
0071 static struct hlist_head *mount_hashtable __read_mostly;
0072 static struct hlist_head *mountpoint_hashtable __read_mostly;
0073 static struct kmem_cache *mnt_cache __read_mostly;
0074 static DECLARE_RWSEM(namespace_sem);
0075 static HLIST_HEAD(unmounted);
0076 static LIST_HEAD(ex_mountpoints);
0077
0078 struct mount_kattr {
0079 unsigned int attr_set;
0080 unsigned int attr_clr;
0081 unsigned int propagation;
0082 unsigned int lookup_flags;
0083 bool recurse;
0084 struct user_namespace *mnt_userns;
0085 };
0086
0087
0088 struct kobject *fs_kobj;
0089 EXPORT_SYMBOL_GPL(fs_kobj);
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099 __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
0100
0101 static inline void lock_mount_hash(void)
0102 {
0103 write_seqlock(&mount_lock);
0104 }
0105
0106 static inline void unlock_mount_hash(void)
0107 {
0108 write_sequnlock(&mount_lock);
0109 }
0110
0111 static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
0112 {
0113 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
0114 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
0115 tmp = tmp + (tmp >> m_hash_shift);
0116 return &mount_hashtable[tmp & m_hash_mask];
0117 }
0118
0119 static inline struct hlist_head *mp_hash(struct dentry *dentry)
0120 {
0121 unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES);
0122 tmp = tmp + (tmp >> mp_hash_shift);
0123 return &mountpoint_hashtable[tmp & mp_hash_mask];
0124 }
0125
0126 static int mnt_alloc_id(struct mount *mnt)
0127 {
0128 int res = ida_alloc(&mnt_id_ida, GFP_KERNEL);
0129
0130 if (res < 0)
0131 return res;
0132 mnt->mnt_id = res;
0133 return 0;
0134 }
0135
0136 static void mnt_free_id(struct mount *mnt)
0137 {
0138 ida_free(&mnt_id_ida, mnt->mnt_id);
0139 }
0140
0141
0142
0143
0144 static int mnt_alloc_group_id(struct mount *mnt)
0145 {
0146 int res = ida_alloc_min(&mnt_group_ida, 1, GFP_KERNEL);
0147
0148 if (res < 0)
0149 return res;
0150 mnt->mnt_group_id = res;
0151 return 0;
0152 }
0153
0154
0155
0156
0157 void mnt_release_group_id(struct mount *mnt)
0158 {
0159 ida_free(&mnt_group_ida, mnt->mnt_group_id);
0160 mnt->mnt_group_id = 0;
0161 }
0162
0163
0164
0165
0166 static inline void mnt_add_count(struct mount *mnt, int n)
0167 {
0168 #ifdef CONFIG_SMP
0169 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
0170 #else
0171 preempt_disable();
0172 mnt->mnt_count += n;
0173 preempt_enable();
0174 #endif
0175 }
0176
0177
0178
0179
0180 int mnt_get_count(struct mount *mnt)
0181 {
0182 #ifdef CONFIG_SMP
0183 int count = 0;
0184 int cpu;
0185
0186 for_each_possible_cpu(cpu) {
0187 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
0188 }
0189
0190 return count;
0191 #else
0192 return mnt->mnt_count;
0193 #endif
0194 }
0195
0196 static struct mount *alloc_vfsmnt(const char *name)
0197 {
0198 struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
0199 if (mnt) {
0200 int err;
0201
0202 err = mnt_alloc_id(mnt);
0203 if (err)
0204 goto out_free_cache;
0205
0206 if (name) {
0207 mnt->mnt_devname = kstrdup_const(name,
0208 GFP_KERNEL_ACCOUNT);
0209 if (!mnt->mnt_devname)
0210 goto out_free_id;
0211 }
0212
0213 #ifdef CONFIG_SMP
0214 mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
0215 if (!mnt->mnt_pcp)
0216 goto out_free_devname;
0217
0218 this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
0219 #else
0220 mnt->mnt_count = 1;
0221 mnt->mnt_writers = 0;
0222 #endif
0223
0224 INIT_HLIST_NODE(&mnt->mnt_hash);
0225 INIT_LIST_HEAD(&mnt->mnt_child);
0226 INIT_LIST_HEAD(&mnt->mnt_mounts);
0227 INIT_LIST_HEAD(&mnt->mnt_list);
0228 INIT_LIST_HEAD(&mnt->mnt_expire);
0229 INIT_LIST_HEAD(&mnt->mnt_share);
0230 INIT_LIST_HEAD(&mnt->mnt_slave_list);
0231 INIT_LIST_HEAD(&mnt->mnt_slave);
0232 INIT_HLIST_NODE(&mnt->mnt_mp_list);
0233 INIT_LIST_HEAD(&mnt->mnt_umounting);
0234 INIT_HLIST_HEAD(&mnt->mnt_stuck_children);
0235 mnt->mnt.mnt_userns = &init_user_ns;
0236 }
0237 return mnt;
0238
0239 #ifdef CONFIG_SMP
0240 out_free_devname:
0241 kfree_const(mnt->mnt_devname);
0242 #endif
0243 out_free_id:
0244 mnt_free_id(mnt);
0245 out_free_cache:
0246 kmem_cache_free(mnt_cache, mnt);
0247 return NULL;
0248 }
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258
0259
0260
0261
0262
0263
0264
0265
0266
0267
0268
0269 bool __mnt_is_readonly(struct vfsmount *mnt)
0270 {
0271 return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb);
0272 }
0273 EXPORT_SYMBOL_GPL(__mnt_is_readonly);
0274
0275 static inline void mnt_inc_writers(struct mount *mnt)
0276 {
0277 #ifdef CONFIG_SMP
0278 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
0279 #else
0280 mnt->mnt_writers++;
0281 #endif
0282 }
0283
0284 static inline void mnt_dec_writers(struct mount *mnt)
0285 {
0286 #ifdef CONFIG_SMP
0287 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
0288 #else
0289 mnt->mnt_writers--;
0290 #endif
0291 }
0292
0293 static unsigned int mnt_get_writers(struct mount *mnt)
0294 {
0295 #ifdef CONFIG_SMP
0296 unsigned int count = 0;
0297 int cpu;
0298
0299 for_each_possible_cpu(cpu) {
0300 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
0301 }
0302
0303 return count;
0304 #else
0305 return mnt->mnt_writers;
0306 #endif
0307 }
0308
0309 static int mnt_is_readonly(struct vfsmount *mnt)
0310 {
0311 if (mnt->mnt_sb->s_readonly_remount)
0312 return 1;
0313
0314 smp_rmb();
0315 return __mnt_is_readonly(mnt);
0316 }
0317
0318
0319
0320
0321
0322
0323
0324
0325
0326
0327
0328
0329
0330
0331
0332
0333
0334 int __mnt_want_write(struct vfsmount *m)
0335 {
0336 struct mount *mnt = real_mount(m);
0337 int ret = 0;
0338
0339 preempt_disable();
0340 mnt_inc_writers(mnt);
0341
0342
0343
0344
0345
0346 smp_mb();
0347 might_lock(&mount_lock.lock);
0348 while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
0349 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
0350 cpu_relax();
0351 } else {
0352
0353
0354
0355
0356
0357
0358
0359 preempt_enable();
0360 lock_mount_hash();
0361 unlock_mount_hash();
0362 preempt_disable();
0363 }
0364 }
0365
0366
0367
0368
0369
0370 smp_rmb();
0371 if (mnt_is_readonly(m)) {
0372 mnt_dec_writers(mnt);
0373 ret = -EROFS;
0374 }
0375 preempt_enable();
0376
0377 return ret;
0378 }
0379
0380
0381
0382
0383
0384
0385
0386
0387
0388
0389 int mnt_want_write(struct vfsmount *m)
0390 {
0391 int ret;
0392
0393 sb_start_write(m->mnt_sb);
0394 ret = __mnt_want_write(m);
0395 if (ret)
0396 sb_end_write(m->mnt_sb);
0397 return ret;
0398 }
0399 EXPORT_SYMBOL_GPL(mnt_want_write);
0400
0401
0402
0403
0404
0405
0406
0407
0408
0409
0410 int __mnt_want_write_file(struct file *file)
0411 {
0412 if (file->f_mode & FMODE_WRITER) {
0413
0414
0415
0416
0417 if (__mnt_is_readonly(file->f_path.mnt))
0418 return -EROFS;
0419 return 0;
0420 }
0421 return __mnt_want_write(file->f_path.mnt);
0422 }
0423
0424
0425
0426
0427
0428
0429
0430
0431
0432
0433 int mnt_want_write_file(struct file *file)
0434 {
0435 int ret;
0436
0437 sb_start_write(file_inode(file)->i_sb);
0438 ret = __mnt_want_write_file(file);
0439 if (ret)
0440 sb_end_write(file_inode(file)->i_sb);
0441 return ret;
0442 }
0443 EXPORT_SYMBOL_GPL(mnt_want_write_file);
0444
0445
0446
0447
0448
0449
0450
0451
0452
0453 void __mnt_drop_write(struct vfsmount *mnt)
0454 {
0455 preempt_disable();
0456 mnt_dec_writers(real_mount(mnt));
0457 preempt_enable();
0458 }
0459
0460
0461
0462
0463
0464
0465
0466
0467
0468 void mnt_drop_write(struct vfsmount *mnt)
0469 {
0470 __mnt_drop_write(mnt);
0471 sb_end_write(mnt->mnt_sb);
0472 }
0473 EXPORT_SYMBOL_GPL(mnt_drop_write);
0474
0475 void __mnt_drop_write_file(struct file *file)
0476 {
0477 if (!(file->f_mode & FMODE_WRITER))
0478 __mnt_drop_write(file->f_path.mnt);
0479 }
0480
0481 void mnt_drop_write_file(struct file *file)
0482 {
0483 __mnt_drop_write_file(file);
0484 sb_end_write(file_inode(file)->i_sb);
0485 }
0486 EXPORT_SYMBOL(mnt_drop_write_file);
0487
0488
0489
0490
0491
0492
0493
0494
0495
0496
0497
0498
0499
0500
0501
0502
0503
0504
0505
0506 static inline int mnt_hold_writers(struct mount *mnt)
0507 {
0508 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
0509
0510
0511
0512
0513 smp_mb();
0514
0515
0516
0517
0518
0519
0520
0521
0522
0523
0524
0525
0526
0527
0528
0529
0530
0531 if (mnt_get_writers(mnt) > 0)
0532 return -EBUSY;
0533
0534 return 0;
0535 }
0536
0537
0538
0539
0540
0541
0542
0543
0544
0545
0546
0547
0548
0549 static inline void mnt_unhold_writers(struct mount *mnt)
0550 {
0551
0552
0553
0554
0555 smp_wmb();
0556 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
0557 }
0558
0559 static int mnt_make_readonly(struct mount *mnt)
0560 {
0561 int ret;
0562
0563 ret = mnt_hold_writers(mnt);
0564 if (!ret)
0565 mnt->mnt.mnt_flags |= MNT_READONLY;
0566 mnt_unhold_writers(mnt);
0567 return ret;
0568 }
0569
0570 int sb_prepare_remount_readonly(struct super_block *sb)
0571 {
0572 struct mount *mnt;
0573 int err = 0;
0574
0575
0576 if (atomic_long_read(&sb->s_remove_count))
0577 return -EBUSY;
0578
0579 lock_mount_hash();
0580 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
0581 if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
0582 err = mnt_hold_writers(mnt);
0583 if (err)
0584 break;
0585 }
0586 }
0587 if (!err && atomic_long_read(&sb->s_remove_count))
0588 err = -EBUSY;
0589
0590 if (!err) {
0591 sb->s_readonly_remount = 1;
0592 smp_wmb();
0593 }
0594 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
0595 if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
0596 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
0597 }
0598 unlock_mount_hash();
0599
0600 return err;
0601 }
0602
0603 static void free_vfsmnt(struct mount *mnt)
0604 {
0605 struct user_namespace *mnt_userns;
0606
0607 mnt_userns = mnt_user_ns(&mnt->mnt);
0608 if (!initial_idmapping(mnt_userns))
0609 put_user_ns(mnt_userns);
0610 kfree_const(mnt->mnt_devname);
0611 #ifdef CONFIG_SMP
0612 free_percpu(mnt->mnt_pcp);
0613 #endif
0614 kmem_cache_free(mnt_cache, mnt);
0615 }
0616
0617 static void delayed_free_vfsmnt(struct rcu_head *head)
0618 {
0619 free_vfsmnt(container_of(head, struct mount, mnt_rcu));
0620 }
0621
0622
0623 int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
0624 {
0625 struct mount *mnt;
0626 if (read_seqretry(&mount_lock, seq))
0627 return 1;
0628 if (bastard == NULL)
0629 return 0;
0630 mnt = real_mount(bastard);
0631 mnt_add_count(mnt, 1);
0632 smp_mb();
0633 if (likely(!read_seqretry(&mount_lock, seq)))
0634 return 0;
0635 if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
0636 mnt_add_count(mnt, -1);
0637 return 1;
0638 }
0639 lock_mount_hash();
0640 if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
0641 mnt_add_count(mnt, -1);
0642 unlock_mount_hash();
0643 return 1;
0644 }
0645 unlock_mount_hash();
0646
0647 return -1;
0648 }
0649
0650
0651 static bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
0652 {
0653 int res = __legitimize_mnt(bastard, seq);
0654 if (likely(!res))
0655 return true;
0656 if (unlikely(res < 0)) {
0657 rcu_read_unlock();
0658 mntput(bastard);
0659 rcu_read_lock();
0660 }
0661 return false;
0662 }
0663
0664
0665
0666
0667
0668 struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
0669 {
0670 struct hlist_head *head = m_hash(mnt, dentry);
0671 struct mount *p;
0672
0673 hlist_for_each_entry_rcu(p, head, mnt_hash)
0674 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
0675 return p;
0676 return NULL;
0677 }
0678
0679
0680
0681
0682
0683
0684
0685
0686
0687
0688
0689
0690
0691
0692
0693
0694
0695 struct vfsmount *lookup_mnt(const struct path *path)
0696 {
0697 struct mount *child_mnt;
0698 struct vfsmount *m;
0699 unsigned seq;
0700
0701 rcu_read_lock();
0702 do {
0703 seq = read_seqbegin(&mount_lock);
0704 child_mnt = __lookup_mnt(path->mnt, path->dentry);
0705 m = child_mnt ? &child_mnt->mnt : NULL;
0706 } while (!legitimize_mnt(m, seq));
0707 rcu_read_unlock();
0708 return m;
0709 }
0710
0711 static inline void lock_ns_list(struct mnt_namespace *ns)
0712 {
0713 spin_lock(&ns->ns_lock);
0714 }
0715
0716 static inline void unlock_ns_list(struct mnt_namespace *ns)
0717 {
0718 spin_unlock(&ns->ns_lock);
0719 }
0720
0721 static inline bool mnt_is_cursor(struct mount *mnt)
0722 {
0723 return mnt->mnt.mnt_flags & MNT_CURSOR;
0724 }
0725
0726
0727
0728
0729
0730
0731
0732
0733
0734
0735
0736
0737
0738
0739
0740
0741 bool __is_local_mountpoint(struct dentry *dentry)
0742 {
0743 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
0744 struct mount *mnt;
0745 bool is_covered = false;
0746
0747 down_read(&namespace_sem);
0748 lock_ns_list(ns);
0749 list_for_each_entry(mnt, &ns->list, mnt_list) {
0750 if (mnt_is_cursor(mnt))
0751 continue;
0752 is_covered = (mnt->mnt_mountpoint == dentry);
0753 if (is_covered)
0754 break;
0755 }
0756 unlock_ns_list(ns);
0757 up_read(&namespace_sem);
0758
0759 return is_covered;
0760 }
0761
0762 static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
0763 {
0764 struct hlist_head *chain = mp_hash(dentry);
0765 struct mountpoint *mp;
0766
0767 hlist_for_each_entry(mp, chain, m_hash) {
0768 if (mp->m_dentry == dentry) {
0769 mp->m_count++;
0770 return mp;
0771 }
0772 }
0773 return NULL;
0774 }
0775
0776 static struct mountpoint *get_mountpoint(struct dentry *dentry)
0777 {
0778 struct mountpoint *mp, *new = NULL;
0779 int ret;
0780
0781 if (d_mountpoint(dentry)) {
0782
0783 if (d_unlinked(dentry))
0784 return ERR_PTR(-ENOENT);
0785 mountpoint:
0786 read_seqlock_excl(&mount_lock);
0787 mp = lookup_mountpoint(dentry);
0788 read_sequnlock_excl(&mount_lock);
0789 if (mp)
0790 goto done;
0791 }
0792
0793 if (!new)
0794 new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
0795 if (!new)
0796 return ERR_PTR(-ENOMEM);
0797
0798
0799
0800 ret = d_set_mounted(dentry);
0801
0802
0803 if (ret == -EBUSY)
0804 goto mountpoint;
0805
0806
0807 mp = ERR_PTR(ret);
0808 if (ret)
0809 goto done;
0810
0811
0812 read_seqlock_excl(&mount_lock);
0813 new->m_dentry = dget(dentry);
0814 new->m_count = 1;
0815 hlist_add_head(&new->m_hash, mp_hash(dentry));
0816 INIT_HLIST_HEAD(&new->m_list);
0817 read_sequnlock_excl(&mount_lock);
0818
0819 mp = new;
0820 new = NULL;
0821 done:
0822 kfree(new);
0823 return mp;
0824 }
0825
0826
0827
0828
0829
0830 static void __put_mountpoint(struct mountpoint *mp, struct list_head *list)
0831 {
0832 if (!--mp->m_count) {
0833 struct dentry *dentry = mp->m_dentry;
0834 BUG_ON(!hlist_empty(&mp->m_list));
0835 spin_lock(&dentry->d_lock);
0836 dentry->d_flags &= ~DCACHE_MOUNTED;
0837 spin_unlock(&dentry->d_lock);
0838 dput_to_list(dentry, list);
0839 hlist_del(&mp->m_hash);
0840 kfree(mp);
0841 }
0842 }
0843
0844
0845 static void put_mountpoint(struct mountpoint *mp)
0846 {
0847 __put_mountpoint(mp, &ex_mountpoints);
0848 }
0849
0850 static inline int check_mnt(struct mount *mnt)
0851 {
0852 return mnt->mnt_ns == current->nsproxy->mnt_ns;
0853 }
0854
0855
0856
0857
0858 static void touch_mnt_namespace(struct mnt_namespace *ns)
0859 {
0860 if (ns) {
0861 ns->event = ++event;
0862 wake_up_interruptible(&ns->poll);
0863 }
0864 }
0865
0866
0867
0868
0869 static void __touch_mnt_namespace(struct mnt_namespace *ns)
0870 {
0871 if (ns && ns->event != event) {
0872 ns->event = event;
0873 wake_up_interruptible(&ns->poll);
0874 }
0875 }
0876
0877
0878
0879
0880 static struct mountpoint *unhash_mnt(struct mount *mnt)
0881 {
0882 struct mountpoint *mp;
0883 mnt->mnt_parent = mnt;
0884 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
0885 list_del_init(&mnt->mnt_child);
0886 hlist_del_init_rcu(&mnt->mnt_hash);
0887 hlist_del_init(&mnt->mnt_mp_list);
0888 mp = mnt->mnt_mp;
0889 mnt->mnt_mp = NULL;
0890 return mp;
0891 }
0892
0893
0894
0895
0896 static void umount_mnt(struct mount *mnt)
0897 {
0898 put_mountpoint(unhash_mnt(mnt));
0899 }
0900
0901
0902
0903
0904 void mnt_set_mountpoint(struct mount *mnt,
0905 struct mountpoint *mp,
0906 struct mount *child_mnt)
0907 {
0908 mp->m_count++;
0909 mnt_add_count(mnt, 1);
0910 child_mnt->mnt_mountpoint = mp->m_dentry;
0911 child_mnt->mnt_parent = mnt;
0912 child_mnt->mnt_mp = mp;
0913 hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
0914 }
0915
0916 static void __attach_mnt(struct mount *mnt, struct mount *parent)
0917 {
0918 hlist_add_head_rcu(&mnt->mnt_hash,
0919 m_hash(&parent->mnt, mnt->mnt_mountpoint));
0920 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
0921 }
0922
0923
0924
0925
0926 static void attach_mnt(struct mount *mnt,
0927 struct mount *parent,
0928 struct mountpoint *mp)
0929 {
0930 mnt_set_mountpoint(parent, mp, mnt);
0931 __attach_mnt(mnt, parent);
0932 }
0933
0934 void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
0935 {
0936 struct mountpoint *old_mp = mnt->mnt_mp;
0937 struct mount *old_parent = mnt->mnt_parent;
0938
0939 list_del_init(&mnt->mnt_child);
0940 hlist_del_init(&mnt->mnt_mp_list);
0941 hlist_del_init_rcu(&mnt->mnt_hash);
0942
0943 attach_mnt(mnt, parent, mp);
0944
0945 put_mountpoint(old_mp);
0946 mnt_add_count(old_parent, -1);
0947 }
0948
0949
0950
0951
0952 static void commit_tree(struct mount *mnt)
0953 {
0954 struct mount *parent = mnt->mnt_parent;
0955 struct mount *m;
0956 LIST_HEAD(head);
0957 struct mnt_namespace *n = parent->mnt_ns;
0958
0959 BUG_ON(parent == mnt);
0960
0961 list_add_tail(&head, &mnt->mnt_list);
0962 list_for_each_entry(m, &head, mnt_list)
0963 m->mnt_ns = n;
0964
0965 list_splice(&head, n->list.prev);
0966
0967 n->mounts += n->pending_mounts;
0968 n->pending_mounts = 0;
0969
0970 __attach_mnt(mnt, parent);
0971 touch_mnt_namespace(n);
0972 }
0973
0974 static struct mount *next_mnt(struct mount *p, struct mount *root)
0975 {
0976 struct list_head *next = p->mnt_mounts.next;
0977 if (next == &p->mnt_mounts) {
0978 while (1) {
0979 if (p == root)
0980 return NULL;
0981 next = p->mnt_child.next;
0982 if (next != &p->mnt_parent->mnt_mounts)
0983 break;
0984 p = p->mnt_parent;
0985 }
0986 }
0987 return list_entry(next, struct mount, mnt_child);
0988 }
0989
0990 static struct mount *skip_mnt_tree(struct mount *p)
0991 {
0992 struct list_head *prev = p->mnt_mounts.prev;
0993 while (prev != &p->mnt_mounts) {
0994 p = list_entry(prev, struct mount, mnt_child);
0995 prev = p->mnt_mounts.prev;
0996 }
0997 return p;
0998 }
0999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009 struct vfsmount *vfs_create_mount(struct fs_context *fc)
1010 {
1011 struct mount *mnt;
1012 struct user_namespace *fs_userns;
1013
1014 if (!fc->root)
1015 return ERR_PTR(-EINVAL);
1016
1017 mnt = alloc_vfsmnt(fc->source ?: "none");
1018 if (!mnt)
1019 return ERR_PTR(-ENOMEM);
1020
1021 if (fc->sb_flags & SB_KERNMOUNT)
1022 mnt->mnt.mnt_flags = MNT_INTERNAL;
1023
1024 atomic_inc(&fc->root->d_sb->s_active);
1025 mnt->mnt.mnt_sb = fc->root->d_sb;
1026 mnt->mnt.mnt_root = dget(fc->root);
1027 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1028 mnt->mnt_parent = mnt;
1029
1030 fs_userns = mnt->mnt.mnt_sb->s_user_ns;
1031 if (!initial_idmapping(fs_userns))
1032 mnt->mnt.mnt_userns = get_user_ns(fs_userns);
1033
1034 lock_mount_hash();
1035 list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);
1036 unlock_mount_hash();
1037 return &mnt->mnt;
1038 }
1039 EXPORT_SYMBOL(vfs_create_mount);
1040
1041 struct vfsmount *fc_mount(struct fs_context *fc)
1042 {
1043 int err = vfs_get_tree(fc);
1044 if (!err) {
1045 up_write(&fc->root->d_sb->s_umount);
1046 return vfs_create_mount(fc);
1047 }
1048 return ERR_PTR(err);
1049 }
1050 EXPORT_SYMBOL(fc_mount);
1051
1052 struct vfsmount *vfs_kern_mount(struct file_system_type *type,
1053 int flags, const char *name,
1054 void *data)
1055 {
1056 struct fs_context *fc;
1057 struct vfsmount *mnt;
1058 int ret = 0;
1059
1060 if (!type)
1061 return ERR_PTR(-EINVAL);
1062
1063 fc = fs_context_for_mount(type, flags);
1064 if (IS_ERR(fc))
1065 return ERR_CAST(fc);
1066
1067 if (name)
1068 ret = vfs_parse_fs_string(fc, "source",
1069 name, strlen(name));
1070 if (!ret)
1071 ret = parse_monolithic_mount_data(fc, data);
1072 if (!ret)
1073 mnt = fc_mount(fc);
1074 else
1075 mnt = ERR_PTR(ret);
1076
1077 put_fs_context(fc);
1078 return mnt;
1079 }
1080 EXPORT_SYMBOL_GPL(vfs_kern_mount);
1081
1082 struct vfsmount *
1083 vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
1084 const char *name, void *data)
1085 {
1086
1087
1088
1089
1090 if (mountpoint->d_sb->s_user_ns != &init_user_ns)
1091 return ERR_PTR(-EPERM);
1092
1093 return vfs_kern_mount(type, SB_SUBMOUNT, name, data);
1094 }
1095 EXPORT_SYMBOL_GPL(vfs_submount);
1096
1097 static struct mount *clone_mnt(struct mount *old, struct dentry *root,
1098 int flag)
1099 {
1100 struct super_block *sb = old->mnt.mnt_sb;
1101 struct mount *mnt;
1102 int err;
1103
1104 mnt = alloc_vfsmnt(old->mnt_devname);
1105 if (!mnt)
1106 return ERR_PTR(-ENOMEM);
1107
1108 if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
1109 mnt->mnt_group_id = 0;
1110 else
1111 mnt->mnt_group_id = old->mnt_group_id;
1112
1113 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
1114 err = mnt_alloc_group_id(mnt);
1115 if (err)
1116 goto out_free;
1117 }
1118
1119 mnt->mnt.mnt_flags = old->mnt.mnt_flags;
1120 mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
1121
1122 atomic_inc(&sb->s_active);
1123 mnt->mnt.mnt_userns = mnt_user_ns(&old->mnt);
1124 if (!initial_idmapping(mnt->mnt.mnt_userns))
1125 mnt->mnt.mnt_userns = get_user_ns(mnt->mnt.mnt_userns);
1126 mnt->mnt.mnt_sb = sb;
1127 mnt->mnt.mnt_root = dget(root);
1128 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1129 mnt->mnt_parent = mnt;
1130 lock_mount_hash();
1131 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
1132 unlock_mount_hash();
1133
1134 if ((flag & CL_SLAVE) ||
1135 ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
1136 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
1137 mnt->mnt_master = old;
1138 CLEAR_MNT_SHARED(mnt);
1139 } else if (!(flag & CL_PRIVATE)) {
1140 if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
1141 list_add(&mnt->mnt_share, &old->mnt_share);
1142 if (IS_MNT_SLAVE(old))
1143 list_add(&mnt->mnt_slave, &old->mnt_slave);
1144 mnt->mnt_master = old->mnt_master;
1145 } else {
1146 CLEAR_MNT_SHARED(mnt);
1147 }
1148 if (flag & CL_MAKE_SHARED)
1149 set_mnt_shared(mnt);
1150
1151
1152
1153 if (flag & CL_EXPIRE) {
1154 if (!list_empty(&old->mnt_expire))
1155 list_add(&mnt->mnt_expire, &old->mnt_expire);
1156 }
1157
1158 return mnt;
1159
1160 out_free:
1161 mnt_free_id(mnt);
1162 free_vfsmnt(mnt);
1163 return ERR_PTR(err);
1164 }
1165
1166 static void cleanup_mnt(struct mount *mnt)
1167 {
1168 struct hlist_node *p;
1169 struct mount *m;
1170
1171
1172
1173
1174
1175
1176
1177 WARN_ON(mnt_get_writers(mnt));
1178 if (unlikely(mnt->mnt_pins.first))
1179 mnt_pin_kill(mnt);
1180 hlist_for_each_entry_safe(m, p, &mnt->mnt_stuck_children, mnt_umount) {
1181 hlist_del(&m->mnt_umount);
1182 mntput(&m->mnt);
1183 }
1184 fsnotify_vfsmount_delete(&mnt->mnt);
1185 dput(mnt->mnt.mnt_root);
1186 deactivate_super(mnt->mnt.mnt_sb);
1187 mnt_free_id(mnt);
1188 call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
1189 }
1190
1191 static void __cleanup_mnt(struct rcu_head *head)
1192 {
1193 cleanup_mnt(container_of(head, struct mount, mnt_rcu));
1194 }
1195
1196 static LLIST_HEAD(delayed_mntput_list);
1197 static void delayed_mntput(struct work_struct *unused)
1198 {
1199 struct llist_node *node = llist_del_all(&delayed_mntput_list);
1200 struct mount *m, *t;
1201
1202 llist_for_each_entry_safe(m, t, node, mnt_llist)
1203 cleanup_mnt(m);
1204 }
1205 static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
1206
1207 static void mntput_no_expire(struct mount *mnt)
1208 {
1209 LIST_HEAD(list);
1210 int count;
1211
1212 rcu_read_lock();
1213 if (likely(READ_ONCE(mnt->mnt_ns))) {
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223 mnt_add_count(mnt, -1);
1224 rcu_read_unlock();
1225 return;
1226 }
1227 lock_mount_hash();
1228
1229
1230
1231
1232 smp_mb();
1233 mnt_add_count(mnt, -1);
1234 count = mnt_get_count(mnt);
1235 if (count != 0) {
1236 WARN_ON(count < 0);
1237 rcu_read_unlock();
1238 unlock_mount_hash();
1239 return;
1240 }
1241 if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
1242 rcu_read_unlock();
1243 unlock_mount_hash();
1244 return;
1245 }
1246 mnt->mnt.mnt_flags |= MNT_DOOMED;
1247 rcu_read_unlock();
1248
1249 list_del(&mnt->mnt_instance);
1250
1251 if (unlikely(!list_empty(&mnt->mnt_mounts))) {
1252 struct mount *p, *tmp;
1253 list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
1254 __put_mountpoint(unhash_mnt(p), &list);
1255 hlist_add_head(&p->mnt_umount, &mnt->mnt_stuck_children);
1256 }
1257 }
1258 unlock_mount_hash();
1259 shrink_dentry_list(&list);
1260
1261 if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
1262 struct task_struct *task = current;
1263 if (likely(!(task->flags & PF_KTHREAD))) {
1264 init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
1265 if (!task_work_add(task, &mnt->mnt_rcu, TWA_RESUME))
1266 return;
1267 }
1268 if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
1269 schedule_delayed_work(&delayed_mntput_work, 1);
1270 return;
1271 }
1272 cleanup_mnt(mnt);
1273 }
1274
1275 void mntput(struct vfsmount *mnt)
1276 {
1277 if (mnt) {
1278 struct mount *m = real_mount(mnt);
1279
1280 if (unlikely(m->mnt_expiry_mark))
1281 m->mnt_expiry_mark = 0;
1282 mntput_no_expire(m);
1283 }
1284 }
1285 EXPORT_SYMBOL(mntput);
1286
1287 struct vfsmount *mntget(struct vfsmount *mnt)
1288 {
1289 if (mnt)
1290 mnt_add_count(real_mount(mnt), 1);
1291 return mnt;
1292 }
1293 EXPORT_SYMBOL(mntget);
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306 bool path_is_mountpoint(const struct path *path)
1307 {
1308 unsigned seq;
1309 bool res;
1310
1311 if (!d_mountpoint(path->dentry))
1312 return false;
1313
1314 rcu_read_lock();
1315 do {
1316 seq = read_seqbegin(&mount_lock);
1317 res = __path_is_mountpoint(path);
1318 } while (read_seqretry(&mount_lock, seq));
1319 rcu_read_unlock();
1320
1321 return res;
1322 }
1323 EXPORT_SYMBOL(path_is_mountpoint);
1324
1325 struct vfsmount *mnt_clone_internal(const struct path *path)
1326 {
1327 struct mount *p;
1328 p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
1329 if (IS_ERR(p))
1330 return ERR_CAST(p);
1331 p->mnt.mnt_flags |= MNT_INTERNAL;
1332 return &p->mnt;
1333 }
1334
1335 #ifdef CONFIG_PROC_FS
1336 static struct mount *mnt_list_next(struct mnt_namespace *ns,
1337 struct list_head *p)
1338 {
1339 struct mount *mnt, *ret = NULL;
1340
1341 lock_ns_list(ns);
1342 list_for_each_continue(p, &ns->list) {
1343 mnt = list_entry(p, typeof(*mnt), mnt_list);
1344 if (!mnt_is_cursor(mnt)) {
1345 ret = mnt;
1346 break;
1347 }
1348 }
1349 unlock_ns_list(ns);
1350
1351 return ret;
1352 }
1353
1354
1355 static void *m_start(struct seq_file *m, loff_t *pos)
1356 {
1357 struct proc_mounts *p = m->private;
1358 struct list_head *prev;
1359
1360 down_read(&namespace_sem);
1361 if (!*pos) {
1362 prev = &p->ns->list;
1363 } else {
1364 prev = &p->cursor.mnt_list;
1365
1366
1367 if (list_empty(prev))
1368 return NULL;
1369 }
1370
1371 return mnt_list_next(p->ns, prev);
1372 }
1373
1374 static void *m_next(struct seq_file *m, void *v, loff_t *pos)
1375 {
1376 struct proc_mounts *p = m->private;
1377 struct mount *mnt = v;
1378
1379 ++*pos;
1380 return mnt_list_next(p->ns, &mnt->mnt_list);
1381 }
1382
1383 static void m_stop(struct seq_file *m, void *v)
1384 {
1385 struct proc_mounts *p = m->private;
1386 struct mount *mnt = v;
1387
1388 lock_ns_list(p->ns);
1389 if (mnt)
1390 list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list);
1391 else
1392 list_del_init(&p->cursor.mnt_list);
1393 unlock_ns_list(p->ns);
1394 up_read(&namespace_sem);
1395 }
1396
1397 static int m_show(struct seq_file *m, void *v)
1398 {
1399 struct proc_mounts *p = m->private;
1400 struct mount *r = v;
1401 return p->show(m, &r->mnt);
1402 }
1403
1404 const struct seq_operations mounts_op = {
1405 .start = m_start,
1406 .next = m_next,
1407 .stop = m_stop,
1408 .show = m_show,
1409 };
1410
1411 void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor)
1412 {
1413 down_read(&namespace_sem);
1414 lock_ns_list(ns);
1415 list_del(&cursor->mnt_list);
1416 unlock_ns_list(ns);
1417 up_read(&namespace_sem);
1418 }
1419 #endif
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429 int may_umount_tree(struct vfsmount *m)
1430 {
1431 struct mount *mnt = real_mount(m);
1432 int actual_refs = 0;
1433 int minimum_refs = 0;
1434 struct mount *p;
1435 BUG_ON(!m);
1436
1437
1438 lock_mount_hash();
1439 for (p = mnt; p; p = next_mnt(p, mnt)) {
1440 actual_refs += mnt_get_count(p);
1441 minimum_refs += 2;
1442 }
1443 unlock_mount_hash();
1444
1445 if (actual_refs > minimum_refs)
1446 return 0;
1447
1448 return 1;
1449 }
1450
1451 EXPORT_SYMBOL(may_umount_tree);
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466 int may_umount(struct vfsmount *mnt)
1467 {
1468 int ret = 1;
1469 down_read(&namespace_sem);
1470 lock_mount_hash();
1471 if (propagate_mount_busy(real_mount(mnt), 2))
1472 ret = 0;
1473 unlock_mount_hash();
1474 up_read(&namespace_sem);
1475 return ret;
1476 }
1477
1478 EXPORT_SYMBOL(may_umount);
1479
1480 static void namespace_unlock(void)
1481 {
1482 struct hlist_head head;
1483 struct hlist_node *p;
1484 struct mount *m;
1485 LIST_HEAD(list);
1486
1487 hlist_move_list(&unmounted, &head);
1488 list_splice_init(&ex_mountpoints, &list);
1489
1490 up_write(&namespace_sem);
1491
1492 shrink_dentry_list(&list);
1493
1494 if (likely(hlist_empty(&head)))
1495 return;
1496
1497 synchronize_rcu_expedited();
1498
1499 hlist_for_each_entry_safe(m, p, &head, mnt_umount) {
1500 hlist_del(&m->mnt_umount);
1501 mntput(&m->mnt);
1502 }
1503 }
1504
1505 static inline void namespace_lock(void)
1506 {
1507 down_write(&namespace_sem);
1508 }
1509
1510 enum umount_tree_flags {
1511 UMOUNT_SYNC = 1,
1512 UMOUNT_PROPAGATE = 2,
1513 UMOUNT_CONNECTED = 4,
1514 };
1515
1516 static bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how)
1517 {
1518
1519 if (how & UMOUNT_SYNC)
1520 return true;
1521
1522
1523 if (!mnt_has_parent(mnt))
1524 return true;
1525
1526
1527
1528
1529
1530 if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT))
1531 return true;
1532
1533
1534 if (how & UMOUNT_CONNECTED)
1535 return false;
1536
1537
1538 if (IS_MNT_LOCKED(mnt))
1539 return false;
1540
1541
1542 return true;
1543 }
1544
1545
1546
1547
1548
1549 static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1550 {
1551 LIST_HEAD(tmp_list);
1552 struct mount *p;
1553
1554 if (how & UMOUNT_PROPAGATE)
1555 propagate_mount_unlock(mnt);
1556
1557
1558 for (p = mnt; p; p = next_mnt(p, mnt)) {
1559 p->mnt.mnt_flags |= MNT_UMOUNT;
1560 list_move(&p->mnt_list, &tmp_list);
1561 }
1562
1563
1564 list_for_each_entry(p, &tmp_list, mnt_list) {
1565 list_del_init(&p->mnt_child);
1566 }
1567
1568
1569 if (how & UMOUNT_PROPAGATE)
1570 propagate_umount(&tmp_list);
1571
1572 while (!list_empty(&tmp_list)) {
1573 struct mnt_namespace *ns;
1574 bool disconnect;
1575 p = list_first_entry(&tmp_list, struct mount, mnt_list);
1576 list_del_init(&p->mnt_expire);
1577 list_del_init(&p->mnt_list);
1578 ns = p->mnt_ns;
1579 if (ns) {
1580 ns->mounts--;
1581 __touch_mnt_namespace(ns);
1582 }
1583 p->mnt_ns = NULL;
1584 if (how & UMOUNT_SYNC)
1585 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
1586
1587 disconnect = disconnect_mount(p, how);
1588 if (mnt_has_parent(p)) {
1589 mnt_add_count(p->mnt_parent, -1);
1590 if (!disconnect) {
1591
1592 list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
1593 } else {
1594 umount_mnt(p);
1595 }
1596 }
1597 change_mnt_propagation(p, MS_PRIVATE);
1598 if (disconnect)
1599 hlist_add_head(&p->mnt_umount, &unmounted);
1600 }
1601 }
1602
1603 static void shrink_submounts(struct mount *mnt);
1604
1605 static int do_umount_root(struct super_block *sb)
1606 {
1607 int ret = 0;
1608
1609 down_write(&sb->s_umount);
1610 if (!sb_rdonly(sb)) {
1611 struct fs_context *fc;
1612
1613 fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY,
1614 SB_RDONLY);
1615 if (IS_ERR(fc)) {
1616 ret = PTR_ERR(fc);
1617 } else {
1618 ret = parse_monolithic_mount_data(fc, NULL);
1619 if (!ret)
1620 ret = reconfigure_super(fc);
1621 put_fs_context(fc);
1622 }
1623 }
1624 up_write(&sb->s_umount);
1625 return ret;
1626 }
1627
1628 static int do_umount(struct mount *mnt, int flags)
1629 {
1630 struct super_block *sb = mnt->mnt.mnt_sb;
1631 int retval;
1632
1633 retval = security_sb_umount(&mnt->mnt, flags);
1634 if (retval)
1635 return retval;
1636
1637
1638
1639
1640
1641
1642
1643 if (flags & MNT_EXPIRE) {
1644 if (&mnt->mnt == current->fs->root.mnt ||
1645 flags & (MNT_FORCE | MNT_DETACH))
1646 return -EINVAL;
1647
1648
1649
1650
1651
1652 lock_mount_hash();
1653 if (mnt_get_count(mnt) != 2) {
1654 unlock_mount_hash();
1655 return -EBUSY;
1656 }
1657 unlock_mount_hash();
1658
1659 if (!xchg(&mnt->mnt_expiry_mark, 1))
1660 return -EAGAIN;
1661 }
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673 if (flags & MNT_FORCE && sb->s_op->umount_begin) {
1674 sb->s_op->umount_begin(sb);
1675 }
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686 if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1687
1688
1689
1690
1691 if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
1692 return -EPERM;
1693 return do_umount_root(sb);
1694 }
1695
1696 namespace_lock();
1697 lock_mount_hash();
1698
1699
1700 retval = -EINVAL;
1701 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1702 goto out;
1703
1704 event++;
1705 if (flags & MNT_DETACH) {
1706 if (!list_empty(&mnt->mnt_list))
1707 umount_tree(mnt, UMOUNT_PROPAGATE);
1708 retval = 0;
1709 } else {
1710 shrink_submounts(mnt);
1711 retval = -EBUSY;
1712 if (!propagate_mount_busy(mnt, 2)) {
1713 if (!list_empty(&mnt->mnt_list))
1714 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
1715 retval = 0;
1716 }
1717 }
1718 out:
1719 unlock_mount_hash();
1720 namespace_unlock();
1721 return retval;
1722 }
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734 void __detach_mounts(struct dentry *dentry)
1735 {
1736 struct mountpoint *mp;
1737 struct mount *mnt;
1738
1739 namespace_lock();
1740 lock_mount_hash();
1741 mp = lookup_mountpoint(dentry);
1742 if (!mp)
1743 goto out_unlock;
1744
1745 event++;
1746 while (!hlist_empty(&mp->m_list)) {
1747 mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
1748 if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
1749 umount_mnt(mnt);
1750 hlist_add_head(&mnt->mnt_umount, &unmounted);
1751 }
1752 else umount_tree(mnt, UMOUNT_CONNECTED);
1753 }
1754 put_mountpoint(mp);
1755 out_unlock:
1756 unlock_mount_hash();
1757 namespace_unlock();
1758 }
1759
1760
1761
1762
1763 bool may_mount(void)
1764 {
1765 return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
1766 }
1767
1768 static void warn_mandlock(void)
1769 {
1770 pr_warn_once("=======================================================\n"
1771 "WARNING: The mand mount option has been deprecated and\n"
1772 " and is ignored by this kernel. Remove the mand\n"
1773 " option from the mount to silence this warning.\n"
1774 "=======================================================\n");
1775 }
1776
1777 static int can_umount(const struct path *path, int flags)
1778 {
1779 struct mount *mnt = real_mount(path->mnt);
1780
1781 if (!may_mount())
1782 return -EPERM;
1783 if (path->dentry != path->mnt->mnt_root)
1784 return -EINVAL;
1785 if (!check_mnt(mnt))
1786 return -EINVAL;
1787 if (mnt->mnt.mnt_flags & MNT_LOCKED)
1788 return -EINVAL;
1789 if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
1790 return -EPERM;
1791 return 0;
1792 }
1793
1794
1795 int path_umount(struct path *path, int flags)
1796 {
1797 struct mount *mnt = real_mount(path->mnt);
1798 int ret;
1799
1800 ret = can_umount(path, flags);
1801 if (!ret)
1802 ret = do_umount(mnt, flags);
1803
1804
1805 dput(path->dentry);
1806 mntput_no_expire(mnt);
1807 return ret;
1808 }
1809
1810 static int ksys_umount(char __user *name, int flags)
1811 {
1812 int lookup_flags = LOOKUP_MOUNTPOINT;
1813 struct path path;
1814 int ret;
1815
1816
1817 if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
1818 return -EINVAL;
1819
1820 if (!(flags & UMOUNT_NOFOLLOW))
1821 lookup_flags |= LOOKUP_FOLLOW;
1822 ret = user_path_at(AT_FDCWD, name, lookup_flags, &path);
1823 if (ret)
1824 return ret;
1825 return path_umount(&path, flags);
1826 }
1827
1828 SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1829 {
1830 return ksys_umount(name, flags);
1831 }
1832
1833 #ifdef __ARCH_WANT_SYS_OLDUMOUNT
1834
1835
1836
1837
1838 SYSCALL_DEFINE1(oldumount, char __user *, name)
1839 {
1840 return ksys_umount(name, 0);
1841 }
1842
1843 #endif
1844
1845 static bool is_mnt_ns_file(struct dentry *dentry)
1846 {
1847
1848 return dentry->d_op == &ns_dentry_operations &&
1849 dentry->d_fsdata == &mntns_operations;
1850 }
1851
1852 static struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
1853 {
1854 return container_of(ns, struct mnt_namespace, ns);
1855 }
1856
1857 struct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
1858 {
1859 return &mnt->ns;
1860 }
1861
1862 static bool mnt_ns_loop(struct dentry *dentry)
1863 {
1864
1865
1866
1867 struct mnt_namespace *mnt_ns;
1868 if (!is_mnt_ns_file(dentry))
1869 return false;
1870
1871 mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
1872 return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
1873 }
1874
1875 struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1876 int flag)
1877 {
1878 struct mount *res, *p, *q, *r, *parent;
1879
1880 if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
1881 return ERR_PTR(-EINVAL);
1882
1883 if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
1884 return ERR_PTR(-EINVAL);
1885
1886 res = q = clone_mnt(mnt, dentry, flag);
1887 if (IS_ERR(q))
1888 return q;
1889
1890 q->mnt_mountpoint = mnt->mnt_mountpoint;
1891
1892 p = mnt;
1893 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1894 struct mount *s;
1895 if (!is_subdir(r->mnt_mountpoint, dentry))
1896 continue;
1897
1898 for (s = r; s; s = next_mnt(s, r)) {
1899 if (!(flag & CL_COPY_UNBINDABLE) &&
1900 IS_MNT_UNBINDABLE(s)) {
1901 if (s->mnt.mnt_flags & MNT_LOCKED) {
1902
1903 q = ERR_PTR(-EPERM);
1904 goto out;
1905 } else {
1906 s = skip_mnt_tree(s);
1907 continue;
1908 }
1909 }
1910 if (!(flag & CL_COPY_MNT_NS_FILE) &&
1911 is_mnt_ns_file(s->mnt.mnt_root)) {
1912 s = skip_mnt_tree(s);
1913 continue;
1914 }
1915 while (p != s->mnt_parent) {
1916 p = p->mnt_parent;
1917 q = q->mnt_parent;
1918 }
1919 p = s;
1920 parent = q;
1921 q = clone_mnt(p, p->mnt.mnt_root, flag);
1922 if (IS_ERR(q))
1923 goto out;
1924 lock_mount_hash();
1925 list_add_tail(&q->mnt_list, &res->mnt_list);
1926 attach_mnt(q, parent, p->mnt_mp);
1927 unlock_mount_hash();
1928 }
1929 }
1930 return res;
1931 out:
1932 if (res) {
1933 lock_mount_hash();
1934 umount_tree(res, UMOUNT_SYNC);
1935 unlock_mount_hash();
1936 }
1937 return q;
1938 }
1939
1940
1941
1942 struct vfsmount *collect_mounts(const struct path *path)
1943 {
1944 struct mount *tree;
1945 namespace_lock();
1946 if (!check_mnt(real_mount(path->mnt)))
1947 tree = ERR_PTR(-EINVAL);
1948 else
1949 tree = copy_tree(real_mount(path->mnt), path->dentry,
1950 CL_COPY_ALL | CL_PRIVATE);
1951 namespace_unlock();
1952 if (IS_ERR(tree))
1953 return ERR_CAST(tree);
1954 return &tree->mnt;
1955 }
1956
1957 static void free_mnt_ns(struct mnt_namespace *);
1958 static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *, bool);
1959
1960 void dissolve_on_fput(struct vfsmount *mnt)
1961 {
1962 struct mnt_namespace *ns;
1963 namespace_lock();
1964 lock_mount_hash();
1965 ns = real_mount(mnt)->mnt_ns;
1966 if (ns) {
1967 if (is_anon_ns(ns))
1968 umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
1969 else
1970 ns = NULL;
1971 }
1972 unlock_mount_hash();
1973 namespace_unlock();
1974 if (ns)
1975 free_mnt_ns(ns);
1976 }
1977
1978 void drop_collected_mounts(struct vfsmount *mnt)
1979 {
1980 namespace_lock();
1981 lock_mount_hash();
1982 umount_tree(real_mount(mnt), 0);
1983 unlock_mount_hash();
1984 namespace_unlock();
1985 }
1986
1987 static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
1988 {
1989 struct mount *child;
1990
1991 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
1992 if (!is_subdir(child->mnt_mountpoint, dentry))
1993 continue;
1994
1995 if (child->mnt.mnt_flags & MNT_LOCKED)
1996 return true;
1997 }
1998 return false;
1999 }
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011 struct vfsmount *clone_private_mount(const struct path *path)
2012 {
2013 struct mount *old_mnt = real_mount(path->mnt);
2014 struct mount *new_mnt;
2015
2016 down_read(&namespace_sem);
2017 if (IS_MNT_UNBINDABLE(old_mnt))
2018 goto invalid;
2019
2020 if (!check_mnt(old_mnt))
2021 goto invalid;
2022
2023 if (has_locked_children(old_mnt, path->dentry))
2024 goto invalid;
2025
2026 new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
2027 up_read(&namespace_sem);
2028
2029 if (IS_ERR(new_mnt))
2030 return ERR_CAST(new_mnt);
2031
2032
2033 new_mnt->mnt_ns = MNT_NS_INTERNAL;
2034
2035 return &new_mnt->mnt;
2036
2037 invalid:
2038 up_read(&namespace_sem);
2039 return ERR_PTR(-EINVAL);
2040 }
2041 EXPORT_SYMBOL_GPL(clone_private_mount);
2042
2043 int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
2044 struct vfsmount *root)
2045 {
2046 struct mount *mnt;
2047 int res = f(root, arg);
2048 if (res)
2049 return res;
2050 list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
2051 res = f(&mnt->mnt, arg);
2052 if (res)
2053 return res;
2054 }
2055 return 0;
2056 }
2057
2058 static void lock_mnt_tree(struct mount *mnt)
2059 {
2060 struct mount *p;
2061
2062 for (p = mnt; p; p = next_mnt(p, mnt)) {
2063 int flags = p->mnt.mnt_flags;
2064
2065 flags |= MNT_LOCK_ATIME;
2066
2067 if (flags & MNT_READONLY)
2068 flags |= MNT_LOCK_READONLY;
2069
2070 if (flags & MNT_NODEV)
2071 flags |= MNT_LOCK_NODEV;
2072
2073 if (flags & MNT_NOSUID)
2074 flags |= MNT_LOCK_NOSUID;
2075
2076 if (flags & MNT_NOEXEC)
2077 flags |= MNT_LOCK_NOEXEC;
2078
2079 if (list_empty(&p->mnt_expire))
2080 flags |= MNT_LOCKED;
2081 p->mnt.mnt_flags = flags;
2082 }
2083 }
2084
2085 static void cleanup_group_ids(struct mount *mnt, struct mount *end)
2086 {
2087 struct mount *p;
2088
2089 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
2090 if (p->mnt_group_id && !IS_MNT_SHARED(p))
2091 mnt_release_group_id(p);
2092 }
2093 }
2094
2095 static int invent_group_ids(struct mount *mnt, bool recurse)
2096 {
2097 struct mount *p;
2098
2099 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
2100 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
2101 int err = mnt_alloc_group_id(p);
2102 if (err) {
2103 cleanup_group_ids(mnt, p);
2104 return err;
2105 }
2106 }
2107 }
2108
2109 return 0;
2110 }
2111
2112 int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
2113 {
2114 unsigned int max = READ_ONCE(sysctl_mount_max);
2115 unsigned int mounts = 0;
2116 struct mount *p;
2117
2118 if (ns->mounts >= max)
2119 return -ENOSPC;
2120 max -= ns->mounts;
2121 if (ns->pending_mounts >= max)
2122 return -ENOSPC;
2123 max -= ns->pending_mounts;
2124
2125 for (p = mnt; p; p = next_mnt(p, mnt))
2126 mounts++;
2127
2128 if (mounts > max)
2129 return -ENOSPC;
2130
2131 ns->pending_mounts += mounts;
2132 return 0;
2133 }
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198 static int attach_recursive_mnt(struct mount *source_mnt,
2199 struct mount *dest_mnt,
2200 struct mountpoint *dest_mp,
2201 bool moving)
2202 {
2203 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
2204 HLIST_HEAD(tree_list);
2205 struct mnt_namespace *ns = dest_mnt->mnt_ns;
2206 struct mountpoint *smp;
2207 struct mount *child, *p;
2208 struct hlist_node *n;
2209 int err;
2210
2211
2212
2213
2214 smp = get_mountpoint(source_mnt->mnt.mnt_root);
2215 if (IS_ERR(smp))
2216 return PTR_ERR(smp);
2217
2218
2219 if (!moving) {
2220 err = count_mounts(ns, source_mnt);
2221 if (err)
2222 goto out;
2223 }
2224
2225 if (IS_MNT_SHARED(dest_mnt)) {
2226 err = invent_group_ids(source_mnt, true);
2227 if (err)
2228 goto out;
2229 err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
2230 lock_mount_hash();
2231 if (err)
2232 goto out_cleanup_ids;
2233 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
2234 set_mnt_shared(p);
2235 } else {
2236 lock_mount_hash();
2237 }
2238 if (moving) {
2239 unhash_mnt(source_mnt);
2240 attach_mnt(source_mnt, dest_mnt, dest_mp);
2241 touch_mnt_namespace(source_mnt->mnt_ns);
2242 } else {
2243 if (source_mnt->mnt_ns) {
2244
2245 list_del_init(&source_mnt->mnt_ns->list);
2246 }
2247 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
2248 commit_tree(source_mnt);
2249 }
2250
2251 hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
2252 struct mount *q;
2253 hlist_del_init(&child->mnt_hash);
2254 q = __lookup_mnt(&child->mnt_parent->mnt,
2255 child->mnt_mountpoint);
2256 if (q)
2257 mnt_change_mountpoint(child, smp, q);
2258
2259 if (child->mnt_parent->mnt_ns->user_ns != user_ns)
2260 lock_mnt_tree(child);
2261 child->mnt.mnt_flags &= ~MNT_LOCKED;
2262 commit_tree(child);
2263 }
2264 put_mountpoint(smp);
2265 unlock_mount_hash();
2266
2267 return 0;
2268
2269 out_cleanup_ids:
2270 while (!hlist_empty(&tree_list)) {
2271 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
2272 child->mnt_parent->mnt_ns->pending_mounts = 0;
2273 umount_tree(child, UMOUNT_SYNC);
2274 }
2275 unlock_mount_hash();
2276 cleanup_group_ids(source_mnt, NULL);
2277 out:
2278 ns->pending_mounts = 0;
2279
2280 read_seqlock_excl(&mount_lock);
2281 put_mountpoint(smp);
2282 read_sequnlock_excl(&mount_lock);
2283
2284 return err;
2285 }
2286
2287 static struct mountpoint *lock_mount(struct path *path)
2288 {
2289 struct vfsmount *mnt;
2290 struct dentry *dentry = path->dentry;
2291 retry:
2292 inode_lock(dentry->d_inode);
2293 if (unlikely(cant_mount(dentry))) {
2294 inode_unlock(dentry->d_inode);
2295 return ERR_PTR(-ENOENT);
2296 }
2297 namespace_lock();
2298 mnt = lookup_mnt(path);
2299 if (likely(!mnt)) {
2300 struct mountpoint *mp = get_mountpoint(dentry);
2301 if (IS_ERR(mp)) {
2302 namespace_unlock();
2303 inode_unlock(dentry->d_inode);
2304 return mp;
2305 }
2306 return mp;
2307 }
2308 namespace_unlock();
2309 inode_unlock(path->dentry->d_inode);
2310 path_put(path);
2311 path->mnt = mnt;
2312 dentry = path->dentry = dget(mnt->mnt_root);
2313 goto retry;
2314 }
2315
2316 static void unlock_mount(struct mountpoint *where)
2317 {
2318 struct dentry *dentry = where->m_dentry;
2319
2320 read_seqlock_excl(&mount_lock);
2321 put_mountpoint(where);
2322 read_sequnlock_excl(&mount_lock);
2323
2324 namespace_unlock();
2325 inode_unlock(dentry->d_inode);
2326 }
2327
2328 static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
2329 {
2330 if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER)
2331 return -EINVAL;
2332
2333 if (d_is_dir(mp->m_dentry) !=
2334 d_is_dir(mnt->mnt.mnt_root))
2335 return -ENOTDIR;
2336
2337 return attach_recursive_mnt(mnt, p, mp, false);
2338 }
2339
2340
2341
2342
2343
2344 static int flags_to_propagation_type(int ms_flags)
2345 {
2346 int type = ms_flags & ~(MS_REC | MS_SILENT);
2347
2348
2349 if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
2350 return 0;
2351
2352 if (!is_power_of_2(type))
2353 return 0;
2354 return type;
2355 }
2356
2357
2358
2359
2360 static int do_change_type(struct path *path, int ms_flags)
2361 {
2362 struct mount *m;
2363 struct mount *mnt = real_mount(path->mnt);
2364 int recurse = ms_flags & MS_REC;
2365 int type;
2366 int err = 0;
2367
2368 if (path->dentry != path->mnt->mnt_root)
2369 return -EINVAL;
2370
2371 type = flags_to_propagation_type(ms_flags);
2372 if (!type)
2373 return -EINVAL;
2374
2375 namespace_lock();
2376 if (type == MS_SHARED) {
2377 err = invent_group_ids(mnt, recurse);
2378 if (err)
2379 goto out_unlock;
2380 }
2381
2382 lock_mount_hash();
2383 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
2384 change_mnt_propagation(m, type);
2385 unlock_mount_hash();
2386
2387 out_unlock:
2388 namespace_unlock();
2389 return err;
2390 }
2391
2392 static struct mount *__do_loopback(struct path *old_path, int recurse)
2393 {
2394 struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt);
2395
2396 if (IS_MNT_UNBINDABLE(old))
2397 return mnt;
2398
2399 if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations)
2400 return mnt;
2401
2402 if (!recurse && has_locked_children(old, old_path->dentry))
2403 return mnt;
2404
2405 if (recurse)
2406 mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);
2407 else
2408 mnt = clone_mnt(old, old_path->dentry, 0);
2409
2410 if (!IS_ERR(mnt))
2411 mnt->mnt.mnt_flags &= ~MNT_LOCKED;
2412
2413 return mnt;
2414 }
2415
2416
2417
2418
2419 static int do_loopback(struct path *path, const char *old_name,
2420 int recurse)
2421 {
2422 struct path old_path;
2423 struct mount *mnt = NULL, *parent;
2424 struct mountpoint *mp;
2425 int err;
2426 if (!old_name || !*old_name)
2427 return -EINVAL;
2428 err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
2429 if (err)
2430 return err;
2431
2432 err = -EINVAL;
2433 if (mnt_ns_loop(old_path.dentry))
2434 goto out;
2435
2436 mp = lock_mount(path);
2437 if (IS_ERR(mp)) {
2438 err = PTR_ERR(mp);
2439 goto out;
2440 }
2441
2442 parent = real_mount(path->mnt);
2443 if (!check_mnt(parent))
2444 goto out2;
2445
2446 mnt = __do_loopback(&old_path, recurse);
2447 if (IS_ERR(mnt)) {
2448 err = PTR_ERR(mnt);
2449 goto out2;
2450 }
2451
2452 err = graft_tree(mnt, parent, mp);
2453 if (err) {
2454 lock_mount_hash();
2455 umount_tree(mnt, UMOUNT_SYNC);
2456 unlock_mount_hash();
2457 }
2458 out2:
2459 unlock_mount(mp);
2460 out:
2461 path_put(&old_path);
2462 return err;
2463 }
2464
2465 static struct file *open_detached_copy(struct path *path, bool recursive)
2466 {
2467 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
2468 struct mnt_namespace *ns = alloc_mnt_ns(user_ns, true);
2469 struct mount *mnt, *p;
2470 struct file *file;
2471
2472 if (IS_ERR(ns))
2473 return ERR_CAST(ns);
2474
2475 namespace_lock();
2476 mnt = __do_loopback(path, recursive);
2477 if (IS_ERR(mnt)) {
2478 namespace_unlock();
2479 free_mnt_ns(ns);
2480 return ERR_CAST(mnt);
2481 }
2482
2483 lock_mount_hash();
2484 for (p = mnt; p; p = next_mnt(p, mnt)) {
2485 p->mnt_ns = ns;
2486 ns->mounts++;
2487 }
2488 ns->root = mnt;
2489 list_add_tail(&ns->list, &mnt->mnt_list);
2490 mntget(&mnt->mnt);
2491 unlock_mount_hash();
2492 namespace_unlock();
2493
2494 mntput(path->mnt);
2495 path->mnt = &mnt->mnt;
2496 file = dentry_open(path, O_PATH, current_cred());
2497 if (IS_ERR(file))
2498 dissolve_on_fput(path->mnt);
2499 else
2500 file->f_mode |= FMODE_NEED_UNMOUNT;
2501 return file;
2502 }
2503
2504 SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags)
2505 {
2506 struct file *file;
2507 struct path path;
2508 int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
2509 bool detached = flags & OPEN_TREE_CLONE;
2510 int error;
2511 int fd;
2512
2513 BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC);
2514
2515 if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE |
2516 AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE |
2517 OPEN_TREE_CLOEXEC))
2518 return -EINVAL;
2519
2520 if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE)
2521 return -EINVAL;
2522
2523 if (flags & AT_NO_AUTOMOUNT)
2524 lookup_flags &= ~LOOKUP_AUTOMOUNT;
2525 if (flags & AT_SYMLINK_NOFOLLOW)
2526 lookup_flags &= ~LOOKUP_FOLLOW;
2527 if (flags & AT_EMPTY_PATH)
2528 lookup_flags |= LOOKUP_EMPTY;
2529
2530 if (detached && !may_mount())
2531 return -EPERM;
2532
2533 fd = get_unused_fd_flags(flags & O_CLOEXEC);
2534 if (fd < 0)
2535 return fd;
2536
2537 error = user_path_at(dfd, filename, lookup_flags, &path);
2538 if (unlikely(error)) {
2539 file = ERR_PTR(error);
2540 } else {
2541 if (detached)
2542 file = open_detached_copy(&path, flags & AT_RECURSIVE);
2543 else
2544 file = dentry_open(&path, O_PATH, current_cred());
2545 path_put(&path);
2546 }
2547 if (IS_ERR(file)) {
2548 put_unused_fd(fd);
2549 return PTR_ERR(file);
2550 }
2551 fd_install(fd, file);
2552 return fd;
2553 }
2554
2555
2556
2557
2558
2559
2560
2561 static bool can_change_locked_flags(struct mount *mnt, unsigned int mnt_flags)
2562 {
2563 unsigned int fl = mnt->mnt.mnt_flags;
2564
2565 if ((fl & MNT_LOCK_READONLY) &&
2566 !(mnt_flags & MNT_READONLY))
2567 return false;
2568
2569 if ((fl & MNT_LOCK_NODEV) &&
2570 !(mnt_flags & MNT_NODEV))
2571 return false;
2572
2573 if ((fl & MNT_LOCK_NOSUID) &&
2574 !(mnt_flags & MNT_NOSUID))
2575 return false;
2576
2577 if ((fl & MNT_LOCK_NOEXEC) &&
2578 !(mnt_flags & MNT_NOEXEC))
2579 return false;
2580
2581 if ((fl & MNT_LOCK_ATIME) &&
2582 ((fl & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK)))
2583 return false;
2584
2585 return true;
2586 }
2587
2588 static int change_mount_ro_state(struct mount *mnt, unsigned int mnt_flags)
2589 {
2590 bool readonly_request = (mnt_flags & MNT_READONLY);
2591
2592 if (readonly_request == __mnt_is_readonly(&mnt->mnt))
2593 return 0;
2594
2595 if (readonly_request)
2596 return mnt_make_readonly(mnt);
2597
2598 mnt->mnt.mnt_flags &= ~MNT_READONLY;
2599 return 0;
2600 }
2601
2602 static void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags)
2603 {
2604 mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
2605 mnt->mnt.mnt_flags = mnt_flags;
2606 touch_mnt_namespace(mnt->mnt_ns);
2607 }
2608
2609 static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *mnt)
2610 {
2611 struct super_block *sb = mnt->mnt_sb;
2612
2613 if (!__mnt_is_readonly(mnt) &&
2614 (!(sb->s_iflags & SB_I_TS_EXPIRY_WARNED)) &&
2615 (ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) {
2616 char *buf = (char *)__get_free_page(GFP_KERNEL);
2617 char *mntpath = buf ? d_path(mountpoint, buf, PAGE_SIZE) : ERR_PTR(-ENOMEM);
2618 struct tm tm;
2619
2620 time64_to_tm(sb->s_time_max, 0, &tm);
2621
2622 pr_warn("%s filesystem being %s at %s supports timestamps until %04ld (0x%llx)\n",
2623 sb->s_type->name,
2624 is_mounted(mnt) ? "remounted" : "mounted",
2625 mntpath,
2626 tm.tm_year+1900, (unsigned long long)sb->s_time_max);
2627
2628 free_page((unsigned long)buf);
2629 sb->s_iflags |= SB_I_TS_EXPIRY_WARNED;
2630 }
2631 }
2632
2633
2634
2635
2636
2637
2638 static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
2639 {
2640 struct super_block *sb = path->mnt->mnt_sb;
2641 struct mount *mnt = real_mount(path->mnt);
2642 int ret;
2643
2644 if (!check_mnt(mnt))
2645 return -EINVAL;
2646
2647 if (path->dentry != mnt->mnt.mnt_root)
2648 return -EINVAL;
2649
2650 if (!can_change_locked_flags(mnt, mnt_flags))
2651 return -EPERM;
2652
2653
2654
2655
2656
2657 down_read(&sb->s_umount);
2658 lock_mount_hash();
2659 ret = change_mount_ro_state(mnt, mnt_flags);
2660 if (ret == 0)
2661 set_mount_attributes(mnt, mnt_flags);
2662 unlock_mount_hash();
2663 up_read(&sb->s_umount);
2664
2665 mnt_warn_timestamp_expiry(path, &mnt->mnt);
2666
2667 return ret;
2668 }
2669
2670
2671
2672
2673
2674
2675 static int do_remount(struct path *path, int ms_flags, int sb_flags,
2676 int mnt_flags, void *data)
2677 {
2678 int err;
2679 struct super_block *sb = path->mnt->mnt_sb;
2680 struct mount *mnt = real_mount(path->mnt);
2681 struct fs_context *fc;
2682
2683 if (!check_mnt(mnt))
2684 return -EINVAL;
2685
2686 if (path->dentry != path->mnt->mnt_root)
2687 return -EINVAL;
2688
2689 if (!can_change_locked_flags(mnt, mnt_flags))
2690 return -EPERM;
2691
2692 fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK);
2693 if (IS_ERR(fc))
2694 return PTR_ERR(fc);
2695
2696 fc->oldapi = true;
2697 err = parse_monolithic_mount_data(fc, data);
2698 if (!err) {
2699 down_write(&sb->s_umount);
2700 err = -EPERM;
2701 if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
2702 err = reconfigure_super(fc);
2703 if (!err) {
2704 lock_mount_hash();
2705 set_mount_attributes(mnt, mnt_flags);
2706 unlock_mount_hash();
2707 }
2708 }
2709 up_write(&sb->s_umount);
2710 }
2711
2712 mnt_warn_timestamp_expiry(path, &mnt->mnt);
2713
2714 put_fs_context(fc);
2715 return err;
2716 }
2717
2718 static inline int tree_contains_unbindable(struct mount *mnt)
2719 {
2720 struct mount *p;
2721 for (p = mnt; p; p = next_mnt(p, mnt)) {
2722 if (IS_MNT_UNBINDABLE(p))
2723 return 1;
2724 }
2725 return 0;
2726 }
2727
2728
2729
2730
2731
2732
2733
2734 static bool check_for_nsfs_mounts(struct mount *subtree)
2735 {
2736 struct mount *p;
2737 bool ret = false;
2738
2739 lock_mount_hash();
2740 for (p = subtree; p; p = next_mnt(p, subtree))
2741 if (mnt_ns_loop(p->mnt.mnt_root))
2742 goto out;
2743
2744 ret = true;
2745 out:
2746 unlock_mount_hash();
2747 return ret;
2748 }
2749
2750 static int do_set_group(struct path *from_path, struct path *to_path)
2751 {
2752 struct mount *from, *to;
2753 int err;
2754
2755 from = real_mount(from_path->mnt);
2756 to = real_mount(to_path->mnt);
2757
2758 namespace_lock();
2759
2760 err = -EINVAL;
2761
2762 if (!is_mounted(&from->mnt))
2763 goto out;
2764 if (!is_mounted(&to->mnt))
2765 goto out;
2766
2767 err = -EPERM;
2768
2769 if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
2770 goto out;
2771 if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
2772 goto out;
2773
2774 err = -EINVAL;
2775
2776 if (from_path->dentry != from_path->mnt->mnt_root)
2777 goto out;
2778 if (to_path->dentry != to_path->mnt->mnt_root)
2779 goto out;
2780
2781
2782 if (from->mnt.mnt_sb != to->mnt.mnt_sb)
2783 goto out;
2784
2785
2786 if (!is_subdir(to->mnt.mnt_root, from->mnt.mnt_root))
2787 goto out;
2788
2789
2790 if (has_locked_children(from, to->mnt.mnt_root))
2791 goto out;
2792
2793
2794 if (IS_MNT_SHARED(to) || IS_MNT_SLAVE(to))
2795 goto out;
2796
2797
2798 if (!IS_MNT_SHARED(from) && !IS_MNT_SLAVE(from))
2799 goto out;
2800
2801 if (IS_MNT_SLAVE(from)) {
2802 struct mount *m = from->mnt_master;
2803
2804 list_add(&to->mnt_slave, &m->mnt_slave_list);
2805 to->mnt_master = m;
2806 }
2807
2808 if (IS_MNT_SHARED(from)) {
2809 to->mnt_group_id = from->mnt_group_id;
2810 list_add(&to->mnt_share, &from->mnt_share);
2811 lock_mount_hash();
2812 set_mnt_shared(to);
2813 unlock_mount_hash();
2814 }
2815
2816 err = 0;
2817 out:
2818 namespace_unlock();
2819 return err;
2820 }
2821
2822 static int do_move_mount(struct path *old_path, struct path *new_path)
2823 {
2824 struct mnt_namespace *ns;
2825 struct mount *p;
2826 struct mount *old;
2827 struct mount *parent;
2828 struct mountpoint *mp, *old_mp;
2829 int err;
2830 bool attached;
2831
2832 mp = lock_mount(new_path);
2833 if (IS_ERR(mp))
2834 return PTR_ERR(mp);
2835
2836 old = real_mount(old_path->mnt);
2837 p = real_mount(new_path->mnt);
2838 parent = old->mnt_parent;
2839 attached = mnt_has_parent(old);
2840 old_mp = old->mnt_mp;
2841 ns = old->mnt_ns;
2842
2843 err = -EINVAL;
2844
2845 if (!check_mnt(p))
2846 goto out;
2847
2848
2849 if (!is_mounted(&old->mnt))
2850 goto out;
2851
2852
2853 if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
2854 goto out;
2855
2856 if (old->mnt.mnt_flags & MNT_LOCKED)
2857 goto out;
2858
2859 if (old_path->dentry != old_path->mnt->mnt_root)
2860 goto out;
2861
2862 if (d_is_dir(new_path->dentry) !=
2863 d_is_dir(old_path->dentry))
2864 goto out;
2865
2866
2867
2868 if (attached && IS_MNT_SHARED(parent))
2869 goto out;
2870
2871
2872
2873
2874 if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
2875 goto out;
2876 err = -ELOOP;
2877 if (!check_for_nsfs_mounts(old))
2878 goto out;
2879 for (; mnt_has_parent(p); p = p->mnt_parent)
2880 if (p == old)
2881 goto out;
2882
2883 err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp,
2884 attached);
2885 if (err)
2886 goto out;
2887
2888
2889
2890 list_del_init(&old->mnt_expire);
2891 if (attached)
2892 put_mountpoint(old_mp);
2893 out:
2894 unlock_mount(mp);
2895 if (!err) {
2896 if (attached)
2897 mntput_no_expire(parent);
2898 else
2899 free_mnt_ns(ns);
2900 }
2901 return err;
2902 }
2903
2904 static int do_move_mount_old(struct path *path, const char *old_name)
2905 {
2906 struct path old_path;
2907 int err;
2908
2909 if (!old_name || !*old_name)
2910 return -EINVAL;
2911
2912 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
2913 if (err)
2914 return err;
2915
2916 err = do_move_mount(&old_path, path);
2917 path_put(&old_path);
2918 return err;
2919 }
2920
2921
2922
2923
2924 static int do_add_mount(struct mount *newmnt, struct mountpoint *mp,
2925 const struct path *path, int mnt_flags)
2926 {
2927 struct mount *parent = real_mount(path->mnt);
2928
2929 mnt_flags &= ~MNT_INTERNAL_FLAGS;
2930
2931 if (unlikely(!check_mnt(parent))) {
2932
2933 if (!(mnt_flags & MNT_SHRINKABLE))
2934 return -EINVAL;
2935
2936 if (!parent->mnt_ns)
2937 return -EINVAL;
2938 }
2939
2940
2941 if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
2942 path->mnt->mnt_root == path->dentry)
2943 return -EBUSY;
2944
2945 if (d_is_symlink(newmnt->mnt.mnt_root))
2946 return -EINVAL;
2947
2948 newmnt->mnt.mnt_flags = mnt_flags;
2949 return graft_tree(newmnt, parent, mp);
2950 }
2951
2952 static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags);
2953
2954
2955
2956
2957
2958 static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
2959 unsigned int mnt_flags)
2960 {
2961 struct vfsmount *mnt;
2962 struct mountpoint *mp;
2963 struct super_block *sb = fc->root->d_sb;
2964 int error;
2965
2966 error = security_sb_kern_mount(sb);
2967 if (!error && mount_too_revealing(sb, &mnt_flags))
2968 error = -EPERM;
2969
2970 if (unlikely(error)) {
2971 fc_drop_locked(fc);
2972 return error;
2973 }
2974
2975 up_write(&sb->s_umount);
2976
2977 mnt = vfs_create_mount(fc);
2978 if (IS_ERR(mnt))
2979 return PTR_ERR(mnt);
2980
2981 mnt_warn_timestamp_expiry(mountpoint, mnt);
2982
2983 mp = lock_mount(mountpoint);
2984 if (IS_ERR(mp)) {
2985 mntput(mnt);
2986 return PTR_ERR(mp);
2987 }
2988 error = do_add_mount(real_mount(mnt), mp, mountpoint, mnt_flags);
2989 unlock_mount(mp);
2990 if (error < 0)
2991 mntput(mnt);
2992 return error;
2993 }
2994
2995
2996
2997
2998
2999 static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
3000 int mnt_flags, const char *name, void *data)
3001 {
3002 struct file_system_type *type;
3003 struct fs_context *fc;
3004 const char *subtype = NULL;
3005 int err = 0;
3006
3007 if (!fstype)
3008 return -EINVAL;
3009
3010 type = get_fs_type(fstype);
3011 if (!type)
3012 return -ENODEV;
3013
3014 if (type->fs_flags & FS_HAS_SUBTYPE) {
3015 subtype = strchr(fstype, '.');
3016 if (subtype) {
3017 subtype++;
3018 if (!*subtype) {
3019 put_filesystem(type);
3020 return -EINVAL;
3021 }
3022 }
3023 }
3024
3025 fc = fs_context_for_mount(type, sb_flags);
3026 put_filesystem(type);
3027 if (IS_ERR(fc))
3028 return PTR_ERR(fc);
3029
3030 if (subtype)
3031 err = vfs_parse_fs_string(fc, "subtype",
3032 subtype, strlen(subtype));
3033 if (!err && name)
3034 err = vfs_parse_fs_string(fc, "source", name, strlen(name));
3035 if (!err)
3036 err = parse_monolithic_mount_data(fc, data);
3037 if (!err && !mount_capable(fc))
3038 err = -EPERM;
3039 if (!err)
3040 err = vfs_get_tree(fc);
3041 if (!err)
3042 err = do_new_mount_fc(fc, path, mnt_flags);
3043
3044 put_fs_context(fc);
3045 return err;
3046 }
3047
3048 int finish_automount(struct vfsmount *m, const struct path *path)
3049 {
3050 struct dentry *dentry = path->dentry;
3051 struct mountpoint *mp;
3052 struct mount *mnt;
3053 int err;
3054
3055 if (!m)
3056 return 0;
3057 if (IS_ERR(m))
3058 return PTR_ERR(m);
3059
3060 mnt = real_mount(m);
3061
3062
3063
3064 BUG_ON(mnt_get_count(mnt) < 2);
3065
3066 if (m->mnt_sb == path->mnt->mnt_sb &&
3067 m->mnt_root == dentry) {
3068 err = -ELOOP;
3069 goto discard;
3070 }
3071
3072
3073
3074
3075
3076
3077 inode_lock(dentry->d_inode);
3078 namespace_lock();
3079 if (unlikely(cant_mount(dentry))) {
3080 err = -ENOENT;
3081 goto discard_locked;
3082 }
3083 rcu_read_lock();
3084 if (unlikely(__lookup_mnt(path->mnt, dentry))) {
3085 rcu_read_unlock();
3086 err = 0;
3087 goto discard_locked;
3088 }
3089 rcu_read_unlock();
3090 mp = get_mountpoint(dentry);
3091 if (IS_ERR(mp)) {
3092 err = PTR_ERR(mp);
3093 goto discard_locked;
3094 }
3095
3096 err = do_add_mount(mnt, mp, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
3097 unlock_mount(mp);
3098 if (unlikely(err))
3099 goto discard;
3100 mntput(m);
3101 return 0;
3102
3103 discard_locked:
3104 namespace_unlock();
3105 inode_unlock(dentry->d_inode);
3106 discard:
3107
3108 if (!list_empty(&mnt->mnt_expire)) {
3109 namespace_lock();
3110 list_del_init(&mnt->mnt_expire);
3111 namespace_unlock();
3112 }
3113 mntput(m);
3114 mntput(m);
3115 return err;
3116 }
3117
3118
3119
3120
3121
3122
3123 void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
3124 {
3125 namespace_lock();
3126
3127 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
3128
3129 namespace_unlock();
3130 }
3131 EXPORT_SYMBOL(mnt_set_expiry);
3132
3133
3134
3135
3136
3137
3138 void mark_mounts_for_expiry(struct list_head *mounts)
3139 {
3140 struct mount *mnt, *next;
3141 LIST_HEAD(graveyard);
3142
3143 if (list_empty(mounts))
3144 return;
3145
3146 namespace_lock();
3147 lock_mount_hash();
3148
3149
3150
3151
3152
3153
3154
3155 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
3156 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
3157 propagate_mount_busy(mnt, 1))
3158 continue;
3159 list_move(&mnt->mnt_expire, &graveyard);
3160 }
3161 while (!list_empty(&graveyard)) {
3162 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
3163 touch_mnt_namespace(mnt->mnt_ns);
3164 umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
3165 }
3166 unlock_mount_hash();
3167 namespace_unlock();
3168 }
3169
3170 EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
3171
3172
3173
3174
3175
3176
3177
3178 static int select_submounts(struct mount *parent, struct list_head *graveyard)
3179 {
3180 struct mount *this_parent = parent;
3181 struct list_head *next;
3182 int found = 0;
3183
3184 repeat:
3185 next = this_parent->mnt_mounts.next;
3186 resume:
3187 while (next != &this_parent->mnt_mounts) {
3188 struct list_head *tmp = next;
3189 struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
3190
3191 next = tmp->next;
3192 if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
3193 continue;
3194
3195
3196
3197 if (!list_empty(&mnt->mnt_mounts)) {
3198 this_parent = mnt;
3199 goto repeat;
3200 }
3201
3202 if (!propagate_mount_busy(mnt, 1)) {
3203 list_move_tail(&mnt->mnt_expire, graveyard);
3204 found++;
3205 }
3206 }
3207
3208
3209
3210 if (this_parent != parent) {
3211 next = this_parent->mnt_child.next;
3212 this_parent = this_parent->mnt_parent;
3213 goto resume;
3214 }
3215 return found;
3216 }
3217
3218
3219
3220
3221
3222
3223
3224 static void shrink_submounts(struct mount *mnt)
3225 {
3226 LIST_HEAD(graveyard);
3227 struct mount *m;
3228
3229
3230 while (select_submounts(mnt, &graveyard)) {
3231 while (!list_empty(&graveyard)) {
3232 m = list_first_entry(&graveyard, struct mount,
3233 mnt_expire);
3234 touch_mnt_namespace(m->mnt_ns);
3235 umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC);
3236 }
3237 }
3238 }
3239
3240 static void *copy_mount_options(const void __user * data)
3241 {
3242 char *copy;
3243 unsigned left, offset;
3244
3245 if (!data)
3246 return NULL;
3247
3248 copy = kmalloc(PAGE_SIZE, GFP_KERNEL);
3249 if (!copy)
3250 return ERR_PTR(-ENOMEM);
3251
3252 left = copy_from_user(copy, data, PAGE_SIZE);
3253
3254
3255
3256
3257
3258 offset = PAGE_SIZE - left;
3259 while (left) {
3260 char c;
3261 if (get_user(c, (const char __user *)data + offset))
3262 break;
3263 copy[offset] = c;
3264 left--;
3265 offset++;
3266 }
3267
3268 if (left == PAGE_SIZE) {
3269 kfree(copy);
3270 return ERR_PTR(-EFAULT);
3271 }
3272
3273 return copy;
3274 }
3275
3276 static char *copy_mount_string(const void __user *data)
3277 {
3278 return data ? strndup_user(data, PATH_MAX) : NULL;
3279 }
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295 int path_mount(const char *dev_name, struct path *path,
3296 const char *type_page, unsigned long flags, void *data_page)
3297 {
3298 unsigned int mnt_flags = 0, sb_flags;
3299 int ret;
3300
3301
3302 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
3303 flags &= ~MS_MGC_MSK;
3304
3305
3306 if (data_page)
3307 ((char *)data_page)[PAGE_SIZE - 1] = 0;
3308
3309 if (flags & MS_NOUSER)
3310 return -EINVAL;
3311
3312 ret = security_sb_mount(dev_name, path, type_page, flags, data_page);
3313 if (ret)
3314 return ret;
3315 if (!may_mount())
3316 return -EPERM;
3317 if (flags & SB_MANDLOCK)
3318 warn_mandlock();
3319
3320
3321 if (!(flags & MS_NOATIME))
3322 mnt_flags |= MNT_RELATIME;
3323
3324
3325 if (flags & MS_NOSUID)
3326 mnt_flags |= MNT_NOSUID;
3327 if (flags & MS_NODEV)
3328 mnt_flags |= MNT_NODEV;
3329 if (flags & MS_NOEXEC)
3330 mnt_flags |= MNT_NOEXEC;
3331 if (flags & MS_NOATIME)
3332 mnt_flags |= MNT_NOATIME;
3333 if (flags & MS_NODIRATIME)
3334 mnt_flags |= MNT_NODIRATIME;
3335 if (flags & MS_STRICTATIME)
3336 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
3337 if (flags & MS_RDONLY)
3338 mnt_flags |= MNT_READONLY;
3339 if (flags & MS_NOSYMFOLLOW)
3340 mnt_flags |= MNT_NOSYMFOLLOW;
3341
3342
3343 if ((flags & MS_REMOUNT) &&
3344 ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
3345 MS_STRICTATIME)) == 0)) {
3346 mnt_flags &= ~MNT_ATIME_MASK;
3347 mnt_flags |= path->mnt->mnt_flags & MNT_ATIME_MASK;
3348 }
3349
3350 sb_flags = flags & (SB_RDONLY |
3351 SB_SYNCHRONOUS |
3352 SB_MANDLOCK |
3353 SB_DIRSYNC |
3354 SB_SILENT |
3355 SB_POSIXACL |
3356 SB_LAZYTIME |
3357 SB_I_VERSION);
3358
3359 if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND))
3360 return do_reconfigure_mnt(path, mnt_flags);
3361 if (flags & MS_REMOUNT)
3362 return do_remount(path, flags, sb_flags, mnt_flags, data_page);
3363 if (flags & MS_BIND)
3364 return do_loopback(path, dev_name, flags & MS_REC);
3365 if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
3366 return do_change_type(path, flags);
3367 if (flags & MS_MOVE)
3368 return do_move_mount_old(path, dev_name);
3369
3370 return do_new_mount(path, type_page, sb_flags, mnt_flags, dev_name,
3371 data_page);
3372 }
3373
3374 long do_mount(const char *dev_name, const char __user *dir_name,
3375 const char *type_page, unsigned long flags, void *data_page)
3376 {
3377 struct path path;
3378 int ret;
3379
3380 ret = user_path_at(AT_FDCWD, dir_name, LOOKUP_FOLLOW, &path);
3381 if (ret)
3382 return ret;
3383 ret = path_mount(dev_name, &path, type_page, flags, data_page);
3384 path_put(&path);
3385 return ret;
3386 }
3387
3388 static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
3389 {
3390 return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);
3391 }
3392
3393 static void dec_mnt_namespaces(struct ucounts *ucounts)
3394 {
3395 dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES);
3396 }
3397
3398 static void free_mnt_ns(struct mnt_namespace *ns)
3399 {
3400 if (!is_anon_ns(ns))
3401 ns_free_inum(&ns->ns);
3402 dec_mnt_namespaces(ns->ucounts);
3403 put_user_ns(ns->user_ns);
3404 kfree(ns);
3405 }
3406
3407
3408
3409
3410
3411
3412
3413
3414 static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
3415
3416 static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon)
3417 {
3418 struct mnt_namespace *new_ns;
3419 struct ucounts *ucounts;
3420 int ret;
3421
3422 ucounts = inc_mnt_namespaces(user_ns);
3423 if (!ucounts)
3424 return ERR_PTR(-ENOSPC);
3425
3426 new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL_ACCOUNT);
3427 if (!new_ns) {
3428 dec_mnt_namespaces(ucounts);
3429 return ERR_PTR(-ENOMEM);
3430 }
3431 if (!anon) {
3432 ret = ns_alloc_inum(&new_ns->ns);
3433 if (ret) {
3434 kfree(new_ns);
3435 dec_mnt_namespaces(ucounts);
3436 return ERR_PTR(ret);
3437 }
3438 }
3439 new_ns->ns.ops = &mntns_operations;
3440 if (!anon)
3441 new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
3442 refcount_set(&new_ns->ns.count, 1);
3443 INIT_LIST_HEAD(&new_ns->list);
3444 init_waitqueue_head(&new_ns->poll);
3445 spin_lock_init(&new_ns->ns_lock);
3446 new_ns->user_ns = get_user_ns(user_ns);
3447 new_ns->ucounts = ucounts;
3448 return new_ns;
3449 }
3450
3451 __latent_entropy
3452 struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
3453 struct user_namespace *user_ns, struct fs_struct *new_fs)
3454 {
3455 struct mnt_namespace *new_ns;
3456 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
3457 struct mount *p, *q;
3458 struct mount *old;
3459 struct mount *new;
3460 int copy_flags;
3461
3462 BUG_ON(!ns);
3463
3464 if (likely(!(flags & CLONE_NEWNS))) {
3465 get_mnt_ns(ns);
3466 return ns;
3467 }
3468
3469 old = ns->root;
3470
3471 new_ns = alloc_mnt_ns(user_ns, false);
3472 if (IS_ERR(new_ns))
3473 return new_ns;
3474
3475 namespace_lock();
3476
3477 copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
3478 if (user_ns != ns->user_ns)
3479 copy_flags |= CL_SHARED_TO_SLAVE;
3480 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
3481 if (IS_ERR(new)) {
3482 namespace_unlock();
3483 free_mnt_ns(new_ns);
3484 return ERR_CAST(new);
3485 }
3486 if (user_ns != ns->user_ns) {
3487 lock_mount_hash();
3488 lock_mnt_tree(new);
3489 unlock_mount_hash();
3490 }
3491 new_ns->root = new;
3492 list_add_tail(&new_ns->list, &new->mnt_list);
3493
3494
3495
3496
3497
3498
3499 p = old;
3500 q = new;
3501 while (p) {
3502 q->mnt_ns = new_ns;
3503 new_ns->mounts++;
3504 if (new_fs) {
3505 if (&p->mnt == new_fs->root.mnt) {
3506 new_fs->root.mnt = mntget(&q->mnt);
3507 rootmnt = &p->mnt;
3508 }
3509 if (&p->mnt == new_fs->pwd.mnt) {
3510 new_fs->pwd.mnt = mntget(&q->mnt);
3511 pwdmnt = &p->mnt;
3512 }
3513 }
3514 p = next_mnt(p, old);
3515 q = next_mnt(q, new);
3516 if (!q)
3517 break;
3518 while (p->mnt.mnt_root != q->mnt.mnt_root)
3519 p = next_mnt(p, old);
3520 }
3521 namespace_unlock();
3522
3523 if (rootmnt)
3524 mntput(rootmnt);
3525 if (pwdmnt)
3526 mntput(pwdmnt);
3527
3528 return new_ns;
3529 }
3530
3531 struct dentry *mount_subtree(struct vfsmount *m, const char *name)
3532 {
3533 struct mount *mnt = real_mount(m);
3534 struct mnt_namespace *ns;
3535 struct super_block *s;
3536 struct path path;
3537 int err;
3538
3539 ns = alloc_mnt_ns(&init_user_ns, true);
3540 if (IS_ERR(ns)) {
3541 mntput(m);
3542 return ERR_CAST(ns);
3543 }
3544 mnt->mnt_ns = ns;
3545 ns->root = mnt;
3546 ns->mounts++;
3547 list_add(&mnt->mnt_list, &ns->list);
3548
3549 err = vfs_path_lookup(m->mnt_root, m,
3550 name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
3551
3552 put_mnt_ns(ns);
3553
3554 if (err)
3555 return ERR_PTR(err);
3556
3557
3558 s = path.mnt->mnt_sb;
3559 atomic_inc(&s->s_active);
3560 mntput(path.mnt);
3561
3562 down_write(&s->s_umount);
3563
3564 return path.dentry;
3565 }
3566 EXPORT_SYMBOL(mount_subtree);
3567
3568 SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
3569 char __user *, type, unsigned long, flags, void __user *, data)
3570 {
3571 int ret;
3572 char *kernel_type;
3573 char *kernel_dev;
3574 void *options;
3575
3576 kernel_type = copy_mount_string(type);
3577 ret = PTR_ERR(kernel_type);
3578 if (IS_ERR(kernel_type))
3579 goto out_type;
3580
3581 kernel_dev = copy_mount_string(dev_name);
3582 ret = PTR_ERR(kernel_dev);
3583 if (IS_ERR(kernel_dev))
3584 goto out_dev;
3585
3586 options = copy_mount_options(data);
3587 ret = PTR_ERR(options);
3588 if (IS_ERR(options))
3589 goto out_data;
3590
3591 ret = do_mount(kernel_dev, dir_name, kernel_type, flags, options);
3592
3593 kfree(options);
3594 out_data:
3595 kfree(kernel_dev);
3596 out_dev:
3597 kfree(kernel_type);
3598 out_type:
3599 return ret;
3600 }
3601
3602 #define FSMOUNT_VALID_FLAGS \
3603 (MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NODEV | \
3604 MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME | MOUNT_ATTR_NODIRATIME | \
3605 MOUNT_ATTR_NOSYMFOLLOW)
3606
3607 #define MOUNT_SETATTR_VALID_FLAGS (FSMOUNT_VALID_FLAGS | MOUNT_ATTR_IDMAP)
3608
3609 #define MOUNT_SETATTR_PROPAGATION_FLAGS \
3610 (MS_UNBINDABLE | MS_PRIVATE | MS_SLAVE | MS_SHARED)
3611
3612 static unsigned int attr_flags_to_mnt_flags(u64 attr_flags)
3613 {
3614 unsigned int mnt_flags = 0;
3615
3616 if (attr_flags & MOUNT_ATTR_RDONLY)
3617 mnt_flags |= MNT_READONLY;
3618 if (attr_flags & MOUNT_ATTR_NOSUID)
3619 mnt_flags |= MNT_NOSUID;
3620 if (attr_flags & MOUNT_ATTR_NODEV)
3621 mnt_flags |= MNT_NODEV;
3622 if (attr_flags & MOUNT_ATTR_NOEXEC)
3623 mnt_flags |= MNT_NOEXEC;
3624 if (attr_flags & MOUNT_ATTR_NODIRATIME)
3625 mnt_flags |= MNT_NODIRATIME;
3626 if (attr_flags & MOUNT_ATTR_NOSYMFOLLOW)
3627 mnt_flags |= MNT_NOSYMFOLLOW;
3628
3629 return mnt_flags;
3630 }
3631
3632
3633
3634
3635
3636 SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
3637 unsigned int, attr_flags)
3638 {
3639 struct mnt_namespace *ns;
3640 struct fs_context *fc;
3641 struct file *file;
3642 struct path newmount;
3643 struct mount *mnt;
3644 struct fd f;
3645 unsigned int mnt_flags = 0;
3646 long ret;
3647
3648 if (!may_mount())
3649 return -EPERM;
3650
3651 if ((flags & ~(FSMOUNT_CLOEXEC)) != 0)
3652 return -EINVAL;
3653
3654 if (attr_flags & ~FSMOUNT_VALID_FLAGS)
3655 return -EINVAL;
3656
3657 mnt_flags = attr_flags_to_mnt_flags(attr_flags);
3658
3659 switch (attr_flags & MOUNT_ATTR__ATIME) {
3660 case MOUNT_ATTR_STRICTATIME:
3661 break;
3662 case MOUNT_ATTR_NOATIME:
3663 mnt_flags |= MNT_NOATIME;
3664 break;
3665 case MOUNT_ATTR_RELATIME:
3666 mnt_flags |= MNT_RELATIME;
3667 break;
3668 default:
3669 return -EINVAL;
3670 }
3671
3672 f = fdget(fs_fd);
3673 if (!f.file)
3674 return -EBADF;
3675
3676 ret = -EINVAL;
3677 if (f.file->f_op != &fscontext_fops)
3678 goto err_fsfd;
3679
3680 fc = f.file->private_data;
3681
3682 ret = mutex_lock_interruptible(&fc->uapi_mutex);
3683 if (ret < 0)
3684 goto err_fsfd;
3685
3686
3687 ret = -EINVAL;
3688 if (!fc->root)
3689 goto err_unlock;
3690
3691 ret = -EPERM;
3692 if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {
3693 pr_warn("VFS: Mount too revealing\n");
3694 goto err_unlock;
3695 }
3696
3697 ret = -EBUSY;
3698 if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)
3699 goto err_unlock;
3700
3701 if (fc->sb_flags & SB_MANDLOCK)
3702 warn_mandlock();
3703
3704 newmount.mnt = vfs_create_mount(fc);
3705 if (IS_ERR(newmount.mnt)) {
3706 ret = PTR_ERR(newmount.mnt);
3707 goto err_unlock;
3708 }
3709 newmount.dentry = dget(fc->root);
3710 newmount.mnt->mnt_flags = mnt_flags;
3711
3712
3713
3714
3715
3716
3717 vfs_clean_context(fc);
3718
3719 ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true);
3720 if (IS_ERR(ns)) {
3721 ret = PTR_ERR(ns);
3722 goto err_path;
3723 }
3724 mnt = real_mount(newmount.mnt);
3725 mnt->mnt_ns = ns;
3726 ns->root = mnt;
3727 ns->mounts = 1;
3728 list_add(&mnt->mnt_list, &ns->list);
3729 mntget(newmount.mnt);
3730
3731
3732
3733
3734 file = dentry_open(&newmount, O_PATH, fc->cred);
3735 if (IS_ERR(file)) {
3736 dissolve_on_fput(newmount.mnt);
3737 ret = PTR_ERR(file);
3738 goto err_path;
3739 }
3740 file->f_mode |= FMODE_NEED_UNMOUNT;
3741
3742 ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0);
3743 if (ret >= 0)
3744 fd_install(ret, file);
3745 else
3746 fput(file);
3747
3748 err_path:
3749 path_put(&newmount);
3750 err_unlock:
3751 mutex_unlock(&fc->uapi_mutex);
3752 err_fsfd:
3753 fdput(f);
3754 return ret;
3755 }
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765 SYSCALL_DEFINE5(move_mount,
3766 int, from_dfd, const char __user *, from_pathname,
3767 int, to_dfd, const char __user *, to_pathname,
3768 unsigned int, flags)
3769 {
3770 struct path from_path, to_path;
3771 unsigned int lflags;
3772 int ret = 0;
3773
3774 if (!may_mount())
3775 return -EPERM;
3776
3777 if (flags & ~MOVE_MOUNT__MASK)
3778 return -EINVAL;
3779
3780
3781
3782
3783
3784 lflags = 0;
3785 if (flags & MOVE_MOUNT_F_SYMLINKS) lflags |= LOOKUP_FOLLOW;
3786 if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
3787 if (flags & MOVE_MOUNT_F_EMPTY_PATH) lflags |= LOOKUP_EMPTY;
3788
3789 ret = user_path_at(from_dfd, from_pathname, lflags, &from_path);
3790 if (ret < 0)
3791 return ret;
3792
3793 lflags = 0;
3794 if (flags & MOVE_MOUNT_T_SYMLINKS) lflags |= LOOKUP_FOLLOW;
3795 if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT;
3796 if (flags & MOVE_MOUNT_T_EMPTY_PATH) lflags |= LOOKUP_EMPTY;
3797
3798 ret = user_path_at(to_dfd, to_pathname, lflags, &to_path);
3799 if (ret < 0)
3800 goto out_from;
3801
3802 ret = security_move_mount(&from_path, &to_path);
3803 if (ret < 0)
3804 goto out_to;
3805
3806 if (flags & MOVE_MOUNT_SET_GROUP)
3807 ret = do_set_group(&from_path, &to_path);
3808 else
3809 ret = do_move_mount(&from_path, &to_path);
3810
3811 out_to:
3812 path_put(&to_path);
3813 out_from:
3814 path_put(&from_path);
3815 return ret;
3816 }
3817
3818
3819
3820
3821
3822
3823 bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
3824 const struct path *root)
3825 {
3826 while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
3827 dentry = mnt->mnt_mountpoint;
3828 mnt = mnt->mnt_parent;
3829 }
3830 return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
3831 }
3832
3833 bool path_is_under(const struct path *path1, const struct path *path2)
3834 {
3835 bool res;
3836 read_seqlock_excl(&mount_lock);
3837 res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
3838 read_sequnlock_excl(&mount_lock);
3839 return res;
3840 }
3841 EXPORT_SYMBOL(path_is_under);
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868 SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
3869 const char __user *, put_old)
3870 {
3871 struct path new, old, root;
3872 struct mount *new_mnt, *root_mnt, *old_mnt, *root_parent, *ex_parent;
3873 struct mountpoint *old_mp, *root_mp;
3874 int error;
3875
3876 if (!may_mount())
3877 return -EPERM;
3878
3879 error = user_path_at(AT_FDCWD, new_root,
3880 LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &new);
3881 if (error)
3882 goto out0;
3883
3884 error = user_path_at(AT_FDCWD, put_old,
3885 LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old);
3886 if (error)
3887 goto out1;
3888
3889 error = security_sb_pivotroot(&old, &new);
3890 if (error)
3891 goto out2;
3892
3893 get_fs_root(current->fs, &root);
3894 old_mp = lock_mount(&old);
3895 error = PTR_ERR(old_mp);
3896 if (IS_ERR(old_mp))
3897 goto out3;
3898
3899 error = -EINVAL;
3900 new_mnt = real_mount(new.mnt);
3901 root_mnt = real_mount(root.mnt);
3902 old_mnt = real_mount(old.mnt);
3903 ex_parent = new_mnt->mnt_parent;
3904 root_parent = root_mnt->mnt_parent;
3905 if (IS_MNT_SHARED(old_mnt) ||
3906 IS_MNT_SHARED(ex_parent) ||
3907 IS_MNT_SHARED(root_parent))
3908 goto out4;
3909 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
3910 goto out4;
3911 if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
3912 goto out4;
3913 error = -ENOENT;
3914 if (d_unlinked(new.dentry))
3915 goto out4;
3916 error = -EBUSY;
3917 if (new_mnt == root_mnt || old_mnt == root_mnt)
3918 goto out4;
3919 error = -EINVAL;
3920 if (root.mnt->mnt_root != root.dentry)
3921 goto out4;
3922 if (!mnt_has_parent(root_mnt))
3923 goto out4;
3924 if (new.mnt->mnt_root != new.dentry)
3925 goto out4;
3926 if (!mnt_has_parent(new_mnt))
3927 goto out4;
3928
3929 if (!is_path_reachable(old_mnt, old.dentry, &new))
3930 goto out4;
3931
3932 if (!is_path_reachable(new_mnt, new.dentry, &root))
3933 goto out4;
3934 lock_mount_hash();
3935 umount_mnt(new_mnt);
3936 root_mp = unhash_mnt(root_mnt);
3937 if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
3938 new_mnt->mnt.mnt_flags |= MNT_LOCKED;
3939 root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
3940 }
3941
3942 attach_mnt(root_mnt, old_mnt, old_mp);
3943
3944 attach_mnt(new_mnt, root_parent, root_mp);
3945 mnt_add_count(root_parent, -1);
3946 touch_mnt_namespace(current->nsproxy->mnt_ns);
3947
3948 list_del_init(&new_mnt->mnt_expire);
3949 put_mountpoint(root_mp);
3950 unlock_mount_hash();
3951 chroot_fs_refs(&root, &new);
3952 error = 0;
3953 out4:
3954 unlock_mount(old_mp);
3955 if (!error)
3956 mntput_no_expire(ex_parent);
3957 out3:
3958 path_put(&root);
3959 out2:
3960 path_put(&old);
3961 out1:
3962 path_put(&new);
3963 out0:
3964 return error;
3965 }
3966
3967 static unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt)
3968 {
3969 unsigned int flags = mnt->mnt.mnt_flags;
3970
3971
3972 flags &= ~kattr->attr_clr;
3973
3974 flags |= kattr->attr_set;
3975
3976 return flags;
3977 }
3978
3979 static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
3980 {
3981 struct vfsmount *m = &mnt->mnt;
3982 struct user_namespace *fs_userns = m->mnt_sb->s_user_ns;
3983
3984 if (!kattr->mnt_userns)
3985 return 0;
3986
3987
3988
3989
3990
3991 if (kattr->mnt_userns == fs_userns)
3992 return -EINVAL;
3993
3994
3995
3996
3997
3998
3999 if (is_idmapped_mnt(m))
4000 return -EPERM;
4001
4002
4003 if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP))
4004 return -EINVAL;
4005
4006
4007 if (!ns_capable(fs_userns, CAP_SYS_ADMIN))
4008 return -EPERM;
4009
4010
4011 if (!is_anon_ns(mnt->mnt_ns))
4012 return -EINVAL;
4013
4014 return 0;
4015 }
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026 static inline bool mnt_allow_writers(const struct mount_kattr *kattr,
4027 const struct mount *mnt)
4028 {
4029 return (!(kattr->attr_set & MNT_READONLY) ||
4030 (mnt->mnt.mnt_flags & MNT_READONLY)) &&
4031 !kattr->mnt_userns;
4032 }
4033
4034 static int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt)
4035 {
4036 struct mount *m;
4037 int err;
4038
4039 for (m = mnt; m; m = next_mnt(m, mnt)) {
4040 if (!can_change_locked_flags(m, recalc_flags(kattr, m))) {
4041 err = -EPERM;
4042 break;
4043 }
4044
4045 err = can_idmap_mount(kattr, m);
4046 if (err)
4047 break;
4048
4049 if (!mnt_allow_writers(kattr, m)) {
4050 err = mnt_hold_writers(m);
4051 if (err)
4052 break;
4053 }
4054
4055 if (!kattr->recurse)
4056 return 0;
4057 }
4058
4059 if (err) {
4060 struct mount *p;
4061
4062
4063
4064
4065
4066
4067 for (p = mnt; p; p = next_mnt(p, mnt)) {
4068
4069 if (p->mnt.mnt_flags & MNT_WRITE_HOLD)
4070 mnt_unhold_writers(p);
4071
4072
4073
4074
4075
4076 if (p == m)
4077 break;
4078 }
4079 }
4080 return err;
4081 }
4082
4083 static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
4084 {
4085 struct user_namespace *mnt_userns, *old_mnt_userns;
4086
4087 if (!kattr->mnt_userns)
4088 return;
4089
4090
4091
4092
4093
4094 old_mnt_userns = mnt->mnt.mnt_userns;
4095
4096 mnt_userns = get_user_ns(kattr->mnt_userns);
4097
4098 smp_store_release(&mnt->mnt.mnt_userns, mnt_userns);
4099
4100
4101
4102
4103
4104 if (!initial_idmapping(old_mnt_userns))
4105 put_user_ns(old_mnt_userns);
4106 }
4107
4108 static void mount_setattr_commit(struct mount_kattr *kattr, struct mount *mnt)
4109 {
4110 struct mount *m;
4111
4112 for (m = mnt; m; m = next_mnt(m, mnt)) {
4113 unsigned int flags;
4114
4115 do_idmap_mount(kattr, m);
4116 flags = recalc_flags(kattr, m);
4117 WRITE_ONCE(m->mnt.mnt_flags, flags);
4118
4119
4120 if (m->mnt.mnt_flags & MNT_WRITE_HOLD)
4121 mnt_unhold_writers(m);
4122
4123 if (kattr->propagation)
4124 change_mnt_propagation(m, kattr->propagation);
4125 if (!kattr->recurse)
4126 break;
4127 }
4128 touch_mnt_namespace(mnt->mnt_ns);
4129 }
4130
4131 static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
4132 {
4133 struct mount *mnt = real_mount(path->mnt);
4134 int err = 0;
4135
4136 if (path->dentry != mnt->mnt.mnt_root)
4137 return -EINVAL;
4138
4139 if (kattr->propagation) {
4140
4141
4142
4143
4144 namespace_lock();
4145 if (kattr->propagation == MS_SHARED) {
4146 err = invent_group_ids(mnt, kattr->recurse);
4147 if (err) {
4148 namespace_unlock();
4149 return err;
4150 }
4151 }
4152 }
4153
4154 err = -EINVAL;
4155 lock_mount_hash();
4156
4157
4158 if (!is_mounted(&mnt->mnt))
4159 goto out;
4160
4161
4162
4163
4164
4165
4166
4167 if (!(mnt_has_parent(mnt) ? check_mnt(mnt) : is_anon_ns(mnt->mnt_ns)))
4168 goto out;
4169
4170
4171
4172
4173
4174
4175 err = mount_setattr_prepare(kattr, mnt);
4176 if (!err)
4177 mount_setattr_commit(kattr, mnt);
4178
4179 out:
4180 unlock_mount_hash();
4181
4182 if (kattr->propagation) {
4183 namespace_unlock();
4184 if (err)
4185 cleanup_group_ids(mnt, NULL);
4186 }
4187
4188 return err;
4189 }
4190
4191 static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
4192 struct mount_kattr *kattr, unsigned int flags)
4193 {
4194 int err = 0;
4195 struct ns_common *ns;
4196 struct user_namespace *mnt_userns;
4197 struct file *file;
4198
4199 if (!((attr->attr_set | attr->attr_clr) & MOUNT_ATTR_IDMAP))
4200 return 0;
4201
4202
4203
4204
4205
4206
4207 if (attr->attr_clr & MOUNT_ATTR_IDMAP)
4208 return -EINVAL;
4209
4210 if (attr->userns_fd > INT_MAX)
4211 return -EINVAL;
4212
4213 file = fget(attr->userns_fd);
4214 if (!file)
4215 return -EBADF;
4216
4217 if (!proc_ns_file(file)) {
4218 err = -EINVAL;
4219 goto out_fput;
4220 }
4221
4222 ns = get_proc_ns(file_inode(file));
4223 if (ns->ops->type != CLONE_NEWUSER) {
4224 err = -EINVAL;
4225 goto out_fput;
4226 }
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236 mnt_userns = container_of(ns, struct user_namespace, ns);
4237 if (initial_idmapping(mnt_userns)) {
4238 err = -EPERM;
4239 goto out_fput;
4240 }
4241
4242
4243 if (!ns_capable(mnt_userns, CAP_SYS_ADMIN)) {
4244 err = -EPERM;
4245 goto out_fput;
4246 }
4247
4248 kattr->mnt_userns = get_user_ns(mnt_userns);
4249
4250 out_fput:
4251 fput(file);
4252 return err;
4253 }
4254
4255 static int build_mount_kattr(const struct mount_attr *attr, size_t usize,
4256 struct mount_kattr *kattr, unsigned int flags)
4257 {
4258 unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
4259
4260 if (flags & AT_NO_AUTOMOUNT)
4261 lookup_flags &= ~LOOKUP_AUTOMOUNT;
4262 if (flags & AT_SYMLINK_NOFOLLOW)
4263 lookup_flags &= ~LOOKUP_FOLLOW;
4264 if (flags & AT_EMPTY_PATH)
4265 lookup_flags |= LOOKUP_EMPTY;
4266
4267 *kattr = (struct mount_kattr) {
4268 .lookup_flags = lookup_flags,
4269 .recurse = !!(flags & AT_RECURSIVE),
4270 };
4271
4272 if (attr->propagation & ~MOUNT_SETATTR_PROPAGATION_FLAGS)
4273 return -EINVAL;
4274 if (hweight32(attr->propagation & MOUNT_SETATTR_PROPAGATION_FLAGS) > 1)
4275 return -EINVAL;
4276 kattr->propagation = attr->propagation;
4277
4278 if ((attr->attr_set | attr->attr_clr) & ~MOUNT_SETATTR_VALID_FLAGS)
4279 return -EINVAL;
4280
4281 kattr->attr_set = attr_flags_to_mnt_flags(attr->attr_set);
4282 kattr->attr_clr = attr_flags_to_mnt_flags(attr->attr_clr);
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293 if (attr->attr_clr & MOUNT_ATTR__ATIME) {
4294 if ((attr->attr_clr & MOUNT_ATTR__ATIME) != MOUNT_ATTR__ATIME)
4295 return -EINVAL;
4296
4297
4298
4299
4300
4301 kattr->attr_clr |= MNT_RELATIME | MNT_NOATIME;
4302 switch (attr->attr_set & MOUNT_ATTR__ATIME) {
4303 case MOUNT_ATTR_RELATIME:
4304 kattr->attr_set |= MNT_RELATIME;
4305 break;
4306 case MOUNT_ATTR_NOATIME:
4307 kattr->attr_set |= MNT_NOATIME;
4308 break;
4309 case MOUNT_ATTR_STRICTATIME:
4310 break;
4311 default:
4312 return -EINVAL;
4313 }
4314 } else {
4315 if (attr->attr_set & MOUNT_ATTR__ATIME)
4316 return -EINVAL;
4317 }
4318
4319 return build_mount_idmapped(attr, usize, kattr, flags);
4320 }
4321
4322 static void finish_mount_kattr(struct mount_kattr *kattr)
4323 {
4324 put_user_ns(kattr->mnt_userns);
4325 kattr->mnt_userns = NULL;
4326 }
4327
4328 SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
4329 unsigned int, flags, struct mount_attr __user *, uattr,
4330 size_t, usize)
4331 {
4332 int err;
4333 struct path target;
4334 struct mount_attr attr;
4335 struct mount_kattr kattr;
4336
4337 BUILD_BUG_ON(sizeof(struct mount_attr) != MOUNT_ATTR_SIZE_VER0);
4338
4339 if (flags & ~(AT_EMPTY_PATH |
4340 AT_RECURSIVE |
4341 AT_SYMLINK_NOFOLLOW |
4342 AT_NO_AUTOMOUNT))
4343 return -EINVAL;
4344
4345 if (unlikely(usize > PAGE_SIZE))
4346 return -E2BIG;
4347 if (unlikely(usize < MOUNT_ATTR_SIZE_VER0))
4348 return -EINVAL;
4349
4350 if (!may_mount())
4351 return -EPERM;
4352
4353 err = copy_struct_from_user(&attr, sizeof(attr), uattr, usize);
4354 if (err)
4355 return err;
4356
4357
4358 if (attr.attr_set == 0 &&
4359 attr.attr_clr == 0 &&
4360 attr.propagation == 0)
4361 return 0;
4362
4363 err = build_mount_kattr(&attr, usize, &kattr, flags);
4364 if (err)
4365 return err;
4366
4367 err = user_path_at(dfd, path, kattr.lookup_flags, &target);
4368 if (!err) {
4369 err = do_mount_setattr(&target, &kattr);
4370 path_put(&target);
4371 }
4372 finish_mount_kattr(&kattr);
4373 return err;
4374 }
4375
4376 static void __init init_mount_tree(void)
4377 {
4378 struct vfsmount *mnt;
4379 struct mount *m;
4380 struct mnt_namespace *ns;
4381 struct path root;
4382
4383 mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL);
4384 if (IS_ERR(mnt))
4385 panic("Can't create rootfs");
4386
4387 ns = alloc_mnt_ns(&init_user_ns, false);
4388 if (IS_ERR(ns))
4389 panic("Can't allocate initial namespace");
4390 m = real_mount(mnt);
4391 m->mnt_ns = ns;
4392 ns->root = m;
4393 ns->mounts = 1;
4394 list_add(&m->mnt_list, &ns->list);
4395 init_task.nsproxy->mnt_ns = ns;
4396 get_mnt_ns(ns);
4397
4398 root.mnt = mnt;
4399 root.dentry = mnt->mnt_root;
4400 mnt->mnt_flags |= MNT_LOCKED;
4401
4402 set_fs_pwd(current->fs, &root);
4403 set_fs_root(current->fs, &root);
4404 }
4405
4406 void __init mnt_init(void)
4407 {
4408 int err;
4409
4410 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
4411 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL);
4412
4413 mount_hashtable = alloc_large_system_hash("Mount-cache",
4414 sizeof(struct hlist_head),
4415 mhash_entries, 19,
4416 HASH_ZERO,
4417 &m_hash_shift, &m_hash_mask, 0, 0);
4418 mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
4419 sizeof(struct hlist_head),
4420 mphash_entries, 19,
4421 HASH_ZERO,
4422 &mp_hash_shift, &mp_hash_mask, 0, 0);
4423
4424 if (!mount_hashtable || !mountpoint_hashtable)
4425 panic("Failed to allocate mount hash table\n");
4426
4427 kernfs_init();
4428
4429 err = sysfs_init();
4430 if (err)
4431 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
4432 __func__, err);
4433 fs_kobj = kobject_create_and_add("fs", NULL);
4434 if (!fs_kobj)
4435 printk(KERN_WARNING "%s: kobj create error\n", __func__);
4436 shmem_init();
4437 init_rootfs();
4438 init_mount_tree();
4439 }
4440
4441 void put_mnt_ns(struct mnt_namespace *ns)
4442 {
4443 if (!refcount_dec_and_test(&ns->ns.count))
4444 return;
4445 drop_collected_mounts(&ns->root->mnt);
4446 free_mnt_ns(ns);
4447 }
4448
4449 struct vfsmount *kern_mount(struct file_system_type *type)
4450 {
4451 struct vfsmount *mnt;
4452 mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);
4453 if (!IS_ERR(mnt)) {
4454
4455
4456
4457
4458 real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
4459 }
4460 return mnt;
4461 }
4462 EXPORT_SYMBOL_GPL(kern_mount);
4463
4464 void kern_unmount(struct vfsmount *mnt)
4465 {
4466
4467 if (!IS_ERR_OR_NULL(mnt)) {
4468 real_mount(mnt)->mnt_ns = NULL;
4469 synchronize_rcu();
4470 mntput(mnt);
4471 }
4472 }
4473 EXPORT_SYMBOL(kern_unmount);
4474
4475 void kern_unmount_array(struct vfsmount *mnt[], unsigned int num)
4476 {
4477 unsigned int i;
4478
4479 for (i = 0; i < num; i++)
4480 if (mnt[i])
4481 real_mount(mnt[i])->mnt_ns = NULL;
4482 synchronize_rcu_expedited();
4483 for (i = 0; i < num; i++)
4484 mntput(mnt[i]);
4485 }
4486 EXPORT_SYMBOL(kern_unmount_array);
4487
4488 bool our_mnt(struct vfsmount *mnt)
4489 {
4490 return check_mnt(real_mount(mnt));
4491 }
4492
4493 bool current_chrooted(void)
4494 {
4495
4496 struct path ns_root;
4497 struct path fs_root;
4498 bool chrooted;
4499
4500
4501 ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt;
4502 ns_root.dentry = ns_root.mnt->mnt_root;
4503 path_get(&ns_root);
4504 while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
4505 ;
4506
4507 get_fs_root(current->fs, &fs_root);
4508
4509 chrooted = !path_equal(&fs_root, &ns_root);
4510
4511 path_put(&fs_root);
4512 path_put(&ns_root);
4513
4514 return chrooted;
4515 }
4516
4517 static bool mnt_already_visible(struct mnt_namespace *ns,
4518 const struct super_block *sb,
4519 int *new_mnt_flags)
4520 {
4521 int new_flags = *new_mnt_flags;
4522 struct mount *mnt;
4523 bool visible = false;
4524
4525 down_read(&namespace_sem);
4526 lock_ns_list(ns);
4527 list_for_each_entry(mnt, &ns->list, mnt_list) {
4528 struct mount *child;
4529 int mnt_flags;
4530
4531 if (mnt_is_cursor(mnt))
4532 continue;
4533
4534 if (mnt->mnt.mnt_sb->s_type != sb->s_type)
4535 continue;
4536
4537
4538
4539
4540 if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
4541 continue;
4542
4543
4544 mnt_flags = mnt->mnt.mnt_flags;
4545
4546
4547 if (sb_rdonly(mnt->mnt.mnt_sb))
4548 mnt_flags |= MNT_LOCK_READONLY;
4549
4550
4551
4552
4553 if ((mnt_flags & MNT_LOCK_READONLY) &&
4554 !(new_flags & MNT_READONLY))
4555 continue;
4556 if ((mnt_flags & MNT_LOCK_ATIME) &&
4557 ((mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
4558 continue;
4559
4560
4561
4562
4563
4564 list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
4565 struct inode *inode = child->mnt_mountpoint->d_inode;
4566
4567 if (!(child->mnt.mnt_flags & MNT_LOCKED))
4568 continue;
4569
4570 if (!is_empty_dir_inode(inode))
4571 goto next;
4572 }
4573
4574 *new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \
4575 MNT_LOCK_ATIME);
4576 visible = true;
4577 goto found;
4578 next: ;
4579 }
4580 found:
4581 unlock_ns_list(ns);
4582 up_read(&namespace_sem);
4583 return visible;
4584 }
4585
4586 static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags)
4587 {
4588 const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
4589 struct mnt_namespace *ns = current->nsproxy->mnt_ns;
4590 unsigned long s_iflags;
4591
4592 if (ns->user_ns == &init_user_ns)
4593 return false;
4594
4595
4596 s_iflags = sb->s_iflags;
4597 if (!(s_iflags & SB_I_USERNS_VISIBLE))
4598 return false;
4599
4600 if ((s_iflags & required_iflags) != required_iflags) {
4601 WARN_ONCE(1, "Expected s_iflags to contain 0x%lx\n",
4602 required_iflags);
4603 return true;
4604 }
4605
4606 return !mnt_already_visible(ns, sb, new_mnt_flags);
4607 }
4608
4609 bool mnt_may_suid(struct vfsmount *mnt)
4610 {
4611
4612
4613
4614
4615
4616
4617
4618 return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) &&
4619 current_in_userns(mnt->mnt_sb->s_user_ns);
4620 }
4621
4622 static struct ns_common *mntns_get(struct task_struct *task)
4623 {
4624 struct ns_common *ns = NULL;
4625 struct nsproxy *nsproxy;
4626
4627 task_lock(task);
4628 nsproxy = task->nsproxy;
4629 if (nsproxy) {
4630 ns = &nsproxy->mnt_ns->ns;
4631 get_mnt_ns(to_mnt_ns(ns));
4632 }
4633 task_unlock(task);
4634
4635 return ns;
4636 }
4637
4638 static void mntns_put(struct ns_common *ns)
4639 {
4640 put_mnt_ns(to_mnt_ns(ns));
4641 }
4642
4643 static int mntns_install(struct nsset *nsset, struct ns_common *ns)
4644 {
4645 struct nsproxy *nsproxy = nsset->nsproxy;
4646 struct fs_struct *fs = nsset->fs;
4647 struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;
4648 struct user_namespace *user_ns = nsset->cred->user_ns;
4649 struct path root;
4650 int err;
4651
4652 if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
4653 !ns_capable(user_ns, CAP_SYS_CHROOT) ||
4654 !ns_capable(user_ns, CAP_SYS_ADMIN))
4655 return -EPERM;
4656
4657 if (is_anon_ns(mnt_ns))
4658 return -EINVAL;
4659
4660 if (fs->users != 1)
4661 return -EINVAL;
4662
4663 get_mnt_ns(mnt_ns);
4664 old_mnt_ns = nsproxy->mnt_ns;
4665 nsproxy->mnt_ns = mnt_ns;
4666
4667
4668 err = vfs_path_lookup(mnt_ns->root->mnt.mnt_root, &mnt_ns->root->mnt,
4669 "/", LOOKUP_DOWN, &root);
4670 if (err) {
4671
4672 nsproxy->mnt_ns = old_mnt_ns;
4673 put_mnt_ns(mnt_ns);
4674 return err;
4675 }
4676
4677 put_mnt_ns(old_mnt_ns);
4678
4679
4680 set_fs_pwd(fs, &root);
4681 set_fs_root(fs, &root);
4682
4683 path_put(&root);
4684 return 0;
4685 }
4686
4687 static struct user_namespace *mntns_owner(struct ns_common *ns)
4688 {
4689 return to_mnt_ns(ns)->user_ns;
4690 }
4691
4692 const struct proc_ns_operations mntns_operations = {
4693 .name = "mnt",
4694 .type = CLONE_NEWNS,
4695 .get = mntns_get,
4696 .put = mntns_put,
4697 .install = mntns_install,
4698 .owner = mntns_owner,
4699 };
4700
4701 #ifdef CONFIG_SYSCTL
4702 static struct ctl_table fs_namespace_sysctls[] = {
4703 {
4704 .procname = "mount-max",
4705 .data = &sysctl_mount_max,
4706 .maxlen = sizeof(unsigned int),
4707 .mode = 0644,
4708 .proc_handler = proc_dointvec_minmax,
4709 .extra1 = SYSCTL_ONE,
4710 },
4711 { }
4712 };
4713
4714 static int __init init_fs_namespace_sysctls(void)
4715 {
4716 register_sysctl_init("fs", fs_namespace_sysctls);
4717 return 0;
4718 }
4719 fs_initcall(init_fs_namespace_sysctls);
4720
4721 #endif