0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #include <linux/slab.h>
0013 #include <linux/export.h>
0014 #include <linux/nsproxy.h>
0015 #include <linux/init_task.h>
0016 #include <linux/mnt_namespace.h>
0017 #include <linux/utsname.h>
0018 #include <linux/pid_namespace.h>
0019 #include <net/net_namespace.h>
0020 #include <linux/ipc_namespace.h>
0021 #include <linux/time_namespace.h>
0022 #include <linux/fs_struct.h>
0023 #include <linux/proc_fs.h>
0024 #include <linux/proc_ns.h>
0025 #include <linux/file.h>
0026 #include <linux/syscalls.h>
0027 #include <linux/cgroup.h>
0028 #include <linux/perf_event.h>
0029
0030 static struct kmem_cache *nsproxy_cachep;
0031
0032 struct nsproxy init_nsproxy = {
0033 .count = ATOMIC_INIT(1),
0034 .uts_ns = &init_uts_ns,
0035 #if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC)
0036 .ipc_ns = &init_ipc_ns,
0037 #endif
0038 .mnt_ns = NULL,
0039 .pid_ns_for_children = &init_pid_ns,
0040 #ifdef CONFIG_NET
0041 .net_ns = &init_net,
0042 #endif
0043 #ifdef CONFIG_CGROUPS
0044 .cgroup_ns = &init_cgroup_ns,
0045 #endif
0046 #ifdef CONFIG_TIME_NS
0047 .time_ns = &init_time_ns,
0048 .time_ns_for_children = &init_time_ns,
0049 #endif
0050 };
0051
0052 static inline struct nsproxy *create_nsproxy(void)
0053 {
0054 struct nsproxy *nsproxy;
0055
0056 nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
0057 if (nsproxy)
0058 atomic_set(&nsproxy->count, 1);
0059 return nsproxy;
0060 }
0061
0062
0063
0064
0065
0066
0067 static struct nsproxy *create_new_namespaces(unsigned long flags,
0068 struct task_struct *tsk, struct user_namespace *user_ns,
0069 struct fs_struct *new_fs)
0070 {
0071 struct nsproxy *new_nsp;
0072 int err;
0073
0074 new_nsp = create_nsproxy();
0075 if (!new_nsp)
0076 return ERR_PTR(-ENOMEM);
0077
0078 new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs);
0079 if (IS_ERR(new_nsp->mnt_ns)) {
0080 err = PTR_ERR(new_nsp->mnt_ns);
0081 goto out_ns;
0082 }
0083
0084 new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns);
0085 if (IS_ERR(new_nsp->uts_ns)) {
0086 err = PTR_ERR(new_nsp->uts_ns);
0087 goto out_uts;
0088 }
0089
0090 new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns);
0091 if (IS_ERR(new_nsp->ipc_ns)) {
0092 err = PTR_ERR(new_nsp->ipc_ns);
0093 goto out_ipc;
0094 }
0095
0096 new_nsp->pid_ns_for_children =
0097 copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns_for_children);
0098 if (IS_ERR(new_nsp->pid_ns_for_children)) {
0099 err = PTR_ERR(new_nsp->pid_ns_for_children);
0100 goto out_pid;
0101 }
0102
0103 new_nsp->cgroup_ns = copy_cgroup_ns(flags, user_ns,
0104 tsk->nsproxy->cgroup_ns);
0105 if (IS_ERR(new_nsp->cgroup_ns)) {
0106 err = PTR_ERR(new_nsp->cgroup_ns);
0107 goto out_cgroup;
0108 }
0109
0110 new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
0111 if (IS_ERR(new_nsp->net_ns)) {
0112 err = PTR_ERR(new_nsp->net_ns);
0113 goto out_net;
0114 }
0115
0116 new_nsp->time_ns_for_children = copy_time_ns(flags, user_ns,
0117 tsk->nsproxy->time_ns_for_children);
0118 if (IS_ERR(new_nsp->time_ns_for_children)) {
0119 err = PTR_ERR(new_nsp->time_ns_for_children);
0120 goto out_time;
0121 }
0122 new_nsp->time_ns = get_time_ns(tsk->nsproxy->time_ns);
0123
0124 return new_nsp;
0125
0126 out_time:
0127 put_net(new_nsp->net_ns);
0128 out_net:
0129 put_cgroup_ns(new_nsp->cgroup_ns);
0130 out_cgroup:
0131 if (new_nsp->pid_ns_for_children)
0132 put_pid_ns(new_nsp->pid_ns_for_children);
0133 out_pid:
0134 if (new_nsp->ipc_ns)
0135 put_ipc_ns(new_nsp->ipc_ns);
0136 out_ipc:
0137 if (new_nsp->uts_ns)
0138 put_uts_ns(new_nsp->uts_ns);
0139 out_uts:
0140 if (new_nsp->mnt_ns)
0141 put_mnt_ns(new_nsp->mnt_ns);
0142 out_ns:
0143 kmem_cache_free(nsproxy_cachep, new_nsp);
0144 return ERR_PTR(err);
0145 }
0146
0147
0148
0149
0150
0151 int copy_namespaces(unsigned long flags, struct task_struct *tsk)
0152 {
0153 struct nsproxy *old_ns = tsk->nsproxy;
0154 struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
0155 struct nsproxy *new_ns;
0156
0157 if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
0158 CLONE_NEWPID | CLONE_NEWNET |
0159 CLONE_NEWCGROUP | CLONE_NEWTIME)))) {
0160 if (likely(old_ns->time_ns_for_children == old_ns->time_ns)) {
0161 get_nsproxy(old_ns);
0162 return 0;
0163 }
0164 } else if (!ns_capable(user_ns, CAP_SYS_ADMIN))
0165 return -EPERM;
0166
0167
0168
0169
0170
0171
0172
0173
0174 if ((flags & (CLONE_NEWIPC | CLONE_SYSVSEM)) ==
0175 (CLONE_NEWIPC | CLONE_SYSVSEM))
0176 return -EINVAL;
0177
0178 new_ns = create_new_namespaces(flags, tsk, user_ns, tsk->fs);
0179 if (IS_ERR(new_ns))
0180 return PTR_ERR(new_ns);
0181
0182 timens_on_fork(new_ns, tsk);
0183
0184 tsk->nsproxy = new_ns;
0185 return 0;
0186 }
0187
0188 void free_nsproxy(struct nsproxy *ns)
0189 {
0190 if (ns->mnt_ns)
0191 put_mnt_ns(ns->mnt_ns);
0192 if (ns->uts_ns)
0193 put_uts_ns(ns->uts_ns);
0194 if (ns->ipc_ns)
0195 put_ipc_ns(ns->ipc_ns);
0196 if (ns->pid_ns_for_children)
0197 put_pid_ns(ns->pid_ns_for_children);
0198 if (ns->time_ns)
0199 put_time_ns(ns->time_ns);
0200 if (ns->time_ns_for_children)
0201 put_time_ns(ns->time_ns_for_children);
0202 put_cgroup_ns(ns->cgroup_ns);
0203 put_net(ns->net_ns);
0204 kmem_cache_free(nsproxy_cachep, ns);
0205 }
0206
0207
0208
0209
0210
0211 int unshare_nsproxy_namespaces(unsigned long unshare_flags,
0212 struct nsproxy **new_nsp, struct cred *new_cred, struct fs_struct *new_fs)
0213 {
0214 struct user_namespace *user_ns;
0215 int err = 0;
0216
0217 if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
0218 CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP |
0219 CLONE_NEWTIME)))
0220 return 0;
0221
0222 user_ns = new_cred ? new_cred->user_ns : current_user_ns();
0223 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
0224 return -EPERM;
0225
0226 *new_nsp = create_new_namespaces(unshare_flags, current, user_ns,
0227 new_fs ? new_fs : current->fs);
0228 if (IS_ERR(*new_nsp)) {
0229 err = PTR_ERR(*new_nsp);
0230 goto out;
0231 }
0232
0233 out:
0234 return err;
0235 }
0236
0237 void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
0238 {
0239 struct nsproxy *ns;
0240
0241 might_sleep();
0242
0243 task_lock(p);
0244 ns = p->nsproxy;
0245 p->nsproxy = new;
0246 task_unlock(p);
0247
0248 if (ns)
0249 put_nsproxy(ns);
0250 }
0251
0252 void exit_task_namespaces(struct task_struct *p)
0253 {
0254 switch_task_namespaces(p, NULL);
0255 }
0256
0257 static int check_setns_flags(unsigned long flags)
0258 {
0259 if (!flags || (flags & ~(CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
0260 CLONE_NEWNET | CLONE_NEWTIME | CLONE_NEWUSER |
0261 CLONE_NEWPID | CLONE_NEWCGROUP)))
0262 return -EINVAL;
0263
0264 #ifndef CONFIG_USER_NS
0265 if (flags & CLONE_NEWUSER)
0266 return -EINVAL;
0267 #endif
0268 #ifndef CONFIG_PID_NS
0269 if (flags & CLONE_NEWPID)
0270 return -EINVAL;
0271 #endif
0272 #ifndef CONFIG_UTS_NS
0273 if (flags & CLONE_NEWUTS)
0274 return -EINVAL;
0275 #endif
0276 #ifndef CONFIG_IPC_NS
0277 if (flags & CLONE_NEWIPC)
0278 return -EINVAL;
0279 #endif
0280 #ifndef CONFIG_CGROUPS
0281 if (flags & CLONE_NEWCGROUP)
0282 return -EINVAL;
0283 #endif
0284 #ifndef CONFIG_NET_NS
0285 if (flags & CLONE_NEWNET)
0286 return -EINVAL;
0287 #endif
0288 #ifndef CONFIG_TIME_NS
0289 if (flags & CLONE_NEWTIME)
0290 return -EINVAL;
0291 #endif
0292
0293 return 0;
0294 }
0295
0296 static void put_nsset(struct nsset *nsset)
0297 {
0298 unsigned flags = nsset->flags;
0299
0300 if (flags & CLONE_NEWUSER)
0301 put_cred(nsset_cred(nsset));
0302
0303
0304
0305
0306 if (nsset->fs && (flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS))
0307 free_fs_struct(nsset->fs);
0308 if (nsset->nsproxy)
0309 free_nsproxy(nsset->nsproxy);
0310 }
0311
0312 static int prepare_nsset(unsigned flags, struct nsset *nsset)
0313 {
0314 struct task_struct *me = current;
0315
0316 nsset->nsproxy = create_new_namespaces(0, me, current_user_ns(), me->fs);
0317 if (IS_ERR(nsset->nsproxy))
0318 return PTR_ERR(nsset->nsproxy);
0319
0320 if (flags & CLONE_NEWUSER)
0321 nsset->cred = prepare_creds();
0322 else
0323 nsset->cred = current_cred();
0324 if (!nsset->cred)
0325 goto out;
0326
0327
0328 if (flags == CLONE_NEWNS) {
0329 nsset->fs = me->fs;
0330 } else if (flags & CLONE_NEWNS) {
0331 nsset->fs = copy_fs_struct(me->fs);
0332 if (!nsset->fs)
0333 goto out;
0334 }
0335
0336 nsset->flags = flags;
0337 return 0;
0338
0339 out:
0340 put_nsset(nsset);
0341 return -ENOMEM;
0342 }
0343
0344 static inline int validate_ns(struct nsset *nsset, struct ns_common *ns)
0345 {
0346 return ns->ops->install(nsset, ns);
0347 }
0348
0349
0350
0351
0352
0353
0354
0355
0356 static int validate_nsset(struct nsset *nsset, struct pid *pid)
0357 {
0358 int ret = 0;
0359 unsigned flags = nsset->flags;
0360 struct user_namespace *user_ns = NULL;
0361 struct pid_namespace *pid_ns = NULL;
0362 struct nsproxy *nsp;
0363 struct task_struct *tsk;
0364
0365
0366 rcu_read_lock();
0367 tsk = pid_task(pid, PIDTYPE_PID);
0368 if (!tsk) {
0369 rcu_read_unlock();
0370 return -ESRCH;
0371 }
0372
0373 if (!ptrace_may_access(tsk, PTRACE_MODE_READ_REALCREDS)) {
0374 rcu_read_unlock();
0375 return -EPERM;
0376 }
0377
0378 task_lock(tsk);
0379 nsp = tsk->nsproxy;
0380 if (nsp)
0381 get_nsproxy(nsp);
0382 task_unlock(tsk);
0383 if (!nsp) {
0384 rcu_read_unlock();
0385 return -ESRCH;
0386 }
0387
0388 #ifdef CONFIG_PID_NS
0389 if (flags & CLONE_NEWPID) {
0390 pid_ns = task_active_pid_ns(tsk);
0391 if (unlikely(!pid_ns)) {
0392 rcu_read_unlock();
0393 ret = -ESRCH;
0394 goto out;
0395 }
0396 get_pid_ns(pid_ns);
0397 }
0398 #endif
0399
0400 #ifdef CONFIG_USER_NS
0401 if (flags & CLONE_NEWUSER)
0402 user_ns = get_user_ns(__task_cred(tsk)->user_ns);
0403 #endif
0404 rcu_read_unlock();
0405
0406
0407
0408
0409
0410
0411
0412 #ifdef CONFIG_USER_NS
0413 if (flags & CLONE_NEWUSER) {
0414 ret = validate_ns(nsset, &user_ns->ns);
0415 if (ret)
0416 goto out;
0417 }
0418 #endif
0419
0420 if (flags & CLONE_NEWNS) {
0421 ret = validate_ns(nsset, from_mnt_ns(nsp->mnt_ns));
0422 if (ret)
0423 goto out;
0424 }
0425
0426 #ifdef CONFIG_UTS_NS
0427 if (flags & CLONE_NEWUTS) {
0428 ret = validate_ns(nsset, &nsp->uts_ns->ns);
0429 if (ret)
0430 goto out;
0431 }
0432 #endif
0433
0434 #ifdef CONFIG_IPC_NS
0435 if (flags & CLONE_NEWIPC) {
0436 ret = validate_ns(nsset, &nsp->ipc_ns->ns);
0437 if (ret)
0438 goto out;
0439 }
0440 #endif
0441
0442 #ifdef CONFIG_PID_NS
0443 if (flags & CLONE_NEWPID) {
0444 ret = validate_ns(nsset, &pid_ns->ns);
0445 if (ret)
0446 goto out;
0447 }
0448 #endif
0449
0450 #ifdef CONFIG_CGROUPS
0451 if (flags & CLONE_NEWCGROUP) {
0452 ret = validate_ns(nsset, &nsp->cgroup_ns->ns);
0453 if (ret)
0454 goto out;
0455 }
0456 #endif
0457
0458 #ifdef CONFIG_NET_NS
0459 if (flags & CLONE_NEWNET) {
0460 ret = validate_ns(nsset, &nsp->net_ns->ns);
0461 if (ret)
0462 goto out;
0463 }
0464 #endif
0465
0466 #ifdef CONFIG_TIME_NS
0467 if (flags & CLONE_NEWTIME) {
0468 ret = validate_ns(nsset, &nsp->time_ns->ns);
0469 if (ret)
0470 goto out;
0471 }
0472 #endif
0473
0474 out:
0475 if (pid_ns)
0476 put_pid_ns(pid_ns);
0477 if (nsp)
0478 put_nsproxy(nsp);
0479 put_user_ns(user_ns);
0480
0481 return ret;
0482 }
0483
0484
0485
0486
0487
0488
0489
0490
0491
0492
0493 static void commit_nsset(struct nsset *nsset)
0494 {
0495 unsigned flags = nsset->flags;
0496 struct task_struct *me = current;
0497
0498 #ifdef CONFIG_USER_NS
0499 if (flags & CLONE_NEWUSER) {
0500
0501 commit_creds(nsset_cred(nsset));
0502 nsset->cred = NULL;
0503 }
0504 #endif
0505
0506
0507 if ((flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS)) {
0508 set_fs_root(me->fs, &nsset->fs->root);
0509 set_fs_pwd(me->fs, &nsset->fs->pwd);
0510 }
0511
0512 #ifdef CONFIG_IPC_NS
0513 if (flags & CLONE_NEWIPC)
0514 exit_sem(me);
0515 #endif
0516
0517 #ifdef CONFIG_TIME_NS
0518 if (flags & CLONE_NEWTIME)
0519 timens_commit(me, nsset->nsproxy->time_ns);
0520 #endif
0521
0522
0523 switch_task_namespaces(me, nsset->nsproxy);
0524 nsset->nsproxy = NULL;
0525 }
0526
0527 SYSCALL_DEFINE2(setns, int, fd, int, flags)
0528 {
0529 struct file *file;
0530 struct ns_common *ns = NULL;
0531 struct nsset nsset = {};
0532 int err = 0;
0533
0534 file = fget(fd);
0535 if (!file)
0536 return -EBADF;
0537
0538 if (proc_ns_file(file)) {
0539 ns = get_proc_ns(file_inode(file));
0540 if (flags && (ns->ops->type != flags))
0541 err = -EINVAL;
0542 flags = ns->ops->type;
0543 } else if (!IS_ERR(pidfd_pid(file))) {
0544 err = check_setns_flags(flags);
0545 } else {
0546 err = -EINVAL;
0547 }
0548 if (err)
0549 goto out;
0550
0551 err = prepare_nsset(flags, &nsset);
0552 if (err)
0553 goto out;
0554
0555 if (proc_ns_file(file))
0556 err = validate_ns(&nsset, ns);
0557 else
0558 err = validate_nsset(&nsset, file->private_data);
0559 if (!err) {
0560 commit_nsset(&nsset);
0561 perf_event_namespaces(current);
0562 }
0563 put_nsset(&nsset);
0564 out:
0565 fput(file);
0566 return err;
0567 }
0568
0569 int __init nsproxy_cache_init(void)
0570 {
0571 nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC|SLAB_ACCOUNT);
0572 return 0;
0573 }