Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 
0003 #define _GNU_SOURCE
0004 #include <errno.h>
0005 #include <fcntl.h>
0006 #include <limits.h>
0007 #include <linux/types.h>
0008 #include <sched.h>
0009 #include <signal.h>
0010 #include <stdio.h>
0011 #include <stdlib.h>
0012 #include <string.h>
0013 #include <syscall.h>
0014 #include <sys/prctl.h>
0015 #include <sys/wait.h>
0016 #include <unistd.h>
0017 #include <sys/socket.h>
0018 #include <sys/stat.h>
0019 
0020 #include "pidfd.h"
0021 #include "../clone3/clone3_selftests.h"
0022 #include "../kselftest_harness.h"
0023 
0024 enum {
0025     PIDFD_NS_USER,
0026     PIDFD_NS_MNT,
0027     PIDFD_NS_PID,
0028     PIDFD_NS_UTS,
0029     PIDFD_NS_IPC,
0030     PIDFD_NS_NET,
0031     PIDFD_NS_CGROUP,
0032     PIDFD_NS_PIDCLD,
0033     PIDFD_NS_TIME,
0034     PIDFD_NS_MAX
0035 };
0036 
0037 const struct ns_info {
0038     const char *name;
0039     int flag;
0040 } ns_info[] = {
0041     [PIDFD_NS_USER]   = { "user",             CLONE_NEWUSER,   },
0042     [PIDFD_NS_MNT]    = { "mnt",              CLONE_NEWNS,     },
0043     [PIDFD_NS_PID]    = { "pid",              CLONE_NEWPID,    },
0044     [PIDFD_NS_UTS]    = { "uts",              CLONE_NEWUTS,    },
0045     [PIDFD_NS_IPC]    = { "ipc",              CLONE_NEWIPC,    },
0046     [PIDFD_NS_NET]    = { "net",              CLONE_NEWNET,    },
0047     [PIDFD_NS_CGROUP] = { "cgroup",           CLONE_NEWCGROUP, },
0048     [PIDFD_NS_PIDCLD] = { "pid_for_children", 0,               },
0049     [PIDFD_NS_TIME]   = { "time",             CLONE_NEWTIME,   },
0050 };
0051 
0052 FIXTURE(current_nsset)
0053 {
0054     pid_t pid;
0055     int pidfd;
0056     int nsfds[PIDFD_NS_MAX];
0057 
0058     pid_t child_pid_exited;
0059     int child_pidfd_exited;
0060 
0061     pid_t child_pid1;
0062     int child_pidfd1;
0063     int child_nsfds1[PIDFD_NS_MAX];
0064 
0065     pid_t child_pid2;
0066     int child_pidfd2;
0067     int child_nsfds2[PIDFD_NS_MAX];
0068 };
0069 
0070 static int sys_waitid(int which, pid_t pid, int options)
0071 {
0072     return syscall(__NR_waitid, which, pid, NULL, options, NULL);
0073 }
0074 
0075 pid_t create_child(int *pidfd, unsigned flags)
0076 {
0077     struct __clone_args args = {
0078         .flags      = CLONE_PIDFD | flags,
0079         .exit_signal    = SIGCHLD,
0080         .pidfd      = ptr_to_u64(pidfd),
0081     };
0082 
0083     return sys_clone3(&args, sizeof(struct clone_args));
0084 }
0085 
0086 static bool switch_timens(void)
0087 {
0088     int fd, ret;
0089 
0090     if (unshare(CLONE_NEWTIME))
0091         return false;
0092 
0093     fd = open("/proc/self/ns/time_for_children", O_RDONLY | O_CLOEXEC);
0094     if (fd < 0)
0095         return false;
0096 
0097     ret = setns(fd, CLONE_NEWTIME);
0098     close(fd);
0099     return ret == 0;
0100 }
0101 
0102 static ssize_t read_nointr(int fd, void *buf, size_t count)
0103 {
0104     ssize_t ret;
0105 
0106     do {
0107         ret = read(fd, buf, count);
0108     } while (ret < 0 && errno == EINTR);
0109 
0110     return ret;
0111 }
0112 
0113 static ssize_t write_nointr(int fd, const void *buf, size_t count)
0114 {
0115     ssize_t ret;
0116 
0117     do {
0118         ret = write(fd, buf, count);
0119     } while (ret < 0 && errno == EINTR);
0120 
0121     return ret;
0122 }
0123 
0124 FIXTURE_SETUP(current_nsset)
0125 {
0126     int i, proc_fd, ret;
0127     int ipc_sockets[2];
0128     char c;
0129 
0130     for (i = 0; i < PIDFD_NS_MAX; i++) {
0131         self->nsfds[i]      = -EBADF;
0132         self->child_nsfds1[i]   = -EBADF;
0133         self->child_nsfds2[i]   = -EBADF;
0134     }
0135 
0136     proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC);
0137     ASSERT_GE(proc_fd, 0) {
0138         TH_LOG("%m - Failed to open /proc/self/ns");
0139     }
0140 
0141     self->pid = getpid();
0142     for (i = 0; i < PIDFD_NS_MAX; i++) {
0143         const struct ns_info *info = &ns_info[i];
0144         self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
0145         if (self->nsfds[i] < 0) {
0146             EXPECT_EQ(errno, ENOENT) {
0147                 TH_LOG("%m - Failed to open %s namespace for process %d",
0148                        info->name, self->pid);
0149             }
0150         }
0151     }
0152 
0153     self->pidfd = sys_pidfd_open(self->pid, 0);
0154     EXPECT_GT(self->pidfd, 0) {
0155         TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
0156     }
0157 
0158     /* Create task that exits right away. */
0159     self->child_pid_exited = create_child(&self->child_pidfd_exited,
0160                           CLONE_NEWUSER | CLONE_NEWNET);
0161     EXPECT_GT(self->child_pid_exited, 0);
0162 
0163     if (self->child_pid_exited == 0)
0164         _exit(EXIT_SUCCESS);
0165 
0166     ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0);
0167 
0168     self->pidfd = sys_pidfd_open(self->pid, 0);
0169     EXPECT_GE(self->pidfd, 0) {
0170         TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
0171     }
0172 
0173     ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
0174     EXPECT_EQ(ret, 0);
0175 
0176     /* Create tasks that will be stopped. */
0177     self->child_pid1 = create_child(&self->child_pidfd1,
0178                     CLONE_NEWUSER | CLONE_NEWNS |
0179                     CLONE_NEWCGROUP | CLONE_NEWIPC |
0180                     CLONE_NEWUTS | CLONE_NEWPID |
0181                     CLONE_NEWNET);
0182     EXPECT_GE(self->child_pid1, 0);
0183 
0184     if (self->child_pid1 == 0) {
0185         close(ipc_sockets[0]);
0186 
0187         if (!switch_timens())
0188             _exit(EXIT_FAILURE);
0189 
0190         if (write_nointr(ipc_sockets[1], "1", 1) < 0)
0191             _exit(EXIT_FAILURE);
0192 
0193         close(ipc_sockets[1]);
0194 
0195         pause();
0196         _exit(EXIT_SUCCESS);
0197     }
0198 
0199     close(ipc_sockets[1]);
0200     ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
0201     close(ipc_sockets[0]);
0202 
0203     ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
0204     EXPECT_EQ(ret, 0);
0205 
0206     self->child_pid2 = create_child(&self->child_pidfd2,
0207                     CLONE_NEWUSER | CLONE_NEWNS |
0208                     CLONE_NEWCGROUP | CLONE_NEWIPC |
0209                     CLONE_NEWUTS | CLONE_NEWPID |
0210                     CLONE_NEWNET);
0211     EXPECT_GE(self->child_pid2, 0);
0212 
0213     if (self->child_pid2 == 0) {
0214         close(ipc_sockets[0]);
0215 
0216         if (!switch_timens())
0217             _exit(EXIT_FAILURE);
0218 
0219         if (write_nointr(ipc_sockets[1], "1", 1) < 0)
0220             _exit(EXIT_FAILURE);
0221 
0222         close(ipc_sockets[1]);
0223 
0224         pause();
0225         _exit(EXIT_SUCCESS);
0226     }
0227 
0228     close(ipc_sockets[1]);
0229     ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
0230     close(ipc_sockets[0]);
0231 
0232     for (i = 0; i < PIDFD_NS_MAX; i++) {
0233         char p[100];
0234 
0235         const struct ns_info *info = &ns_info[i];
0236 
0237         self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
0238         if (self->nsfds[i] < 0) {
0239             EXPECT_EQ(errno, ENOENT) {
0240                 TH_LOG("%m - Failed to open %s namespace for process %d",
0241                        info->name, self->pid);
0242             }
0243         }
0244 
0245         ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
0246                    self->child_pid1, info->name);
0247         EXPECT_GT(ret, 0);
0248         EXPECT_LT(ret, sizeof(p));
0249 
0250         self->child_nsfds1[i] = open(p, O_RDONLY | O_CLOEXEC);
0251         if (self->child_nsfds1[i] < 0) {
0252             EXPECT_EQ(errno, ENOENT) {
0253                 TH_LOG("%m - Failed to open %s namespace for process %d",
0254                        info->name, self->child_pid1);
0255             }
0256         }
0257 
0258         ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
0259                    self->child_pid2, info->name);
0260         EXPECT_GT(ret, 0);
0261         EXPECT_LT(ret, sizeof(p));
0262 
0263         self->child_nsfds2[i] = open(p, O_RDONLY | O_CLOEXEC);
0264         if (self->child_nsfds2[i] < 0) {
0265             EXPECT_EQ(errno, ENOENT) {
0266                 TH_LOG("%m - Failed to open %s namespace for process %d",
0267                        info->name, self->child_pid1);
0268             }
0269         }
0270     }
0271 
0272     close(proc_fd);
0273 }
0274 
0275 FIXTURE_TEARDOWN(current_nsset)
0276 {
0277     int i;
0278 
0279     ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd1,
0280                     SIGKILL, NULL, 0), 0);
0281     ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd2,
0282                     SIGKILL, NULL, 0), 0);
0283 
0284     for (i = 0; i < PIDFD_NS_MAX; i++) {
0285         if (self->nsfds[i] >= 0)
0286             close(self->nsfds[i]);
0287         if (self->child_nsfds1[i] >= 0)
0288             close(self->child_nsfds1[i]);
0289         if (self->child_nsfds2[i] >= 0)
0290             close(self->child_nsfds2[i]);
0291     }
0292 
0293     if (self->child_pidfd1 >= 0)
0294         EXPECT_EQ(0, close(self->child_pidfd1));
0295     if (self->child_pidfd2 >= 0)
0296         EXPECT_EQ(0, close(self->child_pidfd2));
0297     ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0);
0298     ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0);
0299     ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0);
0300 }
0301 
0302 static int preserve_ns(const int pid, const char *ns)
0303 {
0304     int ret;
0305     char path[50];
0306 
0307     ret = snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns);
0308     if (ret < 0 || (size_t)ret >= sizeof(path))
0309         return -EIO;
0310 
0311     return open(path, O_RDONLY | O_CLOEXEC);
0312 }
0313 
0314 static int in_same_namespace(int ns_fd1, pid_t pid2, const char *ns)
0315 {
0316     int ns_fd2 = -EBADF;
0317     int ret = -1;
0318     struct stat ns_st1, ns_st2;
0319 
0320     ret = fstat(ns_fd1, &ns_st1);
0321     if (ret < 0)
0322         return -1;
0323 
0324     ns_fd2 = preserve_ns(pid2, ns);
0325     if (ns_fd2 < 0)
0326         return -1;
0327 
0328     ret = fstat(ns_fd2, &ns_st2);
0329     close(ns_fd2);
0330     if (ret < 0)
0331         return -1;
0332 
0333     /* processes are in the same namespace */
0334     if ((ns_st1.st_dev == ns_st2.st_dev) &&
0335         (ns_st1.st_ino == ns_st2.st_ino))
0336         return 1;
0337 
0338     /* processes are in different namespaces */
0339     return 0;
0340 }
0341 
0342 /* Test that we can't pass garbage to the kernel. */
0343 TEST_F(current_nsset, invalid_flags)
0344 {
0345     ASSERT_NE(setns(self->pidfd, 0), 0);
0346     EXPECT_EQ(errno, EINVAL);
0347 
0348     ASSERT_NE(setns(self->pidfd, -1), 0);
0349     EXPECT_EQ(errno, EINVAL);
0350 
0351     ASSERT_NE(setns(self->pidfd, CLONE_VM), 0);
0352     EXPECT_EQ(errno, EINVAL);
0353 
0354     ASSERT_NE(setns(self->pidfd, CLONE_NEWUSER | CLONE_VM), 0);
0355     EXPECT_EQ(errno, EINVAL);
0356 }
0357 
0358 /* Test that we can't attach to a task that has already exited. */
0359 TEST_F(current_nsset, pidfd_exited_child)
0360 {
0361     int i;
0362     pid_t pid;
0363 
0364     ASSERT_NE(setns(self->child_pidfd_exited, CLONE_NEWUSER | CLONE_NEWNET),
0365           0);
0366     EXPECT_EQ(errno, ESRCH);
0367 
0368     pid = getpid();
0369     for (i = 0; i < PIDFD_NS_MAX; i++) {
0370         const struct ns_info *info = &ns_info[i];
0371         /* Verify that we haven't changed any namespaces. */
0372         if (self->nsfds[i] >= 0)
0373             ASSERT_EQ(in_same_namespace(self->nsfds[i], pid, info->name), 1);
0374     }
0375 }
0376 
0377 TEST_F(current_nsset, pidfd_incremental_setns)
0378 {
0379     int i;
0380     pid_t pid;
0381 
0382     pid = getpid();
0383     for (i = 0; i < PIDFD_NS_MAX; i++) {
0384         const struct ns_info *info = &ns_info[i];
0385         int nsfd;
0386 
0387         if (self->child_nsfds1[i] < 0)
0388             continue;
0389 
0390         if (info->flag) {
0391             ASSERT_EQ(setns(self->child_pidfd1, info->flag), 0) {
0392                 TH_LOG("%m - Failed to setns to %s namespace of %d via pidfd %d",
0393                        info->name, self->child_pid1,
0394                        self->child_pidfd1);
0395             }
0396         }
0397 
0398         /* Verify that we have changed to the correct namespaces. */
0399         if (info->flag == CLONE_NEWPID)
0400             nsfd = self->nsfds[i];
0401         else
0402             nsfd = self->child_nsfds1[i];
0403         ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
0404             TH_LOG("setns failed to place us correctly into %s namespace of %d via pidfd %d",
0405                    info->name, self->child_pid1,
0406                    self->child_pidfd1);
0407         }
0408         TH_LOG("Managed to correctly setns to %s namespace of %d via pidfd %d",
0409                info->name, self->child_pid1, self->child_pidfd1);
0410     }
0411 }
0412 
0413 TEST_F(current_nsset, nsfd_incremental_setns)
0414 {
0415     int i;
0416     pid_t pid;
0417 
0418     pid = getpid();
0419     for (i = 0; i < PIDFD_NS_MAX; i++) {
0420         const struct ns_info *info = &ns_info[i];
0421         int nsfd;
0422 
0423         if (self->child_nsfds1[i] < 0)
0424             continue;
0425 
0426         if (info->flag) {
0427             ASSERT_EQ(setns(self->child_nsfds1[i], info->flag), 0) {
0428                 TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d",
0429                        info->name, self->child_pid1,
0430                        self->child_nsfds1[i]);
0431             }
0432         }
0433 
0434         /* Verify that we have changed to the correct namespaces. */
0435         if (info->flag == CLONE_NEWPID)
0436             nsfd = self->nsfds[i];
0437         else
0438             nsfd = self->child_nsfds1[i];
0439         ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
0440             TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d",
0441                    info->name, self->child_pid1,
0442                    self->child_nsfds1[i]);
0443         }
0444         TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d",
0445                info->name, self->child_pid1, self->child_nsfds1[i]);
0446     }
0447 }
0448 
0449 TEST_F(current_nsset, pidfd_one_shot_setns)
0450 {
0451     unsigned flags = 0;
0452     int i;
0453     pid_t pid;
0454 
0455     for (i = 0; i < PIDFD_NS_MAX; i++) {
0456         const struct ns_info *info = &ns_info[i];
0457 
0458         if (self->child_nsfds1[i] < 0)
0459             continue;
0460 
0461         flags |= info->flag;
0462         TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
0463                info->name, self->child_pid1);
0464     }
0465 
0466     ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
0467         TH_LOG("%m - Failed to setns to namespaces of %d",
0468                self->child_pid1);
0469     }
0470 
0471     pid = getpid();
0472     for (i = 0; i < PIDFD_NS_MAX; i++) {
0473         const struct ns_info *info = &ns_info[i];
0474         int nsfd;
0475 
0476         if (self->child_nsfds1[i] < 0)
0477             continue;
0478 
0479         /* Verify that we have changed to the correct namespaces. */
0480         if (info->flag == CLONE_NEWPID)
0481             nsfd = self->nsfds[i];
0482         else
0483             nsfd = self->child_nsfds1[i];
0484         ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
0485             TH_LOG("setns failed to place us correctly into %s namespace of %d",
0486                    info->name, self->child_pid1);
0487         }
0488         TH_LOG("Managed to correctly setns to %s namespace of %d",
0489                info->name, self->child_pid1);
0490     }
0491 }
0492 
0493 TEST_F(current_nsset, no_foul_play)
0494 {
0495     unsigned flags = 0;
0496     int i;
0497 
0498     for (i = 0; i < PIDFD_NS_MAX; i++) {
0499         const struct ns_info *info = &ns_info[i];
0500 
0501         if (self->child_nsfds1[i] < 0)
0502             continue;
0503 
0504         flags |= info->flag;
0505         if (info->flag) /* No use logging pid_for_children. */
0506             TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
0507                    info->name, self->child_pid1);
0508     }
0509 
0510     ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
0511         TH_LOG("%m - Failed to setns to namespaces of %d vid pidfd %d",
0512                self->child_pid1, self->child_pidfd1);
0513     }
0514 
0515     /*
0516      * Can't setns to a user namespace outside of our hierarchy since we
0517      * don't have caps in there and didn't create it. That means that under
0518      * no circumstances should we be able to setns to any of the other
0519      * ones since they aren't owned by our user namespace.
0520      */
0521     for (i = 0; i < PIDFD_NS_MAX; i++) {
0522         const struct ns_info *info = &ns_info[i];
0523 
0524         if (self->child_nsfds2[i] < 0 || !info->flag)
0525             continue;
0526 
0527         ASSERT_NE(setns(self->child_pidfd2, info->flag), 0) {
0528             TH_LOG("Managed to setns to %s namespace of %d via pidfd %d",
0529                    info->name, self->child_pid2,
0530                    self->child_pidfd2);
0531         }
0532         TH_LOG("%m - Correctly failed to setns to %s namespace of %d via pidfd %d",
0533                info->name, self->child_pid2,
0534                self->child_pidfd2);
0535 
0536         ASSERT_NE(setns(self->child_nsfds2[i], info->flag), 0) {
0537             TH_LOG("Managed to setns to %s namespace of %d via nsfd %d",
0538                    info->name, self->child_pid2,
0539                    self->child_nsfds2[i]);
0540         }
0541         TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d",
0542                info->name, self->child_pid2,
0543                self->child_nsfds2[i]);
0544     }
0545 }
0546 
0547 TEST(setns_einval)
0548 {
0549     int fd;
0550 
0551     fd = sys_memfd_create("rostock", 0);
0552     EXPECT_GT(fd, 0);
0553 
0554     ASSERT_NE(setns(fd, 0), 0);
0555     EXPECT_EQ(errno, EINVAL);
0556     close(fd);
0557 }
0558 
0559 TEST_HARNESS_MAIN