0001
0002
0003 #define _GNU_SOURCE
0004 #include <errno.h>
0005 #include <fcntl.h>
0006 #include <limits.h>
0007 #include <linux/types.h>
0008 #include <sched.h>
0009 #include <signal.h>
0010 #include <stdio.h>
0011 #include <stdlib.h>
0012 #include <string.h>
0013 #include <syscall.h>
0014 #include <sys/prctl.h>
0015 #include <sys/wait.h>
0016 #include <unistd.h>
0017 #include <sys/socket.h>
0018 #include <sys/stat.h>
0019
0020 #include "pidfd.h"
0021 #include "../clone3/clone3_selftests.h"
0022 #include "../kselftest_harness.h"
0023
0024 enum {
0025 PIDFD_NS_USER,
0026 PIDFD_NS_MNT,
0027 PIDFD_NS_PID,
0028 PIDFD_NS_UTS,
0029 PIDFD_NS_IPC,
0030 PIDFD_NS_NET,
0031 PIDFD_NS_CGROUP,
0032 PIDFD_NS_PIDCLD,
0033 PIDFD_NS_TIME,
0034 PIDFD_NS_MAX
0035 };
0036
0037 const struct ns_info {
0038 const char *name;
0039 int flag;
0040 } ns_info[] = {
0041 [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, },
0042 [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, },
0043 [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, },
0044 [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, },
0045 [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, },
0046 [PIDFD_NS_NET] = { "net", CLONE_NEWNET, },
0047 [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, },
0048 [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, },
0049 [PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, },
0050 };
0051
0052 FIXTURE(current_nsset)
0053 {
0054 pid_t pid;
0055 int pidfd;
0056 int nsfds[PIDFD_NS_MAX];
0057
0058 pid_t child_pid_exited;
0059 int child_pidfd_exited;
0060
0061 pid_t child_pid1;
0062 int child_pidfd1;
0063 int child_nsfds1[PIDFD_NS_MAX];
0064
0065 pid_t child_pid2;
0066 int child_pidfd2;
0067 int child_nsfds2[PIDFD_NS_MAX];
0068 };
0069
0070 static int sys_waitid(int which, pid_t pid, int options)
0071 {
0072 return syscall(__NR_waitid, which, pid, NULL, options, NULL);
0073 }
0074
0075 pid_t create_child(int *pidfd, unsigned flags)
0076 {
0077 struct __clone_args args = {
0078 .flags = CLONE_PIDFD | flags,
0079 .exit_signal = SIGCHLD,
0080 .pidfd = ptr_to_u64(pidfd),
0081 };
0082
0083 return sys_clone3(&args, sizeof(struct clone_args));
0084 }
0085
0086 static bool switch_timens(void)
0087 {
0088 int fd, ret;
0089
0090 if (unshare(CLONE_NEWTIME))
0091 return false;
0092
0093 fd = open("/proc/self/ns/time_for_children", O_RDONLY | O_CLOEXEC);
0094 if (fd < 0)
0095 return false;
0096
0097 ret = setns(fd, CLONE_NEWTIME);
0098 close(fd);
0099 return ret == 0;
0100 }
0101
0102 static ssize_t read_nointr(int fd, void *buf, size_t count)
0103 {
0104 ssize_t ret;
0105
0106 do {
0107 ret = read(fd, buf, count);
0108 } while (ret < 0 && errno == EINTR);
0109
0110 return ret;
0111 }
0112
0113 static ssize_t write_nointr(int fd, const void *buf, size_t count)
0114 {
0115 ssize_t ret;
0116
0117 do {
0118 ret = write(fd, buf, count);
0119 } while (ret < 0 && errno == EINTR);
0120
0121 return ret;
0122 }
0123
0124 FIXTURE_SETUP(current_nsset)
0125 {
0126 int i, proc_fd, ret;
0127 int ipc_sockets[2];
0128 char c;
0129
0130 for (i = 0; i < PIDFD_NS_MAX; i++) {
0131 self->nsfds[i] = -EBADF;
0132 self->child_nsfds1[i] = -EBADF;
0133 self->child_nsfds2[i] = -EBADF;
0134 }
0135
0136 proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC);
0137 ASSERT_GE(proc_fd, 0) {
0138 TH_LOG("%m - Failed to open /proc/self/ns");
0139 }
0140
0141 self->pid = getpid();
0142 for (i = 0; i < PIDFD_NS_MAX; i++) {
0143 const struct ns_info *info = &ns_info[i];
0144 self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
0145 if (self->nsfds[i] < 0) {
0146 EXPECT_EQ(errno, ENOENT) {
0147 TH_LOG("%m - Failed to open %s namespace for process %d",
0148 info->name, self->pid);
0149 }
0150 }
0151 }
0152
0153 self->pidfd = sys_pidfd_open(self->pid, 0);
0154 EXPECT_GT(self->pidfd, 0) {
0155 TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
0156 }
0157
0158
0159 self->child_pid_exited = create_child(&self->child_pidfd_exited,
0160 CLONE_NEWUSER | CLONE_NEWNET);
0161 EXPECT_GT(self->child_pid_exited, 0);
0162
0163 if (self->child_pid_exited == 0)
0164 _exit(EXIT_SUCCESS);
0165
0166 ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0);
0167
0168 self->pidfd = sys_pidfd_open(self->pid, 0);
0169 EXPECT_GE(self->pidfd, 0) {
0170 TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
0171 }
0172
0173 ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
0174 EXPECT_EQ(ret, 0);
0175
0176
0177 self->child_pid1 = create_child(&self->child_pidfd1,
0178 CLONE_NEWUSER | CLONE_NEWNS |
0179 CLONE_NEWCGROUP | CLONE_NEWIPC |
0180 CLONE_NEWUTS | CLONE_NEWPID |
0181 CLONE_NEWNET);
0182 EXPECT_GE(self->child_pid1, 0);
0183
0184 if (self->child_pid1 == 0) {
0185 close(ipc_sockets[0]);
0186
0187 if (!switch_timens())
0188 _exit(EXIT_FAILURE);
0189
0190 if (write_nointr(ipc_sockets[1], "1", 1) < 0)
0191 _exit(EXIT_FAILURE);
0192
0193 close(ipc_sockets[1]);
0194
0195 pause();
0196 _exit(EXIT_SUCCESS);
0197 }
0198
0199 close(ipc_sockets[1]);
0200 ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
0201 close(ipc_sockets[0]);
0202
0203 ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
0204 EXPECT_EQ(ret, 0);
0205
0206 self->child_pid2 = create_child(&self->child_pidfd2,
0207 CLONE_NEWUSER | CLONE_NEWNS |
0208 CLONE_NEWCGROUP | CLONE_NEWIPC |
0209 CLONE_NEWUTS | CLONE_NEWPID |
0210 CLONE_NEWNET);
0211 EXPECT_GE(self->child_pid2, 0);
0212
0213 if (self->child_pid2 == 0) {
0214 close(ipc_sockets[0]);
0215
0216 if (!switch_timens())
0217 _exit(EXIT_FAILURE);
0218
0219 if (write_nointr(ipc_sockets[1], "1", 1) < 0)
0220 _exit(EXIT_FAILURE);
0221
0222 close(ipc_sockets[1]);
0223
0224 pause();
0225 _exit(EXIT_SUCCESS);
0226 }
0227
0228 close(ipc_sockets[1]);
0229 ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
0230 close(ipc_sockets[0]);
0231
0232 for (i = 0; i < PIDFD_NS_MAX; i++) {
0233 char p[100];
0234
0235 const struct ns_info *info = &ns_info[i];
0236
0237 self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
0238 if (self->nsfds[i] < 0) {
0239 EXPECT_EQ(errno, ENOENT) {
0240 TH_LOG("%m - Failed to open %s namespace for process %d",
0241 info->name, self->pid);
0242 }
0243 }
0244
0245 ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
0246 self->child_pid1, info->name);
0247 EXPECT_GT(ret, 0);
0248 EXPECT_LT(ret, sizeof(p));
0249
0250 self->child_nsfds1[i] = open(p, O_RDONLY | O_CLOEXEC);
0251 if (self->child_nsfds1[i] < 0) {
0252 EXPECT_EQ(errno, ENOENT) {
0253 TH_LOG("%m - Failed to open %s namespace for process %d",
0254 info->name, self->child_pid1);
0255 }
0256 }
0257
0258 ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
0259 self->child_pid2, info->name);
0260 EXPECT_GT(ret, 0);
0261 EXPECT_LT(ret, sizeof(p));
0262
0263 self->child_nsfds2[i] = open(p, O_RDONLY | O_CLOEXEC);
0264 if (self->child_nsfds2[i] < 0) {
0265 EXPECT_EQ(errno, ENOENT) {
0266 TH_LOG("%m - Failed to open %s namespace for process %d",
0267 info->name, self->child_pid1);
0268 }
0269 }
0270 }
0271
0272 close(proc_fd);
0273 }
0274
0275 FIXTURE_TEARDOWN(current_nsset)
0276 {
0277 int i;
0278
0279 ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd1,
0280 SIGKILL, NULL, 0), 0);
0281 ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd2,
0282 SIGKILL, NULL, 0), 0);
0283
0284 for (i = 0; i < PIDFD_NS_MAX; i++) {
0285 if (self->nsfds[i] >= 0)
0286 close(self->nsfds[i]);
0287 if (self->child_nsfds1[i] >= 0)
0288 close(self->child_nsfds1[i]);
0289 if (self->child_nsfds2[i] >= 0)
0290 close(self->child_nsfds2[i]);
0291 }
0292
0293 if (self->child_pidfd1 >= 0)
0294 EXPECT_EQ(0, close(self->child_pidfd1));
0295 if (self->child_pidfd2 >= 0)
0296 EXPECT_EQ(0, close(self->child_pidfd2));
0297 ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0);
0298 ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0);
0299 ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0);
0300 }
0301
0302 static int preserve_ns(const int pid, const char *ns)
0303 {
0304 int ret;
0305 char path[50];
0306
0307 ret = snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns);
0308 if (ret < 0 || (size_t)ret >= sizeof(path))
0309 return -EIO;
0310
0311 return open(path, O_RDONLY | O_CLOEXEC);
0312 }
0313
0314 static int in_same_namespace(int ns_fd1, pid_t pid2, const char *ns)
0315 {
0316 int ns_fd2 = -EBADF;
0317 int ret = -1;
0318 struct stat ns_st1, ns_st2;
0319
0320 ret = fstat(ns_fd1, &ns_st1);
0321 if (ret < 0)
0322 return -1;
0323
0324 ns_fd2 = preserve_ns(pid2, ns);
0325 if (ns_fd2 < 0)
0326 return -1;
0327
0328 ret = fstat(ns_fd2, &ns_st2);
0329 close(ns_fd2);
0330 if (ret < 0)
0331 return -1;
0332
0333
0334 if ((ns_st1.st_dev == ns_st2.st_dev) &&
0335 (ns_st1.st_ino == ns_st2.st_ino))
0336 return 1;
0337
0338
0339 return 0;
0340 }
0341
0342
0343 TEST_F(current_nsset, invalid_flags)
0344 {
0345 ASSERT_NE(setns(self->pidfd, 0), 0);
0346 EXPECT_EQ(errno, EINVAL);
0347
0348 ASSERT_NE(setns(self->pidfd, -1), 0);
0349 EXPECT_EQ(errno, EINVAL);
0350
0351 ASSERT_NE(setns(self->pidfd, CLONE_VM), 0);
0352 EXPECT_EQ(errno, EINVAL);
0353
0354 ASSERT_NE(setns(self->pidfd, CLONE_NEWUSER | CLONE_VM), 0);
0355 EXPECT_EQ(errno, EINVAL);
0356 }
0357
0358
0359 TEST_F(current_nsset, pidfd_exited_child)
0360 {
0361 int i;
0362 pid_t pid;
0363
0364 ASSERT_NE(setns(self->child_pidfd_exited, CLONE_NEWUSER | CLONE_NEWNET),
0365 0);
0366 EXPECT_EQ(errno, ESRCH);
0367
0368 pid = getpid();
0369 for (i = 0; i < PIDFD_NS_MAX; i++) {
0370 const struct ns_info *info = &ns_info[i];
0371
0372 if (self->nsfds[i] >= 0)
0373 ASSERT_EQ(in_same_namespace(self->nsfds[i], pid, info->name), 1);
0374 }
0375 }
0376
0377 TEST_F(current_nsset, pidfd_incremental_setns)
0378 {
0379 int i;
0380 pid_t pid;
0381
0382 pid = getpid();
0383 for (i = 0; i < PIDFD_NS_MAX; i++) {
0384 const struct ns_info *info = &ns_info[i];
0385 int nsfd;
0386
0387 if (self->child_nsfds1[i] < 0)
0388 continue;
0389
0390 if (info->flag) {
0391 ASSERT_EQ(setns(self->child_pidfd1, info->flag), 0) {
0392 TH_LOG("%m - Failed to setns to %s namespace of %d via pidfd %d",
0393 info->name, self->child_pid1,
0394 self->child_pidfd1);
0395 }
0396 }
0397
0398
0399 if (info->flag == CLONE_NEWPID)
0400 nsfd = self->nsfds[i];
0401 else
0402 nsfd = self->child_nsfds1[i];
0403 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
0404 TH_LOG("setns failed to place us correctly into %s namespace of %d via pidfd %d",
0405 info->name, self->child_pid1,
0406 self->child_pidfd1);
0407 }
0408 TH_LOG("Managed to correctly setns to %s namespace of %d via pidfd %d",
0409 info->name, self->child_pid1, self->child_pidfd1);
0410 }
0411 }
0412
0413 TEST_F(current_nsset, nsfd_incremental_setns)
0414 {
0415 int i;
0416 pid_t pid;
0417
0418 pid = getpid();
0419 for (i = 0; i < PIDFD_NS_MAX; i++) {
0420 const struct ns_info *info = &ns_info[i];
0421 int nsfd;
0422
0423 if (self->child_nsfds1[i] < 0)
0424 continue;
0425
0426 if (info->flag) {
0427 ASSERT_EQ(setns(self->child_nsfds1[i], info->flag), 0) {
0428 TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d",
0429 info->name, self->child_pid1,
0430 self->child_nsfds1[i]);
0431 }
0432 }
0433
0434
0435 if (info->flag == CLONE_NEWPID)
0436 nsfd = self->nsfds[i];
0437 else
0438 nsfd = self->child_nsfds1[i];
0439 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
0440 TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d",
0441 info->name, self->child_pid1,
0442 self->child_nsfds1[i]);
0443 }
0444 TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d",
0445 info->name, self->child_pid1, self->child_nsfds1[i]);
0446 }
0447 }
0448
0449 TEST_F(current_nsset, pidfd_one_shot_setns)
0450 {
0451 unsigned flags = 0;
0452 int i;
0453 pid_t pid;
0454
0455 for (i = 0; i < PIDFD_NS_MAX; i++) {
0456 const struct ns_info *info = &ns_info[i];
0457
0458 if (self->child_nsfds1[i] < 0)
0459 continue;
0460
0461 flags |= info->flag;
0462 TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
0463 info->name, self->child_pid1);
0464 }
0465
0466 ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
0467 TH_LOG("%m - Failed to setns to namespaces of %d",
0468 self->child_pid1);
0469 }
0470
0471 pid = getpid();
0472 for (i = 0; i < PIDFD_NS_MAX; i++) {
0473 const struct ns_info *info = &ns_info[i];
0474 int nsfd;
0475
0476 if (self->child_nsfds1[i] < 0)
0477 continue;
0478
0479
0480 if (info->flag == CLONE_NEWPID)
0481 nsfd = self->nsfds[i];
0482 else
0483 nsfd = self->child_nsfds1[i];
0484 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
0485 TH_LOG("setns failed to place us correctly into %s namespace of %d",
0486 info->name, self->child_pid1);
0487 }
0488 TH_LOG("Managed to correctly setns to %s namespace of %d",
0489 info->name, self->child_pid1);
0490 }
0491 }
0492
0493 TEST_F(current_nsset, no_foul_play)
0494 {
0495 unsigned flags = 0;
0496 int i;
0497
0498 for (i = 0; i < PIDFD_NS_MAX; i++) {
0499 const struct ns_info *info = &ns_info[i];
0500
0501 if (self->child_nsfds1[i] < 0)
0502 continue;
0503
0504 flags |= info->flag;
0505 if (info->flag)
0506 TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
0507 info->name, self->child_pid1);
0508 }
0509
0510 ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
0511 TH_LOG("%m - Failed to setns to namespaces of %d vid pidfd %d",
0512 self->child_pid1, self->child_pidfd1);
0513 }
0514
0515
0516
0517
0518
0519
0520
0521 for (i = 0; i < PIDFD_NS_MAX; i++) {
0522 const struct ns_info *info = &ns_info[i];
0523
0524 if (self->child_nsfds2[i] < 0 || !info->flag)
0525 continue;
0526
0527 ASSERT_NE(setns(self->child_pidfd2, info->flag), 0) {
0528 TH_LOG("Managed to setns to %s namespace of %d via pidfd %d",
0529 info->name, self->child_pid2,
0530 self->child_pidfd2);
0531 }
0532 TH_LOG("%m - Correctly failed to setns to %s namespace of %d via pidfd %d",
0533 info->name, self->child_pid2,
0534 self->child_pidfd2);
0535
0536 ASSERT_NE(setns(self->child_nsfds2[i], info->flag), 0) {
0537 TH_LOG("Managed to setns to %s namespace of %d via nsfd %d",
0538 info->name, self->child_pid2,
0539 self->child_nsfds2[i]);
0540 }
0541 TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d",
0542 info->name, self->child_pid2,
0543 self->child_nsfds2[i]);
0544 }
0545 }
0546
0547 TEST(setns_einval)
0548 {
0549 int fd;
0550
0551 fd = sys_memfd_create("rostock", 0);
0552 EXPECT_GT(fd, 0);
0553
0554 ASSERT_NE(setns(fd, 0), 0);
0555 EXPECT_EQ(errno, EINVAL);
0556 close(fd);
0557 }
0558
0559 TEST_HARNESS_MAIN