Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 */
0002 
0003 #define _GNU_SOURCE
0004 
0005 #include <errno.h>
0006 #include <fcntl.h>
0007 #include <linux/limits.h>
0008 #include <poll.h>
0009 #include <signal.h>
0010 #include <stdio.h>
0011 #include <stdlib.h>
0012 #include <string.h>
0013 #include <sys/inotify.h>
0014 #include <sys/stat.h>
0015 #include <sys/types.h>
0016 #include <sys/wait.h>
0017 #include <unistd.h>
0018 
0019 #include "cgroup_util.h"
0020 #include "../clone3/clone3_selftests.h"
0021 
0022 /* Returns read len on success, or -errno on failure. */
0023 static ssize_t read_text(const char *path, char *buf, size_t max_len)
0024 {
0025     ssize_t len;
0026     int fd;
0027 
0028     fd = open(path, O_RDONLY);
0029     if (fd < 0)
0030         return -errno;
0031 
0032     len = read(fd, buf, max_len - 1);
0033 
0034     if (len >= 0)
0035         buf[len] = 0;
0036 
0037     close(fd);
0038     return len < 0 ? -errno : len;
0039 }
0040 
0041 /* Returns written len on success, or -errno on failure. */
0042 static ssize_t write_text(const char *path, char *buf, ssize_t len)
0043 {
0044     int fd;
0045 
0046     fd = open(path, O_WRONLY | O_APPEND);
0047     if (fd < 0)
0048         return -errno;
0049 
0050     len = write(fd, buf, len);
0051     close(fd);
0052     return len < 0 ? -errno : len;
0053 }
0054 
0055 char *cg_name(const char *root, const char *name)
0056 {
0057     size_t len = strlen(root) + strlen(name) + 2;
0058     char *ret = malloc(len);
0059 
0060     snprintf(ret, len, "%s/%s", root, name);
0061 
0062     return ret;
0063 }
0064 
0065 char *cg_name_indexed(const char *root, const char *name, int index)
0066 {
0067     size_t len = strlen(root) + strlen(name) + 10;
0068     char *ret = malloc(len);
0069 
0070     snprintf(ret, len, "%s/%s_%d", root, name, index);
0071 
0072     return ret;
0073 }
0074 
0075 char *cg_control(const char *cgroup, const char *control)
0076 {
0077     size_t len = strlen(cgroup) + strlen(control) + 2;
0078     char *ret = malloc(len);
0079 
0080     snprintf(ret, len, "%s/%s", cgroup, control);
0081 
0082     return ret;
0083 }
0084 
0085 /* Returns 0 on success, or -errno on failure. */
0086 int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
0087 {
0088     char path[PATH_MAX];
0089     ssize_t ret;
0090 
0091     snprintf(path, sizeof(path), "%s/%s", cgroup, control);
0092 
0093     ret = read_text(path, buf, len);
0094     return ret >= 0 ? 0 : ret;
0095 }
0096 
0097 int cg_read_strcmp(const char *cgroup, const char *control,
0098            const char *expected)
0099 {
0100     size_t size;
0101     char *buf;
0102     int ret;
0103 
0104     /* Handle the case of comparing against empty string */
0105     if (!expected)
0106         return -1;
0107     else
0108         size = strlen(expected) + 1;
0109 
0110     buf = malloc(size);
0111     if (!buf)
0112         return -1;
0113 
0114     if (cg_read(cgroup, control, buf, size)) {
0115         free(buf);
0116         return -1;
0117     }
0118 
0119     ret = strcmp(expected, buf);
0120     free(buf);
0121     return ret;
0122 }
0123 
0124 int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
0125 {
0126     char buf[PAGE_SIZE];
0127 
0128     if (cg_read(cgroup, control, buf, sizeof(buf)))
0129         return -1;
0130 
0131     return strstr(buf, needle) ? 0 : -1;
0132 }
0133 
0134 long cg_read_long(const char *cgroup, const char *control)
0135 {
0136     char buf[128];
0137 
0138     if (cg_read(cgroup, control, buf, sizeof(buf)))
0139         return -1;
0140 
0141     return atol(buf);
0142 }
0143 
0144 long cg_read_key_long(const char *cgroup, const char *control, const char *key)
0145 {
0146     char buf[PAGE_SIZE];
0147     char *ptr;
0148 
0149     if (cg_read(cgroup, control, buf, sizeof(buf)))
0150         return -1;
0151 
0152     ptr = strstr(buf, key);
0153     if (!ptr)
0154         return -1;
0155 
0156     return atol(ptr + strlen(key));
0157 }
0158 
0159 long cg_read_lc(const char *cgroup, const char *control)
0160 {
0161     char buf[PAGE_SIZE];
0162     const char delim[] = "\n";
0163     char *line;
0164     long cnt = 0;
0165 
0166     if (cg_read(cgroup, control, buf, sizeof(buf)))
0167         return -1;
0168 
0169     for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
0170         cnt++;
0171 
0172     return cnt;
0173 }
0174 
0175 /* Returns 0 on success, or -errno on failure. */
0176 int cg_write(const char *cgroup, const char *control, char *buf)
0177 {
0178     char path[PATH_MAX];
0179     ssize_t len = strlen(buf), ret;
0180 
0181     snprintf(path, sizeof(path), "%s/%s", cgroup, control);
0182     ret = write_text(path, buf, len);
0183     return ret == len ? 0 : ret;
0184 }
0185 
0186 int cg_write_numeric(const char *cgroup, const char *control, long value)
0187 {
0188     char buf[64];
0189     int ret;
0190 
0191     ret = sprintf(buf, "%lu", value);
0192     if (ret < 0)
0193         return ret;
0194 
0195     return cg_write(cgroup, control, buf);
0196 }
0197 
0198 int cg_find_unified_root(char *root, size_t len)
0199 {
0200     char buf[10 * PAGE_SIZE];
0201     char *fs, *mount, *type;
0202     const char delim[] = "\n\t ";
0203 
0204     if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
0205         return -1;
0206 
0207     /*
0208      * Example:
0209      * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
0210      */
0211     for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
0212         mount = strtok(NULL, delim);
0213         type = strtok(NULL, delim);
0214         strtok(NULL, delim);
0215         strtok(NULL, delim);
0216         strtok(NULL, delim);
0217 
0218         if (strcmp(type, "cgroup2") == 0) {
0219             strncpy(root, mount, len);
0220             return 0;
0221         }
0222     }
0223 
0224     return -1;
0225 }
0226 
0227 int cg_create(const char *cgroup)
0228 {
0229     return mkdir(cgroup, 0755);
0230 }
0231 
0232 int cg_wait_for_proc_count(const char *cgroup, int count)
0233 {
0234     char buf[10 * PAGE_SIZE] = {0};
0235     int attempts;
0236     char *ptr;
0237 
0238     for (attempts = 10; attempts >= 0; attempts--) {
0239         int nr = 0;
0240 
0241         if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
0242             break;
0243 
0244         for (ptr = buf; *ptr; ptr++)
0245             if (*ptr == '\n')
0246                 nr++;
0247 
0248         if (nr >= count)
0249             return 0;
0250 
0251         usleep(100000);
0252     }
0253 
0254     return -1;
0255 }
0256 
0257 int cg_killall(const char *cgroup)
0258 {
0259     char buf[PAGE_SIZE];
0260     char *ptr = buf;
0261 
0262     /* If cgroup.kill exists use it. */
0263     if (!cg_write(cgroup, "cgroup.kill", "1"))
0264         return 0;
0265 
0266     if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
0267         return -1;
0268 
0269     while (ptr < buf + sizeof(buf)) {
0270         int pid = strtol(ptr, &ptr, 10);
0271 
0272         if (pid == 0)
0273             break;
0274         if (*ptr)
0275             ptr++;
0276         else
0277             break;
0278         if (kill(pid, SIGKILL))
0279             return -1;
0280     }
0281 
0282     return 0;
0283 }
0284 
0285 int cg_destroy(const char *cgroup)
0286 {
0287     int ret;
0288 
0289 retry:
0290     ret = rmdir(cgroup);
0291     if (ret && errno == EBUSY) {
0292         cg_killall(cgroup);
0293         usleep(100);
0294         goto retry;
0295     }
0296 
0297     if (ret && errno == ENOENT)
0298         ret = 0;
0299 
0300     return ret;
0301 }
0302 
0303 int cg_enter(const char *cgroup, int pid)
0304 {
0305     char pidbuf[64];
0306 
0307     snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
0308     return cg_write(cgroup, "cgroup.procs", pidbuf);
0309 }
0310 
0311 int cg_enter_current(const char *cgroup)
0312 {
0313     return cg_write(cgroup, "cgroup.procs", "0");
0314 }
0315 
0316 int cg_enter_current_thread(const char *cgroup)
0317 {
0318     return cg_write(cgroup, "cgroup.threads", "0");
0319 }
0320 
0321 int cg_run(const char *cgroup,
0322        int (*fn)(const char *cgroup, void *arg),
0323        void *arg)
0324 {
0325     int pid, retcode;
0326 
0327     pid = fork();
0328     if (pid < 0) {
0329         return pid;
0330     } else if (pid == 0) {
0331         char buf[64];
0332 
0333         snprintf(buf, sizeof(buf), "%d", getpid());
0334         if (cg_write(cgroup, "cgroup.procs", buf))
0335             exit(EXIT_FAILURE);
0336         exit(fn(cgroup, arg));
0337     } else {
0338         waitpid(pid, &retcode, 0);
0339         if (WIFEXITED(retcode))
0340             return WEXITSTATUS(retcode);
0341         else
0342             return -1;
0343     }
0344 }
0345 
0346 pid_t clone_into_cgroup(int cgroup_fd)
0347 {
0348 #ifdef CLONE_ARGS_SIZE_VER2
0349     pid_t pid;
0350 
0351     struct __clone_args args = {
0352         .flags = CLONE_INTO_CGROUP,
0353         .exit_signal = SIGCHLD,
0354         .cgroup = cgroup_fd,
0355     };
0356 
0357     pid = sys_clone3(&args, sizeof(struct __clone_args));
0358     /*
0359      * Verify that this is a genuine test failure:
0360      * ENOSYS -> clone3() not available
0361      * E2BIG  -> CLONE_INTO_CGROUP not available
0362      */
0363     if (pid < 0 && (errno == ENOSYS || errno == E2BIG))
0364         goto pretend_enosys;
0365 
0366     return pid;
0367 
0368 pretend_enosys:
0369 #endif
0370     errno = ENOSYS;
0371     return -ENOSYS;
0372 }
0373 
0374 int clone_reap(pid_t pid, int options)
0375 {
0376     int ret;
0377     siginfo_t info = {
0378         .si_signo = 0,
0379     };
0380 
0381 again:
0382     ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD);
0383     if (ret < 0) {
0384         if (errno == EINTR)
0385             goto again;
0386         return -1;
0387     }
0388 
0389     if (options & WEXITED) {
0390         if (WIFEXITED(info.si_status))
0391             return WEXITSTATUS(info.si_status);
0392     }
0393 
0394     if (options & WSTOPPED) {
0395         if (WIFSTOPPED(info.si_status))
0396             return WSTOPSIG(info.si_status);
0397     }
0398 
0399     if (options & WCONTINUED) {
0400         if (WIFCONTINUED(info.si_status))
0401             return 0;
0402     }
0403 
0404     return -1;
0405 }
0406 
0407 int dirfd_open_opath(const char *dir)
0408 {
0409     return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH);
0410 }
0411 
0412 #define close_prot_errno(fd)                                                   \
0413     if (fd >= 0) {                                                         \
0414         int _e_ = errno;                                               \
0415         close(fd);                                                     \
0416         errno = _e_;                                                   \
0417     }
0418 
0419 static int clone_into_cgroup_run_nowait(const char *cgroup,
0420                     int (*fn)(const char *cgroup, void *arg),
0421                     void *arg)
0422 {
0423     int cgroup_fd;
0424     pid_t pid;
0425 
0426     cgroup_fd =  dirfd_open_opath(cgroup);
0427     if (cgroup_fd < 0)
0428         return -1;
0429 
0430     pid = clone_into_cgroup(cgroup_fd);
0431     close_prot_errno(cgroup_fd);
0432     if (pid == 0)
0433         exit(fn(cgroup, arg));
0434 
0435     return pid;
0436 }
0437 
0438 int cg_run_nowait(const char *cgroup,
0439           int (*fn)(const char *cgroup, void *arg),
0440           void *arg)
0441 {
0442     int pid;
0443 
0444     pid = clone_into_cgroup_run_nowait(cgroup, fn, arg);
0445     if (pid > 0)
0446         return pid;
0447 
0448     /* Genuine test failure. */
0449     if (pid < 0 && errno != ENOSYS)
0450         return -1;
0451 
0452     pid = fork();
0453     if (pid == 0) {
0454         char buf[64];
0455 
0456         snprintf(buf, sizeof(buf), "%d", getpid());
0457         if (cg_write(cgroup, "cgroup.procs", buf))
0458             exit(EXIT_FAILURE);
0459         exit(fn(cgroup, arg));
0460     }
0461 
0462     return pid;
0463 }
0464 
0465 int get_temp_fd(void)
0466 {
0467     return open(".", O_TMPFILE | O_RDWR | O_EXCL);
0468 }
0469 
0470 int alloc_pagecache(int fd, size_t size)
0471 {
0472     char buf[PAGE_SIZE];
0473     struct stat st;
0474     int i;
0475 
0476     if (fstat(fd, &st))
0477         goto cleanup;
0478 
0479     size += st.st_size;
0480 
0481     if (ftruncate(fd, size))
0482         goto cleanup;
0483 
0484     for (i = 0; i < size; i += sizeof(buf))
0485         read(fd, buf, sizeof(buf));
0486 
0487     return 0;
0488 
0489 cleanup:
0490     return -1;
0491 }
0492 
0493 int alloc_anon(const char *cgroup, void *arg)
0494 {
0495     size_t size = (unsigned long)arg;
0496     char *buf, *ptr;
0497 
0498     buf = malloc(size);
0499     for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
0500         *ptr = 0;
0501 
0502     free(buf);
0503     return 0;
0504 }
0505 
0506 int is_swap_enabled(void)
0507 {
0508     char buf[PAGE_SIZE];
0509     const char delim[] = "\n";
0510     int cnt = 0;
0511     char *line;
0512 
0513     if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
0514         return -1;
0515 
0516     for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
0517         cnt++;
0518 
0519     return cnt > 1;
0520 }
0521 
0522 int set_oom_adj_score(int pid, int score)
0523 {
0524     char path[PATH_MAX];
0525     int fd, len;
0526 
0527     sprintf(path, "/proc/%d/oom_score_adj", pid);
0528 
0529     fd = open(path, O_WRONLY | O_APPEND);
0530     if (fd < 0)
0531         return fd;
0532 
0533     len = dprintf(fd, "%d", score);
0534     if (len < 0) {
0535         close(fd);
0536         return len;
0537     }
0538 
0539     close(fd);
0540     return 0;
0541 }
0542 
0543 int proc_mount_contains(const char *option)
0544 {
0545     char buf[4 * PAGE_SIZE];
0546     ssize_t read;
0547 
0548     read = read_text("/proc/mounts", buf, sizeof(buf));
0549     if (read < 0)
0550         return read;
0551 
0552     return strstr(buf, option) != NULL;
0553 }
0554 
0555 ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
0556 {
0557     char path[PATH_MAX];
0558 
0559     if (!pid)
0560         snprintf(path, sizeof(path), "/proc/%s/%s",
0561              thread ? "thread-self" : "self", item);
0562     else
0563         snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
0564 
0565     size = read_text(path, buf, size);
0566     return size < 0 ? -1 : size;
0567 }
0568 
0569 int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
0570 {
0571     char buf[PAGE_SIZE];
0572 
0573     if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0)
0574         return -1;
0575 
0576     return strstr(buf, needle) ? 0 : -1;
0577 }
0578 
0579 int clone_into_cgroup_run_wait(const char *cgroup)
0580 {
0581     int cgroup_fd;
0582     pid_t pid;
0583 
0584     cgroup_fd =  dirfd_open_opath(cgroup);
0585     if (cgroup_fd < 0)
0586         return -1;
0587 
0588     pid = clone_into_cgroup(cgroup_fd);
0589     close_prot_errno(cgroup_fd);
0590     if (pid < 0)
0591         return -1;
0592 
0593     if (pid == 0)
0594         exit(EXIT_SUCCESS);
0595 
0596     /*
0597      * We don't care whether this fails. We only care whether the initial
0598      * clone succeeded.
0599      */
0600     (void)clone_reap(pid, WEXITED);
0601     return 0;
0602 }
0603 
0604 static int __prepare_for_wait(const char *cgroup, const char *filename)
0605 {
0606     int fd, ret = -1;
0607 
0608     fd = inotify_init1(0);
0609     if (fd == -1)
0610         return fd;
0611 
0612     ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY);
0613     if (ret == -1) {
0614         close(fd);
0615         fd = -1;
0616     }
0617 
0618     return fd;
0619 }
0620 
0621 int cg_prepare_for_wait(const char *cgroup)
0622 {
0623     return __prepare_for_wait(cgroup, "cgroup.events");
0624 }
0625 
0626 int memcg_prepare_for_wait(const char *cgroup)
0627 {
0628     return __prepare_for_wait(cgroup, "memory.events");
0629 }
0630 
0631 int cg_wait_for(int fd)
0632 {
0633     int ret = -1;
0634     struct pollfd fds = {
0635         .fd = fd,
0636         .events = POLLIN,
0637     };
0638 
0639     while (true) {
0640         ret = poll(&fds, 1, 10000);
0641 
0642         if (ret == -1) {
0643             if (errno == EINTR)
0644                 continue;
0645 
0646             break;
0647         }
0648 
0649         if (ret > 0 && fds.revents & POLLIN) {
0650             ret = 0;
0651             break;
0652         }
0653     }
0654 
0655     return ret;
0656 }