Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 #define _GNU_SOURCE
0003 
0004 #include <linux/limits.h>
0005 #include <fcntl.h>
0006 #include <stdio.h>
0007 #include <stdlib.h>
0008 #include <string.h>
0009 #include <sys/stat.h>
0010 #include <sys/types.h>
0011 #include <unistd.h>
0012 #include <sys/wait.h>
0013 #include <errno.h>
0014 #include <sys/sysinfo.h>
0015 #include <pthread.h>
0016 
0017 #include "../kselftest.h"
0018 #include "cgroup_util.h"
0019 
0020 
0021 /*
0022  * Memory cgroup charging is performed using percpu batches 32 pages
0023  * big (look at MEMCG_CHARGE_BATCH), whereas memory.stat is exact. So
0024  * the maximum discrepancy between charge and vmstat entries is number
0025  * of cpus multiplied by 32 pages.
0026  */
0027 #define MAX_VMSTAT_ERROR (4096 * 32 * get_nprocs())
0028 
0029 
0030 static int alloc_dcache(const char *cgroup, void *arg)
0031 {
0032     unsigned long i;
0033     struct stat st;
0034     char buf[128];
0035 
0036     for (i = 0; i < (unsigned long)arg; i++) {
0037         snprintf(buf, sizeof(buf),
0038             "/something-non-existent-with-a-long-name-%64lu-%d",
0039              i, getpid());
0040         stat(buf, &st);
0041     }
0042 
0043     return 0;
0044 }
0045 
0046 /*
0047  * This test allocates 100000 of negative dentries with long names.
0048  * Then it checks that "slab" in memory.stat is larger than 1M.
0049  * Then it sets memory.high to 1M and checks that at least 1/2
0050  * of slab memory has been reclaimed.
0051  */
0052 static int test_kmem_basic(const char *root)
0053 {
0054     int ret = KSFT_FAIL;
0055     char *cg = NULL;
0056     long slab0, slab1, current;
0057 
0058     cg = cg_name(root, "kmem_basic_test");
0059     if (!cg)
0060         goto cleanup;
0061 
0062     if (cg_create(cg))
0063         goto cleanup;
0064 
0065     if (cg_run(cg, alloc_dcache, (void *)100000))
0066         goto cleanup;
0067 
0068     slab0 = cg_read_key_long(cg, "memory.stat", "slab ");
0069     if (slab0 < (1 << 20))
0070         goto cleanup;
0071 
0072     cg_write(cg, "memory.high", "1M");
0073     slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
0074     if (slab1 <= 0)
0075         goto cleanup;
0076 
0077     current = cg_read_long(cg, "memory.current");
0078     if (current <= 0)
0079         goto cleanup;
0080 
0081     if (slab1 < slab0 / 2 && current < slab0 / 2)
0082         ret = KSFT_PASS;
0083 cleanup:
0084     cg_destroy(cg);
0085     free(cg);
0086 
0087     return ret;
0088 }
0089 
0090 static void *alloc_kmem_fn(void *arg)
0091 {
0092     alloc_dcache(NULL, (void *)100);
0093     return NULL;
0094 }
0095 
0096 static int alloc_kmem_smp(const char *cgroup, void *arg)
0097 {
0098     int nr_threads = 2 * get_nprocs();
0099     pthread_t *tinfo;
0100     unsigned long i;
0101     int ret = -1;
0102 
0103     tinfo = calloc(nr_threads, sizeof(pthread_t));
0104     if (tinfo == NULL)
0105         return -1;
0106 
0107     for (i = 0; i < nr_threads; i++) {
0108         if (pthread_create(&tinfo[i], NULL, &alloc_kmem_fn,
0109                    (void *)i)) {
0110             free(tinfo);
0111             return -1;
0112         }
0113     }
0114 
0115     for (i = 0; i < nr_threads; i++) {
0116         ret = pthread_join(tinfo[i], NULL);
0117         if (ret)
0118             break;
0119     }
0120 
0121     free(tinfo);
0122     return ret;
0123 }
0124 
0125 static int cg_run_in_subcgroups(const char *parent,
0126                 int (*fn)(const char *cgroup, void *arg),
0127                 void *arg, int times)
0128 {
0129     char *child;
0130     int i;
0131 
0132     for (i = 0; i < times; i++) {
0133         child = cg_name_indexed(parent, "child", i);
0134         if (!child)
0135             return -1;
0136 
0137         if (cg_create(child)) {
0138             cg_destroy(child);
0139             free(child);
0140             return -1;
0141         }
0142 
0143         if (cg_run(child, fn, NULL)) {
0144             cg_destroy(child);
0145             free(child);
0146             return -1;
0147         }
0148 
0149         cg_destroy(child);
0150         free(child);
0151     }
0152 
0153     return 0;
0154 }
0155 
0156 /*
0157  * The test creates and destroys a large number of cgroups. In each cgroup it
0158  * allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS
0159  * threads. Then it checks the sanity of numbers on the parent level:
0160  * the total size of the cgroups should be roughly equal to
0161  * anon + file + slab + kernel_stack.
0162  */
0163 static int test_kmem_memcg_deletion(const char *root)
0164 {
0165     long current, slab, anon, file, kernel_stack, pagetables, percpu, sock, sum;
0166     int ret = KSFT_FAIL;
0167     char *parent;
0168 
0169     parent = cg_name(root, "kmem_memcg_deletion_test");
0170     if (!parent)
0171         goto cleanup;
0172 
0173     if (cg_create(parent))
0174         goto cleanup;
0175 
0176     if (cg_write(parent, "cgroup.subtree_control", "+memory"))
0177         goto cleanup;
0178 
0179     if (cg_run_in_subcgroups(parent, alloc_kmem_smp, NULL, 100))
0180         goto cleanup;
0181 
0182     current = cg_read_long(parent, "memory.current");
0183     slab = cg_read_key_long(parent, "memory.stat", "slab ");
0184     anon = cg_read_key_long(parent, "memory.stat", "anon ");
0185     file = cg_read_key_long(parent, "memory.stat", "file ");
0186     kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack ");
0187     pagetables = cg_read_key_long(parent, "memory.stat", "pagetables ");
0188     percpu = cg_read_key_long(parent, "memory.stat", "percpu ");
0189     sock = cg_read_key_long(parent, "memory.stat", "sock ");
0190     if (current < 0 || slab < 0 || anon < 0 || file < 0 ||
0191         kernel_stack < 0 || pagetables < 0 || percpu < 0 || sock < 0)
0192         goto cleanup;
0193 
0194     sum = slab + anon + file + kernel_stack + pagetables + percpu + sock;
0195     if (abs(sum - current) < MAX_VMSTAT_ERROR) {
0196         ret = KSFT_PASS;
0197     } else {
0198         printf("memory.current = %ld\n", current);
0199         printf("slab + anon + file + kernel_stack = %ld\n", sum);
0200         printf("slab = %ld\n", slab);
0201         printf("anon = %ld\n", anon);
0202         printf("file = %ld\n", file);
0203         printf("kernel_stack = %ld\n", kernel_stack);
0204         printf("pagetables = %ld\n", pagetables);
0205         printf("percpu = %ld\n", percpu);
0206         printf("sock = %ld\n", sock);
0207     }
0208 
0209 cleanup:
0210     cg_destroy(parent);
0211     free(parent);
0212 
0213     return ret;
0214 }
0215 
0216 /*
0217  * The test reads the entire /proc/kpagecgroup. If the operation went
0218  * successfully (and the kernel didn't panic), the test is treated as passed.
0219  */
0220 static int test_kmem_proc_kpagecgroup(const char *root)
0221 {
0222     unsigned long buf[128];
0223     int ret = KSFT_FAIL;
0224     ssize_t len;
0225     int fd;
0226 
0227     fd = open("/proc/kpagecgroup", O_RDONLY);
0228     if (fd < 0)
0229         return ret;
0230 
0231     do {
0232         len = read(fd, buf, sizeof(buf));
0233     } while (len > 0);
0234 
0235     if (len == 0)
0236         ret = KSFT_PASS;
0237 
0238     close(fd);
0239     return ret;
0240 }
0241 
0242 static void *pthread_wait_fn(void *arg)
0243 {
0244     sleep(100);
0245     return NULL;
0246 }
0247 
0248 static int spawn_1000_threads(const char *cgroup, void *arg)
0249 {
0250     int nr_threads = 1000;
0251     pthread_t *tinfo;
0252     unsigned long i;
0253     long stack;
0254     int ret = -1;
0255 
0256     tinfo = calloc(nr_threads, sizeof(pthread_t));
0257     if (tinfo == NULL)
0258         return -1;
0259 
0260     for (i = 0; i < nr_threads; i++) {
0261         if (pthread_create(&tinfo[i], NULL, &pthread_wait_fn,
0262                    (void *)i)) {
0263             free(tinfo);
0264             return(-1);
0265         }
0266     }
0267 
0268     stack = cg_read_key_long(cgroup, "memory.stat", "kernel_stack ");
0269     if (stack >= 4096 * 1000)
0270         ret = 0;
0271 
0272     free(tinfo);
0273     return ret;
0274 }
0275 
0276 /*
0277  * The test spawns a process, which spawns 1000 threads. Then it checks
0278  * that memory.stat's kernel_stack is at least 1000 pages large.
0279  */
0280 static int test_kmem_kernel_stacks(const char *root)
0281 {
0282     int ret = KSFT_FAIL;
0283     char *cg = NULL;
0284 
0285     cg = cg_name(root, "kmem_kernel_stacks_test");
0286     if (!cg)
0287         goto cleanup;
0288 
0289     if (cg_create(cg))
0290         goto cleanup;
0291 
0292     if (cg_run(cg, spawn_1000_threads, NULL))
0293         goto cleanup;
0294 
0295     ret = KSFT_PASS;
0296 cleanup:
0297     cg_destroy(cg);
0298     free(cg);
0299 
0300     return ret;
0301 }
0302 
0303 /*
0304  * This test sequentionally creates 30 child cgroups, allocates some
0305  * kernel memory in each of them, and deletes them. Then it checks
0306  * that the number of dying cgroups on the parent level is 0.
0307  */
0308 static int test_kmem_dead_cgroups(const char *root)
0309 {
0310     int ret = KSFT_FAIL;
0311     char *parent;
0312     long dead;
0313     int i;
0314 
0315     parent = cg_name(root, "kmem_dead_cgroups_test");
0316     if (!parent)
0317         goto cleanup;
0318 
0319     if (cg_create(parent))
0320         goto cleanup;
0321 
0322     if (cg_write(parent, "cgroup.subtree_control", "+memory"))
0323         goto cleanup;
0324 
0325     if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30))
0326         goto cleanup;
0327 
0328     for (i = 0; i < 5; i++) {
0329         dead = cg_read_key_long(parent, "cgroup.stat",
0330                     "nr_dying_descendants ");
0331         if (dead == 0) {
0332             ret = KSFT_PASS;
0333             break;
0334         }
0335         /*
0336          * Reclaiming cgroups might take some time,
0337          * let's wait a bit and repeat.
0338          */
0339         sleep(1);
0340     }
0341 
0342 cleanup:
0343     cg_destroy(parent);
0344     free(parent);
0345 
0346     return ret;
0347 }
0348 
0349 /*
0350  * This test creates a sub-tree with 1000 memory cgroups.
0351  * Then it checks that the memory.current on the parent level
0352  * is greater than 0 and approximates matches the percpu value
0353  * from memory.stat.
0354  */
0355 static int test_percpu_basic(const char *root)
0356 {
0357     int ret = KSFT_FAIL;
0358     char *parent, *child;
0359     long current, percpu;
0360     int i;
0361 
0362     parent = cg_name(root, "percpu_basic_test");
0363     if (!parent)
0364         goto cleanup;
0365 
0366     if (cg_create(parent))
0367         goto cleanup;
0368 
0369     if (cg_write(parent, "cgroup.subtree_control", "+memory"))
0370         goto cleanup;
0371 
0372     for (i = 0; i < 1000; i++) {
0373         child = cg_name_indexed(parent, "child", i);
0374         if (!child)
0375             return -1;
0376 
0377         if (cg_create(child))
0378             goto cleanup_children;
0379 
0380         free(child);
0381     }
0382 
0383     current = cg_read_long(parent, "memory.current");
0384     percpu = cg_read_key_long(parent, "memory.stat", "percpu ");
0385 
0386     if (current > 0 && percpu > 0 && abs(current - percpu) <
0387         MAX_VMSTAT_ERROR)
0388         ret = KSFT_PASS;
0389     else
0390         printf("memory.current %ld\npercpu %ld\n",
0391                current, percpu);
0392 
0393 cleanup_children:
0394     for (i = 0; i < 1000; i++) {
0395         child = cg_name_indexed(parent, "child", i);
0396         cg_destroy(child);
0397         free(child);
0398     }
0399 
0400 cleanup:
0401     cg_destroy(parent);
0402     free(parent);
0403 
0404     return ret;
0405 }
0406 
0407 #define T(x) { x, #x }
0408 struct kmem_test {
0409     int (*fn)(const char *root);
0410     const char *name;
0411 } tests[] = {
0412     T(test_kmem_basic),
0413     T(test_kmem_memcg_deletion),
0414     T(test_kmem_proc_kpagecgroup),
0415     T(test_kmem_kernel_stacks),
0416     T(test_kmem_dead_cgroups),
0417     T(test_percpu_basic),
0418 };
0419 #undef T
0420 
0421 int main(int argc, char **argv)
0422 {
0423     char root[PATH_MAX];
0424     int i, ret = EXIT_SUCCESS;
0425 
0426     if (cg_find_unified_root(root, sizeof(root)))
0427         ksft_exit_skip("cgroup v2 isn't mounted\n");
0428 
0429     /*
0430      * Check that memory controller is available:
0431      * memory is listed in cgroup.controllers
0432      */
0433     if (cg_read_strstr(root, "cgroup.controllers", "memory"))
0434         ksft_exit_skip("memory controller isn't available\n");
0435 
0436     if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
0437         if (cg_write(root, "cgroup.subtree_control", "+memory"))
0438             ksft_exit_skip("Failed to set memory controller\n");
0439 
0440     for (i = 0; i < ARRAY_SIZE(tests); i++) {
0441         switch (tests[i].fn(root)) {
0442         case KSFT_PASS:
0443             ksft_test_result_pass("%s\n", tests[i].name);
0444             break;
0445         case KSFT_SKIP:
0446             ksft_test_result_skip("%s\n", tests[i].name);
0447             break;
0448         default:
0449             ret = EXIT_FAILURE;
0450             ksft_test_result_fail("%s\n", tests[i].name);
0451             break;
0452         }
0453     }
0454 
0455     return ret;
0456 }