Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (c) 2019 Alexey Dobriyan <adobriyan@gmail.com>
0003  *
0004  * Permission to use, copy, modify, and distribute this software for any
0005  * purpose with or without fee is hereby granted, provided that the above
0006  * copyright notice and this permission notice appear in all copies.
0007  *
0008  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
0009  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
0010  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
0011  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
0012  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
0013  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
0014  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
0015  */
0016 /*
0017  * Fork and exec tiny 1 page executable which precisely controls its VM.
0018  * Test /proc/$PID/maps
0019  * Test /proc/$PID/smaps
0020  * Test /proc/$PID/smaps_rollup
0021  * Test /proc/$PID/statm
0022  *
0023  * FIXME require CONFIG_TMPFS which can be disabled
0024  * FIXME test other values from "smaps"
0025  * FIXME support other archs
0026  */
0027 #undef NDEBUG
0028 #include <assert.h>
0029 #include <errno.h>
0030 #include <sched.h>
0031 #include <signal.h>
0032 #include <stdbool.h>
0033 #include <stdint.h>
0034 #include <stdio.h>
0035 #include <string.h>
0036 #include <stdlib.h>
0037 #include <sys/mount.h>
0038 #include <sys/types.h>
0039 #include <sys/stat.h>
0040 #include <sys/wait.h>
0041 #include <fcntl.h>
0042 #include <unistd.h>
0043 #include <sys/syscall.h>
0044 #include <sys/uio.h>
0045 #include <linux/kdev_t.h>
0046 #include <sys/time.h>
0047 #include <sys/resource.h>
0048 
0049 #include "../kselftest.h"
0050 
0051 static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags)
0052 {
0053     return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags);
0054 }
0055 
0056 static void make_private_tmp(void)
0057 {
0058     if (unshare(CLONE_NEWNS) == -1) {
0059         if (errno == ENOSYS || errno == EPERM) {
0060             exit(4);
0061         }
0062         exit(1);
0063     }
0064     if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) {
0065         exit(1);
0066     }
0067     if (mount(NULL, "/tmp", "tmpfs", 0, NULL) == -1) {
0068         exit(1);
0069     }
0070 }
0071 
0072 static pid_t pid = -1;
0073 static void ate(void)
0074 {
0075     if (pid > 0) {
0076         kill(pid, SIGTERM);
0077     }
0078 }
0079 
0080 struct elf64_hdr {
0081     uint8_t e_ident[16];
0082     uint16_t e_type;
0083     uint16_t e_machine;
0084     uint32_t e_version;
0085     uint64_t e_entry;
0086     uint64_t e_phoff;
0087     uint64_t e_shoff;
0088     uint32_t e_flags;
0089     uint16_t e_ehsize;
0090     uint16_t e_phentsize;
0091     uint16_t e_phnum;
0092     uint16_t e_shentsize;
0093     uint16_t e_shnum;
0094     uint16_t e_shstrndx;
0095 };
0096 
0097 struct elf64_phdr {
0098     uint32_t p_type;
0099     uint32_t p_flags;
0100     uint64_t p_offset;
0101     uint64_t p_vaddr;
0102     uint64_t p_paddr;
0103     uint64_t p_filesz;
0104     uint64_t p_memsz;
0105     uint64_t p_align;
0106 };
0107 
0108 #ifdef __x86_64__
0109 #define PAGE_SIZE 4096
0110 #define VADDR (1UL << 32)
0111 #define MAPS_OFFSET 73
0112 
0113 #define syscall 0x0f, 0x05
0114 #define mov_rdi(x)  \
0115     0x48, 0xbf, \
0116     (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff,    \
0117     ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff
0118 
0119 #define mov_rsi(x)  \
0120     0x48, 0xbe, \
0121     (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff,    \
0122     ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff
0123 
0124 #define mov_eax(x)  \
0125     0xb8, (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff
0126 
0127 static const uint8_t payload[] = {
0128     /* Casually unmap stack, vDSO and everything else. */
0129     /* munmap */
0130     mov_rdi(VADDR + 4096),
0131     mov_rsi((1ULL << 47) - 4096 - VADDR - 4096),
0132     mov_eax(11),
0133     syscall,
0134 
0135     /* Ping parent. */
0136     /* write(0, &c, 1); */
0137     0x31, 0xff,                 /* xor edi, edi */
0138     0x48, 0x8d, 0x35, 0x00, 0x00, 0x00, 0x00,   /* lea rsi, [rip] */
0139     0xba, 0x01, 0x00, 0x00, 0x00,           /* mov edx, 1 */
0140     mov_eax(1),
0141     syscall,
0142 
0143     /* 1: pause(); */
0144     mov_eax(34),
0145     syscall,
0146 
0147     0xeb, 0xf7, /* jmp 1b */
0148 };
0149 
0150 static int make_exe(const uint8_t *payload, size_t len)
0151 {
0152     struct elf64_hdr h;
0153     struct elf64_phdr ph;
0154 
0155     struct iovec iov[3] = {
0156         {&h, sizeof(struct elf64_hdr)},
0157         {&ph, sizeof(struct elf64_phdr)},
0158         {(void *)payload, len},
0159     };
0160     int fd, fd1;
0161     char buf[64];
0162 
0163     memset(&h, 0, sizeof(h));
0164     h.e_ident[0] = 0x7f;
0165     h.e_ident[1] = 'E';
0166     h.e_ident[2] = 'L';
0167     h.e_ident[3] = 'F';
0168     h.e_ident[4] = 2;
0169     h.e_ident[5] = 1;
0170     h.e_ident[6] = 1;
0171     h.e_ident[7] = 0;
0172     h.e_type = 2;
0173     h.e_machine = 0x3e;
0174     h.e_version = 1;
0175     h.e_entry = VADDR + sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr);
0176     h.e_phoff = sizeof(struct elf64_hdr);
0177     h.e_shoff = 0;
0178     h.e_flags = 0;
0179     h.e_ehsize = sizeof(struct elf64_hdr);
0180     h.e_phentsize = sizeof(struct elf64_phdr);
0181     h.e_phnum = 1;
0182     h.e_shentsize = 0;
0183     h.e_shnum = 0;
0184     h.e_shstrndx = 0;
0185 
0186     memset(&ph, 0, sizeof(ph));
0187     ph.p_type = 1;
0188     ph.p_flags = (1<<2)|1;
0189     ph.p_offset = 0;
0190     ph.p_vaddr = VADDR;
0191     ph.p_paddr = 0;
0192     ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
0193     ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
0194     ph.p_align = 4096;
0195 
0196     fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_EXCL|O_TMPFILE, 0700);
0197     if (fd == -1) {
0198         exit(1);
0199     }
0200 
0201     if (writev(fd, iov, 3) != sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len) {
0202         exit(1);
0203     }
0204 
0205     /* Avoid ETXTBSY on exec. */
0206     snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd);
0207     fd1 = open(buf, O_RDONLY|O_CLOEXEC);
0208     close(fd);
0209 
0210     return fd1;
0211 }
0212 #endif
0213 
0214 /*
0215  * 0: vsyscall VMA doesn't exist    vsyscall=none
0216  * 1: vsyscall VMA is r-xp      vsyscall=emulate
0217  * 2: vsyscall VMA is --xp      vsyscall=xonly
0218  */
0219 static int g_vsyscall;
0220 static const char *str_vsyscall;
0221 
0222 static const char str_vsyscall_0[] = "";
0223 static const char str_vsyscall_1[] =
0224 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n";
0225 static const char str_vsyscall_2[] =
0226 "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]\n";
0227 
0228 #ifdef __x86_64__
0229 static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___)
0230 {
0231     _exit(1);
0232 }
0233 
0234 /*
0235  * vsyscall page can't be unmapped, probe it directly.
0236  */
0237 static void vsyscall(void)
0238 {
0239     pid_t pid;
0240     int wstatus;
0241 
0242     pid = fork();
0243     if (pid < 0) {
0244         fprintf(stderr, "fork, errno %d\n", errno);
0245         exit(1);
0246     }
0247     if (pid == 0) {
0248         struct rlimit rlim = {0, 0};
0249         (void)setrlimit(RLIMIT_CORE, &rlim);
0250 
0251         /* Hide "segfault at ffffffffff600000" messages. */
0252         struct sigaction act;
0253         memset(&act, 0, sizeof(struct sigaction));
0254         act.sa_flags = SA_SIGINFO;
0255         act.sa_sigaction = sigaction_SIGSEGV;
0256         (void)sigaction(SIGSEGV, &act, NULL);
0257 
0258         /* gettimeofday(NULL, NULL); */
0259         asm volatile (
0260             "call %P0"
0261             :
0262             : "i" (0xffffffffff600000), "D" (NULL), "S" (NULL)
0263             : "rax", "rcx", "r11"
0264         );
0265         exit(0);
0266     }
0267     waitpid(pid, &wstatus, 0);
0268     if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) {
0269         /* vsyscall page exists and is executable. */
0270     } else {
0271         /* vsyscall page doesn't exist. */
0272         g_vsyscall = 0;
0273         return;
0274     }
0275 
0276     pid = fork();
0277     if (pid < 0) {
0278         fprintf(stderr, "fork, errno %d\n", errno);
0279         exit(1);
0280     }
0281     if (pid == 0) {
0282         struct rlimit rlim = {0, 0};
0283         (void)setrlimit(RLIMIT_CORE, &rlim);
0284 
0285         /* Hide "segfault at ffffffffff600000" messages. */
0286         struct sigaction act;
0287         memset(&act, 0, sizeof(struct sigaction));
0288         act.sa_flags = SA_SIGINFO;
0289         act.sa_sigaction = sigaction_SIGSEGV;
0290         (void)sigaction(SIGSEGV, &act, NULL);
0291 
0292         *(volatile int *)0xffffffffff600000UL;
0293         exit(0);
0294     }
0295     waitpid(pid, &wstatus, 0);
0296     if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) {
0297         /* vsyscall page is readable and executable. */
0298         g_vsyscall = 1;
0299         return;
0300     }
0301 
0302     /* vsyscall page is executable but unreadable. */
0303     g_vsyscall = 2;
0304 }
0305 
0306 int main(void)
0307 {
0308     int pipefd[2];
0309     int exec_fd;
0310 
0311     vsyscall();
0312     switch (g_vsyscall) {
0313     case 0:
0314         str_vsyscall = str_vsyscall_0;
0315         break;
0316     case 1:
0317         str_vsyscall = str_vsyscall_1;
0318         break;
0319     case 2:
0320         str_vsyscall = str_vsyscall_2;
0321         break;
0322     default:
0323         abort();
0324     }
0325 
0326     atexit(ate);
0327 
0328     make_private_tmp();
0329 
0330     /* Reserve fd 0 for 1-byte pipe ping from child. */
0331     close(0);
0332     if (open("/", O_RDONLY|O_DIRECTORY|O_PATH) != 0) {
0333         return 1;
0334     }
0335 
0336     exec_fd = make_exe(payload, sizeof(payload));
0337 
0338     if (pipe(pipefd) == -1) {
0339         return 1;
0340     }
0341     if (dup2(pipefd[1], 0) != 0) {
0342         return 1;
0343     }
0344 
0345     pid = fork();
0346     if (pid == -1) {
0347         return 1;
0348     }
0349     if (pid == 0) {
0350         sys_execveat(exec_fd, "", NULL, NULL, AT_EMPTY_PATH);
0351         return 1;
0352     }
0353 
0354     char _;
0355     if (read(pipefd[0], &_, 1) != 1) {
0356         return 1;
0357     }
0358 
0359     struct stat st;
0360     if (fstat(exec_fd, &st) == -1) {
0361         return 1;
0362     }
0363 
0364     /* Generate "head -n1 /proc/$PID/maps" */
0365     char buf0[256];
0366     memset(buf0, ' ', sizeof(buf0));
0367     int len = snprintf(buf0, sizeof(buf0),
0368             "%08lx-%08lx r-xp 00000000 %02lx:%02lx %llu",
0369             VADDR, VADDR + PAGE_SIZE,
0370             MAJOR(st.st_dev), MINOR(st.st_dev),
0371             (unsigned long long)st.st_ino);
0372     buf0[len] = ' ';
0373     snprintf(buf0 + MAPS_OFFSET, sizeof(buf0) - MAPS_OFFSET,
0374          "/tmp/#%llu (deleted)\n", (unsigned long long)st.st_ino);
0375 
0376     /* Test /proc/$PID/maps */
0377     {
0378         const size_t len = strlen(buf0) + strlen(str_vsyscall);
0379         char buf[256];
0380         ssize_t rv;
0381         int fd;
0382 
0383         snprintf(buf, sizeof(buf), "/proc/%u/maps", pid);
0384         fd = open(buf, O_RDONLY);
0385         if (fd == -1) {
0386             return 1;
0387         }
0388         rv = read(fd, buf, sizeof(buf));
0389         assert(rv == len);
0390         assert(memcmp(buf, buf0, strlen(buf0)) == 0);
0391         if (g_vsyscall > 0) {
0392             assert(memcmp(buf + strlen(buf0), str_vsyscall, strlen(str_vsyscall)) == 0);
0393         }
0394     }
0395 
0396     /* Test /proc/$PID/smaps */
0397     {
0398         char buf[4096];
0399         ssize_t rv;
0400         int fd;
0401 
0402         snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid);
0403         fd = open(buf, O_RDONLY);
0404         if (fd == -1) {
0405             return 1;
0406         }
0407         rv = read(fd, buf, sizeof(buf));
0408         assert(0 <= rv && rv <= sizeof(buf));
0409 
0410         assert(rv >= strlen(buf0));
0411         assert(memcmp(buf, buf0, strlen(buf0)) == 0);
0412 
0413 #define RSS1 "Rss:                   4 kB\n"
0414 #define RSS2 "Rss:                   0 kB\n"
0415 #define PSS1 "Pss:                   4 kB\n"
0416 #define PSS2 "Pss:                   0 kB\n"
0417         assert(memmem(buf, rv, RSS1, strlen(RSS1)) ||
0418                memmem(buf, rv, RSS2, strlen(RSS2)));
0419         assert(memmem(buf, rv, PSS1, strlen(PSS1)) ||
0420                memmem(buf, rv, PSS2, strlen(PSS2)));
0421 
0422         static const char *S[] = {
0423             "Size:                  4 kB\n",
0424             "KernelPageSize:        4 kB\n",
0425             "MMUPageSize:           4 kB\n",
0426             "Anonymous:             0 kB\n",
0427             "AnonHugePages:         0 kB\n",
0428             "Shared_Hugetlb:        0 kB\n",
0429             "Private_Hugetlb:       0 kB\n",
0430             "Locked:                0 kB\n",
0431         };
0432         int i;
0433 
0434         for (i = 0; i < ARRAY_SIZE(S); i++) {
0435             assert(memmem(buf, rv, S[i], strlen(S[i])));
0436         }
0437 
0438         if (g_vsyscall > 0) {
0439             assert(memmem(buf, rv, str_vsyscall, strlen(str_vsyscall)));
0440         }
0441     }
0442 
0443     /* Test /proc/$PID/smaps_rollup */
0444     {
0445         char bufr[256];
0446         memset(bufr, ' ', sizeof(bufr));
0447         len = snprintf(bufr, sizeof(bufr),
0448                 "%08lx-%08lx ---p 00000000 00:00 0",
0449                 VADDR, VADDR + PAGE_SIZE);
0450         bufr[len] = ' ';
0451         snprintf(bufr + MAPS_OFFSET, sizeof(bufr) - MAPS_OFFSET,
0452              "[rollup]\n");
0453 
0454         char buf[1024];
0455         ssize_t rv;
0456         int fd;
0457 
0458         snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid);
0459         fd = open(buf, O_RDONLY);
0460         if (fd == -1) {
0461             return 1;
0462         }
0463         rv = read(fd, buf, sizeof(buf));
0464         assert(0 <= rv && rv <= sizeof(buf));
0465 
0466         assert(rv >= strlen(bufr));
0467         assert(memcmp(buf, bufr, strlen(bufr)) == 0);
0468 
0469         assert(memmem(buf, rv, RSS1, strlen(RSS1)) ||
0470                memmem(buf, rv, RSS2, strlen(RSS2)));
0471         assert(memmem(buf, rv, PSS1, strlen(PSS1)) ||
0472                memmem(buf, rv, PSS2, strlen(PSS2)));
0473 
0474         static const char *S[] = {
0475             "Anonymous:             0 kB\n",
0476             "AnonHugePages:         0 kB\n",
0477             "Shared_Hugetlb:        0 kB\n",
0478             "Private_Hugetlb:       0 kB\n",
0479             "Locked:                0 kB\n",
0480         };
0481         int i;
0482 
0483         for (i = 0; i < ARRAY_SIZE(S); i++) {
0484             assert(memmem(buf, rv, S[i], strlen(S[i])));
0485         }
0486     }
0487 
0488     /* Test /proc/$PID/statm */
0489     {
0490         char buf[64];
0491         ssize_t rv;
0492         int fd;
0493 
0494         snprintf(buf, sizeof(buf), "/proc/%u/statm", pid);
0495         fd = open(buf, O_RDONLY);
0496         if (fd == -1) {
0497             return 1;
0498         }
0499         rv = read(fd, buf, sizeof(buf));
0500         assert(rv == 7 * 2);
0501 
0502         assert(buf[0] == '1');  /* ->total_vm */
0503         assert(buf[1] == ' ');
0504         assert(buf[2] == '0' || buf[2] == '1'); /* rss */
0505         assert(buf[3] == ' ');
0506         assert(buf[4] == '0' || buf[2] == '1'); /* file rss */
0507         assert(buf[5] == ' ');
0508         assert(buf[6] == '1');  /* ELF executable segments */
0509         assert(buf[7] == ' ');
0510         assert(buf[8] == '0');
0511         assert(buf[9] == ' ');
0512         assert(buf[10] == '0'); /* ->data_vm + ->stack_vm */
0513         assert(buf[11] == ' ');
0514         assert(buf[12] == '0');
0515         assert(buf[13] == '\n');
0516     }
0517 
0518     return 0;
0519 }
0520 #else
0521 int main(void)
0522 {
0523     return 4;
0524 }
0525 #endif