Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 */
0002 /*
0003  * syscall_numbering.c - test calling the x86-64 kernel with various
0004  * valid and invalid system call numbers.
0005  *
0006  * Copyright (c) 2018 Andrew Lutomirski
0007  */
0008 
0009 #define _GNU_SOURCE
0010 
0011 #include <stdlib.h>
0012 #include <stdio.h>
0013 #include <stdbool.h>
0014 #include <errno.h>
0015 #include <unistd.h>
0016 #include <string.h>
0017 #include <fcntl.h>
0018 #include <limits.h>
0019 #include <signal.h>
0020 #include <sysexits.h>
0021 
0022 #include <sys/ptrace.h>
0023 #include <sys/user.h>
0024 #include <sys/wait.h>
0025 #include <sys/mman.h>
0026 
0027 #include <linux/ptrace.h>
0028 
0029 /* Common system call numbers */
0030 #define SYS_READ      0
0031 #define SYS_WRITE     1
0032 #define SYS_GETPID   39
0033 /* x64-only system call numbers */
0034 #define X64_IOCTL    16
0035 #define X64_READV    19
0036 #define X64_WRITEV   20
0037 /* x32-only system call numbers (without X32_BIT) */
0038 #define X32_IOCTL   514
0039 #define X32_READV   515
0040 #define X32_WRITEV  516
0041 
0042 #define X32_BIT 0x40000000
0043 
0044 static int nullfd = -1;     /* File descriptor for /dev/null */
0045 static bool with_x32;       /* x32 supported on this kernel? */
0046 
0047 enum ptrace_pass {
0048     PTP_NOTHING,
0049     PTP_GETREGS,
0050     PTP_WRITEBACK,
0051     PTP_FUZZRET,
0052     PTP_FUZZHIGH,
0053     PTP_INTNUM,
0054     PTP_DONE
0055 };
0056 
0057 static const char * const ptrace_pass_name[] =
0058 {
0059     [PTP_NOTHING]   = "just stop, no data read",
0060     [PTP_GETREGS]   = "only getregs",
0061     [PTP_WRITEBACK] = "getregs, unmodified setregs",
0062     [PTP_FUZZRET]   = "modifying the default return",
0063     [PTP_FUZZHIGH]  = "clobbering the top 32 bits",
0064     [PTP_INTNUM]    = "sign-extending the syscall number",
0065 };
0066 
0067 /*
0068  * Shared memory block between tracer and test
0069  */
0070 struct shared {
0071     unsigned int nerr;  /* Total error count */
0072     unsigned int indent;    /* Message indentation level */
0073     enum ptrace_pass ptrace_pass;
0074     bool probing_syscall;   /* In probe_syscall() */
0075 };
0076 static volatile struct shared *sh;
0077 
0078 static inline unsigned int offset(void)
0079 {
0080     unsigned int level = sh ? sh->indent : 0;
0081 
0082     return 8 + level * 4;
0083 }
0084 
0085 #define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \
0086                   ## __VA_ARGS__)
0087 
0088 #define run(fmt, ...)  msg(RUN,  fmt, ## __VA_ARGS__)
0089 #define info(fmt, ...) msg(INFO, fmt, ## __VA_ARGS__)
0090 #define ok(fmt, ...)   msg(OK,   fmt, ## __VA_ARGS__)
0091 
0092 #define fail(fmt, ...)                  \
0093     do {                        \
0094         msg(FAIL, fmt, ## __VA_ARGS__);     \
0095         sh->nerr++;             \
0096        } while (0)
0097 
0098 #define crit(fmt, ...)                  \
0099     do {                        \
0100         sh->indent = 0;             \
0101         msg(FAIL, fmt, ## __VA_ARGS__);     \
0102         msg(SKIP, "Unable to run test\n");  \
0103         exit(EX_OSERR);             \
0104        } while (0)
0105 
0106 /* Sentinel for ptrace-modified return value */
0107 #define MODIFIED_BY_PTRACE  -9999
0108 
0109 /*
0110  * Directly invokes the given syscall with nullfd as the first argument
0111  * and the rest zero. Avoids involving glibc wrappers in case they ever
0112  * end up intercepting some system calls for some reason, or modify
0113  * the system call number itself.
0114  */
0115 static long long probe_syscall(int msb, int lsb)
0116 {
0117     register long long arg1 asm("rdi") = nullfd;
0118     register long long arg2 asm("rsi") = 0;
0119     register long long arg3 asm("rdx") = 0;
0120     register long long arg4 asm("r10") = 0;
0121     register long long arg5 asm("r8")  = 0;
0122     register long long arg6 asm("r9")  = 0;
0123     long long nr = ((long long)msb << 32) | (unsigned int)lsb;
0124     long long ret;
0125 
0126     /*
0127      * We pass in an extra copy of the extended system call number
0128      * in %rbx, so we can examine it from the ptrace handler without
0129      * worrying about it being possibly modified. This is to test
0130      * the validity of struct user regs.orig_rax a.k.a.
0131      * struct pt_regs.orig_ax.
0132      */
0133     sh->probing_syscall = true;
0134     asm volatile("syscall"
0135              : "=a" (ret)
0136              : "a" (nr), "b" (nr),
0137                "r" (arg1), "r" (arg2), "r" (arg3),
0138                "r" (arg4), "r" (arg5), "r" (arg6)
0139              : "rcx", "r11", "memory", "cc");
0140     sh->probing_syscall = false;
0141 
0142     return ret;
0143 }
0144 
0145 static const char *syscall_str(int msb, int start, int end)
0146 {
0147     static char buf[64];
0148     const char * const type = (start & X32_BIT) ? "x32" : "x64";
0149     int lsb = start;
0150 
0151     /*
0152      * Improve readability by stripping the x32 bit, but round
0153      * toward zero so we don't display -1 as -1073741825.
0154      */
0155     if (lsb < 0)
0156         lsb |= X32_BIT;
0157     else
0158         lsb &= ~X32_BIT;
0159 
0160     if (start == end)
0161         snprintf(buf, sizeof buf, "%s syscall %d:%d",
0162              type, msb, lsb);
0163     else
0164         snprintf(buf, sizeof buf, "%s syscalls %d:%d..%d",
0165              type, msb, lsb, lsb + (end-start));
0166 
0167     return buf;
0168 }
0169 
0170 static unsigned int _check_for(int msb, int start, int end, long long expect,
0171                    const char *expect_str)
0172 {
0173     unsigned int err = 0;
0174 
0175     sh->indent++;
0176     if (start != end)
0177         sh->indent++;
0178 
0179     for (int nr = start; nr <= end; nr++) {
0180         long long ret = probe_syscall(msb, nr);
0181 
0182         if (ret != expect) {
0183             fail("%s returned %lld, but it should have returned %s\n",
0184                    syscall_str(msb, nr, nr),
0185                    ret, expect_str);
0186             err++;
0187         }
0188     }
0189 
0190     if (start != end)
0191         sh->indent--;
0192 
0193     if (err) {
0194         if (start != end)
0195             fail("%s had %u failure%s\n",
0196                  syscall_str(msb, start, end),
0197                  err, err == 1 ? "s" : "");
0198     } else {
0199         ok("%s returned %s as expected\n",
0200            syscall_str(msb, start, end), expect_str);
0201     }
0202 
0203     sh->indent--;
0204 
0205     return err;
0206 }
0207 
0208 #define check_for(msb,start,end,expect) \
0209     _check_for(msb,start,end,expect,#expect)
0210 
0211 static bool check_zero(int msb, int nr)
0212 {
0213     return check_for(msb, nr, nr, 0);
0214 }
0215 
0216 static bool check_enosys(int msb, int nr)
0217 {
0218     return check_for(msb, nr, nr, -ENOSYS);
0219 }
0220 
0221 /*
0222  * Anyone diagnosing a failure will want to know whether the kernel
0223  * supports x32. Tell them. This can also be used to conditionalize
0224  * tests based on existence or nonexistence of x32.
0225  */
0226 static bool test_x32(void)
0227 {
0228     long long ret;
0229     pid_t mypid = getpid();
0230 
0231     run("Checking for x32 by calling x32 getpid()\n");
0232     ret = probe_syscall(0, SYS_GETPID | X32_BIT);
0233 
0234     sh->indent++;
0235     if (ret == mypid) {
0236         info("x32 is supported\n");
0237         with_x32 = true;
0238     } else if (ret == -ENOSYS) {
0239         info("x32 is not supported\n");
0240         with_x32 = false;
0241     } else {
0242         fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, (long long)mypid);
0243         with_x32 = false;
0244     }
0245     sh->indent--;
0246     return with_x32;
0247 }
0248 
0249 static void test_syscalls_common(int msb)
0250 {
0251     enum ptrace_pass pass = sh->ptrace_pass;
0252 
0253     run("Checking some common syscalls as 64 bit\n");
0254     check_zero(msb, SYS_READ);
0255     check_zero(msb, SYS_WRITE);
0256 
0257     run("Checking some 64-bit only syscalls as 64 bit\n");
0258     check_zero(msb, X64_READV);
0259     check_zero(msb, X64_WRITEV);
0260 
0261     run("Checking out of range system calls\n");
0262     check_for(msb, -64, -2, -ENOSYS);
0263     if (pass >= PTP_FUZZRET)
0264         check_for(msb, -1, -1, MODIFIED_BY_PTRACE);
0265     else
0266         check_for(msb, -1, -1, -ENOSYS);
0267     check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS);
0268     check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS);
0269     check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS);
0270 }
0271 
0272 static void test_syscalls_with_x32(int msb)
0273 {
0274     /*
0275      * Syscalls 512-547 are "x32" syscalls.  They are
0276      * intended to be called with the x32 (0x40000000) bit
0277      * set.  Calling them without the x32 bit set is
0278      * nonsense and should not work.
0279      */
0280     run("Checking x32 syscalls as 64 bit\n");
0281     check_for(msb, 512, 547, -ENOSYS);
0282 
0283     run("Checking some common syscalls as x32\n");
0284     check_zero(msb, SYS_READ   | X32_BIT);
0285     check_zero(msb, SYS_WRITE  | X32_BIT);
0286 
0287     run("Checking some x32 syscalls as x32\n");
0288     check_zero(msb, X32_READV  | X32_BIT);
0289     check_zero(msb, X32_WRITEV | X32_BIT);
0290 
0291     run("Checking some 64-bit syscalls as x32\n");
0292     check_enosys(msb, X64_IOCTL  | X32_BIT);
0293     check_enosys(msb, X64_READV  | X32_BIT);
0294     check_enosys(msb, X64_WRITEV | X32_BIT);
0295 }
0296 
0297 static void test_syscalls_without_x32(int msb)
0298 {
0299     run("Checking for absence of x32 system calls\n");
0300     check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS);
0301 }
0302 
0303 static void test_syscall_numbering(void)
0304 {
0305     static const int msbs[] = {
0306         0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX,
0307         INT_MIN, INT_MIN+1
0308     };
0309 
0310     sh->indent++;
0311 
0312     /*
0313      * The MSB is supposed to be ignored, so we loop over a few
0314      * to test that out.
0315      */
0316     for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) {
0317         int msb = msbs[i];
0318         run("Checking system calls with msb = %d (0x%x)\n",
0319             msb, msb);
0320 
0321         sh->indent++;
0322 
0323         test_syscalls_common(msb);
0324         if (with_x32)
0325             test_syscalls_with_x32(msb);
0326         else
0327             test_syscalls_without_x32(msb);
0328 
0329         sh->indent--;
0330     }
0331 
0332     sh->indent--;
0333 }
0334 
0335 static void syscall_numbering_tracee(void)
0336 {
0337     enum ptrace_pass pass;
0338 
0339     if (ptrace(PTRACE_TRACEME, 0, 0, 0)) {
0340         crit("Failed to request tracing\n");
0341         return;
0342     }
0343     raise(SIGSTOP);
0344 
0345     for (sh->ptrace_pass = pass = PTP_NOTHING; pass < PTP_DONE;
0346          sh->ptrace_pass = ++pass) {
0347         run("Running tests under ptrace: %s\n", ptrace_pass_name[pass]);
0348         test_syscall_numbering();
0349     }
0350 }
0351 
0352 static void mess_with_syscall(pid_t testpid, enum ptrace_pass pass)
0353 {
0354     struct user_regs_struct regs;
0355 
0356     sh->probing_syscall = false; /* Do this on entry only */
0357 
0358     /* For these, don't even getregs */
0359     if (pass == PTP_NOTHING || pass == PTP_DONE)
0360         return;
0361 
0362     ptrace(PTRACE_GETREGS, testpid, NULL, &regs);
0363 
0364     if (regs.orig_rax != regs.rbx) {
0365         fail("orig_rax %#llx doesn't match syscall number %#llx\n",
0366              (unsigned long long)regs.orig_rax,
0367              (unsigned long long)regs.rbx);
0368     }
0369 
0370     switch (pass) {
0371     case PTP_GETREGS:
0372         /* Just read, no writeback */
0373         return;
0374     case PTP_WRITEBACK:
0375         /* Write back the same register state verbatim */
0376         break;
0377     case PTP_FUZZRET:
0378         regs.rax = MODIFIED_BY_PTRACE;
0379         break;
0380     case PTP_FUZZHIGH:
0381         regs.rax = MODIFIED_BY_PTRACE;
0382         regs.orig_rax = regs.orig_rax | 0xffffffff00000000ULL;
0383         break;
0384     case PTP_INTNUM:
0385         regs.rax = MODIFIED_BY_PTRACE;
0386         regs.orig_rax = (int)regs.orig_rax;
0387         break;
0388     default:
0389         crit("invalid ptrace_pass\n");
0390         break;
0391     }
0392 
0393     ptrace(PTRACE_SETREGS, testpid, NULL, &regs);
0394 }
0395 
0396 static void syscall_numbering_tracer(pid_t testpid)
0397 {
0398     int wstatus;
0399 
0400     do {
0401         pid_t wpid = waitpid(testpid, &wstatus, 0);
0402         if (wpid < 0 && errno != EINTR)
0403             break;
0404         if (wpid != testpid)
0405             continue;
0406         if (!WIFSTOPPED(wstatus))
0407             break;  /* Thread exited? */
0408 
0409         if (sh->probing_syscall && WSTOPSIG(wstatus) == SIGTRAP)
0410             mess_with_syscall(testpid, sh->ptrace_pass);
0411     } while (sh->ptrace_pass != PTP_DONE &&
0412          !ptrace(PTRACE_SYSCALL, testpid, NULL, NULL));
0413 
0414     ptrace(PTRACE_DETACH, testpid, NULL, NULL);
0415 
0416     /* Wait for the child process to terminate */
0417     while (waitpid(testpid, &wstatus, 0) != testpid || !WIFEXITED(wstatus))
0418         /* wait some more */;
0419 }
0420 
0421 static void test_traced_syscall_numbering(void)
0422 {
0423     pid_t testpid;
0424 
0425     /* Launch the test thread; this thread continues as the tracer thread */
0426     testpid = fork();
0427 
0428     if (testpid < 0) {
0429         crit("Unable to launch tracer process\n");
0430     } else if (testpid == 0) {
0431         syscall_numbering_tracee();
0432         _exit(0);
0433     } else {
0434         syscall_numbering_tracer(testpid);
0435     }
0436 }
0437 
0438 int main(void)
0439 {
0440     unsigned int nerr;
0441 
0442     /*
0443      * It is quite likely to get a segfault on a failure, so make
0444      * sure the message gets out by setting stdout to nonbuffered.
0445      */
0446     setvbuf(stdout, NULL, _IONBF, 0);
0447 
0448     /*
0449      * Harmless file descriptor to work on...
0450      */
0451     nullfd = open("/dev/null", O_RDWR);
0452     if (nullfd < 0) {
0453         crit("Unable to open /dev/null: %s\n", strerror(errno));
0454     }
0455 
0456     /*
0457      * Set up a block of shared memory...
0458      */
0459     sh = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ|PROT_WRITE,
0460           MAP_ANONYMOUS|MAP_SHARED, 0, 0);
0461     if (sh == MAP_FAILED) {
0462         crit("Unable to allocated shared memory block: %s\n",
0463              strerror(errno));
0464     }
0465 
0466     with_x32 = test_x32();
0467 
0468     run("Running tests without ptrace...\n");
0469     test_syscall_numbering();
0470 
0471     test_traced_syscall_numbering();
0472 
0473     nerr = sh->nerr;
0474     if (!nerr) {
0475         ok("All system calls succeeded or failed as expected\n");
0476         return 0;
0477     } else {
0478         fail("A total of %u system call%s had incorrect behavior\n",
0479              nerr, nerr != 1 ? "s" : "");
0480         return 1;
0481     }
0482 }