0001
0002
0003
0004
0005
0006
0007 #define _GNU_SOURCE
0008 #include <stdio.h>
0009 #include <stdlib.h>
0010 #include <stdbool.h>
0011 #include <string.h>
0012 #include <sys/syscall.h>
0013 #include <unistd.h>
0014 #include <err.h>
0015 #include <sys/user.h>
0016 #include <asm/prctl.h>
0017 #include <sys/prctl.h>
0018 #include <signal.h>
0019 #include <limits.h>
0020 #include <sys/ucontext.h>
0021 #include <sched.h>
0022 #include <linux/futex.h>
0023 #include <pthread.h>
0024 #include <asm/ldt.h>
0025 #include <sys/mman.h>
0026 #include <stddef.h>
0027 #include <sys/ptrace.h>
0028 #include <sys/wait.h>
0029 #include <setjmp.h>
0030
0031 #ifndef __x86_64__
0032 # error This test is 64-bit only
0033 #endif
0034
0035 static volatile sig_atomic_t want_segv;
0036 static volatile unsigned long segv_addr;
0037
0038 static unsigned short *shared_scratch;
0039
0040 static int nerrs;
0041
0042 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
0043 int flags)
0044 {
0045 struct sigaction sa;
0046 memset(&sa, 0, sizeof(sa));
0047 sa.sa_sigaction = handler;
0048 sa.sa_flags = SA_SIGINFO | flags;
0049 sigemptyset(&sa.sa_mask);
0050 if (sigaction(sig, &sa, 0))
0051 err(1, "sigaction");
0052 }
0053
0054 static void clearhandler(int sig)
0055 {
0056 struct sigaction sa;
0057 memset(&sa, 0, sizeof(sa));
0058 sa.sa_handler = SIG_DFL;
0059 sigemptyset(&sa.sa_mask);
0060 if (sigaction(sig, &sa, 0))
0061 err(1, "sigaction");
0062 }
0063
0064 static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
0065 {
0066 ucontext_t *ctx = (ucontext_t*)ctx_void;
0067
0068 if (!want_segv) {
0069 clearhandler(SIGSEGV);
0070 return;
0071 }
0072
0073 want_segv = false;
0074 segv_addr = (unsigned long)si->si_addr;
0075
0076 ctx->uc_mcontext.gregs[REG_RIP] += 4;
0077
0078 }
0079
0080 static jmp_buf jmpbuf;
0081
0082 static void sigill(int sig, siginfo_t *si, void *ctx_void)
0083 {
0084 siglongjmp(jmpbuf, 1);
0085 }
0086
0087 static bool have_fsgsbase;
0088
0089 static inline unsigned long rdgsbase(void)
0090 {
0091 unsigned long gsbase;
0092
0093 asm volatile("rdgsbase %0" : "=r" (gsbase) :: "memory");
0094
0095 return gsbase;
0096 }
0097
0098 static inline unsigned long rdfsbase(void)
0099 {
0100 unsigned long fsbase;
0101
0102 asm volatile("rdfsbase %0" : "=r" (fsbase) :: "memory");
0103
0104 return fsbase;
0105 }
0106
0107 static inline void wrgsbase(unsigned long gsbase)
0108 {
0109 asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory");
0110 }
0111
0112 static inline void wrfsbase(unsigned long fsbase)
0113 {
0114 asm volatile("wrfsbase %0" :: "r" (fsbase) : "memory");
0115 }
0116
0117 enum which_base { FS, GS };
0118
0119 static unsigned long read_base(enum which_base which)
0120 {
0121 unsigned long offset;
0122
0123
0124
0125
0126
0127 want_segv = true;
0128
0129 offset = 0;
0130 if (which == FS) {
0131
0132 asm volatile ("mov %%fs:(%%rcx), %%rax" : : "c" (offset) : "rax");
0133 } else {
0134 asm volatile ("mov %%gs:(%%rcx), %%rax" : : "c" (offset) : "rax");
0135 }
0136 if (!want_segv)
0137 return segv_addr + offset;
0138
0139
0140
0141
0142
0143
0144
0145 offset = (ULONG_MAX >> 1) + 1;
0146 if (which == FS) {
0147 asm volatile ("mov %%fs:(%%rcx), %%rax"
0148 : : "c" (offset) : "rax");
0149 } else {
0150 asm volatile ("mov %%gs:(%%rcx), %%rax"
0151 : : "c" (offset) : "rax");
0152 }
0153 if (!want_segv)
0154 return segv_addr + offset;
0155
0156 abort();
0157 }
0158
0159 static void check_gs_value(unsigned long value)
0160 {
0161 unsigned long base;
0162 unsigned short sel;
0163
0164 printf("[RUN]\tARCH_SET_GS to 0x%lx\n", value);
0165 if (syscall(SYS_arch_prctl, ARCH_SET_GS, value) != 0)
0166 err(1, "ARCH_SET_GS");
0167
0168 asm volatile ("mov %%gs, %0" : "=rm" (sel));
0169 base = read_base(GS);
0170 if (base == value) {
0171 printf("[OK]\tGSBASE was set as expected (selector 0x%hx)\n",
0172 sel);
0173 } else {
0174 nerrs++;
0175 printf("[FAIL]\tGSBASE was not as expected: got 0x%lx (selector 0x%hx)\n",
0176 base, sel);
0177 }
0178
0179 if (syscall(SYS_arch_prctl, ARCH_GET_GS, &base) != 0)
0180 err(1, "ARCH_GET_GS");
0181 if (base == value) {
0182 printf("[OK]\tARCH_GET_GS worked as expected (selector 0x%hx)\n",
0183 sel);
0184 } else {
0185 nerrs++;
0186 printf("[FAIL]\tARCH_GET_GS was not as expected: got 0x%lx (selector 0x%hx)\n",
0187 base, sel);
0188 }
0189 }
0190
0191 static void mov_0_gs(unsigned long initial_base, bool schedule)
0192 {
0193 unsigned long base, arch_base;
0194
0195 printf("[RUN]\tARCH_SET_GS to 0x%lx then mov 0 to %%gs%s\n", initial_base, schedule ? " and schedule " : "");
0196 if (syscall(SYS_arch_prctl, ARCH_SET_GS, initial_base) != 0)
0197 err(1, "ARCH_SET_GS");
0198
0199 if (schedule)
0200 usleep(10);
0201
0202 asm volatile ("mov %0, %%gs" : : "rm" (0));
0203 base = read_base(GS);
0204 if (syscall(SYS_arch_prctl, ARCH_GET_GS, &arch_base) != 0)
0205 err(1, "ARCH_GET_GS");
0206 if (base == arch_base) {
0207 printf("[OK]\tGSBASE is 0x%lx\n", base);
0208 } else {
0209 nerrs++;
0210 printf("[FAIL]\tGSBASE changed to 0x%lx but kernel reports 0x%lx\n", base, arch_base);
0211 }
0212 }
0213
0214 static volatile unsigned long remote_base;
0215 static volatile bool remote_hard_zero;
0216 static volatile unsigned int ftx;
0217
0218
0219
0220
0221
0222 #define HARD_ZERO 0xa1fa5f343cb85fa4
0223
0224 static void do_remote_base()
0225 {
0226 unsigned long to_set = remote_base;
0227 bool hard_zero = false;
0228 if (to_set == HARD_ZERO) {
0229 to_set = 0;
0230 hard_zero = true;
0231 }
0232
0233 if (syscall(SYS_arch_prctl, ARCH_SET_GS, to_set) != 0)
0234 err(1, "ARCH_SET_GS");
0235
0236 if (hard_zero)
0237 asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
0238
0239 unsigned short sel;
0240 asm volatile ("mov %%gs, %0" : "=rm" (sel));
0241 printf("\tother thread: ARCH_SET_GS(0x%lx)%s -- sel is 0x%hx\n",
0242 to_set, hard_zero ? " and clear gs" : "", sel);
0243 }
0244
0245 static __thread int set_thread_area_entry_number = -1;
0246
0247 static unsigned short load_gs(void)
0248 {
0249
0250
0251
0252
0253
0254
0255 if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
0256 err(1, "ARCH_SET_GS");
0257
0258
0259 struct user_desc desc = {
0260 .entry_number = 0,
0261 .base_addr = 0xBAADF00D,
0262 .limit = 0xfffff,
0263 .seg_32bit = 1,
0264 .contents = 0,
0265 .read_exec_only = 0,
0266 .limit_in_pages = 1,
0267 .seg_not_present = 0,
0268 .useable = 0
0269 };
0270 if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) {
0271 printf("\tusing LDT slot 0\n");
0272 asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7));
0273 return 0x7;
0274 } else {
0275
0276
0277 struct user_desc *low_desc = mmap(
0278 NULL, sizeof(desc),
0279 PROT_READ | PROT_WRITE,
0280 MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
0281 memcpy(low_desc, &desc, sizeof(desc));
0282
0283 low_desc->entry_number = set_thread_area_entry_number;
0284
0285
0286 long ret;
0287 asm volatile ("int $0x80"
0288 : "=a" (ret), "+m" (*low_desc)
0289 : "a" (243), "b" (low_desc)
0290 : "r8", "r9", "r10", "r11");
0291 memcpy(&desc, low_desc, sizeof(desc));
0292 munmap(low_desc, sizeof(desc));
0293
0294 if (ret != 0) {
0295 printf("[NOTE]\tcould not create a segment -- test won't do anything\n");
0296 return 0;
0297 }
0298 printf("\tusing GDT slot %d\n", desc.entry_number);
0299 set_thread_area_entry_number = desc.entry_number;
0300
0301 unsigned short gs = (unsigned short)((desc.entry_number << 3) | 0x3);
0302 asm volatile ("mov %0, %%gs" : : "rm" (gs));
0303 return gs;
0304 }
0305 }
0306
0307 void test_wrbase(unsigned short index, unsigned long base)
0308 {
0309 unsigned short newindex;
0310 unsigned long newbase;
0311
0312 printf("[RUN]\tGS = 0x%hx, GSBASE = 0x%lx\n", index, base);
0313
0314 asm volatile ("mov %0, %%gs" : : "rm" (index));
0315 wrgsbase(base);
0316
0317 remote_base = 0;
0318 ftx = 1;
0319 syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
0320 while (ftx != 0)
0321 syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
0322
0323 asm volatile ("mov %%gs, %0" : "=rm" (newindex));
0324 newbase = rdgsbase();
0325
0326 if (newindex == index && newbase == base) {
0327 printf("[OK]\tIndex and base were preserved\n");
0328 } else {
0329 printf("[FAIL]\tAfter switch, GS = 0x%hx and GSBASE = 0x%lx\n",
0330 newindex, newbase);
0331 nerrs++;
0332 }
0333 }
0334
0335 static void *threadproc(void *ctx)
0336 {
0337 while (1) {
0338 while (ftx == 0)
0339 syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0);
0340 if (ftx == 3)
0341 return NULL;
0342
0343 if (ftx == 1) {
0344 do_remote_base();
0345 } else if (ftx == 2) {
0346
0347
0348
0349
0350
0351 load_gs();
0352 asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
0353 } else {
0354 errx(1, "helper thread got bad command");
0355 }
0356
0357 ftx = 0;
0358 syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
0359 }
0360 }
0361
0362 static void set_gs_and_switch_to(unsigned long local,
0363 unsigned short force_sel,
0364 unsigned long remote)
0365 {
0366 unsigned long base;
0367 unsigned short sel_pre_sched, sel_post_sched;
0368
0369 bool hard_zero = false;
0370 if (local == HARD_ZERO) {
0371 hard_zero = true;
0372 local = 0;
0373 }
0374
0375 printf("[RUN]\tARCH_SET_GS(0x%lx)%s, then schedule to 0x%lx\n",
0376 local, hard_zero ? " and clear gs" : "", remote);
0377 if (force_sel)
0378 printf("\tBefore schedule, set selector to 0x%hx\n", force_sel);
0379 if (syscall(SYS_arch_prctl, ARCH_SET_GS, local) != 0)
0380 err(1, "ARCH_SET_GS");
0381 if (hard_zero)
0382 asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
0383
0384 if (read_base(GS) != local) {
0385 nerrs++;
0386 printf("[FAIL]\tGSBASE wasn't set as expected\n");
0387 }
0388
0389 if (force_sel) {
0390 asm volatile ("mov %0, %%gs" : : "rm" (force_sel));
0391 sel_pre_sched = force_sel;
0392 local = read_base(GS);
0393
0394
0395
0396
0397
0398 asm volatile ("mov %0, %%gs" : : "rm" (force_sel));
0399 } else {
0400 asm volatile ("mov %%gs, %0" : "=rm" (sel_pre_sched));
0401 }
0402
0403 remote_base = remote;
0404 ftx = 1;
0405 syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
0406 while (ftx != 0)
0407 syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
0408
0409 asm volatile ("mov %%gs, %0" : "=rm" (sel_post_sched));
0410 base = read_base(GS);
0411 if (base == local && sel_pre_sched == sel_post_sched) {
0412 printf("[OK]\tGS/BASE remained 0x%hx/0x%lx\n",
0413 sel_pre_sched, local);
0414 } else if (base == local && sel_pre_sched >= 1 && sel_pre_sched <= 3 &&
0415 sel_post_sched == 0) {
0416
0417
0418
0419
0420 printf("[OK]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx because IRET is defective\n",
0421 sel_pre_sched, local, sel_post_sched, base);
0422 } else {
0423 nerrs++;
0424 printf("[FAIL]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx\n",
0425 sel_pre_sched, local, sel_post_sched, base);
0426 }
0427 }
0428
0429 static void test_unexpected_base(void)
0430 {
0431 unsigned long base;
0432
0433 printf("[RUN]\tARCH_SET_GS(0), clear gs, then manipulate GSBASE in a different thread\n");
0434 if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
0435 err(1, "ARCH_SET_GS");
0436 asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
0437
0438 ftx = 2;
0439 syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
0440 while (ftx != 0)
0441 syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
0442
0443 base = read_base(GS);
0444 if (base == 0) {
0445 printf("[OK]\tGSBASE remained 0\n");
0446 } else {
0447 nerrs++;
0448 printf("[FAIL]\tGSBASE changed to 0x%lx\n", base);
0449 }
0450 }
0451
0452 #define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r)
0453
0454 static void test_ptrace_write_gs_read_base(void)
0455 {
0456 int status;
0457 pid_t child = fork();
0458
0459 if (child < 0)
0460 err(1, "fork");
0461
0462 if (child == 0) {
0463 printf("[RUN]\tPTRACE_POKE GS, read GSBASE back\n");
0464
0465 printf("[RUN]\tARCH_SET_GS to 1\n");
0466 if (syscall(SYS_arch_prctl, ARCH_SET_GS, 1) != 0)
0467 err(1, "ARCH_SET_GS");
0468
0469 if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
0470 err(1, "PTRACE_TRACEME");
0471
0472 raise(SIGTRAP);
0473 _exit(0);
0474 }
0475
0476 wait(&status);
0477
0478 if (WSTOPSIG(status) == SIGTRAP) {
0479 unsigned long base;
0480 unsigned long gs_offset = USER_REGS_OFFSET(gs);
0481 unsigned long base_offset = USER_REGS_OFFSET(gs_base);
0482
0483
0484 base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
0485 if (base == 1) {
0486 printf("[OK]\tGSBASE started at 1\n");
0487 } else {
0488 nerrs++;
0489 printf("[FAIL]\tGSBASE started at 0x%lx\n", base);
0490 }
0491
0492 printf("[RUN]\tSet GS = 0x7, read GSBASE\n");
0493
0494
0495 if (ptrace(PTRACE_POKEUSER, child, gs_offset, 0x7) != 0)
0496 err(1, "PTRACE_POKEUSER");
0497
0498
0499 base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
0500
0501 if (base == 0 || base == 1) {
0502 printf("[OK]\tGSBASE reads as 0x%lx with invalid GS\n", base);
0503 } else {
0504 nerrs++;
0505 printf("[FAIL]\tGSBASE=0x%lx (should be 0 or 1)\n", base);
0506 }
0507 }
0508
0509 ptrace(PTRACE_CONT, child, NULL, NULL);
0510
0511 wait(&status);
0512 if (!WIFEXITED(status))
0513 printf("[WARN]\tChild didn't exit cleanly.\n");
0514 }
0515
0516 static void test_ptrace_write_gsbase(void)
0517 {
0518 int status;
0519 pid_t child = fork();
0520
0521 if (child < 0)
0522 err(1, "fork");
0523
0524 if (child == 0) {
0525 printf("[RUN]\tPTRACE_POKE(), write GSBASE from ptracer\n");
0526
0527 *shared_scratch = load_gs();
0528
0529 if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
0530 err(1, "PTRACE_TRACEME");
0531
0532 raise(SIGTRAP);
0533 _exit(0);
0534 }
0535
0536 wait(&status);
0537
0538 if (WSTOPSIG(status) == SIGTRAP) {
0539 unsigned long gs, base;
0540 unsigned long gs_offset = USER_REGS_OFFSET(gs);
0541 unsigned long base_offset = USER_REGS_OFFSET(gs_base);
0542
0543 gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL);
0544
0545 if (gs != *shared_scratch) {
0546 nerrs++;
0547 printf("[FAIL]\tGS is not prepared with nonzero\n");
0548 goto END;
0549 }
0550
0551 if (ptrace(PTRACE_POKEUSER, child, base_offset, 0xFF) != 0)
0552 err(1, "PTRACE_POKEUSER");
0553
0554 gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL);
0555 base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
0556
0557
0558
0559
0560
0561
0562
0563 if (gs != *shared_scratch) {
0564 nerrs++;
0565 printf("[FAIL]\tGS changed to %lx\n", gs);
0566
0567
0568
0569
0570
0571
0572
0573
0574
0575 if (gs == 0)
0576 printf("\tNote: this is expected behavior on older kernels.\n");
0577 } else if (have_fsgsbase && (base != 0xFF)) {
0578 nerrs++;
0579 printf("[FAIL]\tGSBASE changed to %lx\n", base);
0580 } else {
0581 printf("[OK]\tGS remained 0x%hx", *shared_scratch);
0582 if (have_fsgsbase)
0583 printf(" and GSBASE changed to 0xFF");
0584 printf("\n");
0585 }
0586 }
0587
0588 END:
0589 ptrace(PTRACE_CONT, child, NULL, NULL);
0590 wait(&status);
0591 if (!WIFEXITED(status))
0592 printf("[WARN]\tChild didn't exit cleanly.\n");
0593 }
0594
0595 int main()
0596 {
0597 pthread_t thread;
0598
0599 shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
0600 MAP_ANONYMOUS | MAP_SHARED, -1, 0);
0601
0602
0603 test_ptrace_write_gs_read_base();
0604
0605
0606 sethandler(SIGILL, sigill, 0);
0607 if (sigsetjmp(jmpbuf, 1) == 0) {
0608 rdfsbase();
0609 have_fsgsbase = true;
0610 printf("\tFSGSBASE instructions are enabled\n");
0611 } else {
0612 printf("\tFSGSBASE instructions are disabled\n");
0613 }
0614 clearhandler(SIGILL);
0615
0616 sethandler(SIGSEGV, sigsegv, 0);
0617
0618 check_gs_value(0);
0619 check_gs_value(1);
0620 check_gs_value(0x200000000);
0621 check_gs_value(0);
0622 check_gs_value(0x200000000);
0623 check_gs_value(1);
0624
0625 for (int sched = 0; sched < 2; sched++) {
0626 mov_0_gs(0, !!sched);
0627 mov_0_gs(1, !!sched);
0628 mov_0_gs(0x200000000, !!sched);
0629 }
0630
0631
0632
0633 cpu_set_t cpuset;
0634 CPU_ZERO(&cpuset);
0635 CPU_SET(0, &cpuset);
0636 if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
0637 err(1, "sched_setaffinity to CPU 0");
0638
0639 if (pthread_create(&thread, 0, threadproc, 0) != 0)
0640 err(1, "pthread_create");
0641
0642 static unsigned long bases_with_hard_zero[] = {
0643 0, HARD_ZERO, 1, 0x200000000,
0644 };
0645
0646 for (int local = 0; local < 4; local++) {
0647 for (int remote = 0; remote < 4; remote++) {
0648 for (unsigned short s = 0; s < 5; s++) {
0649 unsigned short sel = s;
0650 if (s == 4)
0651 asm ("mov %%ss, %0" : "=rm" (sel));
0652 set_gs_and_switch_to(
0653 bases_with_hard_zero[local],
0654 sel,
0655 bases_with_hard_zero[remote]);
0656 }
0657 }
0658 }
0659
0660 test_unexpected_base();
0661
0662 if (have_fsgsbase) {
0663 unsigned short ss;
0664
0665 asm volatile ("mov %%ss, %0" : "=rm" (ss));
0666
0667 test_wrbase(0, 0);
0668 test_wrbase(0, 1);
0669 test_wrbase(0, 0x200000000);
0670 test_wrbase(0, 0xffffffffffffffff);
0671 test_wrbase(ss, 0);
0672 test_wrbase(ss, 1);
0673 test_wrbase(ss, 0x200000000);
0674 test_wrbase(ss, 0xffffffffffffffff);
0675 }
0676
0677 ftx = 3;
0678 syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
0679
0680 if (pthread_join(thread, NULL) != 0)
0681 err(1, "pthread_join");
0682
0683 test_ptrace_write_gsbase();
0684
0685 return nerrs == 0 ? 0 : 1;
0686 }