Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace
0004  * Copyright (c) 2014-2015 Andrew Lutomirski
0005  *
0006  * This is a series of tests that exercises the sigreturn(2) syscall and
0007  * the IRET / SYSRET paths in the kernel.
0008  *
0009  * For now, this focuses on the effects of unusual CS and SS values,
0010  * and it has a bunch of tests to make sure that ESP/RSP is restored
0011  * properly.
0012  *
0013  * The basic idea behind these tests is to raise(SIGUSR1) to create a
0014  * sigcontext frame, plug in the values to be tested, and then return,
0015  * which implicitly invokes sigreturn(2) and programs the user context
0016  * as desired.
0017  *
0018  * For tests for which we expect sigreturn and the subsequent return to
0019  * user mode to succeed, we return to a short trampoline that generates
0020  * SIGTRAP so that the meat of the tests can be ordinary C code in a
0021  * SIGTRAP handler.
0022  *
0023  * The inner workings of each test is documented below.
0024  *
0025  * Do not run on outdated, unpatched kernels at risk of nasty crashes.
0026  */
0027 
0028 #define _GNU_SOURCE
0029 
0030 #include <sys/time.h>
0031 #include <time.h>
0032 #include <stdlib.h>
0033 #include <sys/syscall.h>
0034 #include <unistd.h>
0035 #include <stdio.h>
0036 #include <string.h>
0037 #include <inttypes.h>
0038 #include <sys/mman.h>
0039 #include <sys/signal.h>
0040 #include <sys/ucontext.h>
0041 #include <asm/ldt.h>
0042 #include <err.h>
0043 #include <setjmp.h>
0044 #include <stddef.h>
0045 #include <stdbool.h>
0046 #include <sys/ptrace.h>
0047 #include <sys/user.h>
0048 
0049 /* Pull in AR_xyz defines. */
0050 typedef unsigned int u32;
0051 typedef unsigned short u16;
0052 #include "../../../../arch/x86/include/asm/desc_defs.h"
0053 
0054 /*
0055  * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc
0056  * headers.
0057  */
0058 #ifdef __x86_64__
0059 /*
0060  * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
0061  * kernels that save SS in the sigcontext.  All kernels that set
0062  * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
0063  * regardless of SS (i.e. they implement espfix).
0064  *
0065  * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
0066  * when delivering a signal that came from 64-bit code.
0067  *
0068  * Sigreturn restores SS as follows:
0069  *
0070  * if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
0071  *     saved CS is not 64-bit)
0072  *         new SS = saved SS  (will fail IRET and signal if invalid)
0073  * else
0074  *         new SS = a flat 32-bit data segment
0075  */
0076 #define UC_SIGCONTEXT_SS       0x2
0077 #define UC_STRICT_RESTORE_SS   0x4
0078 #endif
0079 
0080 /*
0081  * In principle, this test can run on Linux emulation layers (e.g.
0082  * Illumos "LX branded zones").  Solaris-based kernels reserve LDT
0083  * entries 0-5 for their own internal purposes, so start our LDT
0084  * allocations above that reservation.  (The tests don't pass on LX
0085  * branded zones, but at least this lets them run.)
0086  */
0087 #define LDT_OFFSET 6
0088 
0089 /* An aligned stack accessible through some of our segments. */
0090 static unsigned char stack16[65536] __attribute__((aligned(4096)));
0091 
0092 /*
0093  * An aligned int3 instruction used as a trampoline.  Some of the tests
0094  * want to fish out their ss values, so this trampoline copies ss to eax
0095  * before the int3.
0096  */
0097 asm (".pushsection .text\n\t"
0098      ".type int3, @function\n\t"
0099      ".align 4096\n\t"
0100      "int3:\n\t"
0101      "mov %ss,%ecx\n\t"
0102      "int3\n\t"
0103      ".size int3, . - int3\n\t"
0104      ".align 4096, 0xcc\n\t"
0105      ".popsection");
0106 extern char int3[4096];
0107 
0108 /*
0109  * At startup, we prepapre:
0110  *
0111  * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero
0112  *   descriptor or out of bounds).
0113  * - code16_sel: A 16-bit LDT code segment pointing to int3.
0114  * - data16_sel: A 16-bit LDT data segment pointing to stack16.
0115  * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3.
0116  * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16.
0117  * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16.
0118  * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to
0119  *   stack16.
0120  *
0121  * For no particularly good reason, xyz_sel is a selector value with the
0122  * RPL and LDT bits filled in, whereas xyz_idx is just an index into the
0123  * descriptor table.  These variables will be zero if their respective
0124  * segments could not be allocated.
0125  */
0126 static unsigned short ldt_nonexistent_sel;
0127 static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel;
0128 
0129 static unsigned short gdt_data16_idx, gdt_npdata32_idx;
0130 
0131 static unsigned short GDT3(int idx)
0132 {
0133     return (idx << 3) | 3;
0134 }
0135 
0136 static unsigned short LDT3(int idx)
0137 {
0138     return (idx << 3) | 7;
0139 }
0140 
0141 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
0142                int flags)
0143 {
0144     struct sigaction sa;
0145     memset(&sa, 0, sizeof(sa));
0146     sa.sa_sigaction = handler;
0147     sa.sa_flags = SA_SIGINFO | flags;
0148     sigemptyset(&sa.sa_mask);
0149     if (sigaction(sig, &sa, 0))
0150         err(1, "sigaction");
0151 }
0152 
0153 static void clearhandler(int sig)
0154 {
0155     struct sigaction sa;
0156     memset(&sa, 0, sizeof(sa));
0157     sa.sa_handler = SIG_DFL;
0158     sigemptyset(&sa.sa_mask);
0159     if (sigaction(sig, &sa, 0))
0160         err(1, "sigaction");
0161 }
0162 
0163 static void add_ldt(const struct user_desc *desc, unsigned short *var,
0164             const char *name)
0165 {
0166     if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) {
0167         *var = LDT3(desc->entry_number);
0168     } else {
0169         printf("[NOTE]\tFailed to create %s segment\n", name);
0170         *var = 0;
0171     }
0172 }
0173 
0174 static void setup_ldt(void)
0175 {
0176     if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16))
0177         errx(1, "stack16 is too high\n");
0178     if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3))
0179         errx(1, "int3 is too high\n");
0180 
0181     ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2);
0182 
0183     const struct user_desc code16_desc = {
0184         .entry_number    = LDT_OFFSET + 0,
0185         .base_addr       = (unsigned long)int3,
0186         .limit           = 4095,
0187         .seg_32bit       = 0,
0188         .contents        = 2, /* Code, not conforming */
0189         .read_exec_only  = 0,
0190         .limit_in_pages  = 0,
0191         .seg_not_present = 0,
0192         .useable         = 0
0193     };
0194     add_ldt(&code16_desc, &code16_sel, "code16");
0195 
0196     const struct user_desc data16_desc = {
0197         .entry_number    = LDT_OFFSET + 1,
0198         .base_addr       = (unsigned long)stack16,
0199         .limit           = 0xffff,
0200         .seg_32bit       = 0,
0201         .contents        = 0, /* Data, grow-up */
0202         .read_exec_only  = 0,
0203         .limit_in_pages  = 0,
0204         .seg_not_present = 0,
0205         .useable         = 0
0206     };
0207     add_ldt(&data16_desc, &data16_sel, "data16");
0208 
0209     const struct user_desc npcode32_desc = {
0210         .entry_number    = LDT_OFFSET + 3,
0211         .base_addr       = (unsigned long)int3,
0212         .limit           = 4095,
0213         .seg_32bit       = 1,
0214         .contents        = 2, /* Code, not conforming */
0215         .read_exec_only  = 0,
0216         .limit_in_pages  = 0,
0217         .seg_not_present = 1,
0218         .useable         = 0
0219     };
0220     add_ldt(&npcode32_desc, &npcode32_sel, "npcode32");
0221 
0222     const struct user_desc npdata32_desc = {
0223         .entry_number    = LDT_OFFSET + 4,
0224         .base_addr       = (unsigned long)stack16,
0225         .limit           = 0xffff,
0226         .seg_32bit       = 1,
0227         .contents        = 0, /* Data, grow-up */
0228         .read_exec_only  = 0,
0229         .limit_in_pages  = 0,
0230         .seg_not_present = 1,
0231         .useable         = 0
0232     };
0233     add_ldt(&npdata32_desc, &npdata32_sel, "npdata32");
0234 
0235     struct user_desc gdt_data16_desc = {
0236         .entry_number    = -1,
0237         .base_addr       = (unsigned long)stack16,
0238         .limit           = 0xffff,
0239         .seg_32bit       = 0,
0240         .contents        = 0, /* Data, grow-up */
0241         .read_exec_only  = 0,
0242         .limit_in_pages  = 0,
0243         .seg_not_present = 0,
0244         .useable         = 0
0245     };
0246 
0247     if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) {
0248         /*
0249          * This probably indicates vulnerability to CVE-2014-8133.
0250          * Merely getting here isn't definitive, though, and we'll
0251          * diagnose the problem for real later on.
0252          */
0253         printf("[WARN]\tset_thread_area allocated data16 at index %d\n",
0254                gdt_data16_desc.entry_number);
0255         gdt_data16_idx = gdt_data16_desc.entry_number;
0256     } else {
0257         printf("[OK]\tset_thread_area refused 16-bit data\n");
0258     }
0259 
0260     struct user_desc gdt_npdata32_desc = {
0261         .entry_number    = -1,
0262         .base_addr       = (unsigned long)stack16,
0263         .limit           = 0xffff,
0264         .seg_32bit       = 1,
0265         .contents        = 0, /* Data, grow-up */
0266         .read_exec_only  = 0,
0267         .limit_in_pages  = 0,
0268         .seg_not_present = 1,
0269         .useable         = 0
0270     };
0271 
0272     if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) {
0273         /*
0274          * As a hardening measure, newer kernels don't allow this.
0275          */
0276         printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n",
0277                gdt_npdata32_desc.entry_number);
0278         gdt_npdata32_idx = gdt_npdata32_desc.entry_number;
0279     } else {
0280         printf("[OK]\tset_thread_area refused 16-bit data\n");
0281     }
0282 }
0283 
0284 /* State used by our signal handlers. */
0285 static gregset_t initial_regs, requested_regs, resulting_regs;
0286 
0287 /* Instructions for the SIGUSR1 handler. */
0288 static volatile unsigned short sig_cs, sig_ss;
0289 static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno;
0290 #ifdef __x86_64__
0291 static volatile sig_atomic_t sig_corrupt_final_ss;
0292 #endif
0293 
0294 /* Abstractions for some 32-bit vs 64-bit differences. */
0295 #ifdef __x86_64__
0296 # define REG_IP REG_RIP
0297 # define REG_SP REG_RSP
0298 # define REG_CX REG_RCX
0299 
0300 struct selectors {
0301     unsigned short cs, gs, fs, ss;
0302 };
0303 
0304 static unsigned short *ssptr(ucontext_t *ctx)
0305 {
0306     struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
0307     return &sels->ss;
0308 }
0309 
0310 static unsigned short *csptr(ucontext_t *ctx)
0311 {
0312     struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
0313     return &sels->cs;
0314 }
0315 #else
0316 # define REG_IP REG_EIP
0317 # define REG_SP REG_ESP
0318 # define REG_CX REG_ECX
0319 
0320 static greg_t *ssptr(ucontext_t *ctx)
0321 {
0322     return &ctx->uc_mcontext.gregs[REG_SS];
0323 }
0324 
0325 static greg_t *csptr(ucontext_t *ctx)
0326 {
0327     return &ctx->uc_mcontext.gregs[REG_CS];
0328 }
0329 #endif
0330 
0331 /*
0332  * Checks a given selector for its code bitness or returns -1 if it's not
0333  * a usable code segment selector.
0334  */
0335 int cs_bitness(unsigned short cs)
0336 {
0337     uint32_t valid = 0, ar;
0338     asm ("lar %[cs], %[ar]\n\t"
0339          "jnz 1f\n\t"
0340          "mov $1, %[valid]\n\t"
0341          "1:"
0342          : [ar] "=r" (ar), [valid] "+rm" (valid)
0343          : [cs] "r" (cs));
0344 
0345     if (!valid)
0346         return -1;
0347 
0348     bool db = (ar & (1 << 22));
0349     bool l = (ar & (1 << 21));
0350 
0351     if (!(ar & (1<<11)))
0352         return -1;  /* Not code. */
0353 
0354     if (l && !db)
0355         return 64;
0356     else if (!l && db)
0357         return 32;
0358     else if (!l && !db)
0359         return 16;
0360     else
0361         return -1;  /* Unknown bitness. */
0362 }
0363 
0364 /*
0365  * Checks a given selector for its code bitness or returns -1 if it's not
0366  * a usable code segment selector.
0367  */
0368 bool is_valid_ss(unsigned short cs)
0369 {
0370     uint32_t valid = 0, ar;
0371     asm ("lar %[cs], %[ar]\n\t"
0372          "jnz 1f\n\t"
0373          "mov $1, %[valid]\n\t"
0374          "1:"
0375          : [ar] "=r" (ar), [valid] "+rm" (valid)
0376          : [cs] "r" (cs));
0377 
0378     if (!valid)
0379         return false;
0380 
0381     if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA &&
0382         (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN)
0383         return false;
0384 
0385     return (ar & AR_P);
0386 }
0387 
0388 /* Number of errors in the current test case. */
0389 static volatile sig_atomic_t nerrs;
0390 
0391 static void validate_signal_ss(int sig, ucontext_t *ctx)
0392 {
0393 #ifdef __x86_64__
0394     bool was_64bit = (cs_bitness(*csptr(ctx)) == 64);
0395 
0396     if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) {
0397         printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n");
0398         nerrs++;
0399 
0400         /*
0401          * This happens on Linux 4.1.  The rest will fail, too, so
0402          * return now to reduce the noise.
0403          */
0404         return;
0405     }
0406 
0407     /* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */
0408     if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) {
0409         printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n",
0410                sig);
0411         nerrs++;
0412     }
0413 
0414     if (is_valid_ss(*ssptr(ctx))) {
0415         /*
0416          * DOSEMU was written before 64-bit sigcontext had SS, and
0417          * it tries to figure out the signal source SS by looking at
0418          * the physical register.  Make sure that keeps working.
0419          */
0420         unsigned short hw_ss;
0421         asm ("mov %%ss, %0" : "=rm" (hw_ss));
0422         if (hw_ss != *ssptr(ctx)) {
0423             printf("[FAIL]\tHW SS didn't match saved SS\n");
0424             nerrs++;
0425         }
0426     }
0427 #endif
0428 }
0429 
0430 /*
0431  * SIGUSR1 handler.  Sets CS and SS as requested and points IP to the
0432  * int3 trampoline.  Sets SP to a large known value so that we can see
0433  * whether the value round-trips back to user mode correctly.
0434  */
0435 static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
0436 {
0437     ucontext_t *ctx = (ucontext_t*)ctx_void;
0438 
0439     validate_signal_ss(sig, ctx);
0440 
0441     memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
0442 
0443     *csptr(ctx) = sig_cs;
0444     *ssptr(ctx) = sig_ss;
0445 
0446     ctx->uc_mcontext.gregs[REG_IP] =
0447         sig_cs == code16_sel ? 0 : (unsigned long)&int3;
0448     ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
0449     ctx->uc_mcontext.gregs[REG_CX] = 0;
0450 
0451 #ifdef __i386__
0452     /*
0453      * Make sure the kernel doesn't inadvertently use DS or ES-relative
0454      * accesses in a region where user DS or ES is loaded.
0455      *
0456      * Skip this for 64-bit builds because long mode doesn't care about
0457      * DS and ES and skipping it increases test coverage a little bit,
0458      * since 64-bit kernels can still run the 32-bit build.
0459      */
0460     ctx->uc_mcontext.gregs[REG_DS] = 0;
0461     ctx->uc_mcontext.gregs[REG_ES] = 0;
0462 #endif
0463 
0464     memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
0465     requested_regs[REG_CX] = *ssptr(ctx);   /* The asm code does this. */
0466 
0467     return;
0468 }
0469 
0470 /*
0471  * Called after a successful sigreturn (via int3) or from a failed
0472  * sigreturn (directly by kernel).  Restores our state so that the
0473  * original raise(SIGUSR1) returns.
0474  */
0475 static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
0476 {
0477     ucontext_t *ctx = (ucontext_t*)ctx_void;
0478 
0479     validate_signal_ss(sig, ctx);
0480 
0481     sig_err = ctx->uc_mcontext.gregs[REG_ERR];
0482     sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
0483 
0484     unsigned short ss;
0485     asm ("mov %%ss,%0" : "=r" (ss));
0486 
0487     greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX];
0488     if (asm_ss != sig_ss && sig == SIGTRAP) {
0489         /* Sanity check failure. */
0490         printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
0491                ss, *ssptr(ctx), (unsigned long long)asm_ss);
0492         nerrs++;
0493     }
0494 
0495     memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
0496     memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
0497 
0498 #ifdef __x86_64__
0499     if (sig_corrupt_final_ss) {
0500         if (ctx->uc_flags & UC_STRICT_RESTORE_SS) {
0501             printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n");
0502             nerrs++;
0503         } else {
0504             /*
0505              * DOSEMU transitions from 32-bit to 64-bit mode by
0506              * adjusting sigcontext, and it requires that this work
0507              * even if the saved SS is bogus.
0508              */
0509             printf("\tCorrupting SS on return to 64-bit mode\n");
0510             *ssptr(ctx) = 0;
0511         }
0512     }
0513 #endif
0514 
0515     sig_trapped = sig;
0516 }
0517 
0518 #ifdef __x86_64__
0519 /* Tests recovery if !UC_STRICT_RESTORE_SS */
0520 static void sigusr2(int sig, siginfo_t *info, void *ctx_void)
0521 {
0522     ucontext_t *ctx = (ucontext_t*)ctx_void;
0523 
0524     if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) {
0525         printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n");
0526         nerrs++;
0527         return;  /* We can't do the rest. */
0528     }
0529 
0530     ctx->uc_flags &= ~UC_STRICT_RESTORE_SS;
0531     *ssptr(ctx) = 0;
0532 
0533     /* Return.  The kernel should recover without sending another signal. */
0534 }
0535 
0536 static int test_nonstrict_ss(void)
0537 {
0538     clearhandler(SIGUSR1);
0539     clearhandler(SIGTRAP);
0540     clearhandler(SIGSEGV);
0541     clearhandler(SIGILL);
0542     sethandler(SIGUSR2, sigusr2, 0);
0543 
0544     nerrs = 0;
0545 
0546     printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n");
0547     raise(SIGUSR2);
0548     if (!nerrs)
0549         printf("[OK]\tIt worked\n");
0550 
0551     return nerrs;
0552 }
0553 #endif
0554 
0555 /* Finds a usable code segment of the requested bitness. */
0556 int find_cs(int bitness)
0557 {
0558     unsigned short my_cs;
0559 
0560     asm ("mov %%cs,%0" :  "=r" (my_cs));
0561 
0562     if (cs_bitness(my_cs) == bitness)
0563         return my_cs;
0564     if (cs_bitness(my_cs + (2 << 3)) == bitness)
0565         return my_cs + (2 << 3);
0566     if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness)
0567         return my_cs - (2 << 3);
0568     if (cs_bitness(code16_sel) == bitness)
0569         return code16_sel;
0570 
0571     printf("[WARN]\tCould not find %d-bit CS\n", bitness);
0572     return -1;
0573 }
0574 
0575 static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
0576 {
0577     int cs = find_cs(cs_bits);
0578     if (cs == -1) {
0579         printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n",
0580                cs_bits, use_16bit_ss ? 16 : 32);
0581         return 0;
0582     }
0583 
0584     if (force_ss != -1) {
0585         sig_ss = force_ss;
0586     } else {
0587         if (use_16bit_ss) {
0588             if (!data16_sel) {
0589                 printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n",
0590                        cs_bits);
0591                 return 0;
0592             }
0593             sig_ss = data16_sel;
0594         } else {
0595             asm volatile ("mov %%ss,%0" : "=r" (sig_ss));
0596         }
0597     }
0598 
0599     sig_cs = cs;
0600 
0601     printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n",
0602            cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss,
0603            (sig_ss & 4) ? "" : ", GDT");
0604 
0605     raise(SIGUSR1);
0606 
0607     nerrs = 0;
0608 
0609     /*
0610      * Check that each register had an acceptable value when the
0611      * int3 trampoline was invoked.
0612      */
0613     for (int i = 0; i < NGREG; i++) {
0614         greg_t req = requested_regs[i], res = resulting_regs[i];
0615 
0616         if (i == REG_TRAPNO || i == REG_IP)
0617             continue;   /* don't care */
0618 
0619         if (i == REG_SP) {
0620             /*
0621              * If we were using a 16-bit stack segment, then
0622              * the kernel is a bit stuck: IRET only restores
0623              * the low 16 bits of ESP/RSP if SS is 16-bit.
0624              * The kernel uses a hack to restore bits 31:16,
0625              * but that hack doesn't help with bits 63:32.
0626              * On Intel CPUs, bits 63:32 end up zeroed, and, on
0627              * AMD CPUs, they leak the high bits of the kernel
0628              * espfix64 stack pointer.  There's very little that
0629              * the kernel can do about it.
0630              *
0631              * Similarly, if we are returning to a 32-bit context,
0632              * the CPU will often lose the high 32 bits of RSP.
0633              */
0634 
0635             if (res == req)
0636                 continue;
0637 
0638             if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) {
0639                 printf("[NOTE]\tSP: %llx -> %llx\n",
0640                        (unsigned long long)req,
0641                        (unsigned long long)res);
0642                 continue;
0643             }
0644 
0645             printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n",
0646                    (unsigned long long)requested_regs[i],
0647                    (unsigned long long)resulting_regs[i]);
0648             nerrs++;
0649             continue;
0650         }
0651 
0652         bool ignore_reg = false;
0653 #if __i386__
0654         if (i == REG_UESP)
0655             ignore_reg = true;
0656 #else
0657         if (i == REG_CSGSFS) {
0658             struct selectors *req_sels =
0659                 (void *)&requested_regs[REG_CSGSFS];
0660             struct selectors *res_sels =
0661                 (void *)&resulting_regs[REG_CSGSFS];
0662             if (req_sels->cs != res_sels->cs) {
0663                 printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n",
0664                        req_sels->cs, res_sels->cs);
0665                 nerrs++;
0666             }
0667 
0668             if (req_sels->ss != res_sels->ss) {
0669                 printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n",
0670                        req_sels->ss, res_sels->ss);
0671                 nerrs++;
0672             }
0673 
0674             continue;
0675         }
0676 #endif
0677 
0678         /* Sanity check on the kernel */
0679         if (i == REG_CX && req != res) {
0680             printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
0681                    (unsigned long long)req,
0682                    (unsigned long long)res);
0683             nerrs++;
0684             continue;
0685         }
0686 
0687         if (req != res && !ignore_reg) {
0688             printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
0689                    i, (unsigned long long)req,
0690                    (unsigned long long)res);
0691             nerrs++;
0692         }
0693     }
0694 
0695     if (nerrs == 0)
0696         printf("[OK]\tall registers okay\n");
0697 
0698     return nerrs;
0699 }
0700 
0701 static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs)
0702 {
0703     int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs;
0704     if (cs == -1)
0705         return 0;
0706 
0707     sig_cs = cs;
0708     sig_ss = ss;
0709 
0710     printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n",
0711            cs_bits, sig_cs, sig_ss);
0712 
0713     sig_trapped = 0;
0714     raise(SIGUSR1);
0715     if (sig_trapped) {
0716         char errdesc[32] = "";
0717         if (sig_err) {
0718             const char *src = (sig_err & 1) ? " EXT" : "";
0719             const char *table;
0720             if ((sig_err & 0x6) == 0x0)
0721                 table = "GDT";
0722             else if ((sig_err & 0x6) == 0x4)
0723                 table = "LDT";
0724             else if ((sig_err & 0x6) == 0x2)
0725                 table = "IDT";
0726             else
0727                 table = "???";
0728 
0729             sprintf(errdesc, "%s%s index %d, ",
0730                 table, src, sig_err >> 3);
0731         }
0732 
0733         char trapname[32];
0734         if (sig_trapno == 13)
0735             strcpy(trapname, "GP");
0736         else if (sig_trapno == 11)
0737             strcpy(trapname, "NP");
0738         else if (sig_trapno == 12)
0739             strcpy(trapname, "SS");
0740         else if (sig_trapno == 32)
0741             strcpy(trapname, "IRET");  /* X86_TRAP_IRET */
0742         else
0743             sprintf(trapname, "%d", sig_trapno);
0744 
0745         printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n",
0746                trapname, (unsigned long)sig_err,
0747                errdesc, strsignal(sig_trapped));
0748         return 0;
0749     } else {
0750         /*
0751          * This also implicitly tests UC_STRICT_RESTORE_SS:
0752          * We check that these signals set UC_STRICT_RESTORE_SS and,
0753          * if UC_STRICT_RESTORE_SS doesn't cause strict behavior,
0754          * then we won't get SIGSEGV.
0755          */
0756         printf("[FAIL]\tDid not get SIGSEGV\n");
0757         return 1;
0758     }
0759 }
0760 
0761 int main()
0762 {
0763     int total_nerrs = 0;
0764     unsigned short my_cs, my_ss;
0765 
0766     asm volatile ("mov %%cs,%0" : "=r" (my_cs));
0767     asm volatile ("mov %%ss,%0" : "=r" (my_ss));
0768     setup_ldt();
0769 
0770     stack_t stack = {
0771         /* Our sigaltstack scratch space. */
0772         .ss_sp = malloc(sizeof(char) * SIGSTKSZ),
0773         .ss_size = SIGSTKSZ,
0774     };
0775     if (sigaltstack(&stack, NULL) != 0)
0776         err(1, "sigaltstack");
0777 
0778     sethandler(SIGUSR1, sigusr1, 0);
0779     sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
0780 
0781     /* Easy cases: return to a 32-bit SS in each possible CS bitness. */
0782     total_nerrs += test_valid_sigreturn(64, false, -1);
0783     total_nerrs += test_valid_sigreturn(32, false, -1);
0784     total_nerrs += test_valid_sigreturn(16, false, -1);
0785 
0786     /*
0787      * Test easy espfix cases: return to a 16-bit LDT SS in each possible
0788      * CS bitness.  NB: with a long mode CS, the SS bitness is irrelevant.
0789      *
0790      * This catches the original missing-espfix-on-64-bit-kernels issue
0791      * as well as CVE-2014-8134.
0792      */
0793     total_nerrs += test_valid_sigreturn(64, true, -1);
0794     total_nerrs += test_valid_sigreturn(32, true, -1);
0795     total_nerrs += test_valid_sigreturn(16, true, -1);
0796 
0797     if (gdt_data16_idx) {
0798         /*
0799          * For performance reasons, Linux skips espfix if SS points
0800          * to the GDT.  If we were able to allocate a 16-bit SS in
0801          * the GDT, see if it leaks parts of the kernel stack pointer.
0802          *
0803          * This tests for CVE-2014-8133.
0804          */
0805         total_nerrs += test_valid_sigreturn(64, true,
0806                             GDT3(gdt_data16_idx));
0807         total_nerrs += test_valid_sigreturn(32, true,
0808                             GDT3(gdt_data16_idx));
0809         total_nerrs += test_valid_sigreturn(16, true,
0810                             GDT3(gdt_data16_idx));
0811     }
0812 
0813 #ifdef __x86_64__
0814     /* Nasty ABI case: check SS corruption handling. */
0815     sig_corrupt_final_ss = 1;
0816     total_nerrs += test_valid_sigreturn(32, false, -1);
0817     total_nerrs += test_valid_sigreturn(32, true, -1);
0818     sig_corrupt_final_ss = 0;
0819 #endif
0820 
0821     /*
0822      * We're done testing valid sigreturn cases.  Now we test states
0823      * for which sigreturn itself will succeed but the subsequent
0824      * entry to user mode will fail.
0825      *
0826      * Depending on the failure mode and the kernel bitness, these
0827      * entry failures can generate SIGSEGV, SIGBUS, or SIGILL.
0828      */
0829     clearhandler(SIGTRAP);
0830     sethandler(SIGSEGV, sigtrap, SA_ONSTACK);
0831     sethandler(SIGBUS, sigtrap, SA_ONSTACK);
0832     sethandler(SIGILL, sigtrap, SA_ONSTACK);  /* 32-bit kernels do this */
0833 
0834     /* Easy failures: invalid SS, resulting in #GP(0) */
0835     test_bad_iret(64, ldt_nonexistent_sel, -1);
0836     test_bad_iret(32, ldt_nonexistent_sel, -1);
0837     test_bad_iret(16, ldt_nonexistent_sel, -1);
0838 
0839     /* These fail because SS isn't a data segment, resulting in #GP(SS) */
0840     test_bad_iret(64, my_cs, -1);
0841     test_bad_iret(32, my_cs, -1);
0842     test_bad_iret(16, my_cs, -1);
0843 
0844     /* Try to return to a not-present code segment, triggering #NP(SS). */
0845     test_bad_iret(32, my_ss, npcode32_sel);
0846 
0847     /*
0848      * Try to return to a not-present but otherwise valid data segment.
0849      * This will cause IRET to fail with #SS on the espfix stack.  This
0850      * exercises CVE-2014-9322.
0851      *
0852      * Note that, if espfix is enabled, 64-bit Linux will lose track
0853      * of the actual cause of failure and report #GP(0) instead.
0854      * This would be very difficult for Linux to avoid, because
0855      * espfix64 causes IRET failures to be promoted to #DF, so the
0856      * original exception frame is never pushed onto the stack.
0857      */
0858     test_bad_iret(32, npdata32_sel, -1);
0859 
0860     /*
0861      * Try to return to a not-present but otherwise valid data
0862      * segment without invoking espfix.  Newer kernels don't allow
0863      * this to happen in the first place.  On older kernels, though,
0864      * this can trigger CVE-2014-9322.
0865      */
0866     if (gdt_npdata32_idx)
0867         test_bad_iret(32, GDT3(gdt_npdata32_idx), -1);
0868 
0869 #ifdef __x86_64__
0870     total_nerrs += test_nonstrict_ss();
0871 #endif
0872 
0873     free(stack.ss_sp);
0874     return total_nerrs ? 1 : 0;
0875 }