Home | History | Annotate | Download | only in x86
      1 /*
      2  * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace
      3  * Copyright (c) 2014-2015 Andrew Lutomirski
      4  *
      5  * This program is free software; you can redistribute it and/or modify
      6  * it under the terms and conditions of the GNU General Public License,
      7  * version 2, as published by the Free Software Foundation.
      8  *
      9  * This program is distributed in the hope it will be useful, but
     10  * WITHOUT ANY WARRANTY; without even the implied warranty of
     11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     12  * General Public License for more details.
     13  *
     14  * This is a series of tests that exercises the sigreturn(2) syscall and
     15  * the IRET / SYSRET paths in the kernel.
     16  *
     17  * For now, this focuses on the effects of unusual CS and SS values,
     18  * and it has a bunch of tests to make sure that ESP/RSP is restored
     19  * properly.
     20  *
     21  * The basic idea behind these tests is to raise(SIGUSR1) to create a
     22  * sigcontext frame, plug in the values to be tested, and then return,
     23  * which implicitly invokes sigreturn(2) and programs the user context
     24  * as desired.
     25  *
     26  * For tests for which we expect sigreturn and the subsequent return to
     27  * user mode to succeed, we return to a short trampoline that generates
     28  * SIGTRAP so that the meat of the tests can be ordinary C code in a
     29  * SIGTRAP handler.
     30  *
     31  * The inner workings of each test is documented below.
     32  *
     33  * Do not run on outdated, unpatched kernels at risk of nasty crashes.
     34  */
     35 
     36 #define _GNU_SOURCE
     37 
     38 #include <sys/time.h>
     39 #include <time.h>
     40 #include <stdlib.h>
     41 #include <sys/syscall.h>
     42 #include <unistd.h>
     43 #include <stdio.h>
     44 #include <string.h>
     45 #include <inttypes.h>
     46 #include <sys/mman.h>
     47 #include <sys/signal.h>
     48 #include <sys/ucontext.h>
     49 #include <asm/ldt.h>
     50 #include <err.h>
     51 #include <setjmp.h>
     52 #include <stddef.h>
     53 #include <stdbool.h>
     54 #include <sys/ptrace.h>
     55 #include <sys/user.h>
     56 
     57 /* Pull in AR_xyz defines. */
     58 typedef unsigned int u32;
     59 typedef unsigned short u16;
     60 #include "../../../../arch/x86/include/asm/desc_defs.h"
     61 
     62 /*
     63  * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc
     64  * headers.
     65  */
     66 #ifdef __x86_64__
     67 /*
     68  * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
     69  * kernels that save SS in the sigcontext.  All kernels that set
     70  * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
     71  * regardless of SS (i.e. they implement espfix).
     72  *
     73  * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
     74  * when delivering a signal that came from 64-bit code.
     75  *
     76  * Sigreturn restores SS as follows:
     77  *
     78  * if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
     79  *     saved CS is not 64-bit)
     80  *         new SS = saved SS  (will fail IRET and signal if invalid)
     81  * else
     82  *         new SS = a flat 32-bit data segment
     83  */
     84 #define UC_SIGCONTEXT_SS       0x2
     85 #define UC_STRICT_RESTORE_SS   0x4
     86 #endif
     87 
     88 /*
     89  * In principle, this test can run on Linux emulation layers (e.g.
     90  * Illumos "LX branded zones").  Solaris-based kernels reserve LDT
     91  * entries 0-5 for their own internal purposes, so start our LDT
     92  * allocations above that reservation.  (The tests don't pass on LX
     93  * branded zones, but at least this lets them run.)
     94  */
     95 #define LDT_OFFSET 6
     96 
     97 /* An aligned stack accessible through some of our segments. */
     98 static unsigned char stack16[65536] __attribute__((aligned(4096)));
     99 
    100 /*
    101  * An aligned int3 instruction used as a trampoline.  Some of the tests
    102  * want to fish out their ss values, so this trampoline copies ss to eax
    103  * before the int3.
    104  */
    105 asm (".pushsection .text\n\t"
    106      ".type int3, @function\n\t"
    107      ".align 4096\n\t"
    108      "int3:\n\t"
    109      "mov %ss,%ecx\n\t"
    110      "int3\n\t"
    111      ".size int3, . - int3\n\t"
    112      ".align 4096, 0xcc\n\t"
    113      ".popsection");
    114 extern char int3[4096];
    115 
    116 /*
    117  * At startup, we prepapre:
    118  *
    119  * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero
    120  *   descriptor or out of bounds).
    121  * - code16_sel: A 16-bit LDT code segment pointing to int3.
    122  * - data16_sel: A 16-bit LDT data segment pointing to stack16.
    123  * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3.
    124  * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16.
    125  * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16.
    126  * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to
    127  *   stack16.
    128  *
    129  * For no particularly good reason, xyz_sel is a selector value with the
    130  * RPL and LDT bits filled in, whereas xyz_idx is just an index into the
    131  * descriptor table.  These variables will be zero if their respective
    132  * segments could not be allocated.
    133  */
    134 static unsigned short ldt_nonexistent_sel;
    135 static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel;
    136 
    137 static unsigned short gdt_data16_idx, gdt_npdata32_idx;
    138 
    139 static unsigned short GDT3(int idx)
    140 {
    141 	return (idx << 3) | 3;
    142 }
    143 
    144 static unsigned short LDT3(int idx)
    145 {
    146 	return (idx << 3) | 7;
    147 }
    148 
    149 /* Our sigaltstack scratch space. */
    150 static char altstack_data[SIGSTKSZ];
    151 
    152 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
    153 		       int flags)
    154 {
    155 	struct sigaction sa;
    156 	memset(&sa, 0, sizeof(sa));
    157 	sa.sa_sigaction = handler;
    158 	sa.sa_flags = SA_SIGINFO | flags;
    159 	sigemptyset(&sa.sa_mask);
    160 	if (sigaction(sig, &sa, 0))
    161 		err(1, "sigaction");
    162 }
    163 
    164 static void clearhandler(int sig)
    165 {
    166 	struct sigaction sa;
    167 	memset(&sa, 0, sizeof(sa));
    168 	sa.sa_handler = SIG_DFL;
    169 	sigemptyset(&sa.sa_mask);
    170 	if (sigaction(sig, &sa, 0))
    171 		err(1, "sigaction");
    172 }
    173 
    174 static void add_ldt(const struct user_desc *desc, unsigned short *var,
    175 		    const char *name)
    176 {
    177 	if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) {
    178 		*var = LDT3(desc->entry_number);
    179 	} else {
    180 		printf("[NOTE]\tFailed to create %s segment\n", name);
    181 		*var = 0;
    182 	}
    183 }
    184 
    185 static void setup_ldt(void)
    186 {
    187 	if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16))
    188 		errx(1, "stack16 is too high\n");
    189 	if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3))
    190 		errx(1, "int3 is too high\n");
    191 
    192 	ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2);
    193 
    194 	const struct user_desc code16_desc = {
    195 		.entry_number    = LDT_OFFSET + 0,
    196 		.base_addr       = (unsigned long)int3,
    197 		.limit           = 4095,
    198 		.seg_32bit       = 0,
    199 		.contents        = 2, /* Code, not conforming */
    200 		.read_exec_only  = 0,
    201 		.limit_in_pages  = 0,
    202 		.seg_not_present = 0,
    203 		.useable         = 0
    204 	};
    205 	add_ldt(&code16_desc, &code16_sel, "code16");
    206 
    207 	const struct user_desc data16_desc = {
    208 		.entry_number    = LDT_OFFSET + 1,
    209 		.base_addr       = (unsigned long)stack16,
    210 		.limit           = 0xffff,
    211 		.seg_32bit       = 0,
    212 		.contents        = 0, /* Data, grow-up */
    213 		.read_exec_only  = 0,
    214 		.limit_in_pages  = 0,
    215 		.seg_not_present = 0,
    216 		.useable         = 0
    217 	};
    218 	add_ldt(&data16_desc, &data16_sel, "data16");
    219 
    220 	const struct user_desc npcode32_desc = {
    221 		.entry_number    = LDT_OFFSET + 3,
    222 		.base_addr       = (unsigned long)int3,
    223 		.limit           = 4095,
    224 		.seg_32bit       = 1,
    225 		.contents        = 2, /* Code, not conforming */
    226 		.read_exec_only  = 0,
    227 		.limit_in_pages  = 0,
    228 		.seg_not_present = 1,
    229 		.useable         = 0
    230 	};
    231 	add_ldt(&npcode32_desc, &npcode32_sel, "npcode32");
    232 
    233 	const struct user_desc npdata32_desc = {
    234 		.entry_number    = LDT_OFFSET + 4,
    235 		.base_addr       = (unsigned long)stack16,
    236 		.limit           = 0xffff,
    237 		.seg_32bit       = 1,
    238 		.contents        = 0, /* Data, grow-up */
    239 		.read_exec_only  = 0,
    240 		.limit_in_pages  = 0,
    241 		.seg_not_present = 1,
    242 		.useable         = 0
    243 	};
    244 	add_ldt(&npdata32_desc, &npdata32_sel, "npdata32");
    245 
    246 	struct user_desc gdt_data16_desc = {
    247 		.entry_number    = -1,
    248 		.base_addr       = (unsigned long)stack16,
    249 		.limit           = 0xffff,
    250 		.seg_32bit       = 0,
    251 		.contents        = 0, /* Data, grow-up */
    252 		.read_exec_only  = 0,
    253 		.limit_in_pages  = 0,
    254 		.seg_not_present = 0,
    255 		.useable         = 0
    256 	};
    257 
    258 	if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) {
    259 		/*
    260 		 * This probably indicates vulnerability to CVE-2014-8133.
    261 		 * Merely getting here isn't definitive, though, and we'll
    262 		 * diagnose the problem for real later on.
    263 		 */
    264 		printf("[WARN]\tset_thread_area allocated data16 at index %d\n",
    265 		       gdt_data16_desc.entry_number);
    266 		gdt_data16_idx = gdt_data16_desc.entry_number;
    267 	} else {
    268 		printf("[OK]\tset_thread_area refused 16-bit data\n");
    269 	}
    270 
    271 	struct user_desc gdt_npdata32_desc = {
    272 		.entry_number    = -1,
    273 		.base_addr       = (unsigned long)stack16,
    274 		.limit           = 0xffff,
    275 		.seg_32bit       = 1,
    276 		.contents        = 0, /* Data, grow-up */
    277 		.read_exec_only  = 0,
    278 		.limit_in_pages  = 0,
    279 		.seg_not_present = 1,
    280 		.useable         = 0
    281 	};
    282 
    283 	if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) {
    284 		/*
    285 		 * As a hardening measure, newer kernels don't allow this.
    286 		 */
    287 		printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n",
    288 		       gdt_npdata32_desc.entry_number);
    289 		gdt_npdata32_idx = gdt_npdata32_desc.entry_number;
    290 	} else {
    291 		printf("[OK]\tset_thread_area refused 16-bit data\n");
    292 	}
    293 }
    294 
    295 /* State used by our signal handlers. */
    296 static gregset_t initial_regs, requested_regs, resulting_regs;
    297 
    298 /* Instructions for the SIGUSR1 handler. */
    299 static volatile unsigned short sig_cs, sig_ss;
    300 static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno;
    301 #ifdef __x86_64__
    302 static volatile sig_atomic_t sig_corrupt_final_ss;
    303 #endif
    304 
    305 /* Abstractions for some 32-bit vs 64-bit differences. */
    306 #ifdef __x86_64__
    307 # define REG_IP REG_RIP
    308 # define REG_SP REG_RSP
    309 # define REG_CX REG_RCX
    310 
    311 struct selectors {
    312 	unsigned short cs, gs, fs, ss;
    313 };
    314 
    315 static unsigned short *ssptr(ucontext_t *ctx)
    316 {
    317 	struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
    318 	return &sels->ss;
    319 }
    320 
    321 static unsigned short *csptr(ucontext_t *ctx)
    322 {
    323 	struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
    324 	return &sels->cs;
    325 }
    326 #else
    327 # define REG_IP REG_EIP
    328 # define REG_SP REG_ESP
    329 # define REG_CX REG_ECX
    330 
    331 static greg_t *ssptr(ucontext_t *ctx)
    332 {
    333 	return &ctx->uc_mcontext.gregs[REG_SS];
    334 }
    335 
    336 static greg_t *csptr(ucontext_t *ctx)
    337 {
    338 	return &ctx->uc_mcontext.gregs[REG_CS];
    339 }
    340 #endif
    341 
    342 /*
    343  * Checks a given selector for its code bitness or returns -1 if it's not
    344  * a usable code segment selector.
    345  */
    346 int cs_bitness(unsigned short cs)
    347 {
    348 	uint32_t valid = 0, ar;
    349 	asm ("lar %[cs], %[ar]\n\t"
    350 	     "jnz 1f\n\t"
    351 	     "mov $1, %[valid]\n\t"
    352 	     "1:"
    353 	     : [ar] "=r" (ar), [valid] "+rm" (valid)
    354 	     : [cs] "r" (cs));
    355 
    356 	if (!valid)
    357 		return -1;
    358 
    359 	bool db = (ar & (1 << 22));
    360 	bool l = (ar & (1 << 21));
    361 
    362 	if (!(ar & (1<<11)))
    363 	    return -1;	/* Not code. */
    364 
    365 	if (l && !db)
    366 		return 64;
    367 	else if (!l && db)
    368 		return 32;
    369 	else if (!l && !db)
    370 		return 16;
    371 	else
    372 		return -1;	/* Unknown bitness. */
    373 }
    374 
    375 /*
    376  * Checks a given selector for its code bitness or returns -1 if it's not
    377  * a usable code segment selector.
    378  */
    379 bool is_valid_ss(unsigned short cs)
    380 {
    381 	uint32_t valid = 0, ar;
    382 	asm ("lar %[cs], %[ar]\n\t"
    383 	     "jnz 1f\n\t"
    384 	     "mov $1, %[valid]\n\t"
    385 	     "1:"
    386 	     : [ar] "=r" (ar), [valid] "+rm" (valid)
    387 	     : [cs] "r" (cs));
    388 
    389 	if (!valid)
    390 		return false;
    391 
    392 	if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA &&
    393 	    (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN)
    394 		return false;
    395 
    396 	return (ar & AR_P);
    397 }
    398 
    399 /* Number of errors in the current test case. */
    400 static volatile sig_atomic_t nerrs;
    401 
    402 static void validate_signal_ss(int sig, ucontext_t *ctx)
    403 {
    404 #ifdef __x86_64__
    405 	bool was_64bit = (cs_bitness(*csptr(ctx)) == 64);
    406 
    407 	if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) {
    408 		printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n");
    409 		nerrs++;
    410 
    411 		/*
    412 		 * This happens on Linux 4.1.  The rest will fail, too, so
    413 		 * return now to reduce the noise.
    414 		 */
    415 		return;
    416 	}
    417 
    418 	/* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */
    419 	if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) {
    420 		printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n",
    421 		       sig);
    422 		nerrs++;
    423 	}
    424 
    425 	if (is_valid_ss(*ssptr(ctx))) {
    426 		/*
    427 		 * DOSEMU was written before 64-bit sigcontext had SS, and
    428 		 * it tries to figure out the signal source SS by looking at
    429 		 * the physical register.  Make sure that keeps working.
    430 		 */
    431 		unsigned short hw_ss;
    432 		asm ("mov %%ss, %0" : "=rm" (hw_ss));
    433 		if (hw_ss != *ssptr(ctx)) {
    434 			printf("[FAIL]\tHW SS didn't match saved SS\n");
    435 			nerrs++;
    436 		}
    437 	}
    438 #endif
    439 }
    440 
    441 /*
    442  * SIGUSR1 handler.  Sets CS and SS as requested and points IP to the
    443  * int3 trampoline.  Sets SP to a large known value so that we can see
    444  * whether the value round-trips back to user mode correctly.
    445  */
    446 static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
    447 {
    448 	ucontext_t *ctx = (ucontext_t*)ctx_void;
    449 
    450 	validate_signal_ss(sig, ctx);
    451 
    452 	memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
    453 
    454 	*csptr(ctx) = sig_cs;
    455 	*ssptr(ctx) = sig_ss;
    456 
    457 	ctx->uc_mcontext.gregs[REG_IP] =
    458 		sig_cs == code16_sel ? 0 : (unsigned long)&int3;
    459 	ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
    460 	ctx->uc_mcontext.gregs[REG_CX] = 0;
    461 
    462 	memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
    463 	requested_regs[REG_CX] = *ssptr(ctx);	/* The asm code does this. */
    464 
    465 	return;
    466 }
    467 
    468 /*
    469  * Called after a successful sigreturn (via int3) or from a failed
    470  * sigreturn (directly by kernel).  Restores our state so that the
    471  * original raise(SIGUSR1) returns.
    472  */
    473 static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
    474 {
    475 	ucontext_t *ctx = (ucontext_t*)ctx_void;
    476 
    477 	validate_signal_ss(sig, ctx);
    478 
    479 	sig_err = ctx->uc_mcontext.gregs[REG_ERR];
    480 	sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
    481 
    482 	unsigned short ss;
    483 	asm ("mov %%ss,%0" : "=r" (ss));
    484 
    485 	greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX];
    486 	if (asm_ss != sig_ss && sig == SIGTRAP) {
    487 		/* Sanity check failure. */
    488 		printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
    489 		       ss, *ssptr(ctx), (unsigned long long)asm_ss);
    490 		nerrs++;
    491 	}
    492 
    493 	memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
    494 	memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
    495 
    496 #ifdef __x86_64__
    497 	if (sig_corrupt_final_ss) {
    498 		if (ctx->uc_flags & UC_STRICT_RESTORE_SS) {
    499 			printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n");
    500 			nerrs++;
    501 		} else {
    502 			/*
    503 			 * DOSEMU transitions from 32-bit to 64-bit mode by
    504 			 * adjusting sigcontext, and it requires that this work
    505 			 * even if the saved SS is bogus.
    506 			 */
    507 			printf("\tCorrupting SS on return to 64-bit mode\n");
    508 			*ssptr(ctx) = 0;
    509 		}
    510 	}
    511 #endif
    512 
    513 	sig_trapped = sig;
    514 }
    515 
    516 #ifdef __x86_64__
    517 /* Tests recovery if !UC_STRICT_RESTORE_SS */
    518 static void sigusr2(int sig, siginfo_t *info, void *ctx_void)
    519 {
    520 	ucontext_t *ctx = (ucontext_t*)ctx_void;
    521 
    522 	if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) {
    523 		printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n");
    524 		nerrs++;
    525 		return;  /* We can't do the rest. */
    526 	}
    527 
    528 	ctx->uc_flags &= ~UC_STRICT_RESTORE_SS;
    529 	*ssptr(ctx) = 0;
    530 
    531 	/* Return.  The kernel should recover without sending another signal. */
    532 }
    533 
    534 static int test_nonstrict_ss(void)
    535 {
    536 	clearhandler(SIGUSR1);
    537 	clearhandler(SIGTRAP);
    538 	clearhandler(SIGSEGV);
    539 	clearhandler(SIGILL);
    540 	sethandler(SIGUSR2, sigusr2, 0);
    541 
    542 	nerrs = 0;
    543 
    544 	printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n");
    545 	raise(SIGUSR2);
    546 	if (!nerrs)
    547 		printf("[OK]\tIt worked\n");
    548 
    549 	return nerrs;
    550 }
    551 #endif
    552 
    553 /* Finds a usable code segment of the requested bitness. */
    554 int find_cs(int bitness)
    555 {
    556 	unsigned short my_cs;
    557 
    558 	asm ("mov %%cs,%0" :  "=r" (my_cs));
    559 
    560 	if (cs_bitness(my_cs) == bitness)
    561 		return my_cs;
    562 	if (cs_bitness(my_cs + (2 << 3)) == bitness)
    563 		return my_cs + (2 << 3);
    564 	if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness)
    565 	    return my_cs - (2 << 3);
    566 	if (cs_bitness(code16_sel) == bitness)
    567 		return code16_sel;
    568 
    569 	printf("[WARN]\tCould not find %d-bit CS\n", bitness);
    570 	return -1;
    571 }
    572 
    573 static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
    574 {
    575 	int cs = find_cs(cs_bits);
    576 	if (cs == -1) {
    577 		printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n",
    578 		       cs_bits, use_16bit_ss ? 16 : 32);
    579 		return 0;
    580 	}
    581 
    582 	if (force_ss != -1) {
    583 		sig_ss = force_ss;
    584 	} else {
    585 		if (use_16bit_ss) {
    586 			if (!data16_sel) {
    587 				printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n",
    588 				       cs_bits);
    589 				return 0;
    590 			}
    591 			sig_ss = data16_sel;
    592 		} else {
    593 			asm volatile ("mov %%ss,%0" : "=r" (sig_ss));
    594 		}
    595 	}
    596 
    597 	sig_cs = cs;
    598 
    599 	printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n",
    600 	       cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss,
    601 	       (sig_ss & 4) ? "" : ", GDT");
    602 
    603 	raise(SIGUSR1);
    604 
    605 	nerrs = 0;
    606 
    607 	/*
    608 	 * Check that each register had an acceptable value when the
    609 	 * int3 trampoline was invoked.
    610 	 */
    611 	for (int i = 0; i < NGREG; i++) {
    612 		greg_t req = requested_regs[i], res = resulting_regs[i];
    613 		if (i == REG_TRAPNO || i == REG_IP)
    614 			continue;	/* don't care */
    615 		if (i == REG_SP) {
    616 			printf("\tSP: %llx -> %llx\n", (unsigned long long)req,
    617 			       (unsigned long long)res);
    618 
    619 			/*
    620 			 * In many circumstances, the high 32 bits of rsp
    621 			 * are zeroed.  For example, we could be a real
    622 			 * 32-bit program, or we could hit any of a number
    623 			 * of poorly-documented IRET or segmented ESP
    624 			 * oddities.  If this happens, it's okay.
    625 			 */
    626 			if (res == (req & 0xFFFFFFFF))
    627 				continue;  /* OK; not expected to work */
    628 		}
    629 
    630 		bool ignore_reg = false;
    631 #if __i386__
    632 		if (i == REG_UESP)
    633 			ignore_reg = true;
    634 #else
    635 		if (i == REG_CSGSFS) {
    636 			struct selectors *req_sels =
    637 				(void *)&requested_regs[REG_CSGSFS];
    638 			struct selectors *res_sels =
    639 				(void *)&resulting_regs[REG_CSGSFS];
    640 			if (req_sels->cs != res_sels->cs) {
    641 				printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n",
    642 				       req_sels->cs, res_sels->cs);
    643 				nerrs++;
    644 			}
    645 
    646 			if (req_sels->ss != res_sels->ss) {
    647 				printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n",
    648 				       req_sels->ss, res_sels->ss);
    649 				nerrs++;
    650 			}
    651 
    652 			continue;
    653 		}
    654 #endif
    655 
    656 		/* Sanity check on the kernel */
    657 		if (i == REG_CX && requested_regs[i] != resulting_regs[i]) {
    658 			printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
    659 			       (unsigned long long)requested_regs[i],
    660 			       (unsigned long long)resulting_regs[i]);
    661 			nerrs++;
    662 			continue;
    663 		}
    664 
    665 		if (requested_regs[i] != resulting_regs[i] && !ignore_reg) {
    666 			/*
    667 			 * SP is particularly interesting here.  The
    668 			 * usual cause of failures is that we hit the
    669 			 * nasty IRET case of returning to a 16-bit SS,
    670 			 * in which case bits 16:31 of the *kernel*
    671 			 * stack pointer persist in ESP.
    672 			 */
    673 			printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
    674 			       i, (unsigned long long)requested_regs[i],
    675 			       (unsigned long long)resulting_regs[i]);
    676 			nerrs++;
    677 		}
    678 	}
    679 
    680 	if (nerrs == 0)
    681 		printf("[OK]\tall registers okay\n");
    682 
    683 	return nerrs;
    684 }
    685 
    686 static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs)
    687 {
    688 	int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs;
    689 	if (cs == -1)
    690 		return 0;
    691 
    692 	sig_cs = cs;
    693 	sig_ss = ss;
    694 
    695 	printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n",
    696 	       cs_bits, sig_cs, sig_ss);
    697 
    698 	sig_trapped = 0;
    699 	raise(SIGUSR1);
    700 	if (sig_trapped) {
    701 		char errdesc[32] = "";
    702 		if (sig_err) {
    703 			const char *src = (sig_err & 1) ? " EXT" : "";
    704 			const char *table;
    705 			if ((sig_err & 0x6) == 0x0)
    706 				table = "GDT";
    707 			else if ((sig_err & 0x6) == 0x4)
    708 				table = "LDT";
    709 			else if ((sig_err & 0x6) == 0x2)
    710 				table = "IDT";
    711 			else
    712 				table = "???";
    713 
    714 			sprintf(errdesc, "%s%s index %d, ",
    715 				table, src, sig_err >> 3);
    716 		}
    717 
    718 		char trapname[32];
    719 		if (sig_trapno == 13)
    720 			strcpy(trapname, "GP");
    721 		else if (sig_trapno == 11)
    722 			strcpy(trapname, "NP");
    723 		else if (sig_trapno == 12)
    724 			strcpy(trapname, "SS");
    725 		else if (sig_trapno == 32)
    726 			strcpy(trapname, "IRET");  /* X86_TRAP_IRET */
    727 		else
    728 			sprintf(trapname, "%d", sig_trapno);
    729 
    730 		printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n",
    731 		       trapname, (unsigned long)sig_err,
    732 		       errdesc, strsignal(sig_trapped));
    733 		return 0;
    734 	} else {
    735 		/*
    736 		 * This also implicitly tests UC_STRICT_RESTORE_SS:
    737 		 * We check that these signals set UC_STRICT_RESTORE_SS and,
    738 		 * if UC_STRICT_RESTORE_SS doesn't cause strict behavior,
    739 		 * then we won't get SIGSEGV.
    740 		 */
    741 		printf("[FAIL]\tDid not get SIGSEGV\n");
    742 		return 1;
    743 	}
    744 }
    745 
    746 int main()
    747 {
    748 	int total_nerrs = 0;
    749 	unsigned short my_cs, my_ss;
    750 
    751 	asm volatile ("mov %%cs,%0" : "=r" (my_cs));
    752 	asm volatile ("mov %%ss,%0" : "=r" (my_ss));
    753 	setup_ldt();
    754 
    755 	stack_t stack = {
    756 		.ss_sp = altstack_data,
    757 		.ss_size = SIGSTKSZ,
    758 	};
    759 	if (sigaltstack(&stack, NULL) != 0)
    760 		err(1, "sigaltstack");
    761 
    762 	sethandler(SIGUSR1, sigusr1, 0);
    763 	sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
    764 
    765 	/* Easy cases: return to a 32-bit SS in each possible CS bitness. */
    766 	total_nerrs += test_valid_sigreturn(64, false, -1);
    767 	total_nerrs += test_valid_sigreturn(32, false, -1);
    768 	total_nerrs += test_valid_sigreturn(16, false, -1);
    769 
    770 	/*
    771 	 * Test easy espfix cases: return to a 16-bit LDT SS in each possible
    772 	 * CS bitness.  NB: with a long mode CS, the SS bitness is irrelevant.
    773 	 *
    774 	 * This catches the original missing-espfix-on-64-bit-kernels issue
    775 	 * as well as CVE-2014-8134.
    776 	 */
    777 	total_nerrs += test_valid_sigreturn(64, true, -1);
    778 	total_nerrs += test_valid_sigreturn(32, true, -1);
    779 	total_nerrs += test_valid_sigreturn(16, true, -1);
    780 
    781 	if (gdt_data16_idx) {
    782 		/*
    783 		 * For performance reasons, Linux skips espfix if SS points
    784 		 * to the GDT.  If we were able to allocate a 16-bit SS in
    785 		 * the GDT, see if it leaks parts of the kernel stack pointer.
    786 		 *
    787 		 * This tests for CVE-2014-8133.
    788 		 */
    789 		total_nerrs += test_valid_sigreturn(64, true,
    790 						    GDT3(gdt_data16_idx));
    791 		total_nerrs += test_valid_sigreturn(32, true,
    792 						    GDT3(gdt_data16_idx));
    793 		total_nerrs += test_valid_sigreturn(16, true,
    794 						    GDT3(gdt_data16_idx));
    795 	}
    796 
    797 #ifdef __x86_64__
    798 	/* Nasty ABI case: check SS corruption handling. */
    799 	sig_corrupt_final_ss = 1;
    800 	total_nerrs += test_valid_sigreturn(32, false, -1);
    801 	total_nerrs += test_valid_sigreturn(32, true, -1);
    802 	sig_corrupt_final_ss = 0;
    803 #endif
    804 
    805 	/*
    806 	 * We're done testing valid sigreturn cases.  Now we test states
    807 	 * for which sigreturn itself will succeed but the subsequent
    808 	 * entry to user mode will fail.
    809 	 *
    810 	 * Depending on the failure mode and the kernel bitness, these
    811 	 * entry failures can generate SIGSEGV, SIGBUS, or SIGILL.
    812 	 */
    813 	clearhandler(SIGTRAP);
    814 	sethandler(SIGSEGV, sigtrap, SA_ONSTACK);
    815 	sethandler(SIGBUS, sigtrap, SA_ONSTACK);
    816 	sethandler(SIGILL, sigtrap, SA_ONSTACK);  /* 32-bit kernels do this */
    817 
    818 	/* Easy failures: invalid SS, resulting in #GP(0) */
    819 	test_bad_iret(64, ldt_nonexistent_sel, -1);
    820 	test_bad_iret(32, ldt_nonexistent_sel, -1);
    821 	test_bad_iret(16, ldt_nonexistent_sel, -1);
    822 
    823 	/* These fail because SS isn't a data segment, resulting in #GP(SS) */
    824 	test_bad_iret(64, my_cs, -1);
    825 	test_bad_iret(32, my_cs, -1);
    826 	test_bad_iret(16, my_cs, -1);
    827 
    828 	/* Try to return to a not-present code segment, triggering #NP(SS). */
    829 	test_bad_iret(32, my_ss, npcode32_sel);
    830 
    831 	/*
    832 	 * Try to return to a not-present but otherwise valid data segment.
    833 	 * This will cause IRET to fail with #SS on the espfix stack.  This
    834 	 * exercises CVE-2014-9322.
    835 	 *
    836 	 * Note that, if espfix is enabled, 64-bit Linux will lose track
    837 	 * of the actual cause of failure and report #GP(0) instead.
    838 	 * This would be very difficult for Linux to avoid, because
    839 	 * espfix64 causes IRET failures to be promoted to #DF, so the
    840 	 * original exception frame is never pushed onto the stack.
    841 	 */
    842 	test_bad_iret(32, npdata32_sel, -1);
    843 
    844 	/*
    845 	 * Try to return to a not-present but otherwise valid data
    846 	 * segment without invoking espfix.  Newer kernels don't allow
    847 	 * this to happen in the first place.  On older kernels, though,
    848 	 * this can trigger CVE-2014-9322.
    849 	 */
    850 	if (gdt_npdata32_idx)
    851 		test_bad_iret(32, GDT3(gdt_npdata32_idx), -1);
    852 
    853 #ifdef __x86_64__
    854 	total_nerrs += test_nonstrict_ss();
    855 #endif
    856 
    857 	return total_nerrs ? 1 : 0;
    858 }
    859