Home | History | Annotate | Download | only in x86
      1 /*
      2  * 32-bit syscall ABI conformance test.
      3  *
      4  * Copyright (c) 2015 Denys Vlasenko
      5  *
      6  * This program is free software; you can redistribute it and/or modify
      7  * it under the terms and conditions of the GNU General Public License,
      8  * version 2, as published by the Free Software Foundation.
      9  *
     10  * This program is distributed in the hope it will be useful, but
     11  * WITHOUT ANY WARRANTY; without even the implied warranty of
     12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13  * General Public License for more details.
     14  */
     15 /*
     16  * Can be built statically:
     17  * gcc -Os -Wall -static -m32 test_syscall_vdso.c thunks_32.S
     18  */
     19 #undef _GNU_SOURCE
     20 #define _GNU_SOURCE 1
     21 #undef __USE_GNU
     22 #define __USE_GNU 1
     23 #include <unistd.h>
     24 #include <stdlib.h>
     25 #include <string.h>
     26 #include <stdio.h>
     27 #include <signal.h>
     28 #include <sys/types.h>
     29 #include <sys/select.h>
     30 #include <sys/time.h>
     31 #include <elf.h>
     32 #include <sys/ptrace.h>
     33 #include <sys/utsname.h>
     34 #include <sys/wait.h>
     35 
     36 #if !defined(__i386__)
     37 int main(int argc, char **argv, char **envp)
     38 {
     39 	printf("[SKIP]\tNot a 32-bit x86 userspace\n");
     40 	return 0;
     41 }
     42 #else
     43 
     44 long syscall_addr;
     45 long get_syscall(char **envp)
     46 {
     47 	Elf32_auxv_t *auxv;
     48 	while (*envp++ != NULL)
     49 		continue;
     50 	for (auxv = (void *)envp; auxv->a_type != AT_NULL; auxv++)
     51 		if (auxv->a_type == AT_SYSINFO)
     52 			return auxv->a_un.a_val;
     53 	printf("[WARN]\tAT_SYSINFO not supplied\n");
     54 	return 0;
     55 }
     56 
     57 asm (
     58 	"	.pushsection .text\n"
     59 	"	.global	int80\n"
     60 	"int80:\n"
     61 	"	int	$0x80\n"
     62 	"	ret\n"
     63 	"	.popsection\n"
     64 );
     65 extern char int80;
     66 
     67 struct regs64 {
     68 	uint64_t rax, rbx, rcx, rdx;
     69 	uint64_t rsi, rdi, rbp, rsp;
     70 	uint64_t r8,  r9,  r10, r11;
     71 	uint64_t r12, r13, r14, r15;
     72 };
     73 struct regs64 regs64;
     74 int kernel_is_64bit;
     75 int clobber_ok;
     76 
     77 asm (
     78 	"	.pushsection .text\n"
     79 	"	.code64\n"
     80 	"get_regs64:\n"
     81 	"	push	%rax\n"
     82 	"	mov	$regs64, %eax\n"
     83 	"	pop	0*8(%rax)\n"
     84 	"	movq	%rbx, 1*8(%rax)\n"
     85 	"	movq	%rcx, 2*8(%rax)\n"
     86 	"	movq	%rdx, 3*8(%rax)\n"
     87 	"	movq	%rsi, 4*8(%rax)\n"
     88 	"	movq	%rdi, 5*8(%rax)\n"
     89 	"	movq	%rbp, 6*8(%rax)\n"
     90 	"	movq	%rsp, 7*8(%rax)\n"
     91 	"	movq	%r8,  8*8(%rax)\n"
     92 	"	movq	%r9,  9*8(%rax)\n"
     93 	"	movq	%r10, 10*8(%rax)\n"
     94 	"	movq	%r11, 11*8(%rax)\n"
     95 	"	movq	%r12, 12*8(%rax)\n"
     96 	"	movq	%r13, 13*8(%rax)\n"
     97 	"	movq	%r14, 14*8(%rax)\n"
     98 	"	movq	%r15, 15*8(%rax)\n"
     99 	"	ret\n"
    100 	"poison_regs64:\n"
    101 	"	movq	$0x7f7f7f7f, %r8\n"
    102 	"	shl	$32, %r8\n"
    103 	"	orq	$0x7f7f7f7f, %r8\n"
    104 	"	movq	%r8, %r9\n"
    105 	"	incq	%r9\n"
    106 	"	movq	%r9, %r10\n"
    107 	"	incq	%r10\n"
    108 	"	movq	%r10, %r11\n"
    109 	"	incq	%r11\n"
    110 	"	movq	%r11, %r12\n"
    111 	"	incq	%r12\n"
    112 	"	movq	%r12, %r13\n"
    113 	"	incq	%r13\n"
    114 	"	movq	%r13, %r14\n"
    115 	"	incq	%r14\n"
    116 	"	movq	%r14, %r15\n"
    117 	"	incq	%r15\n"
    118 	"	ret\n"
    119 	"	.code32\n"
    120 	"	.popsection\n"
    121 );
    122 extern void get_regs64(void);
    123 extern void poison_regs64(void);
    124 extern unsigned long call64_from_32(void (*function)(void));
    125 void print_regs64(void)
    126 {
    127 	if (!kernel_is_64bit)
    128 		return;
    129 	printf("ax:%016llx bx:%016llx cx:%016llx dx:%016llx\n", regs64.rax,  regs64.rbx,  regs64.rcx,  regs64.rdx);
    130 	printf("si:%016llx di:%016llx bp:%016llx sp:%016llx\n", regs64.rsi,  regs64.rdi,  regs64.rbp,  regs64.rsp);
    131 	printf(" 8:%016llx  9:%016llx 10:%016llx 11:%016llx\n", regs64.r8 ,  regs64.r9 ,  regs64.r10,  regs64.r11);
    132 	printf("12:%016llx 13:%016llx 14:%016llx 15:%016llx\n", regs64.r12,  regs64.r13,  regs64.r14,  regs64.r15);
    133 }
    134 
    135 static void get_kernel_version(int *version, int *patchlevel)
    136 {
    137 	int ret, sublevel;
    138 	struct utsname utsname;
    139 
    140 	ret = uname(&utsname);
    141 	if (ret) {
    142 		perror("uname");
    143 		exit(1);
    144 	}
    145 
    146 	ret = sscanf(utsname.release, "%d.%d.%d", version, patchlevel,
    147 		     &sublevel);
    148 	if (ret < 0) {
    149 		perror("sscanf");
    150 		exit(1);
    151 	} else if (ret != 3) {
    152 		printf("Malformed kernel version %s\n", utsname.release);
    153 		exit(1);
    154 	}
    155 }
    156 
    157 int check_regs64(void)
    158 {
    159 	int err = 0;
    160 	int num = 8;
    161 	uint64_t *r64 = &regs64.r8;
    162 	uint64_t expected = 0x7f7f7f7f7f7f7f7fULL;
    163 
    164 	if (!kernel_is_64bit)
    165 		return 0;
    166 
    167 	do {
    168 		if (*r64 == expected++)
    169 			continue; /* register did not change */
    170 		if (syscall_addr != (long)&int80) {
    171 			/*
    172 			 * Non-INT80 syscall entrypoints are allowed to clobber R8+ regs:
    173 			 * either clear them to 0, or for R11, load EFLAGS.
    174 			 */
    175 			if (*r64 == 0)
    176 				continue;
    177 			if (num == 11) {
    178 				printf("[NOTE]\tR11 has changed:%016llx - assuming clobbered by SYSRET insn\n", *r64);
    179 				continue;
    180 			}
    181 		} else {
    182 			/*
    183 			 * INT80 syscall entrypoint can be used by
    184 			 * 64-bit programs too, unlike SYSCALL/SYSENTER.
    185 			 * Therefore it must preserve R12+
    186 			 * (they are callee-saved registers in 64-bit C ABI).
    187 			 *
    188 			 * Starting in Linux 4.17 (and any kernel that
    189 			 * backports the change), R8..11 are preserved.
    190 			 * Historically (and probably unintentionally), they
    191 			 * were clobbered or zeroed.
    192 			 */
    193 			if (clobber_ok && *r64 == 0 && num <= 11) {
    194 				printf("Warning: kernel zeroed r%d, "
    195 				       "allowing on < v4.17\n", num);
    196 				continue;
    197 			}
    198 		}
    199 		printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64);
    200 		err++;
    201 	} while (r64++, ++num < 16);
    202 
    203 	if (!err)
    204 		printf("[OK]\tR8..R15 did not leak kernel data\n");
    205 	return err;
    206 }
    207 
    208 int nfds;
    209 fd_set rfds;
    210 fd_set wfds;
    211 fd_set efds;
    212 struct timespec timeout;
    213 sigset_t sigmask;
    214 struct {
    215 	sigset_t *sp;
    216 	int sz;
    217 } sigmask_desc;
    218 
    219 void prep_args()
    220 {
    221 	nfds = 42;
    222 	FD_ZERO(&rfds);
    223 	FD_ZERO(&wfds);
    224 	FD_ZERO(&efds);
    225 	FD_SET(0, &rfds);
    226 	FD_SET(1, &wfds);
    227 	FD_SET(2, &efds);
    228 	timeout.tv_sec = 0;
    229 	timeout.tv_nsec = 123;
    230 	sigemptyset(&sigmask);
    231 	sigaddset(&sigmask, SIGINT);
    232 	sigaddset(&sigmask, SIGUSR2);
    233 	sigaddset(&sigmask, SIGRTMAX);
    234 	sigmask_desc.sp = &sigmask;
    235 	sigmask_desc.sz = 8; /* bytes */
    236 }
    237 
    238 static void print_flags(const char *name, unsigned long r)
    239 {
    240 	static const char *bitarray[] = {
    241 	"\n" ,"c\n" ,/* Carry Flag */
    242 	"0 " ,"1 "  ,/* Bit 1 - always on */
    243 	""   ,"p "  ,/* Parity Flag */
    244 	"0 " ,"3? " ,
    245 	""   ,"a "  ,/* Auxiliary carry Flag */
    246 	"0 " ,"5? " ,
    247 	""   ,"z "  ,/* Zero Flag */
    248 	""   ,"s "  ,/* Sign Flag */
    249 	""   ,"t "  ,/* Trap Flag */
    250 	""   ,"i "  ,/* Interrupt Flag */
    251 	""   ,"d "  ,/* Direction Flag */
    252 	""   ,"o "  ,/* Overflow Flag */
    253 	"0 " ,"1 "  ,/* I/O Privilege Level (2 bits) */
    254 	"0"  ,"1"   ,/* I/O Privilege Level (2 bits) */
    255 	""   ,"n "  ,/* Nested Task */
    256 	"0 " ,"15? ",
    257 	""   ,"r "  ,/* Resume Flag */
    258 	""   ,"v "  ,/* Virtual Mode */
    259 	""   ,"ac " ,/* Alignment Check/Access Control */
    260 	""   ,"vif ",/* Virtual Interrupt Flag */
    261 	""   ,"vip ",/* Virtual Interrupt Pending */
    262 	""   ,"id " ,/* CPUID detection */
    263 	NULL
    264 	};
    265 	const char **bitstr;
    266 	int bit;
    267 
    268 	printf("%s=%016lx ", name, r);
    269 	bitstr = bitarray + 42;
    270 	bit = 21;
    271 	if ((r >> 22) != 0)
    272 		printf("(extra bits are set) ");
    273 	do {
    274 		if (bitstr[(r >> bit) & 1][0])
    275 			fputs(bitstr[(r >> bit) & 1], stdout);
    276 		bitstr -= 2;
    277 		bit--;
    278 	} while (bit >= 0);
    279 }
    280 
    281 int run_syscall(void)
    282 {
    283 	long flags, bad_arg;
    284 
    285 	prep_args();
    286 
    287 	if (kernel_is_64bit)
    288 		call64_from_32(poison_regs64);
    289 	/*print_regs64();*/
    290 
    291 	asm("\n"
    292 	/* Try 6-arg syscall: pselect. It should return quickly */
    293 	"	push	%%ebp\n"
    294 	"	mov	$308, %%eax\n"     /* PSELECT */
    295 	"	mov	nfds, %%ebx\n"     /* ebx  arg1 */
    296 	"	mov	$rfds, %%ecx\n"    /* ecx  arg2 */
    297 	"	mov	$wfds, %%edx\n"    /* edx  arg3 */
    298 	"	mov	$efds, %%esi\n"    /* esi  arg4 */
    299 	"	mov	$timeout, %%edi\n" /* edi  arg5 */
    300 	"	mov	$sigmask_desc, %%ebp\n" /* %ebp arg6 */
    301 	"	push	$0x200ed7\n"      /* set almost all flags */
    302 	"	popf\n"		/* except TF, IOPL, NT, RF, VM, AC, VIF, VIP */
    303 	"	call	*syscall_addr\n"
    304 	/* Check that registers are not clobbered */
    305 	"	pushf\n"
    306 	"	pop	%%eax\n"
    307 	"	cld\n"
    308 	"	cmp	nfds, %%ebx\n"     /* ebx  arg1 */
    309 	"	mov	$1, %%ebx\n"
    310 	"	jne	1f\n"
    311 	"	cmp	$rfds, %%ecx\n"    /* ecx  arg2 */
    312 	"	mov	$2, %%ebx\n"
    313 	"	jne	1f\n"
    314 	"	cmp	$wfds, %%edx\n"    /* edx  arg3 */
    315 	"	mov	$3, %%ebx\n"
    316 	"	jne	1f\n"
    317 	"	cmp	$efds, %%esi\n"    /* esi  arg4 */
    318 	"	mov	$4, %%ebx\n"
    319 	"	jne	1f\n"
    320 	"	cmp	$timeout, %%edi\n" /* edi  arg5 */
    321 	"	mov	$5, %%ebx\n"
    322 	"	jne	1f\n"
    323 	"	cmpl	$sigmask_desc, %%ebp\n" /* %ebp arg6 */
    324 	"	mov	$6, %%ebx\n"
    325 	"	jne	1f\n"
    326 	"	mov	$0, %%ebx\n"
    327 	"1:\n"
    328 	"	pop	%%ebp\n"
    329 	: "=a" (flags), "=b" (bad_arg)
    330 	:
    331 	: "cx", "dx", "si", "di"
    332 	);
    333 
    334 	if (kernel_is_64bit) {
    335 		memset(&regs64, 0x77, sizeof(regs64));
    336 		call64_from_32(get_regs64);
    337 		/*print_regs64();*/
    338 	}
    339 
    340 	/*
    341 	 * On paravirt kernels, flags are not preserved across syscalls.
    342 	 * Thus, we do not consider it a bug if some are changed.
    343 	 * We just show ones which do.
    344 	 */
    345 	if ((0x200ed7 ^ flags) != 0) {
    346 		print_flags("[WARN]\tFlags before", 0x200ed7);
    347 		print_flags("[WARN]\tFlags  after", flags);
    348 		print_flags("[WARN]\tFlags change", (0x200ed7 ^ flags));
    349 	}
    350 
    351 	if (bad_arg) {
    352 		printf("[FAIL]\targ#%ld clobbered\n", bad_arg);
    353 		return 1;
    354 	}
    355 	printf("[OK]\tArguments are preserved across syscall\n");
    356 
    357 	return check_regs64();
    358 }
    359 
    360 int run_syscall_twice()
    361 {
    362 	int exitcode = 0;
    363 	long sv;
    364 
    365 	if (syscall_addr) {
    366 		printf("[RUN]\tExecuting 6-argument 32-bit syscall via VDSO\n");
    367 		exitcode = run_syscall();
    368 	}
    369 	sv = syscall_addr;
    370 	syscall_addr = (long)&int80;
    371 	printf("[RUN]\tExecuting 6-argument 32-bit syscall via INT 80\n");
    372 	exitcode += run_syscall();
    373 	syscall_addr = sv;
    374 	return exitcode;
    375 }
    376 
    377 void ptrace_me()
    378 {
    379 	pid_t pid;
    380 
    381 	fflush(NULL);
    382 	pid = fork();
    383 	if (pid < 0)
    384 		exit(1);
    385 	if (pid == 0) {
    386 		/* child */
    387 		if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) != 0)
    388 			exit(0);
    389 		raise(SIGSTOP);
    390 		return;
    391 	}
    392 	/* parent */
    393 	printf("[RUN]\tRunning tests under ptrace\n");
    394 	while (1) {
    395 		int status;
    396 		pid = waitpid(-1, &status, __WALL);
    397 		if (WIFEXITED(status))
    398 			exit(WEXITSTATUS(status));
    399 		if (WIFSIGNALED(status))
    400 			exit(WTERMSIG(status));
    401 		if (pid <= 0 || !WIFSTOPPED(status)) /* paranoia */
    402 			exit(255);
    403 		/*
    404 		 * Note: we do not inject sig = WSTOPSIG(status).
    405 		 * We probably should, but careful: do not inject SIGTRAP
    406 		 * generated by syscall entry/exit stops.
    407 		 * That kills the child.
    408 		 */
    409 		ptrace(PTRACE_SYSCALL, pid, 0L, 0L /*sig*/);
    410 	}
    411 }
    412 
    413 int main(int argc, char **argv, char **envp)
    414 {
    415 	int exitcode = 0;
    416 	int cs;
    417 	int version, patchlevel;
    418 
    419 	asm("\n"
    420 	"	movl	%%cs, %%eax\n"
    421 	: "=a" (cs)
    422 	);
    423 	kernel_is_64bit = (cs == 0x23);
    424 	if (!kernel_is_64bit)
    425 		printf("[NOTE]\tNot a 64-bit kernel, won't test R8..R15 leaks\n");
    426 
    427 	get_kernel_version(&version, &patchlevel);
    428 	clobber_ok = version < 4 || (version == 4 && patchlevel < 17);
    429 
    430 	/* This only works for non-static builds:
    431 	 * syscall_addr = dlsym(dlopen("linux-gate.so.1", RTLD_NOW), "__kernel_vsyscall");
    432 	 */
    433 	syscall_addr = get_syscall(envp);
    434 
    435 	exitcode += run_syscall_twice();
    436 	ptrace_me();
    437 	exitcode += run_syscall_twice();
    438 
    439 	return exitcode;
    440 }
    441 #endif
    442