1 /* 2 * 32-bit syscall ABI conformance test. 3 * 4 * Copyright (c) 2015 Denys Vlasenko 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms and conditions of the GNU General Public License, 8 * version 2, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License for more details. 14 */ 15 /* 16 * Can be built statically: 17 * gcc -Os -Wall -static -m32 test_syscall_vdso.c thunks_32.S 18 */ 19 #undef _GNU_SOURCE 20 #define _GNU_SOURCE 1 21 #undef __USE_GNU 22 #define __USE_GNU 1 23 #include <unistd.h> 24 #include <stdlib.h> 25 #include <string.h> 26 #include <stdio.h> 27 #include <signal.h> 28 #include <sys/types.h> 29 #include <sys/select.h> 30 #include <sys/time.h> 31 #include <elf.h> 32 #include <sys/ptrace.h> 33 #include <sys/utsname.h> 34 #include <sys/wait.h> 35 36 #if !defined(__i386__) 37 int main(int argc, char **argv, char **envp) 38 { 39 printf("[SKIP]\tNot a 32-bit x86 userspace\n"); 40 return 0; 41 } 42 #else 43 44 long syscall_addr; 45 long get_syscall(char **envp) 46 { 47 Elf32_auxv_t *auxv; 48 while (*envp++ != NULL) 49 continue; 50 for (auxv = (void *)envp; auxv->a_type != AT_NULL; auxv++) 51 if (auxv->a_type == AT_SYSINFO) 52 return auxv->a_un.a_val; 53 printf("[WARN]\tAT_SYSINFO not supplied\n"); 54 return 0; 55 } 56 57 asm ( 58 " .pushsection .text\n" 59 " .global int80\n" 60 "int80:\n" 61 " int $0x80\n" 62 " ret\n" 63 " .popsection\n" 64 ); 65 extern char int80; 66 67 struct regs64 { 68 uint64_t rax, rbx, rcx, rdx; 69 uint64_t rsi, rdi, rbp, rsp; 70 uint64_t r8, r9, r10, r11; 71 uint64_t r12, r13, r14, r15; 72 }; 73 struct regs64 regs64; 74 int kernel_is_64bit; 75 int clobber_ok; 76 77 asm ( 78 " .pushsection .text\n" 79 " .code64\n" 80 "get_regs64:\n" 81 " push %rax\n" 82 " mov $regs64, %eax\n" 83 " pop 0*8(%rax)\n" 84 " movq %rbx, 1*8(%rax)\n" 85 " movq %rcx, 2*8(%rax)\n" 86 " movq %rdx, 3*8(%rax)\n" 87 " movq %rsi, 4*8(%rax)\n" 88 " movq %rdi, 5*8(%rax)\n" 89 " movq %rbp, 6*8(%rax)\n" 90 " movq %rsp, 7*8(%rax)\n" 91 " movq %r8, 8*8(%rax)\n" 92 " movq %r9, 9*8(%rax)\n" 93 " movq %r10, 10*8(%rax)\n" 94 " movq %r11, 11*8(%rax)\n" 95 " movq %r12, 12*8(%rax)\n" 96 " movq %r13, 13*8(%rax)\n" 97 " movq %r14, 14*8(%rax)\n" 98 " movq %r15, 15*8(%rax)\n" 99 " ret\n" 100 "poison_regs64:\n" 101 " movq $0x7f7f7f7f, %r8\n" 102 " shl $32, %r8\n" 103 " orq $0x7f7f7f7f, %r8\n" 104 " movq %r8, %r9\n" 105 " incq %r9\n" 106 " movq %r9, %r10\n" 107 " incq %r10\n" 108 " movq %r10, %r11\n" 109 " incq %r11\n" 110 " movq %r11, %r12\n" 111 " incq %r12\n" 112 " movq %r12, %r13\n" 113 " incq %r13\n" 114 " movq %r13, %r14\n" 115 " incq %r14\n" 116 " movq %r14, %r15\n" 117 " incq %r15\n" 118 " ret\n" 119 " .code32\n" 120 " .popsection\n" 121 ); 122 extern void get_regs64(void); 123 extern void poison_regs64(void); 124 extern unsigned long call64_from_32(void (*function)(void)); 125 void print_regs64(void) 126 { 127 if (!kernel_is_64bit) 128 return; 129 printf("ax:%016llx bx:%016llx cx:%016llx dx:%016llx\n", regs64.rax, regs64.rbx, regs64.rcx, regs64.rdx); 130 printf("si:%016llx di:%016llx bp:%016llx sp:%016llx\n", regs64.rsi, regs64.rdi, regs64.rbp, regs64.rsp); 131 printf(" 8:%016llx 9:%016llx 10:%016llx 11:%016llx\n", regs64.r8 , regs64.r9 , regs64.r10, regs64.r11); 132 printf("12:%016llx 13:%016llx 14:%016llx 15:%016llx\n", regs64.r12, regs64.r13, regs64.r14, regs64.r15); 133 } 134 135 static void get_kernel_version(int *version, int *patchlevel) 136 { 137 int ret, sublevel; 138 struct utsname utsname; 139 140 ret = uname(&utsname); 141 if (ret) { 142 perror("uname"); 143 exit(1); 144 } 145 146 ret = sscanf(utsname.release, "%d.%d.%d", version, patchlevel, 147 &sublevel); 148 if (ret < 0) { 149 perror("sscanf"); 150 exit(1); 151 } else if (ret != 3) { 152 printf("Malformed kernel version %s\n", utsname.release); 153 exit(1); 154 } 155 } 156 157 int check_regs64(void) 158 { 159 int err = 0; 160 int num = 8; 161 uint64_t *r64 = ®s64.r8; 162 uint64_t expected = 0x7f7f7f7f7f7f7f7fULL; 163 164 if (!kernel_is_64bit) 165 return 0; 166 167 do { 168 if (*r64 == expected++) 169 continue; /* register did not change */ 170 if (syscall_addr != (long)&int80) { 171 /* 172 * Non-INT80 syscall entrypoints are allowed to clobber R8+ regs: 173 * either clear them to 0, or for R11, load EFLAGS. 174 */ 175 if (*r64 == 0) 176 continue; 177 if (num == 11) { 178 printf("[NOTE]\tR11 has changed:%016llx - assuming clobbered by SYSRET insn\n", *r64); 179 continue; 180 } 181 } else { 182 /* 183 * INT80 syscall entrypoint can be used by 184 * 64-bit programs too, unlike SYSCALL/SYSENTER. 185 * Therefore it must preserve R12+ 186 * (they are callee-saved registers in 64-bit C ABI). 187 * 188 * Starting in Linux 4.17 (and any kernel that 189 * backports the change), R8..11 are preserved. 190 * Historically (and probably unintentionally), they 191 * were clobbered or zeroed. 192 */ 193 if (clobber_ok && *r64 == 0 && num <= 11) { 194 printf("Warning: kernel zeroed r%d, " 195 "allowing on < v4.17\n", num); 196 continue; 197 } 198 } 199 printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64); 200 err++; 201 } while (r64++, ++num < 16); 202 203 if (!err) 204 printf("[OK]\tR8..R15 did not leak kernel data\n"); 205 return err; 206 } 207 208 int nfds; 209 fd_set rfds; 210 fd_set wfds; 211 fd_set efds; 212 struct timespec timeout; 213 sigset_t sigmask; 214 struct { 215 sigset_t *sp; 216 int sz; 217 } sigmask_desc; 218 219 void prep_args() 220 { 221 nfds = 42; 222 FD_ZERO(&rfds); 223 FD_ZERO(&wfds); 224 FD_ZERO(&efds); 225 FD_SET(0, &rfds); 226 FD_SET(1, &wfds); 227 FD_SET(2, &efds); 228 timeout.tv_sec = 0; 229 timeout.tv_nsec = 123; 230 sigemptyset(&sigmask); 231 sigaddset(&sigmask, SIGINT); 232 sigaddset(&sigmask, SIGUSR2); 233 sigaddset(&sigmask, SIGRTMAX); 234 sigmask_desc.sp = &sigmask; 235 sigmask_desc.sz = 8; /* bytes */ 236 } 237 238 static void print_flags(const char *name, unsigned long r) 239 { 240 static const char *bitarray[] = { 241 "\n" ,"c\n" ,/* Carry Flag */ 242 "0 " ,"1 " ,/* Bit 1 - always on */ 243 "" ,"p " ,/* Parity Flag */ 244 "0 " ,"3? " , 245 "" ,"a " ,/* Auxiliary carry Flag */ 246 "0 " ,"5? " , 247 "" ,"z " ,/* Zero Flag */ 248 "" ,"s " ,/* Sign Flag */ 249 "" ,"t " ,/* Trap Flag */ 250 "" ,"i " ,/* Interrupt Flag */ 251 "" ,"d " ,/* Direction Flag */ 252 "" ,"o " ,/* Overflow Flag */ 253 "0 " ,"1 " ,/* I/O Privilege Level (2 bits) */ 254 "0" ,"1" ,/* I/O Privilege Level (2 bits) */ 255 "" ,"n " ,/* Nested Task */ 256 "0 " ,"15? ", 257 "" ,"r " ,/* Resume Flag */ 258 "" ,"v " ,/* Virtual Mode */ 259 "" ,"ac " ,/* Alignment Check/Access Control */ 260 "" ,"vif ",/* Virtual Interrupt Flag */ 261 "" ,"vip ",/* Virtual Interrupt Pending */ 262 "" ,"id " ,/* CPUID detection */ 263 NULL 264 }; 265 const char **bitstr; 266 int bit; 267 268 printf("%s=%016lx ", name, r); 269 bitstr = bitarray + 42; 270 bit = 21; 271 if ((r >> 22) != 0) 272 printf("(extra bits are set) "); 273 do { 274 if (bitstr[(r >> bit) & 1][0]) 275 fputs(bitstr[(r >> bit) & 1], stdout); 276 bitstr -= 2; 277 bit--; 278 } while (bit >= 0); 279 } 280 281 int run_syscall(void) 282 { 283 long flags, bad_arg; 284 285 prep_args(); 286 287 if (kernel_is_64bit) 288 call64_from_32(poison_regs64); 289 /*print_regs64();*/ 290 291 asm("\n" 292 /* Try 6-arg syscall: pselect. It should return quickly */ 293 " push %%ebp\n" 294 " mov $308, %%eax\n" /* PSELECT */ 295 " mov nfds, %%ebx\n" /* ebx arg1 */ 296 " mov $rfds, %%ecx\n" /* ecx arg2 */ 297 " mov $wfds, %%edx\n" /* edx arg3 */ 298 " mov $efds, %%esi\n" /* esi arg4 */ 299 " mov $timeout, %%edi\n" /* edi arg5 */ 300 " mov $sigmask_desc, %%ebp\n" /* %ebp arg6 */ 301 " push $0x200ed7\n" /* set almost all flags */ 302 " popf\n" /* except TF, IOPL, NT, RF, VM, AC, VIF, VIP */ 303 " call *syscall_addr\n" 304 /* Check that registers are not clobbered */ 305 " pushf\n" 306 " pop %%eax\n" 307 " cld\n" 308 " cmp nfds, %%ebx\n" /* ebx arg1 */ 309 " mov $1, %%ebx\n" 310 " jne 1f\n" 311 " cmp $rfds, %%ecx\n" /* ecx arg2 */ 312 " mov $2, %%ebx\n" 313 " jne 1f\n" 314 " cmp $wfds, %%edx\n" /* edx arg3 */ 315 " mov $3, %%ebx\n" 316 " jne 1f\n" 317 " cmp $efds, %%esi\n" /* esi arg4 */ 318 " mov $4, %%ebx\n" 319 " jne 1f\n" 320 " cmp $timeout, %%edi\n" /* edi arg5 */ 321 " mov $5, %%ebx\n" 322 " jne 1f\n" 323 " cmpl $sigmask_desc, %%ebp\n" /* %ebp arg6 */ 324 " mov $6, %%ebx\n" 325 " jne 1f\n" 326 " mov $0, %%ebx\n" 327 "1:\n" 328 " pop %%ebp\n" 329 : "=a" (flags), "=b" (bad_arg) 330 : 331 : "cx", "dx", "si", "di" 332 ); 333 334 if (kernel_is_64bit) { 335 memset(®s64, 0x77, sizeof(regs64)); 336 call64_from_32(get_regs64); 337 /*print_regs64();*/ 338 } 339 340 /* 341 * On paravirt kernels, flags are not preserved across syscalls. 342 * Thus, we do not consider it a bug if some are changed. 343 * We just show ones which do. 344 */ 345 if ((0x200ed7 ^ flags) != 0) { 346 print_flags("[WARN]\tFlags before", 0x200ed7); 347 print_flags("[WARN]\tFlags after", flags); 348 print_flags("[WARN]\tFlags change", (0x200ed7 ^ flags)); 349 } 350 351 if (bad_arg) { 352 printf("[FAIL]\targ#%ld clobbered\n", bad_arg); 353 return 1; 354 } 355 printf("[OK]\tArguments are preserved across syscall\n"); 356 357 return check_regs64(); 358 } 359 360 int run_syscall_twice() 361 { 362 int exitcode = 0; 363 long sv; 364 365 if (syscall_addr) { 366 printf("[RUN]\tExecuting 6-argument 32-bit syscall via VDSO\n"); 367 exitcode = run_syscall(); 368 } 369 sv = syscall_addr; 370 syscall_addr = (long)&int80; 371 printf("[RUN]\tExecuting 6-argument 32-bit syscall via INT 80\n"); 372 exitcode += run_syscall(); 373 syscall_addr = sv; 374 return exitcode; 375 } 376 377 void ptrace_me() 378 { 379 pid_t pid; 380 381 fflush(NULL); 382 pid = fork(); 383 if (pid < 0) 384 exit(1); 385 if (pid == 0) { 386 /* child */ 387 if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) != 0) 388 exit(0); 389 raise(SIGSTOP); 390 return; 391 } 392 /* parent */ 393 printf("[RUN]\tRunning tests under ptrace\n"); 394 while (1) { 395 int status; 396 pid = waitpid(-1, &status, __WALL); 397 if (WIFEXITED(status)) 398 exit(WEXITSTATUS(status)); 399 if (WIFSIGNALED(status)) 400 exit(WTERMSIG(status)); 401 if (pid <= 0 || !WIFSTOPPED(status)) /* paranoia */ 402 exit(255); 403 /* 404 * Note: we do not inject sig = WSTOPSIG(status). 405 * We probably should, but careful: do not inject SIGTRAP 406 * generated by syscall entry/exit stops. 407 * That kills the child. 408 */ 409 ptrace(PTRACE_SYSCALL, pid, 0L, 0L /*sig*/); 410 } 411 } 412 413 int main(int argc, char **argv, char **envp) 414 { 415 int exitcode = 0; 416 int cs; 417 int version, patchlevel; 418 419 asm("\n" 420 " movl %%cs, %%eax\n" 421 : "=a" (cs) 422 ); 423 kernel_is_64bit = (cs == 0x23); 424 if (!kernel_is_64bit) 425 printf("[NOTE]\tNot a 64-bit kernel, won't test R8..R15 leaks\n"); 426 427 get_kernel_version(&version, &patchlevel); 428 clobber_ok = version < 4 || (version == 4 && patchlevel < 17); 429 430 /* This only works for non-static builds: 431 * syscall_addr = dlsym(dlopen("linux-gate.so.1", RTLD_NOW), "__kernel_vsyscall"); 432 */ 433 syscall_addr = get_syscall(envp); 434 435 exitcode += run_syscall_twice(); 436 ptrace_me(); 437 exitcode += run_syscall_twice(); 438 439 return exitcode; 440 } 441 #endif 442