1 2 /*--------------------------------------------------------------------*/ 3 /*--- Handle system calls. syswrap-main.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2000-2013 Julian Seward 11 jseward (at) acm.org 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 */ 30 31 #include "libvex_guest_offsets.h" 32 #include "libvex_trc_values.h" 33 #include "pub_core_basics.h" 34 #include "pub_core_aspacemgr.h" 35 #include "pub_core_vki.h" 36 #include "pub_core_vkiscnums.h" 37 #include "pub_core_libcsetjmp.h" // to keep _threadstate.h happy 38 #include "pub_core_threadstate.h" 39 #include "pub_core_libcbase.h" 40 #include "pub_core_libcassert.h" 41 #include "pub_core_libcprint.h" 42 #include "pub_core_libcproc.h" // For VG_(getpid)() 43 #include "pub_core_libcsignal.h" 44 #include "pub_core_scheduler.h" // For VG_({acquire,release}_BigLock), 45 // and VG_(vg_yield) 46 #include "pub_core_stacktrace.h" // For VG_(get_and_pp_StackTrace)() 47 #include "pub_core_tooliface.h" 48 #include "pub_core_options.h" 49 #include "pub_core_signals.h" // For VG_SIGVGKILL, VG_(poll_signals) 50 #include "pub_core_syscall.h" 51 #include "pub_core_machine.h" 52 #include "pub_core_syswrap.h" 53 54 #include "priv_types_n_macros.h" 55 #include "priv_syswrap-main.h" 56 57 #if defined(VGO_darwin) 58 #include "priv_syswrap-darwin.h" 59 #endif 60 61 /* Useful info which needs to be recorded somewhere: 62 Use of registers in syscalls is: 63 64 NUM ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT 65 LINUX: 66 x86 eax ebx ecx edx esi edi ebp n/a n/a eax (== NUM) 67 amd64 rax rdi rsi rdx r10 r8 r9 n/a n/a rax (== NUM) 68 ppc32 r0 r3 r4 r5 r6 r7 r8 n/a n/a r3+CR0.SO (== ARG1) 69 ppc64 r0 r3 r4 r5 r6 r7 r8 n/a n/a r3+CR0.SO (== ARG1) 70 arm r7 r0 r1 r2 r3 r4 r5 n/a n/a r0 (== ARG1) 71 mips32 v0 a0 a1 a2 a3 stack stack n/a n/a v0 (== NUM) 72 mips64 v0 a0 a1 a2 a3 a4 a5 a6 a7 v0 (== NUM) 73 arm64 x8 x0 x1 x2 x3 x4 x5 n/a n/a x0 ?? (== ARG1??) 74 75 On s390x the svc instruction is used for system calls. The system call 76 number is encoded in the instruction (8 bit immediate field). Since Linux 77 2.6 it is also allowed to use svc 0 with the system call number in r1. 78 This was introduced for system calls >255, but works for all. It is 79 also possible to see the svc 0 together with an EXecute instruction, that 80 fills in the immediate field. 81 s390x r1/SVC r2 r3 r4 r5 r6 r7 n/a n/a r2 (== ARG1) 82 83 DARWIN: 84 x86 eax +4 +8 +12 +16 +20 +24 +28 +32 edx:eax, eflags.c 85 amd64 rax rdi rsi rdx rcx r8 r9 +8 +16 rdx:rax, rflags.c 86 87 For x86-darwin, "+N" denotes "in memory at N(%esp)"; ditto 88 amd64-darwin. Apparently 0(%esp) is some kind of return address 89 (perhaps for syscalls done with "sysenter"?) I don't think it is 90 relevant for syscalls done with "int $0x80/1/2". 91 */ 92 93 /* This is the top level of the system-call handler module. All 94 system calls are channelled through here, doing two things: 95 96 * notify the tool of the events (mem/reg reads, writes) happening 97 98 * perform the syscall, usually by passing it along to the kernel 99 unmodified. 100 101 A magical piece of assembly code, do_syscall_for_client_WRK, in 102 syscall-$PLATFORM.S does the tricky bit of passing a syscall to the 103 kernel, whilst having the simulator retain control. 104 */ 105 106 /* The main function is VG_(client_syscall). The simulation calls it 107 whenever a client thread wants to do a syscall. The following is a 108 sketch of what it does. 109 110 * Ensures the root thread's stack is suitably mapped. Tedious and 111 arcane. See big big comment in VG_(client_syscall). 112 113 * First, it rounds up the syscall number and args (which is a 114 platform dependent activity) and puts them in a struct ("args") 115 and also a copy in "orig_args". 116 117 The pre/post wrappers refer to these structs and so no longer 118 need magic macros to access any specific registers. This struct 119 is stored in thread-specific storage. 120 121 122 * The pre-wrapper is called, passing it a pointer to struct 123 "args". 124 125 126 * The pre-wrapper examines the args and pokes the tool 127 appropriately. It may modify the args; this is why "orig_args" 128 is also stored. 129 130 The pre-wrapper may choose to 'do' the syscall itself, and 131 concludes one of three outcomes: 132 133 Success(N) -- syscall is already complete, with success; 134 result is N 135 136 Fail(N) -- syscall is already complete, with failure; 137 error code is N 138 139 HandToKernel -- (the usual case): this needs to be given to 140 the kernel to be done, using the values in 141 the possibly-modified "args" struct. 142 143 In addition, the pre-wrapper may set some flags: 144 145 MayBlock -- only applicable when outcome==HandToKernel 146 147 PostOnFail -- only applicable when outcome==HandToKernel or Fail 148 149 150 * If the pre-outcome is HandToKernel, the syscall is duly handed 151 off to the kernel (perhaps involving some thread switchery, but 152 that's not important). This reduces the possible set of outcomes 153 to either Success(N) or Fail(N). 154 155 156 * The outcome (Success(N) or Fail(N)) is written back to the guest 157 register(s). This is platform specific: 158 159 x86: Success(N) ==> eax = N 160 Fail(N) ==> eax = -N 161 162 ditto amd64 163 164 ppc32: Success(N) ==> r3 = N, CR0.SO = 0 165 Fail(N) ==> r3 = N, CR0.SO = 1 166 167 Darwin: 168 x86: Success(N) ==> edx:eax = N, cc = 0 169 Fail(N) ==> edx:eax = N, cc = 1 170 171 s390x: Success(N) ==> r2 = N 172 Fail(N) ==> r2 = -N 173 174 * The post wrapper is called if: 175 176 - it exists, and 177 - outcome==Success or (outcome==Fail and PostOnFail is set) 178 179 The post wrapper is passed the adulterated syscall args (struct 180 "args"), and the syscall outcome (viz, Success(N) or Fail(N)). 181 182 There are several other complications, primarily to do with 183 syscalls getting interrupted, explained in comments in the code. 184 */ 185 186 /* CAVEATS for writing wrappers. It is important to follow these! 187 188 The macros defined in priv_types_n_macros.h are designed to help 189 decouple the wrapper logic from the actual representation of 190 syscall args/results, since these wrappers are designed to work on 191 multiple platforms. 192 193 Sometimes a PRE wrapper will complete the syscall itself, without 194 handing it to the kernel. It will use one of SET_STATUS_Success, 195 SET_STATUS_Failure or SET_STATUS_from_SysRes to set the return 196 value. It is critical to appreciate that use of the macro does not 197 immediately cause the underlying guest state to be updated -- that 198 is done by the driver logic in this file, when the wrapper returns. 199 200 As a result, PRE wrappers of the following form will malfunction: 201 202 PRE(fooble) 203 { 204 ... do stuff ... 205 SET_STATUS_Somehow(...) 206 207 // do something that assumes guest state is up to date 208 } 209 210 In particular, direct or indirect calls to VG_(poll_signals) after 211 setting STATUS can cause the guest state to be read (in order to 212 build signal frames). Do not do this. If you want a signal poll 213 after the syscall goes through, do "*flags |= SfPollAfter" and the 214 driver logic will do it for you. 215 216 ----------- 217 218 Another critical requirement following introduction of new address 219 space manager (JRS, 20050923): 220 221 In a situation where the mappedness of memory has changed, aspacem 222 should be notified BEFORE the tool. Hence the following is 223 correct: 224 225 Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start); 226 VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start ); 227 if (d) 228 VG_(discard_translations)(s->start, s->end+1 - s->start); 229 230 whilst this is wrong: 231 232 VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start ); 233 Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start); 234 if (d) 235 VG_(discard_translations)(s->start, s->end+1 - s->start); 236 237 The reason is that the tool may itself ask aspacem for more shadow 238 memory as a result of the VG_TRACK call. In such a situation it is 239 critical that aspacem's segment array is up to date -- hence the 240 need to notify aspacem first. 241 242 ----------- 243 244 Also .. take care to call VG_(discard_translations) whenever 245 memory with execute permissions is unmapped. 246 */ 247 248 249 /* --------------------------------------------------------------------- 250 Do potentially blocking syscall for the client, and mess with 251 signal masks at the same time. 252 ------------------------------------------------------------------ */ 253 254 /* Perform a syscall on behalf of a client thread, using a specific 255 signal mask. On completion, the signal mask is set to restore_mask 256 (which presumably blocks almost everything). If a signal happens 257 during the syscall, the handler should call 258 VG_(fixup_guest_state_after_syscall_interrupted) to adjust the 259 thread's context to do the right thing. 260 261 The _WRK function is handwritten assembly, implemented per-platform 262 in coregrind/m_syswrap/syscall-$PLAT.S. It has some very magic 263 properties. See comments at the top of 264 VG_(fixup_guest_state_after_syscall_interrupted) below for details. 265 266 This function (these functions) are required to return zero in case 267 of success (even if the syscall itself failed), and nonzero if the 268 sigprocmask-swizzling calls failed. We don't actually care about 269 the failure values from sigprocmask, although most of the assembly 270 implementations do attempt to return that, using the convention 271 0 for success, or 0x8000 | error-code for failure. 272 */ 273 #if defined(VGO_linux) 274 extern 275 UWord ML_(do_syscall_for_client_WRK)( Word syscallno, 276 void* guest_state, 277 const vki_sigset_t *syscall_mask, 278 const vki_sigset_t *restore_mask, 279 Word sigsetSzB ); 280 #elif defined(VGO_darwin) 281 extern 282 UWord ML_(do_syscall_for_client_unix_WRK)( Word syscallno, 283 void* guest_state, 284 const vki_sigset_t *syscall_mask, 285 const vki_sigset_t *restore_mask, 286 Word sigsetSzB ); /* unused */ 287 extern 288 UWord ML_(do_syscall_for_client_mach_WRK)( Word syscallno, 289 void* guest_state, 290 const vki_sigset_t *syscall_mask, 291 const vki_sigset_t *restore_mask, 292 Word sigsetSzB ); /* unused */ 293 extern 294 UWord ML_(do_syscall_for_client_mdep_WRK)( Word syscallno, 295 void* guest_state, 296 const vki_sigset_t *syscall_mask, 297 const vki_sigset_t *restore_mask, 298 Word sigsetSzB ); /* unused */ 299 #else 300 # error "Unknown OS" 301 #endif 302 303 304 static 305 void do_syscall_for_client ( Int syscallno, 306 ThreadState* tst, 307 const vki_sigset_t* syscall_mask ) 308 { 309 vki_sigset_t saved; 310 UWord err; 311 # if defined(VGO_linux) 312 err = ML_(do_syscall_for_client_WRK)( 313 syscallno, &tst->arch.vex, 314 syscall_mask, &saved, sizeof(vki_sigset_t) 315 ); 316 # elif defined(VGO_darwin) 317 switch (VG_DARWIN_SYSNO_CLASS(syscallno)) { 318 case VG_DARWIN_SYSCALL_CLASS_UNIX: 319 err = ML_(do_syscall_for_client_unix_WRK)( 320 VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex, 321 syscall_mask, &saved, 0/*unused:sigsetSzB*/ 322 ); 323 break; 324 case VG_DARWIN_SYSCALL_CLASS_MACH: 325 err = ML_(do_syscall_for_client_mach_WRK)( 326 VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex, 327 syscall_mask, &saved, 0/*unused:sigsetSzB*/ 328 ); 329 break; 330 case VG_DARWIN_SYSCALL_CLASS_MDEP: 331 err = ML_(do_syscall_for_client_mdep_WRK)( 332 VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex, 333 syscall_mask, &saved, 0/*unused:sigsetSzB*/ 334 ); 335 break; 336 default: 337 vg_assert(0); 338 /*NOTREACHED*/ 339 break; 340 } 341 # else 342 # error "Unknown OS" 343 # endif 344 vg_assert2( 345 err == 0, 346 "ML_(do_syscall_for_client_WRK): sigprocmask error %d", 347 (Int)(err & 0xFFF) 348 ); 349 } 350 351 352 /* --------------------------------------------------------------------- 353 Impedance matchers and misc helpers 354 ------------------------------------------------------------------ */ 355 356 static 357 Bool eq_SyscallArgs ( SyscallArgs* a1, SyscallArgs* a2 ) 358 { 359 return a1->sysno == a2->sysno 360 && a1->arg1 == a2->arg1 361 && a1->arg2 == a2->arg2 362 && a1->arg3 == a2->arg3 363 && a1->arg4 == a2->arg4 364 && a1->arg5 == a2->arg5 365 && a1->arg6 == a2->arg6 366 && a1->arg7 == a2->arg7 367 && a1->arg8 == a2->arg8; 368 } 369 370 static 371 Bool eq_SyscallStatus ( SyscallStatus* s1, SyscallStatus* s2 ) 372 { 373 /* was: return s1->what == s2->what && sr_EQ( s1->sres, s2->sres ); */ 374 if (s1->what == s2->what && sr_EQ( s1->sres, s2->sres )) 375 return True; 376 # if defined(VGO_darwin) 377 /* Darwin-specific debugging guff */ 378 vg_assert(s1->what == s2->what); 379 VG_(printf)("eq_SyscallStatus:\n"); 380 VG_(printf)(" {%lu %lu %u}\n", s1->sres._wLO, s1->sres._wHI, s1->sres._mode); 381 VG_(printf)(" {%lu %lu %u}\n", s2->sres._wLO, s2->sres._wHI, s2->sres._mode); 382 vg_assert(0); 383 # endif 384 return False; 385 } 386 387 /* Convert between SysRes and SyscallStatus, to the extent possible. */ 388 389 static 390 SyscallStatus convert_SysRes_to_SyscallStatus ( SysRes res ) 391 { 392 SyscallStatus status; 393 status.what = SsComplete; 394 status.sres = res; 395 return status; 396 } 397 398 399 /* Impedance matchers. These convert syscall arg or result data from 400 the platform-specific in-guest-state format to the canonical 401 formats, and back. */ 402 403 static 404 void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs* canonical, 405 /*IN*/ VexGuestArchState* gst_vanilla, 406 /*IN*/ UInt trc ) 407 { 408 #if defined(VGP_x86_linux) 409 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 410 canonical->sysno = gst->guest_EAX; 411 canonical->arg1 = gst->guest_EBX; 412 canonical->arg2 = gst->guest_ECX; 413 canonical->arg3 = gst->guest_EDX; 414 canonical->arg4 = gst->guest_ESI; 415 canonical->arg5 = gst->guest_EDI; 416 canonical->arg6 = gst->guest_EBP; 417 canonical->arg7 = 0; 418 canonical->arg8 = 0; 419 420 #elif defined(VGP_amd64_linux) 421 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 422 canonical->sysno = gst->guest_RAX; 423 canonical->arg1 = gst->guest_RDI; 424 canonical->arg2 = gst->guest_RSI; 425 canonical->arg3 = gst->guest_RDX; 426 canonical->arg4 = gst->guest_R10; 427 canonical->arg5 = gst->guest_R8; 428 canonical->arg6 = gst->guest_R9; 429 canonical->arg7 = 0; 430 canonical->arg8 = 0; 431 432 #elif defined(VGP_ppc32_linux) 433 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla; 434 canonical->sysno = gst->guest_GPR0; 435 canonical->arg1 = gst->guest_GPR3; 436 canonical->arg2 = gst->guest_GPR4; 437 canonical->arg3 = gst->guest_GPR5; 438 canonical->arg4 = gst->guest_GPR6; 439 canonical->arg5 = gst->guest_GPR7; 440 canonical->arg6 = gst->guest_GPR8; 441 canonical->arg7 = 0; 442 canonical->arg8 = 0; 443 444 #elif defined(VGP_ppc64_linux) 445 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla; 446 canonical->sysno = gst->guest_GPR0; 447 canonical->arg1 = gst->guest_GPR3; 448 canonical->arg2 = gst->guest_GPR4; 449 canonical->arg3 = gst->guest_GPR5; 450 canonical->arg4 = gst->guest_GPR6; 451 canonical->arg5 = gst->guest_GPR7; 452 canonical->arg6 = gst->guest_GPR8; 453 canonical->arg7 = 0; 454 canonical->arg8 = 0; 455 456 #elif defined(VGP_arm_linux) 457 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla; 458 canonical->sysno = gst->guest_R7; 459 canonical->arg1 = gst->guest_R0; 460 canonical->arg2 = gst->guest_R1; 461 canonical->arg3 = gst->guest_R2; 462 canonical->arg4 = gst->guest_R3; 463 canonical->arg5 = gst->guest_R4; 464 canonical->arg6 = gst->guest_R5; 465 canonical->arg7 = 0; 466 canonical->arg8 = 0; 467 468 #elif defined(VGP_arm64_linux) 469 VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla; 470 canonical->sysno = gst->guest_X8; 471 canonical->arg1 = gst->guest_X0; 472 canonical->arg2 = gst->guest_X1; 473 canonical->arg3 = gst->guest_X2; 474 canonical->arg4 = gst->guest_X3; 475 canonical->arg5 = gst->guest_X4; 476 canonical->arg6 = gst->guest_X5; 477 canonical->arg7 = 0; 478 canonical->arg8 = 0; 479 480 #elif defined(VGP_mips32_linux) 481 VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla; 482 canonical->sysno = gst->guest_r2; // v0 483 if (canonical->sysno == __NR_exit) { 484 canonical->arg1 = gst->guest_r4; // a0 485 canonical->arg2 = 0; 486 canonical->arg3 = 0; 487 canonical->arg4 = 0; 488 canonical->arg5 = 0; 489 canonical->arg6 = 0; 490 canonical->arg8 = 0; 491 } else if (canonical->sysno != __NR_syscall) { 492 canonical->arg1 = gst->guest_r4; // a0 493 canonical->arg2 = gst->guest_r5; // a1 494 canonical->arg3 = gst->guest_r6; // a2 495 canonical->arg4 = gst->guest_r7; // a3 496 canonical->arg5 = *((UInt*) (gst->guest_r29 + 16)); // 16(guest_SP/sp) 497 canonical->arg6 = *((UInt*) (gst->guest_r29 + 20)); // 20(sp) 498 canonical->arg8 = 0; 499 } else { 500 // Fixme hack handle syscall() 501 canonical->sysno = gst->guest_r4; // a0 502 canonical->arg1 = gst->guest_r5; // a1 503 canonical->arg2 = gst->guest_r6; // a2 504 canonical->arg3 = gst->guest_r7; // a3 505 canonical->arg4 = *((UInt*) (gst->guest_r29 + 16)); // 16(guest_SP/sp) 506 canonical->arg5 = *((UInt*) (gst->guest_r29 + 20)); // 20(guest_SP/sp) 507 canonical->arg6 = *((UInt*) (gst->guest_r29 + 24)); // 24(guest_SP/sp) 508 canonical->arg8 = __NR_syscall; 509 } 510 511 #elif defined(VGP_mips64_linux) 512 VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla; 513 canonical->sysno = gst->guest_r2; // v0 514 canonical->arg1 = gst->guest_r4; // a0 515 canonical->arg2 = gst->guest_r5; // a1 516 canonical->arg3 = gst->guest_r6; // a2 517 canonical->arg4 = gst->guest_r7; // a3 518 canonical->arg5 = gst->guest_r8; // a4 519 canonical->arg6 = gst->guest_r9; // a5 520 521 #elif defined(VGP_x86_darwin) 522 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 523 UWord *stack = (UWord *)gst->guest_ESP; 524 // GrP fixme hope syscalls aren't called with really shallow stacks... 525 canonical->sysno = gst->guest_EAX; 526 if (canonical->sysno != 0) { 527 // stack[0] is return address 528 canonical->arg1 = stack[1]; 529 canonical->arg2 = stack[2]; 530 canonical->arg3 = stack[3]; 531 canonical->arg4 = stack[4]; 532 canonical->arg5 = stack[5]; 533 canonical->arg6 = stack[6]; 534 canonical->arg7 = stack[7]; 535 canonical->arg8 = stack[8]; 536 } else { 537 // GrP fixme hack handle syscall() 538 // GrP fixme what about __syscall() ? 539 // stack[0] is return address 540 // DDD: the tool can't see that the params have been shifted! Can 541 // lead to incorrect checking, I think, because the PRRAn/PSARn 542 // macros will mention the pre-shifted args. 543 canonical->sysno = stack[1]; 544 vg_assert(canonical->sysno != 0); 545 canonical->arg1 = stack[2]; 546 canonical->arg2 = stack[3]; 547 canonical->arg3 = stack[4]; 548 canonical->arg4 = stack[5]; 549 canonical->arg5 = stack[6]; 550 canonical->arg6 = stack[7]; 551 canonical->arg7 = stack[8]; 552 canonical->arg8 = stack[9]; 553 554 PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n", 555 VG_(getpid)(), /*tid,*/ 556 VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno)); 557 } 558 559 // Here we determine what kind of syscall it was by looking at the 560 // interrupt kind, and then encode the syscall number using the 64-bit 561 // encoding for Valgrind's internal use. 562 // 563 // DDD: Would it be better to stash the JMP kind into the Darwin 564 // thread state rather than passing in the trc? 565 switch (trc) { 566 case VEX_TRC_JMP_SYS_INT128: 567 // int $0x80 = Unix, 64-bit result 568 vg_assert(canonical->sysno >= 0); 569 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno); 570 break; 571 case VEX_TRC_JMP_SYS_SYSENTER: 572 // syscall = Unix, 32-bit result 573 // OR Mach, 32-bit result 574 if (canonical->sysno >= 0) { 575 // GrP fixme hack: 0xffff == I386_SYSCALL_NUMBER_MASK 576 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno 577 & 0xffff); 578 } else { 579 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno); 580 } 581 break; 582 case VEX_TRC_JMP_SYS_INT129: 583 // int $0x81 = Mach, 32-bit result 584 vg_assert(canonical->sysno < 0); 585 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno); 586 break; 587 case VEX_TRC_JMP_SYS_INT130: 588 // int $0x82 = mdep, 32-bit result 589 vg_assert(canonical->sysno >= 0); 590 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MDEP(canonical->sysno); 591 break; 592 default: 593 vg_assert(0); 594 break; 595 } 596 597 #elif defined(VGP_amd64_darwin) 598 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 599 UWord *stack = (UWord *)gst->guest_RSP; 600 601 vg_assert(trc == VEX_TRC_JMP_SYS_SYSCALL); 602 603 // GrP fixme hope syscalls aren't called with really shallow stacks... 604 canonical->sysno = gst->guest_RAX; 605 if (canonical->sysno != __NR_syscall) { 606 // stack[0] is return address 607 canonical->arg1 = gst->guest_RDI; 608 canonical->arg2 = gst->guest_RSI; 609 canonical->arg3 = gst->guest_RDX; 610 canonical->arg4 = gst->guest_R10; // not rcx with syscall insn 611 canonical->arg5 = gst->guest_R8; 612 canonical->arg6 = gst->guest_R9; 613 canonical->arg7 = stack[1]; 614 canonical->arg8 = stack[2]; 615 } else { 616 // GrP fixme hack handle syscall() 617 // GrP fixme what about __syscall() ? 618 // stack[0] is return address 619 // DDD: the tool can't see that the params have been shifted! Can 620 // lead to incorrect checking, I think, because the PRRAn/PSARn 621 // macros will mention the pre-shifted args. 622 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(gst->guest_RDI); 623 vg_assert(canonical->sysno != __NR_syscall); 624 canonical->arg1 = gst->guest_RSI; 625 canonical->arg2 = gst->guest_RDX; 626 canonical->arg3 = gst->guest_R10; // not rcx with syscall insn 627 canonical->arg4 = gst->guest_R8; 628 canonical->arg5 = gst->guest_R9; 629 canonical->arg6 = stack[1]; 630 canonical->arg7 = stack[2]; 631 canonical->arg8 = stack[3]; 632 633 PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n", 634 VG_(getpid)(), /*tid,*/ 635 VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno)); 636 } 637 638 // no canonical->sysno adjustment needed 639 640 #elif defined(VGP_s390x_linux) 641 VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla; 642 canonical->sysno = gst->guest_SYSNO; 643 canonical->arg1 = gst->guest_r2; 644 canonical->arg2 = gst->guest_r3; 645 canonical->arg3 = gst->guest_r4; 646 canonical->arg4 = gst->guest_r5; 647 canonical->arg5 = gst->guest_r6; 648 canonical->arg6 = gst->guest_r7; 649 canonical->arg7 = 0; 650 canonical->arg8 = 0; 651 #else 652 # error "getSyscallArgsFromGuestState: unknown arch" 653 #endif 654 } 655 656 static 657 void putSyscallArgsIntoGuestState ( /*IN*/ SyscallArgs* canonical, 658 /*OUT*/VexGuestArchState* gst_vanilla ) 659 { 660 #if defined(VGP_x86_linux) 661 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 662 gst->guest_EAX = canonical->sysno; 663 gst->guest_EBX = canonical->arg1; 664 gst->guest_ECX = canonical->arg2; 665 gst->guest_EDX = canonical->arg3; 666 gst->guest_ESI = canonical->arg4; 667 gst->guest_EDI = canonical->arg5; 668 gst->guest_EBP = canonical->arg6; 669 670 #elif defined(VGP_amd64_linux) 671 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 672 gst->guest_RAX = canonical->sysno; 673 gst->guest_RDI = canonical->arg1; 674 gst->guest_RSI = canonical->arg2; 675 gst->guest_RDX = canonical->arg3; 676 gst->guest_R10 = canonical->arg4; 677 gst->guest_R8 = canonical->arg5; 678 gst->guest_R9 = canonical->arg6; 679 680 #elif defined(VGP_ppc32_linux) 681 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla; 682 gst->guest_GPR0 = canonical->sysno; 683 gst->guest_GPR3 = canonical->arg1; 684 gst->guest_GPR4 = canonical->arg2; 685 gst->guest_GPR5 = canonical->arg3; 686 gst->guest_GPR6 = canonical->arg4; 687 gst->guest_GPR7 = canonical->arg5; 688 gst->guest_GPR8 = canonical->arg6; 689 690 #elif defined(VGP_ppc64_linux) 691 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla; 692 gst->guest_GPR0 = canonical->sysno; 693 gst->guest_GPR3 = canonical->arg1; 694 gst->guest_GPR4 = canonical->arg2; 695 gst->guest_GPR5 = canonical->arg3; 696 gst->guest_GPR6 = canonical->arg4; 697 gst->guest_GPR7 = canonical->arg5; 698 gst->guest_GPR8 = canonical->arg6; 699 700 #elif defined(VGP_arm_linux) 701 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla; 702 gst->guest_R7 = canonical->sysno; 703 gst->guest_R0 = canonical->arg1; 704 gst->guest_R1 = canonical->arg2; 705 gst->guest_R2 = canonical->arg3; 706 gst->guest_R3 = canonical->arg4; 707 gst->guest_R4 = canonical->arg5; 708 gst->guest_R5 = canonical->arg6; 709 710 #elif defined(VGP_arm64_linux) 711 VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla; 712 gst->guest_X8 = canonical->sysno; 713 gst->guest_X0 = canonical->arg1; 714 gst->guest_X1 = canonical->arg2; 715 gst->guest_X2 = canonical->arg3; 716 gst->guest_X3 = canonical->arg4; 717 gst->guest_X4 = canonical->arg5; 718 gst->guest_X5 = canonical->arg6; 719 720 #elif defined(VGP_x86_darwin) 721 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 722 UWord *stack = (UWord *)gst->guest_ESP; 723 724 gst->guest_EAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno); 725 726 // GrP fixme? gst->guest_TEMP_EFLAG_C = 0; 727 // stack[0] is return address 728 stack[1] = canonical->arg1; 729 stack[2] = canonical->arg2; 730 stack[3] = canonical->arg3; 731 stack[4] = canonical->arg4; 732 stack[5] = canonical->arg5; 733 stack[6] = canonical->arg6; 734 stack[7] = canonical->arg7; 735 stack[8] = canonical->arg8; 736 737 #elif defined(VGP_amd64_darwin) 738 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 739 UWord *stack = (UWord *)gst->guest_RSP; 740 741 gst->guest_RAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno); 742 // GrP fixme? gst->guest_TEMP_EFLAG_C = 0; 743 744 // stack[0] is return address 745 gst->guest_RDI = canonical->arg1; 746 gst->guest_RSI = canonical->arg2; 747 gst->guest_RDX = canonical->arg3; 748 gst->guest_RCX = canonical->arg4; 749 gst->guest_R8 = canonical->arg5; 750 gst->guest_R9 = canonical->arg6; 751 stack[1] = canonical->arg7; 752 stack[2] = canonical->arg8; 753 754 #elif defined(VGP_s390x_linux) 755 VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla; 756 gst->guest_SYSNO = canonical->sysno; 757 gst->guest_r2 = canonical->arg1; 758 gst->guest_r3 = canonical->arg2; 759 gst->guest_r4 = canonical->arg3; 760 gst->guest_r5 = canonical->arg4; 761 gst->guest_r6 = canonical->arg5; 762 gst->guest_r7 = canonical->arg6; 763 764 #elif defined(VGP_mips32_linux) 765 VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla; 766 if (canonical->arg8 != __NR_syscall) { 767 gst->guest_r2 = canonical->sysno; 768 gst->guest_r4 = canonical->arg1; 769 gst->guest_r5 = canonical->arg2; 770 gst->guest_r6 = canonical->arg3; 771 gst->guest_r7 = canonical->arg4; 772 *((UInt*) (gst->guest_r29 + 16)) = canonical->arg5; // 16(guest_GPR29/sp) 773 *((UInt*) (gst->guest_r29 + 20)) = canonical->arg6; // 20(sp) 774 } else { 775 canonical->arg8 = 0; 776 gst->guest_r2 = __NR_syscall; 777 gst->guest_r4 = canonical->sysno; 778 gst->guest_r5 = canonical->arg1; 779 gst->guest_r6 = canonical->arg2; 780 gst->guest_r7 = canonical->arg3; 781 *((UInt*) (gst->guest_r29 + 16)) = canonical->arg4; // 16(guest_GPR29/sp) 782 *((UInt*) (gst->guest_r29 + 20)) = canonical->arg5; // 20(sp) 783 *((UInt*) (gst->guest_r29 + 24)) = canonical->arg6; // 24(sp) 784 } 785 786 #elif defined(VGP_mips64_linux) 787 VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla; 788 gst->guest_r2 = canonical->sysno; 789 gst->guest_r4 = canonical->arg1; 790 gst->guest_r5 = canonical->arg2; 791 gst->guest_r6 = canonical->arg3; 792 gst->guest_r7 = canonical->arg4; 793 gst->guest_r8 = canonical->arg5; 794 gst->guest_r9 = canonical->arg6; 795 #else 796 # error "putSyscallArgsIntoGuestState: unknown arch" 797 #endif 798 } 799 800 static 801 void getSyscallStatusFromGuestState ( /*OUT*/SyscallStatus* canonical, 802 /*IN*/ VexGuestArchState* gst_vanilla ) 803 { 804 # if defined(VGP_x86_linux) 805 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 806 canonical->sres = VG_(mk_SysRes_x86_linux)( gst->guest_EAX ); 807 canonical->what = SsComplete; 808 809 # elif defined(VGP_amd64_linux) 810 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 811 canonical->sres = VG_(mk_SysRes_amd64_linux)( gst->guest_RAX ); 812 canonical->what = SsComplete; 813 814 # elif defined(VGP_ppc32_linux) 815 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla; 816 UInt cr = LibVEX_GuestPPC32_get_CR( gst ); 817 UInt cr0so = (cr >> 28) & 1; 818 canonical->sres = VG_(mk_SysRes_ppc32_linux)( gst->guest_GPR3, cr0so ); 819 canonical->what = SsComplete; 820 821 # elif defined(VGP_ppc64_linux) 822 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla; 823 UInt cr = LibVEX_GuestPPC64_get_CR( gst ); 824 UInt cr0so = (cr >> 28) & 1; 825 canonical->sres = VG_(mk_SysRes_ppc64_linux)( gst->guest_GPR3, cr0so ); 826 canonical->what = SsComplete; 827 828 # elif defined(VGP_arm_linux) 829 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla; 830 canonical->sres = VG_(mk_SysRes_arm_linux)( gst->guest_R0 ); 831 canonical->what = SsComplete; 832 833 # elif defined(VGP_arm64_linux) 834 VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla; 835 canonical->sres = VG_(mk_SysRes_arm64_linux)( gst->guest_X0 ); 836 canonical->what = SsComplete; 837 838 # elif defined(VGP_mips32_linux) 839 VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla; 840 UInt v0 = gst->guest_r2; // v0 841 UInt v1 = gst->guest_r3; // v1 842 UInt a3 = gst->guest_r7; // a3 843 canonical->sres = VG_(mk_SysRes_mips32_linux)( v0, v1, a3 ); 844 canonical->what = SsComplete; 845 846 # elif defined(VGP_mips64_linux) 847 VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla; 848 ULong v0 = gst->guest_r2; // v0 849 ULong v1 = gst->guest_r3; // v1 850 ULong a3 = gst->guest_r7; // a3 851 canonical->sres = VG_(mk_SysRes_mips64_linux)(v0, v1, a3); 852 canonical->what = SsComplete; 853 854 # elif defined(VGP_x86_darwin) 855 /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */ 856 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 857 UInt carry = 1 & LibVEX_GuestX86_get_eflags(gst); 858 UInt err = 0; 859 UInt wLO = 0; 860 UInt wHI = 0; 861 switch (gst->guest_SC_CLASS) { 862 case VG_DARWIN_SYSCALL_CLASS_UNIX: 863 // int $0x80 = Unix, 64-bit result 864 err = carry; 865 wLO = gst->guest_EAX; 866 wHI = gst->guest_EDX; 867 break; 868 case VG_DARWIN_SYSCALL_CLASS_MACH: 869 // int $0x81 = Mach, 32-bit result 870 wLO = gst->guest_EAX; 871 break; 872 case VG_DARWIN_SYSCALL_CLASS_MDEP: 873 // int $0x82 = mdep, 32-bit result 874 wLO = gst->guest_EAX; 875 break; 876 default: 877 vg_assert(0); 878 break; 879 } 880 canonical->sres = VG_(mk_SysRes_x86_darwin)( 881 gst->guest_SC_CLASS, err ? True : False, 882 wHI, wLO 883 ); 884 canonical->what = SsComplete; 885 886 # elif defined(VGP_amd64_darwin) 887 /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */ 888 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 889 ULong carry = 1 & LibVEX_GuestAMD64_get_rflags(gst); 890 ULong err = 0; 891 ULong wLO = 0; 892 ULong wHI = 0; 893 switch (gst->guest_SC_CLASS) { 894 case VG_DARWIN_SYSCALL_CLASS_UNIX: 895 // syscall = Unix, 128-bit result 896 err = carry; 897 wLO = gst->guest_RAX; 898 wHI = gst->guest_RDX; 899 break; 900 case VG_DARWIN_SYSCALL_CLASS_MACH: 901 // syscall = Mach, 64-bit result 902 wLO = gst->guest_RAX; 903 break; 904 case VG_DARWIN_SYSCALL_CLASS_MDEP: 905 // syscall = mdep, 64-bit result 906 wLO = gst->guest_RAX; 907 break; 908 default: 909 vg_assert(0); 910 break; 911 } 912 canonical->sres = VG_(mk_SysRes_amd64_darwin)( 913 gst->guest_SC_CLASS, err ? True : False, 914 wHI, wLO 915 ); 916 canonical->what = SsComplete; 917 918 # elif defined(VGP_s390x_linux) 919 VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla; 920 canonical->sres = VG_(mk_SysRes_s390x_linux)( gst->guest_r2 ); 921 canonical->what = SsComplete; 922 923 # else 924 # error "getSyscallStatusFromGuestState: unknown arch" 925 # endif 926 } 927 928 static 929 void putSyscallStatusIntoGuestState ( /*IN*/ ThreadId tid, 930 /*IN*/ SyscallStatus* canonical, 931 /*OUT*/VexGuestArchState* gst_vanilla ) 932 { 933 # if defined(VGP_x86_linux) 934 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 935 vg_assert(canonical->what == SsComplete); 936 if (sr_isError(canonical->sres)) { 937 /* This isn't exactly right, in that really a Failure with res 938 not in the range 1 .. 4095 is unrepresentable in the 939 Linux-x86 scheme. Oh well. */ 940 gst->guest_EAX = - (Int)sr_Err(canonical->sres); 941 } else { 942 gst->guest_EAX = sr_Res(canonical->sres); 943 } 944 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 945 OFFSET_x86_EAX, sizeof(UWord) ); 946 947 # elif defined(VGP_amd64_linux) 948 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 949 vg_assert(canonical->what == SsComplete); 950 if (sr_isError(canonical->sres)) { 951 /* This isn't exactly right, in that really a Failure with res 952 not in the range 1 .. 4095 is unrepresentable in the 953 Linux-amd64 scheme. Oh well. */ 954 gst->guest_RAX = - (Long)sr_Err(canonical->sres); 955 } else { 956 gst->guest_RAX = sr_Res(canonical->sres); 957 } 958 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 959 OFFSET_amd64_RAX, sizeof(UWord) ); 960 961 # elif defined(VGP_ppc32_linux) 962 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla; 963 UInt old_cr = LibVEX_GuestPPC32_get_CR(gst); 964 vg_assert(canonical->what == SsComplete); 965 if (sr_isError(canonical->sres)) { 966 /* set CR0.SO */ 967 LibVEX_GuestPPC32_put_CR( old_cr | (1<<28), gst ); 968 gst->guest_GPR3 = sr_Err(canonical->sres); 969 } else { 970 /* clear CR0.SO */ 971 LibVEX_GuestPPC32_put_CR( old_cr & ~(1<<28), gst ); 972 gst->guest_GPR3 = sr_Res(canonical->sres); 973 } 974 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 975 OFFSET_ppc32_GPR3, sizeof(UWord) ); 976 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 977 OFFSET_ppc32_CR0_0, sizeof(UChar) ); 978 979 # elif defined(VGP_ppc64_linux) 980 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla; 981 UInt old_cr = LibVEX_GuestPPC64_get_CR(gst); 982 vg_assert(canonical->what == SsComplete); 983 if (sr_isError(canonical->sres)) { 984 /* set CR0.SO */ 985 LibVEX_GuestPPC64_put_CR( old_cr | (1<<28), gst ); 986 gst->guest_GPR3 = sr_Err(canonical->sres); 987 } else { 988 /* clear CR0.SO */ 989 LibVEX_GuestPPC64_put_CR( old_cr & ~(1<<28), gst ); 990 gst->guest_GPR3 = sr_Res(canonical->sres); 991 } 992 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 993 OFFSET_ppc64_GPR3, sizeof(UWord) ); 994 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 995 OFFSET_ppc64_CR0_0, sizeof(UChar) ); 996 997 # elif defined(VGP_arm_linux) 998 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla; 999 vg_assert(canonical->what == SsComplete); 1000 if (sr_isError(canonical->sres)) { 1001 /* This isn't exactly right, in that really a Failure with res 1002 not in the range 1 .. 4095 is unrepresentable in the 1003 Linux-arm scheme. Oh well. */ 1004 gst->guest_R0 = - (Int)sr_Err(canonical->sres); 1005 } else { 1006 gst->guest_R0 = sr_Res(canonical->sres); 1007 } 1008 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1009 OFFSET_arm_R0, sizeof(UWord) ); 1010 1011 # elif defined(VGP_arm64_linux) 1012 VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla; 1013 vg_assert(canonical->what == SsComplete); 1014 if (sr_isError(canonical->sres)) { 1015 /* This isn't exactly right, in that really a Failure with res 1016 not in the range 1 .. 4095 is unrepresentable in the 1017 Linux-arm64 scheme. Oh well. */ 1018 gst->guest_X0 = - (Long)sr_Err(canonical->sres); 1019 } else { 1020 gst->guest_X0 = sr_Res(canonical->sres); 1021 } 1022 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1023 OFFSET_arm64_X0, sizeof(UWord) ); 1024 1025 #elif defined(VGP_x86_darwin) 1026 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 1027 SysRes sres = canonical->sres; 1028 vg_assert(canonical->what == SsComplete); 1029 /* Unfortunately here we have to break abstraction and look 1030 directly inside 'res', in order to decide what to do. */ 1031 switch (sres._mode) { 1032 case SysRes_MACH: // int $0x81 = Mach, 32-bit result 1033 case SysRes_MDEP: // int $0x82 = mdep, 32-bit result 1034 gst->guest_EAX = sres._wLO; 1035 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1036 OFFSET_x86_EAX, sizeof(UInt) ); 1037 break; 1038 case SysRes_UNIX_OK: // int $0x80 = Unix, 64-bit result 1039 case SysRes_UNIX_ERR: // int $0x80 = Unix, 64-bit error 1040 gst->guest_EAX = sres._wLO; 1041 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1042 OFFSET_x86_EAX, sizeof(UInt) ); 1043 gst->guest_EDX = sres._wHI; 1044 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1045 OFFSET_x86_EDX, sizeof(UInt) ); 1046 LibVEX_GuestX86_put_eflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0, 1047 gst ); 1048 // GrP fixme sets defined for entire eflags, not just bit c 1049 // DDD: this breaks exp-ptrcheck. 1050 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1051 offsetof(VexGuestX86State, guest_CC_DEP1), sizeof(UInt) ); 1052 break; 1053 default: 1054 vg_assert(0); 1055 break; 1056 } 1057 1058 #elif defined(VGP_amd64_darwin) 1059 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 1060 SysRes sres = canonical->sres; 1061 vg_assert(canonical->what == SsComplete); 1062 /* Unfortunately here we have to break abstraction and look 1063 directly inside 'res', in order to decide what to do. */ 1064 switch (sres._mode) { 1065 case SysRes_MACH: // syscall = Mach, 64-bit result 1066 case SysRes_MDEP: // syscall = mdep, 64-bit result 1067 gst->guest_RAX = sres._wLO; 1068 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1069 OFFSET_amd64_RAX, sizeof(ULong) ); 1070 break; 1071 case SysRes_UNIX_OK: // syscall = Unix, 128-bit result 1072 case SysRes_UNIX_ERR: // syscall = Unix, 128-bit error 1073 gst->guest_RAX = sres._wLO; 1074 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1075 OFFSET_amd64_RAX, sizeof(ULong) ); 1076 gst->guest_RDX = sres._wHI; 1077 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1078 OFFSET_amd64_RDX, sizeof(ULong) ); 1079 LibVEX_GuestAMD64_put_rflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0, 1080 gst ); 1081 // GrP fixme sets defined for entire rflags, not just bit c 1082 // DDD: this breaks exp-ptrcheck. 1083 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1084 offsetof(VexGuestAMD64State, guest_CC_DEP1), sizeof(ULong) ); 1085 break; 1086 default: 1087 vg_assert(0); 1088 break; 1089 } 1090 1091 # elif defined(VGP_s390x_linux) 1092 VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla; 1093 vg_assert(canonical->what == SsComplete); 1094 if (sr_isError(canonical->sres)) { 1095 gst->guest_r2 = - (Long)sr_Err(canonical->sres); 1096 } else { 1097 gst->guest_r2 = sr_Res(canonical->sres); 1098 } 1099 1100 # elif defined(VGP_mips32_linux) 1101 VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla; 1102 vg_assert(canonical->what == SsComplete); 1103 if (sr_isError(canonical->sres)) { 1104 gst->guest_r2 = (Int)sr_Err(canonical->sres); 1105 gst->guest_r7 = (Int)sr_Err(canonical->sres); 1106 } else { 1107 gst->guest_r2 = sr_Res(canonical->sres); 1108 gst->guest_r3 = sr_ResEx(canonical->sres); 1109 gst->guest_r7 = (Int)sr_Err(canonical->sres); 1110 } 1111 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1112 OFFSET_mips32_r2, sizeof(UWord) ); 1113 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1114 OFFSET_mips32_r3, sizeof(UWord) ); 1115 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1116 OFFSET_mips32_r7, sizeof(UWord) ); 1117 1118 # elif defined(VGP_mips64_linux) 1119 VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla; 1120 vg_assert(canonical->what == SsComplete); 1121 if (sr_isError(canonical->sres)) { 1122 gst->guest_r2 = (Int)sr_Err(canonical->sres); 1123 gst->guest_r7 = (Int)sr_Err(canonical->sres); 1124 } else { 1125 gst->guest_r2 = sr_Res(canonical->sres); 1126 gst->guest_r3 = sr_ResEx(canonical->sres); 1127 gst->guest_r7 = (Int)sr_Err(canonical->sres); 1128 } 1129 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1130 OFFSET_mips64_r2, sizeof(UWord) ); 1131 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1132 OFFSET_mips64_r3, sizeof(UWord) ); 1133 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1134 OFFSET_mips64_r7, sizeof(UWord) ); 1135 1136 # else 1137 # error "putSyscallStatusIntoGuestState: unknown arch" 1138 # endif 1139 } 1140 1141 1142 /* Tell me the offsets in the guest state of the syscall params, so 1143 that the scalar argument checkers don't have to have this info 1144 hardwired. */ 1145 1146 static 1147 void getSyscallArgLayout ( /*OUT*/SyscallArgLayout* layout ) 1148 { 1149 VG_(bzero_inline)(layout, sizeof(*layout)); 1150 1151 #if defined(VGP_x86_linux) 1152 layout->o_sysno = OFFSET_x86_EAX; 1153 layout->o_arg1 = OFFSET_x86_EBX; 1154 layout->o_arg2 = OFFSET_x86_ECX; 1155 layout->o_arg3 = OFFSET_x86_EDX; 1156 layout->o_arg4 = OFFSET_x86_ESI; 1157 layout->o_arg5 = OFFSET_x86_EDI; 1158 layout->o_arg6 = OFFSET_x86_EBP; 1159 layout->uu_arg7 = -1; /* impossible value */ 1160 layout->uu_arg8 = -1; /* impossible value */ 1161 1162 #elif defined(VGP_amd64_linux) 1163 layout->o_sysno = OFFSET_amd64_RAX; 1164 layout->o_arg1 = OFFSET_amd64_RDI; 1165 layout->o_arg2 = OFFSET_amd64_RSI; 1166 layout->o_arg3 = OFFSET_amd64_RDX; 1167 layout->o_arg4 = OFFSET_amd64_R10; 1168 layout->o_arg5 = OFFSET_amd64_R8; 1169 layout->o_arg6 = OFFSET_amd64_R9; 1170 layout->uu_arg7 = -1; /* impossible value */ 1171 layout->uu_arg8 = -1; /* impossible value */ 1172 1173 #elif defined(VGP_ppc32_linux) 1174 layout->o_sysno = OFFSET_ppc32_GPR0; 1175 layout->o_arg1 = OFFSET_ppc32_GPR3; 1176 layout->o_arg2 = OFFSET_ppc32_GPR4; 1177 layout->o_arg3 = OFFSET_ppc32_GPR5; 1178 layout->o_arg4 = OFFSET_ppc32_GPR6; 1179 layout->o_arg5 = OFFSET_ppc32_GPR7; 1180 layout->o_arg6 = OFFSET_ppc32_GPR8; 1181 layout->uu_arg7 = -1; /* impossible value */ 1182 layout->uu_arg8 = -1; /* impossible value */ 1183 1184 #elif defined(VGP_ppc64_linux) 1185 layout->o_sysno = OFFSET_ppc64_GPR0; 1186 layout->o_arg1 = OFFSET_ppc64_GPR3; 1187 layout->o_arg2 = OFFSET_ppc64_GPR4; 1188 layout->o_arg3 = OFFSET_ppc64_GPR5; 1189 layout->o_arg4 = OFFSET_ppc64_GPR6; 1190 layout->o_arg5 = OFFSET_ppc64_GPR7; 1191 layout->o_arg6 = OFFSET_ppc64_GPR8; 1192 layout->uu_arg7 = -1; /* impossible value */ 1193 layout->uu_arg8 = -1; /* impossible value */ 1194 1195 #elif defined(VGP_arm_linux) 1196 layout->o_sysno = OFFSET_arm_R7; 1197 layout->o_arg1 = OFFSET_arm_R0; 1198 layout->o_arg2 = OFFSET_arm_R1; 1199 layout->o_arg3 = OFFSET_arm_R2; 1200 layout->o_arg4 = OFFSET_arm_R3; 1201 layout->o_arg5 = OFFSET_arm_R4; 1202 layout->o_arg6 = OFFSET_arm_R5; 1203 layout->uu_arg7 = -1; /* impossible value */ 1204 layout->uu_arg8 = -1; /* impossible value */ 1205 1206 #elif defined(VGP_arm64_linux) 1207 layout->o_sysno = OFFSET_arm64_X8; 1208 layout->o_arg1 = OFFSET_arm64_X0; 1209 layout->o_arg2 = OFFSET_arm64_X1; 1210 layout->o_arg3 = OFFSET_arm64_X2; 1211 layout->o_arg4 = OFFSET_arm64_X3; 1212 layout->o_arg5 = OFFSET_arm64_X4; 1213 layout->o_arg6 = OFFSET_arm64_X5; 1214 layout->uu_arg7 = -1; /* impossible value */ 1215 layout->uu_arg8 = -1; /* impossible value */ 1216 1217 #elif defined(VGP_mips32_linux) 1218 layout->o_sysno = OFFSET_mips32_r2; 1219 layout->o_arg1 = OFFSET_mips32_r4; 1220 layout->o_arg2 = OFFSET_mips32_r5; 1221 layout->o_arg3 = OFFSET_mips32_r6; 1222 layout->o_arg4 = OFFSET_mips32_r7; 1223 layout->s_arg5 = sizeof(UWord) * 4; 1224 layout->s_arg6 = sizeof(UWord) * 5; 1225 layout->uu_arg7 = -1; /* impossible value */ 1226 layout->uu_arg8 = -1; /* impossible value */ 1227 1228 #elif defined(VGP_mips64_linux) 1229 layout->o_sysno = OFFSET_mips64_r2; 1230 layout->o_arg1 = OFFSET_mips64_r4; 1231 layout->o_arg2 = OFFSET_mips64_r5; 1232 layout->o_arg3 = OFFSET_mips64_r6; 1233 layout->o_arg4 = OFFSET_mips64_r7; 1234 layout->o_arg5 = OFFSET_mips64_r8; 1235 layout->o_arg6 = OFFSET_mips64_r9; 1236 layout->uu_arg7 = -1; /* impossible value */ 1237 layout->uu_arg8 = -1; /* impossible value */ 1238 1239 #elif defined(VGP_x86_darwin) 1240 layout->o_sysno = OFFSET_x86_EAX; 1241 // syscall parameters are on stack in C convention 1242 layout->s_arg1 = sizeof(UWord) * 1; 1243 layout->s_arg2 = sizeof(UWord) * 2; 1244 layout->s_arg3 = sizeof(UWord) * 3; 1245 layout->s_arg4 = sizeof(UWord) * 4; 1246 layout->s_arg5 = sizeof(UWord) * 5; 1247 layout->s_arg6 = sizeof(UWord) * 6; 1248 layout->s_arg7 = sizeof(UWord) * 7; 1249 layout->s_arg8 = sizeof(UWord) * 8; 1250 1251 #elif defined(VGP_amd64_darwin) 1252 layout->o_sysno = OFFSET_amd64_RAX; 1253 layout->o_arg1 = OFFSET_amd64_RDI; 1254 layout->o_arg2 = OFFSET_amd64_RSI; 1255 layout->o_arg3 = OFFSET_amd64_RDX; 1256 layout->o_arg4 = OFFSET_amd64_RCX; 1257 layout->o_arg5 = OFFSET_amd64_R8; 1258 layout->o_arg6 = OFFSET_amd64_R9; 1259 layout->s_arg7 = sizeof(UWord) * 1; 1260 layout->s_arg8 = sizeof(UWord) * 2; 1261 1262 #elif defined(VGP_s390x_linux) 1263 layout->o_sysno = OFFSET_s390x_SYSNO; 1264 layout->o_arg1 = OFFSET_s390x_r2; 1265 layout->o_arg2 = OFFSET_s390x_r3; 1266 layout->o_arg3 = OFFSET_s390x_r4; 1267 layout->o_arg4 = OFFSET_s390x_r5; 1268 layout->o_arg5 = OFFSET_s390x_r6; 1269 layout->o_arg6 = OFFSET_s390x_r7; 1270 layout->uu_arg7 = -1; /* impossible value */ 1271 layout->uu_arg8 = -1; /* impossible value */ 1272 #else 1273 # error "getSyscallLayout: unknown arch" 1274 #endif 1275 } 1276 1277 1278 /* --------------------------------------------------------------------- 1279 The main driver logic 1280 ------------------------------------------------------------------ */ 1281 1282 /* Finding the handlers for a given syscall, or faking up one 1283 when no handler is found. */ 1284 1285 static 1286 void bad_before ( ThreadId tid, 1287 SyscallArgLayout* layout, 1288 /*MOD*/SyscallArgs* args, 1289 /*OUT*/SyscallStatus* status, 1290 /*OUT*/UWord* flags ) 1291 { 1292 VG_(dmsg)("WARNING: unhandled syscall: %s\n", 1293 VG_SYSNUM_STRING_EXTRA(args->sysno)); 1294 if (VG_(clo_verbosity) > 1) { 1295 VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size)); 1296 } 1297 VG_(dmsg)("You may be able to write your own handler.\n"); 1298 VG_(dmsg)("Read the file README_MISSING_SYSCALL_OR_IOCTL.\n"); 1299 VG_(dmsg)("Nevertheless we consider this a bug. Please report\n"); 1300 VG_(dmsg)("it at http://valgrind.org/support/bug_reports.html.\n"); 1301 1302 SET_STATUS_Failure(VKI_ENOSYS); 1303 } 1304 1305 static SyscallTableEntry bad_sys = 1306 { bad_before, NULL }; 1307 1308 static const SyscallTableEntry* get_syscall_entry ( Int syscallno ) 1309 { 1310 const SyscallTableEntry* sys = NULL; 1311 1312 # if defined(VGO_linux) 1313 sys = ML_(get_linux_syscall_entry)( syscallno ); 1314 1315 # elif defined(VGO_darwin) 1316 Int idx = VG_DARWIN_SYSNO_INDEX(syscallno); 1317 1318 switch (VG_DARWIN_SYSNO_CLASS(syscallno)) { 1319 case VG_DARWIN_SYSCALL_CLASS_UNIX: 1320 if (idx >= 0 && idx < ML_(syscall_table_size) && 1321 ML_(syscall_table)[idx].before != NULL) 1322 sys = &ML_(syscall_table)[idx]; 1323 break; 1324 case VG_DARWIN_SYSCALL_CLASS_MACH: 1325 if (idx >= 0 && idx < ML_(mach_trap_table_size) && 1326 ML_(mach_trap_table)[idx].before != NULL) 1327 sys = &ML_(mach_trap_table)[idx]; 1328 break; 1329 case VG_DARWIN_SYSCALL_CLASS_MDEP: 1330 if (idx >= 0 && idx < ML_(mdep_trap_table_size) && 1331 ML_(mdep_trap_table)[idx].before != NULL) 1332 sys = &ML_(mdep_trap_table)[idx]; 1333 break; 1334 default: 1335 vg_assert(0); 1336 break; 1337 } 1338 1339 # else 1340 # error Unknown OS 1341 # endif 1342 1343 return sys == NULL ? &bad_sys : sys; 1344 } 1345 1346 1347 /* Add and remove signals from mask so that we end up telling the 1348 kernel the state we actually want rather than what the client 1349 wants. */ 1350 static void sanitize_client_sigmask(vki_sigset_t *mask) 1351 { 1352 VG_(sigdelset)(mask, VKI_SIGKILL); 1353 VG_(sigdelset)(mask, VKI_SIGSTOP); 1354 VG_(sigdelset)(mask, VG_SIGVGKILL); /* never block */ 1355 } 1356 1357 typedef 1358 struct { 1359 SyscallArgs orig_args; 1360 SyscallArgs args; 1361 SyscallStatus status; 1362 UWord flags; 1363 } 1364 SyscallInfo; 1365 1366 SyscallInfo syscallInfo[VG_N_THREADS]; 1367 1368 1369 /* The scheduler needs to be able to zero out these records after a 1370 fork, hence this is exported from m_syswrap. */ 1371 void VG_(clear_syscallInfo) ( Int tid ) 1372 { 1373 vg_assert(tid >= 0 && tid < VG_N_THREADS); 1374 VG_(memset)( & syscallInfo[tid], 0, sizeof( syscallInfo[tid] )); 1375 syscallInfo[tid].status.what = SsIdle; 1376 } 1377 1378 static void ensure_initialised ( void ) 1379 { 1380 Int i; 1381 static Bool init_done = False; 1382 if (init_done) 1383 return; 1384 init_done = True; 1385 for (i = 0; i < VG_N_THREADS; i++) { 1386 VG_(clear_syscallInfo)( i ); 1387 } 1388 } 1389 1390 /* --- This is the main function of this file. --- */ 1391 1392 void VG_(client_syscall) ( ThreadId tid, UInt trc ) 1393 { 1394 Word sysno; 1395 ThreadState* tst; 1396 const SyscallTableEntry* ent; 1397 SyscallArgLayout layout; 1398 SyscallInfo* sci; 1399 1400 ensure_initialised(); 1401 1402 vg_assert(VG_(is_valid_tid)(tid)); 1403 vg_assert(tid >= 1 && tid < VG_N_THREADS); 1404 vg_assert(VG_(is_running_thread)(tid)); 1405 1406 tst = VG_(get_ThreadState)(tid); 1407 1408 /* BEGIN ensure root thread's stack is suitably mapped */ 1409 /* In some rare circumstances, we may do the syscall without the 1410 bottom page of the stack being mapped, because the stack pointer 1411 was moved down just a few instructions before the syscall 1412 instruction, and there have been no memory references since 1413 then, that would cause a call to VG_(extend_stack) to have 1414 happened. 1415 1416 In native execution that's OK: the kernel automagically extends 1417 the stack's mapped area down to cover the stack pointer (or sp - 1418 redzone, really). In simulated normal execution that's OK too, 1419 since any signals we get from accessing below the mapped area of 1420 the (guest's) stack lead us to VG_(extend_stack), where we 1421 simulate the kernel's stack extension logic. But that leaves 1422 the problem of entering a syscall with the SP unmapped. Because 1423 the kernel doesn't know that the segment immediately above SP is 1424 supposed to be a grow-down segment, it causes the syscall to 1425 fail, and thereby causes a divergence between native behaviour 1426 (syscall succeeds) and simulated behaviour (syscall fails). 1427 1428 This is quite a rare failure mode. It has only been seen 1429 affecting calls to sys_readlink on amd64-linux, and even then it 1430 requires a certain code sequence around the syscall to trigger 1431 it. Here is one: 1432 1433 extern int my_readlink ( const char* path ); 1434 asm( 1435 ".text\n" 1436 ".globl my_readlink\n" 1437 "my_readlink:\n" 1438 "\tsubq $0x1008,%rsp\n" 1439 "\tmovq %rdi,%rdi\n" // path is in rdi 1440 "\tmovq %rsp,%rsi\n" // &buf[0] -> rsi 1441 "\tmovl $0x1000,%edx\n" // sizeof(buf) in rdx 1442 "\tmovl $"__NR_READLINK",%eax\n" // syscall number 1443 "\tsyscall\n" 1444 "\taddq $0x1008,%rsp\n" 1445 "\tret\n" 1446 ".previous\n" 1447 ); 1448 1449 For more details, see bug #156404 1450 (https://bugs.kde.org/show_bug.cgi?id=156404). 1451 1452 The fix is actually very simple. We simply need to call 1453 VG_(extend_stack) for this thread, handing it the lowest 1454 possible valid address for stack (sp - redzone), to ensure the 1455 pages all the way down to that address, are mapped. Because 1456 this is a potentially expensive and frequent operation, we 1457 filter in two ways: 1458 1459 First, only the main thread (tid=1) has a growdown stack. So 1460 ignore all others. It is conceivable, although highly unlikely, 1461 that the main thread exits, and later another thread is 1462 allocated tid=1, but that's harmless, I believe; 1463 VG_(extend_stack) will do nothing when applied to a non-root 1464 thread. 1465 1466 Secondly, first call VG_(am_find_nsegment) directly, to see if 1467 the page holding (sp - redzone) is mapped correctly. If so, do 1468 nothing. This is almost always the case. VG_(extend_stack) 1469 calls VG_(am_find_nsegment) twice, so this optimisation -- and 1470 that's all it is -- more or less halves the number of calls to 1471 VG_(am_find_nsegment) required. 1472 1473 TODO: the test "seg->kind == SkAnonC" is really inadequate, 1474 because although it tests whether the segment is mapped 1475 _somehow_, it doesn't check that it has the right permissions 1476 (r,w, maybe x) ? We could test that here, but it will also be 1477 necessary to fix the corresponding test in VG_(extend_stack). 1478 1479 All this guff is of course Linux-specific. Hence the ifdef. 1480 */ 1481 # if defined(VGO_linux) 1482 if (tid == 1/*ROOT THREAD*/) { 1483 Addr stackMin = VG_(get_SP)(tid) - VG_STACK_REDZONE_SZB; 1484 NSegment const* seg = VG_(am_find_nsegment)(stackMin); 1485 if (seg && seg->kind == SkAnonC) { 1486 /* stackMin is already mapped. Nothing to do. */ 1487 } else { 1488 (void)VG_(extend_stack)( stackMin, 1489 tst->client_stack_szB ); 1490 } 1491 } 1492 # endif 1493 /* END ensure root thread's stack is suitably mapped */ 1494 1495 /* First off, get the syscall args and number. This is a 1496 platform-dependent action. */ 1497 1498 sci = & syscallInfo[tid]; 1499 vg_assert(sci->status.what == SsIdle); 1500 1501 getSyscallArgsFromGuestState( &sci->orig_args, &tst->arch.vex, trc ); 1502 1503 /* Copy .orig_args to .args. The pre-handler may modify .args, but 1504 we want to keep the originals too, just in case. */ 1505 sci->args = sci->orig_args; 1506 1507 /* Save the syscall number in the thread state in case the syscall 1508 is interrupted by a signal. */ 1509 sysno = sci->orig_args.sysno; 1510 1511 /* It's sometimes useful, as a crude debugging hack, to get a 1512 stack trace at each (or selected) syscalls. */ 1513 if (0 && sysno == __NR_ioctl) { 1514 VG_(umsg)("\nioctl:\n"); 1515 VG_(get_and_pp_StackTrace)(tid, 10); 1516 VG_(umsg)("\n"); 1517 } 1518 1519 # if defined(VGO_darwin) 1520 /* Record syscall class. But why? Because the syscall might be 1521 interrupted by a signal, and in the signal handler (which will 1522 be m_signals.async_signalhandler) we will need to build a SysRes 1523 reflecting the syscall return result. In order to do that we 1524 need to know the syscall class. Hence stash it in the guest 1525 state of this thread. This madness is not needed on Linux 1526 because it only has a single syscall return convention and so 1527 there is no ambiguity involved in converting the post-signal 1528 machine state into a SysRes. */ 1529 tst->arch.vex.guest_SC_CLASS = VG_DARWIN_SYSNO_CLASS(sysno); 1530 # endif 1531 1532 /* The default what-to-do-next thing is hand the syscall to the 1533 kernel, so we pre-set that here. Set .sres to something 1534 harmless looking (is irrelevant because .what is not 1535 SsComplete.) */ 1536 sci->status.what = SsHandToKernel; 1537 sci->status.sres = VG_(mk_SysRes_Error)(0); 1538 sci->flags = 0; 1539 1540 /* Fetch the syscall's handlers. If no handlers exist for this 1541 syscall, we are given dummy handlers which force an immediate 1542 return with ENOSYS. */ 1543 ent = get_syscall_entry(sysno); 1544 1545 /* Fetch the layout information, which tells us where in the guest 1546 state the syscall args reside. This is a platform-dependent 1547 action. This info is needed so that the scalar syscall argument 1548 checks (PRE_REG_READ calls) know which bits of the guest state 1549 they need to inspect. */ 1550 getSyscallArgLayout( &layout ); 1551 1552 /* Make sure the tmp signal mask matches the real signal mask; 1553 sigsuspend may change this. */ 1554 vg_assert(VG_(iseqsigset)(&tst->sig_mask, &tst->tmp_sig_mask)); 1555 1556 /* Right, we're finally ready to Party. Call the pre-handler and 1557 see what we get back. At this point: 1558 1559 sci->status.what is Unset (we don't know yet). 1560 sci->orig_args contains the original args. 1561 sci->args is the same as sci->orig_args. 1562 sci->flags is zero. 1563 */ 1564 1565 PRINT("SYSCALL[%d,%d](%s) ", 1566 VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno)); 1567 1568 /* Do any pre-syscall actions */ 1569 if (VG_(needs).syscall_wrapper) { 1570 UWord tmpv[8]; 1571 tmpv[0] = sci->orig_args.arg1; 1572 tmpv[1] = sci->orig_args.arg2; 1573 tmpv[2] = sci->orig_args.arg3; 1574 tmpv[3] = sci->orig_args.arg4; 1575 tmpv[4] = sci->orig_args.arg5; 1576 tmpv[5] = sci->orig_args.arg6; 1577 tmpv[6] = sci->orig_args.arg7; 1578 tmpv[7] = sci->orig_args.arg8; 1579 VG_TDICT_CALL(tool_pre_syscall, tid, sysno, 1580 &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0])); 1581 } 1582 1583 vg_assert(ent); 1584 vg_assert(ent->before); 1585 (ent->before)( tid, 1586 &layout, 1587 &sci->args, &sci->status, &sci->flags ); 1588 1589 /* The pre-handler may have modified: 1590 sci->args 1591 sci->status 1592 sci->flags 1593 All else remains unchanged. 1594 Although the args may be modified, pre handlers are not allowed 1595 to change the syscall number. 1596 */ 1597 /* Now we proceed according to what the pre-handler decided. */ 1598 vg_assert(sci->status.what == SsHandToKernel 1599 || sci->status.what == SsComplete); 1600 vg_assert(sci->args.sysno == sci->orig_args.sysno); 1601 1602 if (sci->status.what == SsComplete && !sr_isError(sci->status.sres)) { 1603 /* The pre-handler completed the syscall itself, declaring 1604 success. */ 1605 if (sci->flags & SfNoWriteResult) { 1606 PRINT(" --> [pre-success] NoWriteResult"); 1607 } else { 1608 PRINT(" --> [pre-success] Success(0x%llx:0x%llx)", 1609 (ULong)sr_ResHI(sci->status.sres), 1610 (ULong)sr_Res(sci->status.sres)); 1611 } 1612 /* In this case the allowable flags are to ask for a signal-poll 1613 and/or a yield after the call. Changing the args isn't 1614 allowed. */ 1615 vg_assert(0 == (sci->flags 1616 & ~(SfPollAfter | SfYieldAfter | SfNoWriteResult))); 1617 vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args)); 1618 } 1619 1620 else 1621 if (sci->status.what == SsComplete && sr_isError(sci->status.sres)) { 1622 /* The pre-handler decided to fail syscall itself. */ 1623 PRINT(" --> [pre-fail] Failure(0x%llx)", (ULong)sr_Err(sci->status.sres)); 1624 /* In this case, the pre-handler is also allowed to ask for the 1625 post-handler to be run anyway. Changing the args is not 1626 allowed. */ 1627 vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter))); 1628 vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args)); 1629 } 1630 1631 else 1632 if (sci->status.what != SsHandToKernel) { 1633 /* huh?! */ 1634 vg_assert(0); 1635 } 1636 1637 else /* (sci->status.what == HandToKernel) */ { 1638 /* Ok, this is the usual case -- and the complicated one. There 1639 are two subcases: sync and async. async is the general case 1640 and is to be used when there is any possibility that the 1641 syscall might block [a fact that the pre-handler must tell us 1642 via the sci->flags field.] Because the tidying-away / 1643 context-switch overhead of the async case could be large, if 1644 we are sure that the syscall will not block, we fast-track it 1645 by doing it directly in this thread, which is a lot 1646 simpler. */ 1647 1648 /* Check that the given flags are allowable: MayBlock, PollAfter 1649 and PostOnFail are ok. */ 1650 vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter))); 1651 1652 if (sci->flags & SfMayBlock) { 1653 1654 /* Syscall may block, so run it asynchronously */ 1655 vki_sigset_t mask; 1656 1657 PRINT(" --> [async] ... \n"); 1658 1659 mask = tst->sig_mask; 1660 sanitize_client_sigmask(&mask); 1661 1662 /* Gack. More impedance matching. Copy the possibly 1663 modified syscall args back into the guest state. */ 1664 /* JRS 2009-Mar-16: if the syscall args are possibly modified, 1665 then this assertion is senseless: 1666 vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args)); 1667 The case that exposed it was sys_posix_spawn on Darwin, 1668 which heavily modifies its arguments but then lets the call 1669 go through anyway, with SfToBlock set, hence we end up here. */ 1670 putSyscallArgsIntoGuestState( &sci->args, &tst->arch.vex ); 1671 1672 /* Drop the bigLock */ 1673 VG_(release_BigLock)(tid, VgTs_WaitSys, "VG_(client_syscall)[async]"); 1674 /* Urr. We're now in a race against other threads trying to 1675 acquire the bigLock. I guess that doesn't matter provided 1676 that do_syscall_for_client only touches thread-local 1677 state. */ 1678 1679 /* Do the call, which operates directly on the guest state, 1680 not on our abstracted copies of the args/result. */ 1681 do_syscall_for_client(sysno, tst, &mask); 1682 1683 /* do_syscall_for_client may not return if the syscall was 1684 interrupted by a signal. In that case, flow of control is 1685 first to m_signals.async_sighandler, which calls 1686 VG_(fixup_guest_state_after_syscall_interrupted), which 1687 fixes up the guest state, and possibly calls 1688 VG_(post_syscall). Once that's done, control drops back 1689 to the scheduler. */ 1690 1691 /* Darwin: do_syscall_for_client may not return if the 1692 syscall was workq_ops(WQOPS_THREAD_RETURN) and the kernel 1693 responded by starting the thread at wqthread_hijack(reuse=1) 1694 (to run another workqueue item). In that case, wqthread_hijack 1695 calls ML_(wqthread_continue), which is similar to 1696 VG_(fixup_guest_state_after_syscall_interrupted). */ 1697 1698 /* Reacquire the lock */ 1699 VG_(acquire_BigLock)(tid, "VG_(client_syscall)[async]"); 1700 1701 /* Even more impedance matching. Extract the syscall status 1702 from the guest state. */ 1703 getSyscallStatusFromGuestState( &sci->status, &tst->arch.vex ); 1704 vg_assert(sci->status.what == SsComplete); 1705 1706 /* Be decorative, if required. */ 1707 if (VG_(clo_trace_syscalls)) { 1708 Bool failed = sr_isError(sci->status.sres); 1709 if (failed) { 1710 PRINT("SYSCALL[%d,%d](%s) ... [async] --> Failure(0x%llx)", 1711 VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno), 1712 (ULong)sr_Err(sci->status.sres)); 1713 } else { 1714 PRINT("SYSCALL[%d,%d](%s) ... [async] --> " 1715 "Success(0x%llx:0x%llx)", 1716 VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno), 1717 (ULong)sr_ResHI(sci->status.sres), 1718 (ULong)sr_Res(sci->status.sres) ); 1719 } 1720 } 1721 1722 } else { 1723 1724 /* run the syscall directly */ 1725 /* The pre-handler may have modified the syscall args, but 1726 since we're passing values in ->args directly to the 1727 kernel, there's no point in flushing them back to the 1728 guest state. Indeed doing so could be construed as 1729 incorrect. */ 1730 SysRes sres 1731 = VG_(do_syscall)(sysno, sci->args.arg1, sci->args.arg2, 1732 sci->args.arg3, sci->args.arg4, 1733 sci->args.arg5, sci->args.arg6, 1734 sci->args.arg7, sci->args.arg8 ); 1735 sci->status = convert_SysRes_to_SyscallStatus(sres); 1736 1737 /* Be decorative, if required. */ 1738 if (VG_(clo_trace_syscalls)) { 1739 Bool failed = sr_isError(sci->status.sres); 1740 if (failed) { 1741 PRINT("[sync] --> Failure(0x%llx)", 1742 (ULong)sr_Err(sci->status.sres) ); 1743 } else { 1744 PRINT("[sync] --> Success(0x%llx:0x%llx)", 1745 (ULong)sr_ResHI(sci->status.sres), 1746 (ULong)sr_Res(sci->status.sres) ); 1747 } 1748 } 1749 } 1750 } 1751 1752 vg_assert(sci->status.what == SsComplete); 1753 1754 vg_assert(VG_(is_running_thread)(tid)); 1755 1756 /* Dump the syscall result back in the guest state. This is 1757 a platform-specific action. */ 1758 if (!(sci->flags & SfNoWriteResult)) 1759 putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex ); 1760 1761 /* Situation now: 1762 - the guest state is now correctly modified following the syscall 1763 - modified args, original args and syscall status are still 1764 available in the syscallInfo[] entry for this syscall. 1765 1766 Now go on to do the post-syscall actions (read on down ..) 1767 */ 1768 PRINT(" "); 1769 VG_(post_syscall)(tid); 1770 PRINT("\n"); 1771 } 1772 1773 1774 /* Perform post syscall actions. The expected state on entry is 1775 precisely as at the end of VG_(client_syscall), that is: 1776 1777 - guest state up to date following the syscall 1778 - modified args, original args and syscall status are still 1779 available in the syscallInfo[] entry for this syscall. 1780 - syscall status matches what's in the guest state. 1781 1782 There are two ways to get here: the normal way -- being called by 1783 VG_(client_syscall), and the unusual way, from 1784 VG_(fixup_guest_state_after_syscall_interrupted). 1785 Darwin: there's a third way, ML_(wqthread_continue). 1786 */ 1787 void VG_(post_syscall) (ThreadId tid) 1788 { 1789 SyscallInfo* sci; 1790 const SyscallTableEntry* ent; 1791 SyscallStatus test_status; 1792 ThreadState* tst; 1793 Word sysno; 1794 1795 /* Preliminaries */ 1796 vg_assert(VG_(is_valid_tid)(tid)); 1797 vg_assert(tid >= 1 && tid < VG_N_THREADS); 1798 vg_assert(VG_(is_running_thread)(tid)); 1799 1800 tst = VG_(get_ThreadState)(tid); 1801 sci = & syscallInfo[tid]; 1802 1803 /* m_signals.sigvgkill_handler might call here even when not in 1804 a syscall. */ 1805 if (sci->status.what == SsIdle || sci->status.what == SsHandToKernel) { 1806 sci->status.what = SsIdle; 1807 return; 1808 } 1809 1810 /* Validate current syscallInfo entry. In particular we require 1811 that the current .status matches what's actually in the guest 1812 state. At least in the normal case where we have actually 1813 previously written the result into the guest state. */ 1814 vg_assert(sci->status.what == SsComplete); 1815 1816 getSyscallStatusFromGuestState( &test_status, &tst->arch.vex ); 1817 if (!(sci->flags & SfNoWriteResult)) 1818 vg_assert(eq_SyscallStatus( &sci->status, &test_status )); 1819 /* Failure of the above assertion on Darwin can indicate a problem 1820 in the syscall wrappers that pre-fail or pre-succeed the 1821 syscall, by calling SET_STATUS_Success or SET_STATUS_Failure, 1822 when they really should call SET_STATUS_from_SysRes. The former 1823 create a UNIX-class syscall result on Darwin, which may not be 1824 correct for the syscall; if that's the case then this assertion 1825 fires. See PRE(thread_fast_set_cthread_self) for an example. On 1826 non-Darwin platforms this assertion is should never fail, and this 1827 comment is completely irrelevant. */ 1828 /* Ok, looks sane */ 1829 1830 /* Get the system call number. Because the pre-handler isn't 1831 allowed to mess with it, it should be the same for both the 1832 original and potentially-modified args. */ 1833 vg_assert(sci->args.sysno == sci->orig_args.sysno); 1834 sysno = sci->args.sysno; 1835 ent = get_syscall_entry(sysno); 1836 1837 /* pre: status == Complete (asserted above) */ 1838 /* Consider either success or failure. Now run the post handler if: 1839 - it exists, and 1840 - Success or (Failure and PostOnFail is set) 1841 */ 1842 if (ent->after 1843 && ((!sr_isError(sci->status.sres)) 1844 || (sr_isError(sci->status.sres) 1845 && (sci->flags & SfPostOnFail) ))) { 1846 1847 (ent->after)( tid, &sci->args, &sci->status ); 1848 } 1849 1850 /* Because the post handler might have changed the status (eg, the 1851 post-handler for sys_open can change the result from success to 1852 failure if the kernel supplied a fd that it doesn't like), once 1853 again dump the syscall result back in the guest state.*/ 1854 if (!(sci->flags & SfNoWriteResult)) 1855 putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex ); 1856 1857 /* Do any post-syscall actions required by the tool. */ 1858 if (VG_(needs).syscall_wrapper) { 1859 UWord tmpv[8]; 1860 tmpv[0] = sci->orig_args.arg1; 1861 tmpv[1] = sci->orig_args.arg2; 1862 tmpv[2] = sci->orig_args.arg3; 1863 tmpv[3] = sci->orig_args.arg4; 1864 tmpv[4] = sci->orig_args.arg5; 1865 tmpv[5] = sci->orig_args.arg6; 1866 tmpv[6] = sci->orig_args.arg7; 1867 tmpv[7] = sci->orig_args.arg8; 1868 VG_TDICT_CALL(tool_post_syscall, tid, 1869 sysno, 1870 &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]), 1871 sci->status.sres); 1872 } 1873 1874 /* The syscall is done. */ 1875 vg_assert(sci->status.what == SsComplete); 1876 sci->status.what = SsIdle; 1877 1878 /* The pre/post wrappers may have concluded that pending signals 1879 might have been created, and will have set SfPollAfter to 1880 request a poll for them once the syscall is done. */ 1881 if (sci->flags & SfPollAfter) 1882 VG_(poll_signals)(tid); 1883 1884 /* Similarly, the wrappers might have asked for a yield 1885 afterwards. */ 1886 if (sci->flags & SfYieldAfter) 1887 VG_(vg_yield)(); 1888 } 1889 1890 1891 /* --------------------------------------------------------------------- 1892 Dealing with syscalls which get interrupted by a signal: 1893 VG_(fixup_guest_state_after_syscall_interrupted) 1894 ------------------------------------------------------------------ */ 1895 1896 /* Syscalls done on behalf of the client are finally handed off to the 1897 kernel in VG_(client_syscall) above, either by calling 1898 do_syscall_for_client (the async case), or by calling 1899 VG_(do_syscall6) (the sync case). 1900 1901 If the syscall is not interrupted by a signal (it may block and 1902 later unblock, but that's irrelevant here) then those functions 1903 eventually return and so control is passed to VG_(post_syscall). 1904 NB: not sure if the sync case can actually get interrupted, as it 1905 operates with all signals masked. 1906 1907 However, the syscall may get interrupted by an async-signal. In 1908 that case do_syscall_for_client/VG_(do_syscall6) do not 1909 return. Instead we wind up in m_signals.async_sighandler. We need 1910 to fix up the guest state to make it look like the syscall was 1911 interrupted for guest. So async_sighandler calls here, and this 1912 does the fixup. Note that from here we wind up calling 1913 VG_(post_syscall) too. 1914 */ 1915 1916 1917 /* These are addresses within ML_(do_syscall_for_client_WRK). See 1918 syscall-$PLAT.S for details. 1919 */ 1920 #if defined(VGO_linux) 1921 extern const Addr ML_(blksys_setup); 1922 extern const Addr ML_(blksys_restart); 1923 extern const Addr ML_(blksys_complete); 1924 extern const Addr ML_(blksys_committed); 1925 extern const Addr ML_(blksys_finished); 1926 #elif defined(VGO_darwin) 1927 /* Darwin requires extra uglyness */ 1928 extern const Addr ML_(blksys_setup_MACH); 1929 extern const Addr ML_(blksys_restart_MACH); 1930 extern const Addr ML_(blksys_complete_MACH); 1931 extern const Addr ML_(blksys_committed_MACH); 1932 extern const Addr ML_(blksys_finished_MACH); 1933 extern const Addr ML_(blksys_setup_MDEP); 1934 extern const Addr ML_(blksys_restart_MDEP); 1935 extern const Addr ML_(blksys_complete_MDEP); 1936 extern const Addr ML_(blksys_committed_MDEP); 1937 extern const Addr ML_(blksys_finished_MDEP); 1938 extern const Addr ML_(blksys_setup_UNIX); 1939 extern const Addr ML_(blksys_restart_UNIX); 1940 extern const Addr ML_(blksys_complete_UNIX); 1941 extern const Addr ML_(blksys_committed_UNIX); 1942 extern const Addr ML_(blksys_finished_UNIX); 1943 #else 1944 # error "Unknown OS" 1945 #endif 1946 1947 1948 /* Back up guest state to restart a system call. */ 1949 1950 void ML_(fixup_guest_state_to_restart_syscall) ( ThreadArchState* arch ) 1951 { 1952 #if defined(VGP_x86_linux) 1953 arch->vex.guest_EIP -= 2; // sizeof(int $0x80) 1954 1955 /* Make sure our caller is actually sane, and we're really backing 1956 back over a syscall. 1957 1958 int $0x80 == CD 80 1959 */ 1960 { 1961 UChar *p = (UChar *)arch->vex.guest_EIP; 1962 1963 if (p[0] != 0xcd || p[1] != 0x80) 1964 VG_(message)(Vg_DebugMsg, 1965 "?! restarting over syscall at %#x %02x %02x\n", 1966 arch->vex.guest_EIP, p[0], p[1]); 1967 1968 vg_assert(p[0] == 0xcd && p[1] == 0x80); 1969 } 1970 1971 #elif defined(VGP_amd64_linux) 1972 arch->vex.guest_RIP -= 2; // sizeof(syscall) 1973 1974 /* Make sure our caller is actually sane, and we're really backing 1975 back over a syscall. 1976 1977 syscall == 0F 05 1978 */ 1979 { 1980 UChar *p = (UChar *)arch->vex.guest_RIP; 1981 1982 if (p[0] != 0x0F || p[1] != 0x05) 1983 VG_(message)(Vg_DebugMsg, 1984 "?! restarting over syscall at %#llx %02x %02x\n", 1985 arch->vex.guest_RIP, p[0], p[1]); 1986 1987 vg_assert(p[0] == 0x0F && p[1] == 0x05); 1988 } 1989 1990 #elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux) 1991 arch->vex.guest_CIA -= 4; // sizeof(ppc32 instr) 1992 1993 /* Make sure our caller is actually sane, and we're really backing 1994 back over a syscall. 1995 1996 sc == 44 00 00 02 1997 */ 1998 { 1999 UChar *p = (UChar *)arch->vex.guest_CIA; 2000 2001 if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02) 2002 VG_(message)(Vg_DebugMsg, 2003 "?! restarting over syscall at %#llx %02x %02x %02x %02x\n", 2004 arch->vex.guest_CIA + 0ULL, p[0], p[1], p[2], p[3]); 2005 2006 vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2); 2007 } 2008 2009 #elif defined(VGP_arm_linux) 2010 if (arch->vex.guest_R15T & 1) { 2011 // Thumb mode. SVC is a encoded as 2012 // 1101 1111 imm8 2013 // where imm8 is the SVC number, and we only accept 0. 2014 arch->vex.guest_R15T -= 2; // sizeof(thumb 16 bit insn) 2015 UChar* p = (UChar*)(arch->vex.guest_R15T - 1); 2016 Bool valid = p[0] == 0 && p[1] == 0xDF; 2017 if (!valid) { 2018 VG_(message)(Vg_DebugMsg, 2019 "?! restarting over (Thumb) syscall that is not syscall " 2020 "at %#llx %02x %02x\n", 2021 arch->vex.guest_R15T - 1ULL, p[0], p[1]); 2022 } 2023 vg_assert(valid); 2024 // FIXME: NOTE, this really isn't right. We need to back up 2025 // ITSTATE to what it was before the SVC instruction, but we 2026 // don't know what it was. At least assert that it is now 2027 // zero, because if it is nonzero then it must also have 2028 // been nonzero for the SVC itself, which means it was 2029 // conditional. Urk. 2030 vg_assert(arch->vex.guest_ITSTATE == 0); 2031 } else { 2032 // ARM mode. SVC is encoded as 2033 // cond 1111 imm24 2034 // where imm24 is the SVC number, and we only accept 0. 2035 arch->vex.guest_R15T -= 4; // sizeof(arm instr) 2036 UChar* p = (UChar*)arch->vex.guest_R15T; 2037 Bool valid = p[0] == 0 && p[1] == 0 && p[2] == 0 2038 && (p[3] & 0xF) == 0xF; 2039 if (!valid) { 2040 VG_(message)(Vg_DebugMsg, 2041 "?! restarting over (ARM) syscall that is not syscall " 2042 "at %#llx %02x %02x %02x %02x\n", 2043 arch->vex.guest_R15T + 0ULL, p[0], p[1], p[2], p[3]); 2044 } 2045 vg_assert(valid); 2046 } 2047 2048 #elif defined(VGP_arm64_linux) 2049 arch->vex.guest_PC -= 4; // sizeof(arm64 instr) 2050 2051 /* Make sure our caller is actually sane, and we're really backing 2052 back over a syscall. 2053 2054 svc #0 == d4 00 00 01 2055 */ 2056 { 2057 UChar *p = (UChar *)arch->vex.guest_PC; 2058 2059 if (p[0] != 0x01 || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0xD4) 2060 VG_(message)( 2061 Vg_DebugMsg, 2062 "?! restarting over syscall at %#llx %02x %02x %02x %02x\n", 2063 arch->vex.guest_PC + 0ULL, p[0], p[1], p[2], p[3] 2064 ); 2065 2066 vg_assert(p[0] == 0x01 && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0xD4); 2067 } 2068 2069 #elif defined(VGP_x86_darwin) 2070 arch->vex.guest_EIP = arch->vex.guest_IP_AT_SYSCALL; 2071 2072 /* Make sure our caller is actually sane, and we're really backing 2073 back over a syscall. 2074 2075 int $0x80 == CD 80 2076 int $0x81 == CD 81 2077 int $0x82 == CD 82 2078 sysenter == 0F 34 2079 */ 2080 { 2081 UChar *p = (UChar *)arch->vex.guest_EIP; 2082 Bool ok = (p[0] == 0xCD && p[1] == 0x80) 2083 || (p[0] == 0xCD && p[1] == 0x81) 2084 || (p[0] == 0xCD && p[1] == 0x82) 2085 || (p[0] == 0x0F && p[1] == 0x34); 2086 if (!ok) 2087 VG_(message)(Vg_DebugMsg, 2088 "?! restarting over syscall at %#x %02x %02x\n", 2089 arch->vex.guest_EIP, p[0], p[1]); 2090 vg_assert(ok); 2091 } 2092 2093 #elif defined(VGP_amd64_darwin) 2094 // DDD: #warning GrP fixme amd64 restart unimplemented 2095 vg_assert(0); 2096 2097 #elif defined(VGP_s390x_linux) 2098 arch->vex.guest_IA -= 2; // sizeof(syscall) 2099 2100 /* Make sure our caller is actually sane, and we're really backing 2101 back over a syscall. 2102 2103 syscall == 0A <num> 2104 */ 2105 { 2106 UChar *p = (UChar *)arch->vex.guest_IA; 2107 if (p[0] != 0x0A) 2108 VG_(message)(Vg_DebugMsg, 2109 "?! restarting over syscall at %#llx %02x %02x\n", 2110 arch->vex.guest_IA, p[0], p[1]); 2111 2112 vg_assert(p[0] == 0x0A); 2113 } 2114 2115 #elif defined(VGP_mips32_linux) || defined(VGP_mips64_linux) 2116 2117 arch->vex.guest_PC -= 4; // sizeof(mips instr) 2118 2119 /* Make sure our caller is actually sane, and we're really backing 2120 back over a syscall. 2121 2122 syscall == 00 00 00 0C 2123 big endian 2124 syscall == 0C 00 00 00 2125 */ 2126 { 2127 UChar *p = (UChar *)(arch->vex.guest_PC); 2128 # if defined (VG_LITTLEENDIAN) 2129 if (p[0] != 0x0c || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0x00) 2130 VG_(message)(Vg_DebugMsg, 2131 "?! restarting over syscall at %#llx %02x %02x %02x %02x\n", 2132 (ULong)arch->vex.guest_PC, p[0], p[1], p[2], p[3]); 2133 2134 vg_assert(p[0] == 0x0c && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0x00); 2135 # elif defined (VG_BIGENDIAN) 2136 if (p[0] != 0x00 || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0x0c) 2137 VG_(message)(Vg_DebugMsg, 2138 "?! restarting over syscall at %#llx %02x %02x %02x %02x\n", 2139 (ULong)arch->vex.guest_PC, p[0], p[1], p[2], p[3]); 2140 2141 vg_assert(p[0] == 0x00 && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0x0c); 2142 # else 2143 # error "Unknown endianness" 2144 # endif 2145 } 2146 2147 #else 2148 # error "ML_(fixup_guest_state_to_restart_syscall): unknown plat" 2149 #endif 2150 } 2151 2152 2153 /* 2154 Fix up the guest state when a syscall is interrupted by a signal 2155 and so has been forced to return 'sysret'. 2156 2157 To do this, we determine the precise state of the syscall by 2158 looking at the (real) IP at the time the signal happened. The 2159 syscall sequence looks like: 2160 2161 1. unblock signals 2162 2. perform syscall 2163 3. save result to guest state (EAX, RAX, R3+CR0.SO, R0, V0) 2164 4. re-block signals 2165 2166 If a signal 2167 happens at Then Why? 2168 [1-2) restart nothing has happened (restart syscall) 2169 [2] restart syscall hasn't started, or kernel wants to restart 2170 [2-3) save syscall complete, but results not saved 2171 [3-4) syscall complete, results saved 2172 2173 Sometimes we never want to restart an interrupted syscall (because 2174 sigaction says not to), so we only restart if "restart" is True. 2175 2176 This will also call VG_(post_syscall) if the syscall has actually 2177 completed (either because it was interrupted, or because it 2178 actually finished). It will not call VG_(post_syscall) if the 2179 syscall is set up for restart, which means that the pre-wrapper may 2180 get called multiple times. 2181 */ 2182 2183 void 2184 VG_(fixup_guest_state_after_syscall_interrupted)( ThreadId tid, 2185 Addr ip, 2186 SysRes sres, 2187 Bool restart) 2188 { 2189 /* Note that we don't know the syscall number here, since (1) in 2190 general there's no reliable way to get hold of it short of 2191 stashing it in the guest state before the syscall, and (2) in 2192 any case we don't need to know it for the actions done by this 2193 routine. 2194 2195 Furthermore, 'sres' is only used in the case where the syscall 2196 is complete, but the result has not been committed to the guest 2197 state yet. In any other situation it will be meaningless and 2198 therefore ignored. */ 2199 2200 ThreadState* tst; 2201 SyscallStatus canonical; 2202 ThreadArchState* th_regs; 2203 SyscallInfo* sci; 2204 2205 /* Compute some Booleans indicating which range we're in. */ 2206 Bool outside_range, 2207 in_setup_to_restart, // [1,2) in the .S files 2208 at_restart, // [2] in the .S files 2209 in_complete_to_committed, // [3,4) in the .S files 2210 in_committed_to_finished; // [4,5) in the .S files 2211 2212 # if defined(VGO_linux) 2213 outside_range 2214 = ip < ML_(blksys_setup) || ip >= ML_(blksys_finished); 2215 in_setup_to_restart 2216 = ip >= ML_(blksys_setup) && ip < ML_(blksys_restart); 2217 at_restart 2218 = ip == ML_(blksys_restart); 2219 in_complete_to_committed 2220 = ip >= ML_(blksys_complete) && ip < ML_(blksys_committed); 2221 in_committed_to_finished 2222 = ip >= ML_(blksys_committed) && ip < ML_(blksys_finished); 2223 # elif defined(VGO_darwin) 2224 outside_range 2225 = (ip < ML_(blksys_setup_MACH) || ip >= ML_(blksys_finished_MACH)) 2226 && (ip < ML_(blksys_setup_MDEP) || ip >= ML_(blksys_finished_MDEP)) 2227 && (ip < ML_(blksys_setup_UNIX) || ip >= ML_(blksys_finished_UNIX)); 2228 in_setup_to_restart 2229 = (ip >= ML_(blksys_setup_MACH) && ip < ML_(blksys_restart_MACH)) 2230 || (ip >= ML_(blksys_setup_MDEP) && ip < ML_(blksys_restart_MDEP)) 2231 || (ip >= ML_(blksys_setup_UNIX) && ip < ML_(blksys_restart_UNIX)); 2232 at_restart 2233 = (ip == ML_(blksys_restart_MACH)) 2234 || (ip == ML_(blksys_restart_MDEP)) 2235 || (ip == ML_(blksys_restart_UNIX)); 2236 in_complete_to_committed 2237 = (ip >= ML_(blksys_complete_MACH) && ip < ML_(blksys_committed_MACH)) 2238 || (ip >= ML_(blksys_complete_MDEP) && ip < ML_(blksys_committed_MDEP)) 2239 || (ip >= ML_(blksys_complete_UNIX) && ip < ML_(blksys_committed_UNIX)); 2240 in_committed_to_finished 2241 = (ip >= ML_(blksys_committed_MACH) && ip < ML_(blksys_finished_MACH)) 2242 || (ip >= ML_(blksys_committed_MDEP) && ip < ML_(blksys_finished_MDEP)) 2243 || (ip >= ML_(blksys_committed_UNIX) && ip < ML_(blksys_finished_UNIX)); 2244 /* Wasn't that just So Much Fun? Does your head hurt yet? Mine does. */ 2245 # else 2246 # error "Unknown OS" 2247 # endif 2248 2249 if (VG_(clo_trace_signals)) 2250 VG_(message)( Vg_DebugMsg, 2251 "interrupted_syscall: tid=%d, ip=0x%llx, " 2252 "restart=%s, sres.isErr=%s, sres.val=%lld\n", 2253 (Int)tid, 2254 (ULong)ip, 2255 restart ? "True" : "False", 2256 sr_isError(sres) ? "True" : "False", 2257 (Long)(sr_isError(sres) ? sr_Err(sres) : sr_Res(sres)) ); 2258 2259 vg_assert(VG_(is_valid_tid)(tid)); 2260 vg_assert(tid >= 1 && tid < VG_N_THREADS); 2261 vg_assert(VG_(is_running_thread)(tid)); 2262 2263 tst = VG_(get_ThreadState)(tid); 2264 th_regs = &tst->arch; 2265 sci = & syscallInfo[tid]; 2266 2267 /* Figure out what the state of the syscall was by examining the 2268 (real) IP at the time of the signal, and act accordingly. */ 2269 if (outside_range) { 2270 if (VG_(clo_trace_signals)) 2271 VG_(message)( Vg_DebugMsg, 2272 " not in syscall at all: hmm, very suspicious\n" ); 2273 /* Looks like we weren't in a syscall at all. Hmm. */ 2274 vg_assert(sci->status.what != SsIdle); 2275 return; 2276 } 2277 2278 /* We should not be here unless this thread had first started up 2279 the machinery for a syscall by calling VG_(client_syscall). 2280 Hence: */ 2281 vg_assert(sci->status.what != SsIdle); 2282 2283 /* now, do one of four fixup actions, depending on where the IP has 2284 got to. */ 2285 2286 if (in_setup_to_restart) { 2287 /* syscall hasn't even started; go around again */ 2288 if (VG_(clo_trace_signals)) 2289 VG_(message)( Vg_DebugMsg, " not started: restarting\n"); 2290 vg_assert(sci->status.what == SsHandToKernel); 2291 ML_(fixup_guest_state_to_restart_syscall)(th_regs); 2292 } 2293 2294 else 2295 if (at_restart) { 2296 /* We're either about to run the syscall, or it was interrupted 2297 and the kernel restarted it. Restart if asked, otherwise 2298 EINTR it. */ 2299 if (restart) { 2300 if (VG_(clo_trace_signals)) 2301 VG_(message)( Vg_DebugMsg, " at syscall instr: restarting\n"); 2302 ML_(fixup_guest_state_to_restart_syscall)(th_regs); 2303 } else { 2304 if (VG_(clo_trace_signals)) 2305 VG_(message)( Vg_DebugMsg, " at syscall instr: returning EINTR\n"); 2306 canonical = convert_SysRes_to_SyscallStatus( 2307 VG_(mk_SysRes_Error)( VKI_EINTR ) 2308 ); 2309 if (!(sci->flags & SfNoWriteResult)) 2310 putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex ); 2311 sci->status = canonical; 2312 VG_(post_syscall)(tid); 2313 } 2314 } 2315 2316 else 2317 if (in_complete_to_committed) { 2318 /* Syscall complete, but result hasn't been written back yet. 2319 Write the SysRes we were supplied with back to the guest 2320 state. */ 2321 if (VG_(clo_trace_signals)) 2322 VG_(message)( Vg_DebugMsg, 2323 " completed, but uncommitted: committing\n"); 2324 canonical = convert_SysRes_to_SyscallStatus( sres ); 2325 if (!(sci->flags & SfNoWriteResult)) 2326 putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex ); 2327 sci->status = canonical; 2328 VG_(post_syscall)(tid); 2329 } 2330 2331 else 2332 if (in_committed_to_finished) { 2333 /* Result committed, but the signal mask has not been restored; 2334 we expect our caller (the signal handler) will have fixed 2335 this up. */ 2336 if (VG_(clo_trace_signals)) 2337 VG_(message)( Vg_DebugMsg, 2338 " completed and committed: nothing to do\n"); 2339 getSyscallStatusFromGuestState( &sci->status, &th_regs->vex ); 2340 vg_assert(sci->status.what == SsComplete); 2341 VG_(post_syscall)(tid); 2342 } 2343 2344 else 2345 VG_(core_panic)("?? strange syscall interrupt state?"); 2346 2347 /* In all cases, the syscall is now finished (even if we called 2348 ML_(fixup_guest_state_to_restart_syscall), since that just 2349 re-positions the guest's IP for another go at it). So we need 2350 to record that fact. */ 2351 sci->status.what = SsIdle; 2352 } 2353 2354 2355 #if defined(VGO_darwin) 2356 // Clean up after workq_ops(WQOPS_THREAD_RETURN) jumped to wqthread_hijack. 2357 // This is similar to VG_(fixup_guest_state_after_syscall_interrupted). 2358 // This longjmps back to the scheduler. 2359 void ML_(wqthread_continue_NORETURN)(ThreadId tid) 2360 { 2361 ThreadState* tst; 2362 SyscallInfo* sci; 2363 2364 VG_(acquire_BigLock)(tid, "wqthread_continue_NORETURN"); 2365 2366 PRINT("SYSCALL[%d,%d](%s) workq_ops() starting new workqueue item\n", 2367 VG_(getpid)(), tid, VG_SYSNUM_STRING(__NR_workq_ops)); 2368 2369 vg_assert(VG_(is_valid_tid)(tid)); 2370 vg_assert(tid >= 1 && tid < VG_N_THREADS); 2371 vg_assert(VG_(is_running_thread)(tid)); 2372 2373 tst = VG_(get_ThreadState)(tid); 2374 sci = & syscallInfo[tid]; 2375 vg_assert(sci->status.what != SsIdle); 2376 vg_assert(tst->os_state.wq_jmpbuf_valid); // check this BEFORE post_syscall 2377 2378 // Pretend the syscall completed normally, but don't touch the thread state. 2379 sci->status = convert_SysRes_to_SyscallStatus( VG_(mk_SysRes_Success)(0) ); 2380 sci->flags |= SfNoWriteResult; 2381 VG_(post_syscall)(tid); 2382 2383 sci->status.what = SsIdle; 2384 2385 vg_assert(tst->sched_jmpbuf_valid); 2386 VG_MINIMAL_LONGJMP(tst->sched_jmpbuf); 2387 2388 /* NOTREACHED */ 2389 vg_assert(0); 2390 } 2391 #endif 2392 2393 2394 /* --------------------------------------------------------------------- 2395 A place to store the where-to-call-when-really-done pointer 2396 ------------------------------------------------------------------ */ 2397 2398 // When the final thread is done, where shall I call to shutdown the 2399 // system cleanly? Is set once at startup (in m_main) and never 2400 // changes after that. Is basically a pointer to the exit 2401 // continuation. This is all just a nasty hack to avoid calling 2402 // directly from m_syswrap to m_main at exit, since that would cause 2403 // m_main to become part of a module cycle, which is silly. 2404 void (* VG_(address_of_m_main_shutdown_actions_NORETURN) ) 2405 (ThreadId,VgSchedReturnCode) 2406 = NULL; 2407 2408 /*--------------------------------------------------------------------*/ 2409 /*--- end ---*/ 2410 /*--------------------------------------------------------------------*/ 2411