1 2 /*--------------------------------------------------------------------*/ 3 /*--- Handle system calls. syswrap-main.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2000-2012 Julian Seward 11 jseward (at) acm.org 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 */ 30 31 #include "libvex_guest_offsets.h" 32 #include "libvex_trc_values.h" 33 #include "pub_core_basics.h" 34 #include "pub_core_aspacemgr.h" 35 #include "pub_core_vki.h" 36 #include "pub_core_vkiscnums.h" 37 #include "pub_core_libcsetjmp.h" // to keep _threadstate.h happy 38 #include "pub_core_threadstate.h" 39 #include "pub_core_libcbase.h" 40 #include "pub_core_libcassert.h" 41 #include "pub_core_libcprint.h" 42 #include "pub_core_libcproc.h" // For VG_(getpid)() 43 #include "pub_core_libcsignal.h" 44 #include "pub_core_scheduler.h" // For VG_({acquire,release}_BigLock), 45 // and VG_(vg_yield) 46 #include "pub_core_stacktrace.h" // For VG_(get_and_pp_StackTrace)() 47 #include "pub_core_tooliface.h" 48 #include "pub_core_options.h" 49 #include "pub_core_signals.h" // For VG_SIGVGKILL, VG_(poll_signals) 50 #include "pub_core_syscall.h" 51 #include "pub_core_machine.h" 52 #include "pub_core_syswrap.h" 53 54 #include "priv_types_n_macros.h" 55 #include "priv_syswrap-main.h" 56 57 #if defined(VGO_darwin) 58 #include "priv_syswrap-darwin.h" 59 #endif 60 61 /* Useful info which needs to be recorded somewhere: 62 Use of registers in syscalls is: 63 64 NUM ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT 65 LINUX: 66 x86 eax ebx ecx edx esi edi ebp n/a n/a eax (== NUM) 67 amd64 rax rdi rsi rdx r10 r8 r9 n/a n/a rax (== NUM) 68 ppc32 r0 r3 r4 r5 r6 r7 r8 n/a n/a r3+CR0.SO (== ARG1) 69 ppc64 r0 r3 r4 r5 r6 r7 r8 n/a n/a r3+CR0.SO (== ARG1) 70 arm r7 r0 r1 r2 r3 r4 r5 n/a n/a r0 (== ARG1) 71 mips v0 a0 a1 a2 a3 stack stack n/a n/a v0 (== NUM) 72 73 On s390x the svc instruction is used for system calls. The system call 74 number is encoded in the instruction (8 bit immediate field). Since Linux 75 2.6 it is also allowed to use svc 0 with the system call number in r1. 76 This was introduced for system calls >255, but works for all. It is 77 also possible to see the svc 0 together with an EXecute instruction, that 78 fills in the immediate field. 79 s390x r1/SVC r2 r3 r4 r5 r6 r7 n/a n/a r2 (== ARG1) 80 81 DARWIN: 82 x86 eax +4 +8 +12 +16 +20 +24 +28 +32 edx:eax, eflags.c 83 amd64 rax rdi rsi rdx rcx r8 r9 +8 +16 rdx:rax, rflags.c 84 85 For x86-darwin, "+N" denotes "in memory at N(%esp)"; ditto 86 amd64-darwin. Apparently 0(%esp) is some kind of return address 87 (perhaps for syscalls done with "sysenter"?) I don't think it is 88 relevant for syscalls done with "int $0x80/1/2". 89 */ 90 91 /* This is the top level of the system-call handler module. All 92 system calls are channelled through here, doing two things: 93 94 * notify the tool of the events (mem/reg reads, writes) happening 95 96 * perform the syscall, usually by passing it along to the kernel 97 unmodified. 98 99 A magical piece of assembly code, do_syscall_for_client_WRK, in 100 syscall-$PLATFORM.S does the tricky bit of passing a syscall to the 101 kernel, whilst having the simulator retain control. 102 */ 103 104 /* The main function is VG_(client_syscall). The simulation calls it 105 whenever a client thread wants to do a syscall. The following is a 106 sketch of what it does. 107 108 * Ensures the root thread's stack is suitably mapped. Tedious and 109 arcane. See big big comment in VG_(client_syscall). 110 111 * First, it rounds up the syscall number and args (which is a 112 platform dependent activity) and puts them in a struct ("args") 113 and also a copy in "orig_args". 114 115 The pre/post wrappers refer to these structs and so no longer 116 need magic macros to access any specific registers. This struct 117 is stored in thread-specific storage. 118 119 120 * The pre-wrapper is called, passing it a pointer to struct 121 "args". 122 123 124 * The pre-wrapper examines the args and pokes the tool 125 appropriately. It may modify the args; this is why "orig_args" 126 is also stored. 127 128 The pre-wrapper may choose to 'do' the syscall itself, and 129 concludes one of three outcomes: 130 131 Success(N) -- syscall is already complete, with success; 132 result is N 133 134 Fail(N) -- syscall is already complete, with failure; 135 error code is N 136 137 HandToKernel -- (the usual case): this needs to be given to 138 the kernel to be done, using the values in 139 the possibly-modified "args" struct. 140 141 In addition, the pre-wrapper may set some flags: 142 143 MayBlock -- only applicable when outcome==HandToKernel 144 145 PostOnFail -- only applicable when outcome==HandToKernel or Fail 146 147 148 * If the pre-outcome is HandToKernel, the syscall is duly handed 149 off to the kernel (perhaps involving some thread switchery, but 150 that's not important). This reduces the possible set of outcomes 151 to either Success(N) or Fail(N). 152 153 154 * The outcome (Success(N) or Fail(N)) is written back to the guest 155 register(s). This is platform specific: 156 157 x86: Success(N) ==> eax = N 158 Fail(N) ==> eax = -N 159 160 ditto amd64 161 162 ppc32: Success(N) ==> r3 = N, CR0.SO = 0 163 Fail(N) ==> r3 = N, CR0.SO = 1 164 165 Darwin: 166 x86: Success(N) ==> edx:eax = N, cc = 0 167 Fail(N) ==> edx:eax = N, cc = 1 168 169 s390x: Success(N) ==> r2 = N 170 Fail(N) ==> r2 = -N 171 172 * The post wrapper is called if: 173 174 - it exists, and 175 - outcome==Success or (outcome==Fail and PostOnFail is set) 176 177 The post wrapper is passed the adulterated syscall args (struct 178 "args"), and the syscall outcome (viz, Success(N) or Fail(N)). 179 180 There are several other complications, primarily to do with 181 syscalls getting interrupted, explained in comments in the code. 182 */ 183 184 /* CAVEATS for writing wrappers. It is important to follow these! 185 186 The macros defined in priv_types_n_macros.h are designed to help 187 decouple the wrapper logic from the actual representation of 188 syscall args/results, since these wrappers are designed to work on 189 multiple platforms. 190 191 Sometimes a PRE wrapper will complete the syscall itself, without 192 handing it to the kernel. It will use one of SET_STATUS_Success, 193 SET_STATUS_Failure or SET_STATUS_from_SysRes to set the return 194 value. It is critical to appreciate that use of the macro does not 195 immediately cause the underlying guest state to be updated -- that 196 is done by the driver logic in this file, when the wrapper returns. 197 198 As a result, PRE wrappers of the following form will malfunction: 199 200 PRE(fooble) 201 { 202 ... do stuff ... 203 SET_STATUS_Somehow(...) 204 205 // do something that assumes guest state is up to date 206 } 207 208 In particular, direct or indirect calls to VG_(poll_signals) after 209 setting STATUS can cause the guest state to be read (in order to 210 build signal frames). Do not do this. If you want a signal poll 211 after the syscall goes through, do "*flags |= SfPollAfter" and the 212 driver logic will do it for you. 213 214 ----------- 215 216 Another critical requirement following introduction of new address 217 space manager (JRS, 20050923): 218 219 In a situation where the mappedness of memory has changed, aspacem 220 should be notified BEFORE the tool. Hence the following is 221 correct: 222 223 Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start); 224 VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start ); 225 if (d) 226 VG_(discard_translations)(s->start, s->end+1 - s->start); 227 228 whilst this is wrong: 229 230 VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start ); 231 Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start); 232 if (d) 233 VG_(discard_translations)(s->start, s->end+1 - s->start); 234 235 The reason is that the tool may itself ask aspacem for more shadow 236 memory as a result of the VG_TRACK call. In such a situation it is 237 critical that aspacem's segment array is up to date -- hence the 238 need to notify aspacem first. 239 240 ----------- 241 242 Also .. take care to call VG_(discard_translations) whenever 243 memory with execute permissions is unmapped. 244 */ 245 246 247 /* --------------------------------------------------------------------- 248 Do potentially blocking syscall for the client, and mess with 249 signal masks at the same time. 250 ------------------------------------------------------------------ */ 251 252 /* Perform a syscall on behalf of a client thread, using a specific 253 signal mask. On completion, the signal mask is set to restore_mask 254 (which presumably blocks almost everything). If a signal happens 255 during the syscall, the handler should call 256 VG_(fixup_guest_state_after_syscall_interrupted) to adjust the 257 thread's context to do the right thing. 258 259 The _WRK function is handwritten assembly, implemented per-platform 260 in coregrind/m_syswrap/syscall-$PLAT.S. It has some very magic 261 properties. See comments at the top of 262 VG_(fixup_guest_state_after_syscall_interrupted) below for details. 263 264 This function (these functions) are required to return zero in case 265 of success (even if the syscall itself failed), and nonzero if the 266 sigprocmask-swizzling calls failed. We don't actually care about 267 the failure values from sigprocmask, although most of the assembly 268 implementations do attempt to return that, using the convention 269 0 for success, or 0x8000 | error-code for failure. 270 */ 271 #if defined(VGO_linux) 272 extern 273 UWord ML_(do_syscall_for_client_WRK)( Word syscallno, 274 void* guest_state, 275 const vki_sigset_t *syscall_mask, 276 const vki_sigset_t *restore_mask, 277 Word sigsetSzB ); 278 #elif defined(VGO_darwin) 279 extern 280 UWord ML_(do_syscall_for_client_unix_WRK)( Word syscallno, 281 void* guest_state, 282 const vki_sigset_t *syscall_mask, 283 const vki_sigset_t *restore_mask, 284 Word sigsetSzB ); /* unused */ 285 extern 286 UWord ML_(do_syscall_for_client_mach_WRK)( Word syscallno, 287 void* guest_state, 288 const vki_sigset_t *syscall_mask, 289 const vki_sigset_t *restore_mask, 290 Word sigsetSzB ); /* unused */ 291 extern 292 UWord ML_(do_syscall_for_client_mdep_WRK)( Word syscallno, 293 void* guest_state, 294 const vki_sigset_t *syscall_mask, 295 const vki_sigset_t *restore_mask, 296 Word sigsetSzB ); /* unused */ 297 #else 298 # error "Unknown OS" 299 #endif 300 301 302 static 303 void do_syscall_for_client ( Int syscallno, 304 ThreadState* tst, 305 const vki_sigset_t* syscall_mask ) 306 { 307 vki_sigset_t saved; 308 UWord err; 309 # if defined(VGO_linux) 310 err = ML_(do_syscall_for_client_WRK)( 311 syscallno, &tst->arch.vex, 312 syscall_mask, &saved, sizeof(vki_sigset_t) 313 ); 314 # elif defined(VGO_darwin) 315 switch (VG_DARWIN_SYSNO_CLASS(syscallno)) { 316 case VG_DARWIN_SYSCALL_CLASS_UNIX: 317 err = ML_(do_syscall_for_client_unix_WRK)( 318 VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex, 319 syscall_mask, &saved, 0/*unused:sigsetSzB*/ 320 ); 321 break; 322 case VG_DARWIN_SYSCALL_CLASS_MACH: 323 err = ML_(do_syscall_for_client_mach_WRK)( 324 VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex, 325 syscall_mask, &saved, 0/*unused:sigsetSzB*/ 326 ); 327 break; 328 case VG_DARWIN_SYSCALL_CLASS_MDEP: 329 err = ML_(do_syscall_for_client_mdep_WRK)( 330 VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex, 331 syscall_mask, &saved, 0/*unused:sigsetSzB*/ 332 ); 333 break; 334 default: 335 vg_assert(0); 336 /*NOTREACHED*/ 337 break; 338 } 339 # else 340 # error "Unknown OS" 341 # endif 342 vg_assert2( 343 err == 0, 344 "ML_(do_syscall_for_client_WRK): sigprocmask error %d", 345 (Int)(err & 0xFFF) 346 ); 347 } 348 349 350 /* --------------------------------------------------------------------- 351 Impedance matchers and misc helpers 352 ------------------------------------------------------------------ */ 353 354 static 355 Bool eq_SyscallArgs ( SyscallArgs* a1, SyscallArgs* a2 ) 356 { 357 return a1->sysno == a2->sysno 358 && a1->arg1 == a2->arg1 359 && a1->arg2 == a2->arg2 360 && a1->arg3 == a2->arg3 361 && a1->arg4 == a2->arg4 362 && a1->arg5 == a2->arg5 363 && a1->arg6 == a2->arg6 364 && a1->arg7 == a2->arg7 365 && a1->arg8 == a2->arg8; 366 } 367 368 static 369 Bool eq_SyscallStatus ( SyscallStatus* s1, SyscallStatus* s2 ) 370 { 371 /* was: return s1->what == s2->what && sr_EQ( s1->sres, s2->sres ); */ 372 if (s1->what == s2->what && sr_EQ( s1->sres, s2->sres )) 373 return True; 374 # if defined(VGO_darwin) 375 /* Darwin-specific debugging guff */ 376 vg_assert(s1->what == s2->what); 377 VG_(printf)("eq_SyscallStatus:\n"); 378 VG_(printf)(" {%lu %lu %u}\n", s1->sres._wLO, s1->sres._wHI, s1->sres._mode); 379 VG_(printf)(" {%lu %lu %u}\n", s2->sres._wLO, s2->sres._wHI, s2->sres._mode); 380 vg_assert(0); 381 # endif 382 return False; 383 } 384 385 /* Convert between SysRes and SyscallStatus, to the extent possible. */ 386 387 static 388 SyscallStatus convert_SysRes_to_SyscallStatus ( SysRes res ) 389 { 390 SyscallStatus status; 391 status.what = SsComplete; 392 status.sres = res; 393 return status; 394 } 395 396 397 /* Impedance matchers. These convert syscall arg or result data from 398 the platform-specific in-guest-state format to the canonical 399 formats, and back. */ 400 401 static 402 void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs* canonical, 403 /*IN*/ VexGuestArchState* gst_vanilla, 404 /*IN*/ UInt trc ) 405 { 406 #if defined(VGP_x86_linux) 407 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 408 canonical->sysno = gst->guest_EAX; 409 canonical->arg1 = gst->guest_EBX; 410 canonical->arg2 = gst->guest_ECX; 411 canonical->arg3 = gst->guest_EDX; 412 canonical->arg4 = gst->guest_ESI; 413 canonical->arg5 = gst->guest_EDI; 414 canonical->arg6 = gst->guest_EBP; 415 canonical->arg7 = 0; 416 canonical->arg8 = 0; 417 418 #elif defined(VGP_amd64_linux) 419 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 420 canonical->sysno = gst->guest_RAX; 421 canonical->arg1 = gst->guest_RDI; 422 canonical->arg2 = gst->guest_RSI; 423 canonical->arg3 = gst->guest_RDX; 424 canonical->arg4 = gst->guest_R10; 425 canonical->arg5 = gst->guest_R8; 426 canonical->arg6 = gst->guest_R9; 427 canonical->arg7 = 0; 428 canonical->arg8 = 0; 429 430 #elif defined(VGP_ppc32_linux) 431 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla; 432 canonical->sysno = gst->guest_GPR0; 433 canonical->arg1 = gst->guest_GPR3; 434 canonical->arg2 = gst->guest_GPR4; 435 canonical->arg3 = gst->guest_GPR5; 436 canonical->arg4 = gst->guest_GPR6; 437 canonical->arg5 = gst->guest_GPR7; 438 canonical->arg6 = gst->guest_GPR8; 439 canonical->arg7 = 0; 440 canonical->arg8 = 0; 441 442 #elif defined(VGP_ppc64_linux) 443 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla; 444 canonical->sysno = gst->guest_GPR0; 445 canonical->arg1 = gst->guest_GPR3; 446 canonical->arg2 = gst->guest_GPR4; 447 canonical->arg3 = gst->guest_GPR5; 448 canonical->arg4 = gst->guest_GPR6; 449 canonical->arg5 = gst->guest_GPR7; 450 canonical->arg6 = gst->guest_GPR8; 451 canonical->arg7 = 0; 452 canonical->arg8 = 0; 453 454 #elif defined(VGP_arm_linux) 455 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla; 456 canonical->sysno = gst->guest_R7; 457 canonical->arg1 = gst->guest_R0; 458 canonical->arg2 = gst->guest_R1; 459 canonical->arg3 = gst->guest_R2; 460 canonical->arg4 = gst->guest_R3; 461 canonical->arg5 = gst->guest_R4; 462 canonical->arg6 = gst->guest_R5; 463 canonical->arg7 = 0; 464 canonical->arg8 = 0; 465 466 #elif defined(VGP_mips32_linux) 467 VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla; 468 canonical->sysno = gst->guest_r2; // v0 469 if (canonical->sysno != __NR_syscall) { 470 canonical->arg1 = gst->guest_r4; // a0 471 canonical->arg2 = gst->guest_r5; // a1 472 canonical->arg3 = gst->guest_r6; // a2 473 canonical->arg4 = gst->guest_r7; // a3 474 canonical->arg5 = *((UInt*) (gst->guest_r29 + 16)); // 16(guest_SP/sp) 475 canonical->arg6 = *((UInt*) (gst->guest_r29 + 20)); // 20(sp) 476 canonical->arg8 = 0; 477 } else { 478 // Fixme hack handle syscall() 479 canonical->sysno = gst->guest_r4; // a0 480 canonical->arg1 = gst->guest_r5; // a1 481 canonical->arg2 = gst->guest_r6; // a2 482 canonical->arg3 = gst->guest_r7; // a3 483 canonical->arg4 = *((UInt*) (gst->guest_r29 + 16)); // 16(guest_SP/sp) 484 canonical->arg5 = *((UInt*) (gst->guest_r29 + 20)); // 20(guest_SP/sp) 485 canonical->arg6 = *((UInt*) (gst->guest_r29 + 24)); // 24(guest_SP/sp) 486 canonical->arg8 = __NR_syscall; 487 } 488 489 #elif defined(VGP_x86_darwin) 490 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 491 UWord *stack = (UWord *)gst->guest_ESP; 492 // GrP fixme hope syscalls aren't called with really shallow stacks... 493 canonical->sysno = gst->guest_EAX; 494 if (canonical->sysno != 0) { 495 // stack[0] is return address 496 canonical->arg1 = stack[1]; 497 canonical->arg2 = stack[2]; 498 canonical->arg3 = stack[3]; 499 canonical->arg4 = stack[4]; 500 canonical->arg5 = stack[5]; 501 canonical->arg6 = stack[6]; 502 canonical->arg7 = stack[7]; 503 canonical->arg8 = stack[8]; 504 } else { 505 // GrP fixme hack handle syscall() 506 // GrP fixme what about __syscall() ? 507 // stack[0] is return address 508 // DDD: the tool can't see that the params have been shifted! Can 509 // lead to incorrect checking, I think, because the PRRAn/PSARn 510 // macros will mention the pre-shifted args. 511 canonical->sysno = stack[1]; 512 vg_assert(canonical->sysno != 0); 513 canonical->arg1 = stack[2]; 514 canonical->arg2 = stack[3]; 515 canonical->arg3 = stack[4]; 516 canonical->arg4 = stack[5]; 517 canonical->arg5 = stack[6]; 518 canonical->arg6 = stack[7]; 519 canonical->arg7 = stack[8]; 520 canonical->arg8 = stack[9]; 521 522 PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n", 523 VG_(getpid)(), /*tid,*/ 524 VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno)); 525 } 526 527 // Here we determine what kind of syscall it was by looking at the 528 // interrupt kind, and then encode the syscall number using the 64-bit 529 // encoding for Valgrind's internal use. 530 // 531 // DDD: Would it be better to stash the JMP kind into the Darwin 532 // thread state rather than passing in the trc? 533 switch (trc) { 534 case VEX_TRC_JMP_SYS_INT128: 535 // int $0x80 = Unix, 64-bit result 536 vg_assert(canonical->sysno >= 0); 537 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno); 538 break; 539 case VEX_TRC_JMP_SYS_SYSENTER: 540 // syscall = Unix, 32-bit result 541 // OR Mach, 32-bit result 542 if (canonical->sysno >= 0) { 543 // GrP fixme hack: 0xffff == I386_SYSCALL_NUMBER_MASK 544 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno 545 & 0xffff); 546 } else { 547 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno); 548 } 549 break; 550 case VEX_TRC_JMP_SYS_INT129: 551 // int $0x81 = Mach, 32-bit result 552 vg_assert(canonical->sysno < 0); 553 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno); 554 break; 555 case VEX_TRC_JMP_SYS_INT130: 556 // int $0x82 = mdep, 32-bit result 557 vg_assert(canonical->sysno >= 0); 558 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MDEP(canonical->sysno); 559 break; 560 default: 561 vg_assert(0); 562 break; 563 } 564 565 #elif defined(VGP_amd64_darwin) 566 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 567 UWord *stack = (UWord *)gst->guest_RSP; 568 569 vg_assert(trc == VEX_TRC_JMP_SYS_SYSCALL); 570 571 // GrP fixme hope syscalls aren't called with really shallow stacks... 572 canonical->sysno = gst->guest_RAX; 573 if (canonical->sysno != __NR_syscall) { 574 // stack[0] is return address 575 canonical->arg1 = gst->guest_RDI; 576 canonical->arg2 = gst->guest_RSI; 577 canonical->arg3 = gst->guest_RDX; 578 canonical->arg4 = gst->guest_R10; // not rcx with syscall insn 579 canonical->arg5 = gst->guest_R8; 580 canonical->arg6 = gst->guest_R9; 581 canonical->arg7 = stack[1]; 582 canonical->arg8 = stack[2]; 583 } else { 584 // GrP fixme hack handle syscall() 585 // GrP fixme what about __syscall() ? 586 // stack[0] is return address 587 // DDD: the tool can't see that the params have been shifted! Can 588 // lead to incorrect checking, I think, because the PRRAn/PSARn 589 // macros will mention the pre-shifted args. 590 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(gst->guest_RDI); 591 vg_assert(canonical->sysno != __NR_syscall); 592 canonical->arg1 = gst->guest_RSI; 593 canonical->arg2 = gst->guest_RDX; 594 canonical->arg3 = gst->guest_R10; // not rcx with syscall insn 595 canonical->arg4 = gst->guest_R8; 596 canonical->arg5 = gst->guest_R9; 597 canonical->arg6 = stack[1]; 598 canonical->arg7 = stack[2]; 599 canonical->arg8 = stack[3]; 600 601 PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n", 602 VG_(getpid)(), /*tid,*/ 603 VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno)); 604 } 605 606 // no canonical->sysno adjustment needed 607 608 #elif defined(VGP_s390x_linux) 609 VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla; 610 canonical->sysno = gst->guest_SYSNO; 611 canonical->arg1 = gst->guest_r2; 612 canonical->arg2 = gst->guest_r3; 613 canonical->arg3 = gst->guest_r4; 614 canonical->arg4 = gst->guest_r5; 615 canonical->arg5 = gst->guest_r6; 616 canonical->arg6 = gst->guest_r7; 617 canonical->arg7 = 0; 618 canonical->arg8 = 0; 619 #else 620 # error "getSyscallArgsFromGuestState: unknown arch" 621 #endif 622 } 623 624 static 625 void putSyscallArgsIntoGuestState ( /*IN*/ SyscallArgs* canonical, 626 /*OUT*/VexGuestArchState* gst_vanilla ) 627 { 628 #if defined(VGP_x86_linux) 629 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 630 gst->guest_EAX = canonical->sysno; 631 gst->guest_EBX = canonical->arg1; 632 gst->guest_ECX = canonical->arg2; 633 gst->guest_EDX = canonical->arg3; 634 gst->guest_ESI = canonical->arg4; 635 gst->guest_EDI = canonical->arg5; 636 gst->guest_EBP = canonical->arg6; 637 638 #elif defined(VGP_amd64_linux) 639 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 640 gst->guest_RAX = canonical->sysno; 641 gst->guest_RDI = canonical->arg1; 642 gst->guest_RSI = canonical->arg2; 643 gst->guest_RDX = canonical->arg3; 644 gst->guest_R10 = canonical->arg4; 645 gst->guest_R8 = canonical->arg5; 646 gst->guest_R9 = canonical->arg6; 647 648 #elif defined(VGP_ppc32_linux) 649 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla; 650 gst->guest_GPR0 = canonical->sysno; 651 gst->guest_GPR3 = canonical->arg1; 652 gst->guest_GPR4 = canonical->arg2; 653 gst->guest_GPR5 = canonical->arg3; 654 gst->guest_GPR6 = canonical->arg4; 655 gst->guest_GPR7 = canonical->arg5; 656 gst->guest_GPR8 = canonical->arg6; 657 658 #elif defined(VGP_ppc64_linux) 659 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla; 660 gst->guest_GPR0 = canonical->sysno; 661 gst->guest_GPR3 = canonical->arg1; 662 gst->guest_GPR4 = canonical->arg2; 663 gst->guest_GPR5 = canonical->arg3; 664 gst->guest_GPR6 = canonical->arg4; 665 gst->guest_GPR7 = canonical->arg5; 666 gst->guest_GPR8 = canonical->arg6; 667 668 #elif defined(VGP_arm_linux) 669 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla; 670 gst->guest_R7 = canonical->sysno; 671 gst->guest_R0 = canonical->arg1; 672 gst->guest_R1 = canonical->arg2; 673 gst->guest_R2 = canonical->arg3; 674 gst->guest_R3 = canonical->arg4; 675 gst->guest_R4 = canonical->arg5; 676 gst->guest_R5 = canonical->arg6; 677 678 #elif defined(VGP_x86_darwin) 679 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 680 UWord *stack = (UWord *)gst->guest_ESP; 681 682 gst->guest_EAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno); 683 684 // GrP fixme? gst->guest_TEMP_EFLAG_C = 0; 685 // stack[0] is return address 686 stack[1] = canonical->arg1; 687 stack[2] = canonical->arg2; 688 stack[3] = canonical->arg3; 689 stack[4] = canonical->arg4; 690 stack[5] = canonical->arg5; 691 stack[6] = canonical->arg6; 692 stack[7] = canonical->arg7; 693 stack[8] = canonical->arg8; 694 695 #elif defined(VGP_amd64_darwin) 696 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 697 UWord *stack = (UWord *)gst->guest_RSP; 698 699 gst->guest_RAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno); 700 // GrP fixme? gst->guest_TEMP_EFLAG_C = 0; 701 702 // stack[0] is return address 703 gst->guest_RDI = canonical->arg1; 704 gst->guest_RSI = canonical->arg2; 705 gst->guest_RDX = canonical->arg3; 706 gst->guest_RCX = canonical->arg4; 707 gst->guest_R8 = canonical->arg5; 708 gst->guest_R9 = canonical->arg6; 709 stack[1] = canonical->arg7; 710 stack[2] = canonical->arg8; 711 712 #elif defined(VGP_s390x_linux) 713 VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla; 714 gst->guest_SYSNO = canonical->sysno; 715 gst->guest_r2 = canonical->arg1; 716 gst->guest_r3 = canonical->arg2; 717 gst->guest_r4 = canonical->arg3; 718 gst->guest_r5 = canonical->arg4; 719 gst->guest_r6 = canonical->arg5; 720 gst->guest_r7 = canonical->arg6; 721 722 #elif defined(VGP_mips32_linux) 723 VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla; 724 if (canonical->arg8 != __NR_syscall) { 725 gst->guest_r2 = canonical->sysno; 726 gst->guest_r4 = canonical->arg1; 727 gst->guest_r5 = canonical->arg2; 728 gst->guest_r6 = canonical->arg3; 729 gst->guest_r7 = canonical->arg4; 730 *((UInt*) (gst->guest_r29 + 16)) = canonical->arg5; // 16(guest_GPR29/sp) 731 *((UInt*) (gst->guest_r29 + 20)) = canonical->arg6; // 20(sp) 732 } else { 733 canonical->arg8 = 0; 734 gst->guest_r2 = __NR_syscall; 735 gst->guest_r4 = canonical->sysno; 736 gst->guest_r5 = canonical->arg1; 737 gst->guest_r6 = canonical->arg2; 738 gst->guest_r7 = canonical->arg3; 739 *((UInt*) (gst->guest_r29 + 16)) = canonical->arg4; // 16(guest_GPR29/sp) 740 *((UInt*) (gst->guest_r29 + 20)) = canonical->arg5; // 20(sp) 741 *((UInt*) (gst->guest_r29 + 24)) = canonical->arg6; // 24(sp) 742 } 743 #else 744 # error "putSyscallArgsIntoGuestState: unknown arch" 745 #endif 746 } 747 748 static 749 void getSyscallStatusFromGuestState ( /*OUT*/SyscallStatus* canonical, 750 /*IN*/ VexGuestArchState* gst_vanilla ) 751 { 752 # if defined(VGP_x86_linux) 753 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 754 canonical->sres = VG_(mk_SysRes_x86_linux)( gst->guest_EAX ); 755 canonical->what = SsComplete; 756 757 # elif defined(VGP_amd64_linux) 758 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 759 canonical->sres = VG_(mk_SysRes_amd64_linux)( gst->guest_RAX ); 760 canonical->what = SsComplete; 761 762 # elif defined(VGP_ppc32_linux) 763 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla; 764 UInt cr = LibVEX_GuestPPC32_get_CR( gst ); 765 UInt cr0so = (cr >> 28) & 1; 766 canonical->sres = VG_(mk_SysRes_ppc32_linux)( gst->guest_GPR3, cr0so ); 767 canonical->what = SsComplete; 768 769 # elif defined(VGP_ppc64_linux) 770 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla; 771 UInt cr = LibVEX_GuestPPC64_get_CR( gst ); 772 UInt cr0so = (cr >> 28) & 1; 773 canonical->sres = VG_(mk_SysRes_ppc64_linux)( gst->guest_GPR3, cr0so ); 774 canonical->what = SsComplete; 775 776 # elif defined(VGP_arm_linux) 777 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla; 778 canonical->sres = VG_(mk_SysRes_arm_linux)( gst->guest_R0 ); 779 canonical->what = SsComplete; 780 781 # elif defined(VGP_mips32_linux) 782 VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla; 783 UInt v0 = gst->guest_r2; // v0 784 UInt v1 = gst->guest_r3; // v1 785 UInt a3 = gst->guest_r7; // a3 786 canonical->sres = VG_(mk_SysRes_mips32_linux)( v0, v1, a3 ); 787 canonical->what = SsComplete; 788 789 # elif defined(VGP_x86_darwin) 790 /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */ 791 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 792 UInt carry = 1 & LibVEX_GuestX86_get_eflags(gst); 793 UInt err = 0; 794 UInt wLO = 0; 795 UInt wHI = 0; 796 switch (gst->guest_SC_CLASS) { 797 case VG_DARWIN_SYSCALL_CLASS_UNIX: 798 // int $0x80 = Unix, 64-bit result 799 err = carry; 800 wLO = gst->guest_EAX; 801 wHI = gst->guest_EDX; 802 break; 803 case VG_DARWIN_SYSCALL_CLASS_MACH: 804 // int $0x81 = Mach, 32-bit result 805 wLO = gst->guest_EAX; 806 break; 807 case VG_DARWIN_SYSCALL_CLASS_MDEP: 808 // int $0x82 = mdep, 32-bit result 809 wLO = gst->guest_EAX; 810 break; 811 default: 812 vg_assert(0); 813 break; 814 } 815 canonical->sres = VG_(mk_SysRes_x86_darwin)( 816 gst->guest_SC_CLASS, err ? True : False, 817 wHI, wLO 818 ); 819 canonical->what = SsComplete; 820 821 # elif defined(VGP_amd64_darwin) 822 /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */ 823 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 824 ULong carry = 1 & LibVEX_GuestAMD64_get_rflags(gst); 825 ULong err = 0; 826 ULong wLO = 0; 827 ULong wHI = 0; 828 switch (gst->guest_SC_CLASS) { 829 case VG_DARWIN_SYSCALL_CLASS_UNIX: 830 // syscall = Unix, 128-bit result 831 err = carry; 832 wLO = gst->guest_RAX; 833 wHI = gst->guest_RDX; 834 break; 835 case VG_DARWIN_SYSCALL_CLASS_MACH: 836 // syscall = Mach, 64-bit result 837 wLO = gst->guest_RAX; 838 break; 839 case VG_DARWIN_SYSCALL_CLASS_MDEP: 840 // syscall = mdep, 64-bit result 841 wLO = gst->guest_RAX; 842 break; 843 default: 844 vg_assert(0); 845 break; 846 } 847 canonical->sres = VG_(mk_SysRes_amd64_darwin)( 848 gst->guest_SC_CLASS, err ? True : False, 849 wHI, wLO 850 ); 851 canonical->what = SsComplete; 852 853 # elif defined(VGP_s390x_linux) 854 VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla; 855 canonical->sres = VG_(mk_SysRes_s390x_linux)( gst->guest_r2 ); 856 canonical->what = SsComplete; 857 858 # else 859 # error "getSyscallStatusFromGuestState: unknown arch" 860 # endif 861 } 862 863 static 864 void putSyscallStatusIntoGuestState ( /*IN*/ ThreadId tid, 865 /*IN*/ SyscallStatus* canonical, 866 /*OUT*/VexGuestArchState* gst_vanilla ) 867 { 868 # if defined(VGP_x86_linux) 869 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 870 vg_assert(canonical->what == SsComplete); 871 if (sr_isError(canonical->sres)) { 872 /* This isn't exactly right, in that really a Failure with res 873 not in the range 1 .. 4095 is unrepresentable in the 874 Linux-x86 scheme. Oh well. */ 875 gst->guest_EAX = - (Int)sr_Err(canonical->sres); 876 } else { 877 gst->guest_EAX = sr_Res(canonical->sres); 878 } 879 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 880 OFFSET_x86_EAX, sizeof(UWord) ); 881 882 # elif defined(VGP_amd64_linux) 883 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 884 vg_assert(canonical->what == SsComplete); 885 if (sr_isError(canonical->sres)) { 886 /* This isn't exactly right, in that really a Failure with res 887 not in the range 1 .. 4095 is unrepresentable in the 888 Linux-amd64 scheme. Oh well. */ 889 gst->guest_RAX = - (Long)sr_Err(canonical->sres); 890 } else { 891 gst->guest_RAX = sr_Res(canonical->sres); 892 } 893 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 894 OFFSET_amd64_RAX, sizeof(UWord) ); 895 896 # elif defined(VGP_ppc32_linux) 897 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla; 898 UInt old_cr = LibVEX_GuestPPC32_get_CR(gst); 899 vg_assert(canonical->what == SsComplete); 900 if (sr_isError(canonical->sres)) { 901 /* set CR0.SO */ 902 LibVEX_GuestPPC32_put_CR( old_cr | (1<<28), gst ); 903 gst->guest_GPR3 = sr_Err(canonical->sres); 904 } else { 905 /* clear CR0.SO */ 906 LibVEX_GuestPPC32_put_CR( old_cr & ~(1<<28), gst ); 907 gst->guest_GPR3 = sr_Res(canonical->sres); 908 } 909 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 910 OFFSET_ppc32_GPR3, sizeof(UWord) ); 911 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 912 OFFSET_ppc32_CR0_0, sizeof(UChar) ); 913 914 # elif defined(VGP_ppc64_linux) 915 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla; 916 UInt old_cr = LibVEX_GuestPPC64_get_CR(gst); 917 vg_assert(canonical->what == SsComplete); 918 if (sr_isError(canonical->sres)) { 919 /* set CR0.SO */ 920 LibVEX_GuestPPC64_put_CR( old_cr | (1<<28), gst ); 921 gst->guest_GPR3 = sr_Err(canonical->sres); 922 } else { 923 /* clear CR0.SO */ 924 LibVEX_GuestPPC64_put_CR( old_cr & ~(1<<28), gst ); 925 gst->guest_GPR3 = sr_Res(canonical->sres); 926 } 927 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 928 OFFSET_ppc64_GPR3, sizeof(UWord) ); 929 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 930 OFFSET_ppc64_CR0_0, sizeof(UChar) ); 931 932 # elif defined(VGP_arm_linux) 933 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla; 934 vg_assert(canonical->what == SsComplete); 935 if (sr_isError(canonical->sres)) { 936 /* This isn't exactly right, in that really a Failure with res 937 not in the range 1 .. 4095 is unrepresentable in the 938 Linux-arm scheme. Oh well. */ 939 gst->guest_R0 = - (Int)sr_Err(canonical->sres); 940 } else { 941 gst->guest_R0 = sr_Res(canonical->sres); 942 } 943 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 944 OFFSET_arm_R0, sizeof(UWord) ); 945 946 #elif defined(VGP_x86_darwin) 947 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 948 SysRes sres = canonical->sres; 949 vg_assert(canonical->what == SsComplete); 950 /* Unfortunately here we have to break abstraction and look 951 directly inside 'res', in order to decide what to do. */ 952 switch (sres._mode) { 953 case SysRes_MACH: // int $0x81 = Mach, 32-bit result 954 case SysRes_MDEP: // int $0x82 = mdep, 32-bit result 955 gst->guest_EAX = sres._wLO; 956 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 957 OFFSET_x86_EAX, sizeof(UInt) ); 958 break; 959 case SysRes_UNIX_OK: // int $0x80 = Unix, 64-bit result 960 case SysRes_UNIX_ERR: // int $0x80 = Unix, 64-bit error 961 gst->guest_EAX = sres._wLO; 962 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 963 OFFSET_x86_EAX, sizeof(UInt) ); 964 gst->guest_EDX = sres._wHI; 965 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 966 OFFSET_x86_EDX, sizeof(UInt) ); 967 LibVEX_GuestX86_put_eflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0, 968 gst ); 969 // GrP fixme sets defined for entire eflags, not just bit c 970 // DDD: this breaks exp-ptrcheck. 971 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 972 offsetof(VexGuestX86State, guest_CC_DEP1), sizeof(UInt) ); 973 break; 974 default: 975 vg_assert(0); 976 break; 977 } 978 979 #elif defined(VGP_amd64_darwin) 980 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 981 SysRes sres = canonical->sres; 982 vg_assert(canonical->what == SsComplete); 983 /* Unfortunately here we have to break abstraction and look 984 directly inside 'res', in order to decide what to do. */ 985 switch (sres._mode) { 986 case SysRes_MACH: // syscall = Mach, 64-bit result 987 case SysRes_MDEP: // syscall = mdep, 64-bit result 988 gst->guest_RAX = sres._wLO; 989 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 990 OFFSET_amd64_RAX, sizeof(ULong) ); 991 break; 992 case SysRes_UNIX_OK: // syscall = Unix, 128-bit result 993 case SysRes_UNIX_ERR: // syscall = Unix, 128-bit error 994 gst->guest_RAX = sres._wLO; 995 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 996 OFFSET_amd64_RAX, sizeof(ULong) ); 997 gst->guest_RDX = sres._wHI; 998 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 999 OFFSET_amd64_RDX, sizeof(ULong) ); 1000 LibVEX_GuestAMD64_put_rflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0, 1001 gst ); 1002 // GrP fixme sets defined for entire rflags, not just bit c 1003 // DDD: this breaks exp-ptrcheck. 1004 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1005 offsetof(VexGuestAMD64State, guest_CC_DEP1), sizeof(ULong) ); 1006 break; 1007 default: 1008 vg_assert(0); 1009 break; 1010 } 1011 1012 # elif defined(VGP_s390x_linux) 1013 VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla; 1014 vg_assert(canonical->what == SsComplete); 1015 if (sr_isError(canonical->sres)) { 1016 gst->guest_r2 = - (Long)sr_Err(canonical->sres); 1017 } else { 1018 gst->guest_r2 = sr_Res(canonical->sres); 1019 } 1020 1021 # elif defined(VGP_mips32_linux) 1022 VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla; 1023 vg_assert(canonical->what == SsComplete); 1024 if (sr_isError(canonical->sres)) { 1025 gst->guest_r2 = (Int)sr_Err(canonical->sres); 1026 gst->guest_r7 = (Int)sr_Err(canonical->sres); 1027 } else { 1028 gst->guest_r2 = sr_Res(canonical->sres); 1029 gst->guest_r3 = sr_ResEx(canonical->sres); 1030 gst->guest_r7 = (Int)sr_Err(canonical->sres); 1031 } 1032 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1033 OFFSET_mips32_r2, sizeof(UWord) ); 1034 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1035 OFFSET_mips32_r3, sizeof(UWord) ); 1036 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1037 OFFSET_mips32_r7, sizeof(UWord) ); 1038 1039 # else 1040 # error "putSyscallStatusIntoGuestState: unknown arch" 1041 # endif 1042 } 1043 1044 1045 /* Tell me the offsets in the guest state of the syscall params, so 1046 that the scalar argument checkers don't have to have this info 1047 hardwired. */ 1048 1049 static 1050 void getSyscallArgLayout ( /*OUT*/SyscallArgLayout* layout ) 1051 { 1052 #if defined(VGP_x86_linux) 1053 layout->o_sysno = OFFSET_x86_EAX; 1054 layout->o_arg1 = OFFSET_x86_EBX; 1055 layout->o_arg2 = OFFSET_x86_ECX; 1056 layout->o_arg3 = OFFSET_x86_EDX; 1057 layout->o_arg4 = OFFSET_x86_ESI; 1058 layout->o_arg5 = OFFSET_x86_EDI; 1059 layout->o_arg6 = OFFSET_x86_EBP; 1060 layout->uu_arg7 = -1; /* impossible value */ 1061 layout->uu_arg8 = -1; /* impossible value */ 1062 1063 #elif defined(VGP_amd64_linux) 1064 layout->o_sysno = OFFSET_amd64_RAX; 1065 layout->o_arg1 = OFFSET_amd64_RDI; 1066 layout->o_arg2 = OFFSET_amd64_RSI; 1067 layout->o_arg3 = OFFSET_amd64_RDX; 1068 layout->o_arg4 = OFFSET_amd64_R10; 1069 layout->o_arg5 = OFFSET_amd64_R8; 1070 layout->o_arg6 = OFFSET_amd64_R9; 1071 layout->uu_arg7 = -1; /* impossible value */ 1072 layout->uu_arg8 = -1; /* impossible value */ 1073 1074 #elif defined(VGP_ppc32_linux) 1075 layout->o_sysno = OFFSET_ppc32_GPR0; 1076 layout->o_arg1 = OFFSET_ppc32_GPR3; 1077 layout->o_arg2 = OFFSET_ppc32_GPR4; 1078 layout->o_arg3 = OFFSET_ppc32_GPR5; 1079 layout->o_arg4 = OFFSET_ppc32_GPR6; 1080 layout->o_arg5 = OFFSET_ppc32_GPR7; 1081 layout->o_arg6 = OFFSET_ppc32_GPR8; 1082 layout->uu_arg7 = -1; /* impossible value */ 1083 layout->uu_arg8 = -1; /* impossible value */ 1084 1085 #elif defined(VGP_ppc64_linux) 1086 layout->o_sysno = OFFSET_ppc64_GPR0; 1087 layout->o_arg1 = OFFSET_ppc64_GPR3; 1088 layout->o_arg2 = OFFSET_ppc64_GPR4; 1089 layout->o_arg3 = OFFSET_ppc64_GPR5; 1090 layout->o_arg4 = OFFSET_ppc64_GPR6; 1091 layout->o_arg5 = OFFSET_ppc64_GPR7; 1092 layout->o_arg6 = OFFSET_ppc64_GPR8; 1093 layout->uu_arg7 = -1; /* impossible value */ 1094 layout->uu_arg8 = -1; /* impossible value */ 1095 1096 #elif defined(VGP_arm_linux) 1097 layout->o_sysno = OFFSET_arm_R7; 1098 layout->o_arg1 = OFFSET_arm_R0; 1099 layout->o_arg2 = OFFSET_arm_R1; 1100 layout->o_arg3 = OFFSET_arm_R2; 1101 layout->o_arg4 = OFFSET_arm_R3; 1102 layout->o_arg5 = OFFSET_arm_R4; 1103 layout->o_arg6 = OFFSET_arm_R5; 1104 layout->uu_arg7 = -1; /* impossible value */ 1105 layout->uu_arg8 = -1; /* impossible value */ 1106 1107 #elif defined(VGP_mips32_linux) 1108 layout->o_sysno = OFFSET_mips32_r2; 1109 layout->o_arg1 = OFFSET_mips32_r4; 1110 layout->o_arg2 = OFFSET_mips32_r5; 1111 layout->o_arg3 = OFFSET_mips32_r6; 1112 layout->o_arg4 = OFFSET_mips32_r7; 1113 layout->s_arg5 = sizeof(UWord) * 4; 1114 layout->s_arg6 = sizeof(UWord) * 5; 1115 layout->uu_arg7 = -1; /* impossible value */ 1116 layout->uu_arg8 = -1; /* impossible value */ 1117 1118 #elif defined(VGP_x86_darwin) 1119 layout->o_sysno = OFFSET_x86_EAX; 1120 // syscall parameters are on stack in C convention 1121 layout->s_arg1 = sizeof(UWord) * 1; 1122 layout->s_arg2 = sizeof(UWord) * 2; 1123 layout->s_arg3 = sizeof(UWord) * 3; 1124 layout->s_arg4 = sizeof(UWord) * 4; 1125 layout->s_arg5 = sizeof(UWord) * 5; 1126 layout->s_arg6 = sizeof(UWord) * 6; 1127 layout->s_arg7 = sizeof(UWord) * 7; 1128 layout->s_arg8 = sizeof(UWord) * 8; 1129 1130 #elif defined(VGP_amd64_darwin) 1131 layout->o_sysno = OFFSET_amd64_RAX; 1132 layout->o_arg1 = OFFSET_amd64_RDI; 1133 layout->o_arg2 = OFFSET_amd64_RSI; 1134 layout->o_arg3 = OFFSET_amd64_RDX; 1135 layout->o_arg4 = OFFSET_amd64_RCX; 1136 layout->o_arg5 = OFFSET_amd64_R8; 1137 layout->o_arg6 = OFFSET_amd64_R9; 1138 layout->s_arg7 = sizeof(UWord) * 1; 1139 layout->s_arg8 = sizeof(UWord) * 2; 1140 1141 #elif defined(VGP_s390x_linux) 1142 layout->o_sysno = OFFSET_s390x_SYSNO; 1143 layout->o_arg1 = OFFSET_s390x_r2; 1144 layout->o_arg2 = OFFSET_s390x_r3; 1145 layout->o_arg3 = OFFSET_s390x_r4; 1146 layout->o_arg4 = OFFSET_s390x_r5; 1147 layout->o_arg5 = OFFSET_s390x_r6; 1148 layout->o_arg6 = OFFSET_s390x_r7; 1149 layout->uu_arg7 = -1; /* impossible value */ 1150 layout->uu_arg8 = -1; /* impossible value */ 1151 #else 1152 # error "getSyscallLayout: unknown arch" 1153 #endif 1154 } 1155 1156 1157 /* --------------------------------------------------------------------- 1158 The main driver logic 1159 ------------------------------------------------------------------ */ 1160 1161 /* Finding the handlers for a given syscall, or faking up one 1162 when no handler is found. */ 1163 1164 static 1165 void bad_before ( ThreadId tid, 1166 SyscallArgLayout* layout, 1167 /*MOD*/SyscallArgs* args, 1168 /*OUT*/SyscallStatus* status, 1169 /*OUT*/UWord* flags ) 1170 { 1171 VG_(dmsg)("WARNING: unhandled syscall: %s\n", 1172 VG_SYSNUM_STRING_EXTRA(args->sysno)); 1173 if (VG_(clo_verbosity) > 1) { 1174 VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size)); 1175 } 1176 VG_(dmsg)("You may be able to write your own handler.\n"); 1177 VG_(dmsg)("Read the file README_MISSING_SYSCALL_OR_IOCTL.\n"); 1178 VG_(dmsg)("Nevertheless we consider this a bug. Please report\n"); 1179 VG_(dmsg)("it at http://valgrind.org/support/bug_reports.html.\n"); 1180 1181 SET_STATUS_Failure(VKI_ENOSYS); 1182 } 1183 1184 static SyscallTableEntry bad_sys = 1185 { bad_before, NULL }; 1186 1187 static const SyscallTableEntry* get_syscall_entry ( Int syscallno ) 1188 { 1189 const SyscallTableEntry* sys = NULL; 1190 1191 # if defined(VGO_linux) 1192 sys = ML_(get_linux_syscall_entry)( syscallno ); 1193 1194 # elif defined(VGO_darwin) 1195 Int idx = VG_DARWIN_SYSNO_INDEX(syscallno); 1196 1197 switch (VG_DARWIN_SYSNO_CLASS(syscallno)) { 1198 case VG_DARWIN_SYSCALL_CLASS_UNIX: 1199 if (idx >= 0 && idx < ML_(syscall_table_size) && 1200 ML_(syscall_table)[idx].before != NULL) 1201 sys = &ML_(syscall_table)[idx]; 1202 break; 1203 case VG_DARWIN_SYSCALL_CLASS_MACH: 1204 if (idx >= 0 && idx < ML_(mach_trap_table_size) && 1205 ML_(mach_trap_table)[idx].before != NULL) 1206 sys = &ML_(mach_trap_table)[idx]; 1207 break; 1208 case VG_DARWIN_SYSCALL_CLASS_MDEP: 1209 if (idx >= 0 && idx < ML_(mdep_trap_table_size) && 1210 ML_(mdep_trap_table)[idx].before != NULL) 1211 sys = &ML_(mdep_trap_table)[idx]; 1212 break; 1213 default: 1214 vg_assert(0); 1215 break; 1216 } 1217 1218 # else 1219 # error Unknown OS 1220 # endif 1221 1222 return sys == NULL ? &bad_sys : sys; 1223 } 1224 1225 1226 /* Add and remove signals from mask so that we end up telling the 1227 kernel the state we actually want rather than what the client 1228 wants. */ 1229 static void sanitize_client_sigmask(vki_sigset_t *mask) 1230 { 1231 VG_(sigdelset)(mask, VKI_SIGKILL); 1232 VG_(sigdelset)(mask, VKI_SIGSTOP); 1233 VG_(sigdelset)(mask, VG_SIGVGKILL); /* never block */ 1234 } 1235 1236 typedef 1237 struct { 1238 SyscallArgs orig_args; 1239 SyscallArgs args; 1240 SyscallStatus status; 1241 UWord flags; 1242 } 1243 SyscallInfo; 1244 1245 SyscallInfo syscallInfo[VG_N_THREADS]; 1246 1247 1248 /* The scheduler needs to be able to zero out these records after a 1249 fork, hence this is exported from m_syswrap. */ 1250 void VG_(clear_syscallInfo) ( Int tid ) 1251 { 1252 vg_assert(tid >= 0 && tid < VG_N_THREADS); 1253 VG_(memset)( & syscallInfo[tid], 0, sizeof( syscallInfo[tid] )); 1254 syscallInfo[tid].status.what = SsIdle; 1255 } 1256 1257 static void ensure_initialised ( void ) 1258 { 1259 Int i; 1260 static Bool init_done = False; 1261 if (init_done) 1262 return; 1263 init_done = True; 1264 for (i = 0; i < VG_N_THREADS; i++) { 1265 VG_(clear_syscallInfo)( i ); 1266 } 1267 } 1268 1269 /* --- This is the main function of this file. --- */ 1270 1271 void VG_(client_syscall) ( ThreadId tid, UInt trc ) 1272 { 1273 Word sysno; 1274 ThreadState* tst; 1275 const SyscallTableEntry* ent; 1276 SyscallArgLayout layout; 1277 SyscallInfo* sci; 1278 1279 ensure_initialised(); 1280 1281 vg_assert(VG_(is_valid_tid)(tid)); 1282 vg_assert(tid >= 1 && tid < VG_N_THREADS); 1283 vg_assert(VG_(is_running_thread)(tid)); 1284 1285 tst = VG_(get_ThreadState)(tid); 1286 1287 /* BEGIN ensure root thread's stack is suitably mapped */ 1288 /* In some rare circumstances, we may do the syscall without the 1289 bottom page of the stack being mapped, because the stack pointer 1290 was moved down just a few instructions before the syscall 1291 instruction, and there have been no memory references since 1292 then, that would cause a call to VG_(extend_stack) to have 1293 happened. 1294 1295 In native execution that's OK: the kernel automagically extends 1296 the stack's mapped area down to cover the stack pointer (or sp - 1297 redzone, really). In simulated normal execution that's OK too, 1298 since any signals we get from accessing below the mapped area of 1299 the (guest's) stack lead us to VG_(extend_stack), where we 1300 simulate the kernel's stack extension logic. But that leaves 1301 the problem of entering a syscall with the SP unmapped. Because 1302 the kernel doesn't know that the segment immediately above SP is 1303 supposed to be a grow-down segment, it causes the syscall to 1304 fail, and thereby causes a divergence between native behaviour 1305 (syscall succeeds) and simulated behaviour (syscall fails). 1306 1307 This is quite a rare failure mode. It has only been seen 1308 affecting calls to sys_readlink on amd64-linux, and even then it 1309 requires a certain code sequence around the syscall to trigger 1310 it. Here is one: 1311 1312 extern int my_readlink ( const char* path ); 1313 asm( 1314 ".text\n" 1315 ".globl my_readlink\n" 1316 "my_readlink:\n" 1317 "\tsubq $0x1008,%rsp\n" 1318 "\tmovq %rdi,%rdi\n" // path is in rdi 1319 "\tmovq %rsp,%rsi\n" // &buf[0] -> rsi 1320 "\tmovl $0x1000,%edx\n" // sizeof(buf) in rdx 1321 "\tmovl $"__NR_READLINK",%eax\n" // syscall number 1322 "\tsyscall\n" 1323 "\taddq $0x1008,%rsp\n" 1324 "\tret\n" 1325 ".previous\n" 1326 ); 1327 1328 For more details, see bug #156404 1329 (https://bugs.kde.org/show_bug.cgi?id=156404). 1330 1331 The fix is actually very simple. We simply need to call 1332 VG_(extend_stack) for this thread, handing it the lowest 1333 possible valid address for stack (sp - redzone), to ensure the 1334 pages all the way down to that address, are mapped. Because 1335 this is a potentially expensive and frequent operation, we 1336 filter in two ways: 1337 1338 First, only the main thread (tid=1) has a growdown stack. So 1339 ignore all others. It is conceivable, although highly unlikely, 1340 that the main thread exits, and later another thread is 1341 allocated tid=1, but that's harmless, I believe; 1342 VG_(extend_stack) will do nothing when applied to a non-root 1343 thread. 1344 1345 Secondly, first call VG_(am_find_nsegment) directly, to see if 1346 the page holding (sp - redzone) is mapped correctly. If so, do 1347 nothing. This is almost always the case. VG_(extend_stack) 1348 calls VG_(am_find_nsegment) twice, so this optimisation -- and 1349 that's all it is -- more or less halves the number of calls to 1350 VG_(am_find_nsegment) required. 1351 1352 TODO: the test "seg->kind == SkAnonC" is really inadequate, 1353 because although it tests whether the segment is mapped 1354 _somehow_, it doesn't check that it has the right permissions 1355 (r,w, maybe x) ? We could test that here, but it will also be 1356 necessary to fix the corresponding test in VG_(extend_stack). 1357 1358 All this guff is of course Linux-specific. Hence the ifdef. 1359 */ 1360 # if defined(VGO_linux) 1361 if (tid == 1/*ROOT THREAD*/) { 1362 Addr stackMin = VG_(get_SP)(tid) - VG_STACK_REDZONE_SZB; 1363 NSegment const* seg = VG_(am_find_nsegment)(stackMin); 1364 if (seg && seg->kind == SkAnonC) { 1365 /* stackMin is already mapped. Nothing to do. */ 1366 } else { 1367 (void)VG_(extend_stack)( stackMin, 1368 tst->client_stack_szB ); 1369 } 1370 } 1371 # endif 1372 /* END ensure root thread's stack is suitably mapped */ 1373 1374 /* First off, get the syscall args and number. This is a 1375 platform-dependent action. */ 1376 1377 sci = & syscallInfo[tid]; 1378 vg_assert(sci->status.what == SsIdle); 1379 1380 getSyscallArgsFromGuestState( &sci->orig_args, &tst->arch.vex, trc ); 1381 1382 /* Copy .orig_args to .args. The pre-handler may modify .args, but 1383 we want to keep the originals too, just in case. */ 1384 sci->args = sci->orig_args; 1385 1386 /* Save the syscall number in the thread state in case the syscall 1387 is interrupted by a signal. */ 1388 sysno = sci->orig_args.sysno; 1389 1390 /* It's sometimes useful, as a crude debugging hack, to get a 1391 stack trace at each (or selected) syscalls. */ 1392 if (0 && sysno == __NR_ioctl) { 1393 VG_(umsg)("\nioctl:\n"); 1394 VG_(get_and_pp_StackTrace)(tid, 10); 1395 VG_(umsg)("\n"); 1396 } 1397 1398 # if defined(VGO_darwin) 1399 /* Record syscall class. But why? Because the syscall might be 1400 interrupted by a signal, and in the signal handler (which will 1401 be m_signals.async_signalhandler) we will need to build a SysRes 1402 reflecting the syscall return result. In order to do that we 1403 need to know the syscall class. Hence stash it in the guest 1404 state of this thread. This madness is not needed on Linux 1405 because it only has a single syscall return convention and so 1406 there is no ambiguity involved in converting the post-signal 1407 machine state into a SysRes. */ 1408 tst->arch.vex.guest_SC_CLASS = VG_DARWIN_SYSNO_CLASS(sysno); 1409 # endif 1410 1411 /* The default what-to-do-next thing is hand the syscall to the 1412 kernel, so we pre-set that here. Set .sres to something 1413 harmless looking (is irrelevant because .what is not 1414 SsComplete.) */ 1415 sci->status.what = SsHandToKernel; 1416 sci->status.sres = VG_(mk_SysRes_Error)(0); 1417 sci->flags = 0; 1418 1419 /* Fetch the syscall's handlers. If no handlers exist for this 1420 syscall, we are given dummy handlers which force an immediate 1421 return with ENOSYS. */ 1422 ent = get_syscall_entry(sysno); 1423 1424 /* Fetch the layout information, which tells us where in the guest 1425 state the syscall args reside. This is a platform-dependent 1426 action. This info is needed so that the scalar syscall argument 1427 checks (PRE_REG_READ calls) know which bits of the guest state 1428 they need to inspect. */ 1429 getSyscallArgLayout( &layout ); 1430 1431 /* Make sure the tmp signal mask matches the real signal mask; 1432 sigsuspend may change this. */ 1433 vg_assert(VG_(iseqsigset)(&tst->sig_mask, &tst->tmp_sig_mask)); 1434 1435 /* Right, we're finally ready to Party. Call the pre-handler and 1436 see what we get back. At this point: 1437 1438 sci->status.what is Unset (we don't know yet). 1439 sci->orig_args contains the original args. 1440 sci->args is the same as sci->orig_args. 1441 sci->flags is zero. 1442 */ 1443 1444 PRINT("SYSCALL[%d,%d](%s) ", 1445 VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno)); 1446 1447 /* Do any pre-syscall actions */ 1448 if (VG_(needs).syscall_wrapper) { 1449 UWord tmpv[8]; 1450 tmpv[0] = sci->orig_args.arg1; 1451 tmpv[1] = sci->orig_args.arg2; 1452 tmpv[2] = sci->orig_args.arg3; 1453 tmpv[3] = sci->orig_args.arg4; 1454 tmpv[4] = sci->orig_args.arg5; 1455 tmpv[5] = sci->orig_args.arg6; 1456 tmpv[6] = sci->orig_args.arg7; 1457 tmpv[7] = sci->orig_args.arg8; 1458 VG_TDICT_CALL(tool_pre_syscall, tid, sysno, 1459 &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0])); 1460 } 1461 1462 vg_assert(ent); 1463 vg_assert(ent->before); 1464 (ent->before)( tid, 1465 &layout, 1466 &sci->args, &sci->status, &sci->flags ); 1467 1468 /* The pre-handler may have modified: 1469 sci->args 1470 sci->status 1471 sci->flags 1472 All else remains unchanged. 1473 Although the args may be modified, pre handlers are not allowed 1474 to change the syscall number. 1475 */ 1476 /* Now we proceed according to what the pre-handler decided. */ 1477 vg_assert(sci->status.what == SsHandToKernel 1478 || sci->status.what == SsComplete); 1479 vg_assert(sci->args.sysno == sci->orig_args.sysno); 1480 1481 if (sci->status.what == SsComplete && !sr_isError(sci->status.sres)) { 1482 /* The pre-handler completed the syscall itself, declaring 1483 success. */ 1484 if (sci->flags & SfNoWriteResult) { 1485 PRINT(" --> [pre-success] NoWriteResult"); 1486 } else { 1487 PRINT(" --> [pre-success] Success(0x%llx:0x%llx)", 1488 (ULong)sr_ResHI(sci->status.sres), 1489 (ULong)sr_Res(sci->status.sres)); 1490 } 1491 /* In this case the allowable flags are to ask for a signal-poll 1492 and/or a yield after the call. Changing the args isn't 1493 allowed. */ 1494 vg_assert(0 == (sci->flags 1495 & ~(SfPollAfter | SfYieldAfter | SfNoWriteResult))); 1496 vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args)); 1497 } 1498 1499 else 1500 if (sci->status.what == SsComplete && sr_isError(sci->status.sres)) { 1501 /* The pre-handler decided to fail syscall itself. */ 1502 PRINT(" --> [pre-fail] Failure(0x%llx)", (ULong)sr_Err(sci->status.sres)); 1503 /* In this case, the pre-handler is also allowed to ask for the 1504 post-handler to be run anyway. Changing the args is not 1505 allowed. */ 1506 vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter))); 1507 vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args)); 1508 } 1509 1510 else 1511 if (sci->status.what != SsHandToKernel) { 1512 /* huh?! */ 1513 vg_assert(0); 1514 } 1515 1516 else /* (sci->status.what == HandToKernel) */ { 1517 /* Ok, this is the usual case -- and the complicated one. There 1518 are two subcases: sync and async. async is the general case 1519 and is to be used when there is any possibility that the 1520 syscall might block [a fact that the pre-handler must tell us 1521 via the sci->flags field.] Because the tidying-away / 1522 context-switch overhead of the async case could be large, if 1523 we are sure that the syscall will not block, we fast-track it 1524 by doing it directly in this thread, which is a lot 1525 simpler. */ 1526 1527 /* Check that the given flags are allowable: MayBlock, PollAfter 1528 and PostOnFail are ok. */ 1529 vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter))); 1530 1531 if (sci->flags & SfMayBlock) { 1532 1533 /* Syscall may block, so run it asynchronously */ 1534 vki_sigset_t mask; 1535 1536 PRINT(" --> [async] ... \n"); 1537 1538 mask = tst->sig_mask; 1539 sanitize_client_sigmask(&mask); 1540 1541 /* Gack. More impedance matching. Copy the possibly 1542 modified syscall args back into the guest state. */ 1543 /* JRS 2009-Mar-16: if the syscall args are possibly modified, 1544 then this assertion is senseless: 1545 vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args)); 1546 The case that exposed it was sys_posix_spawn on Darwin, 1547 which heavily modifies its arguments but then lets the call 1548 go through anyway, with SfToBlock set, hence we end up here. */ 1549 putSyscallArgsIntoGuestState( &sci->args, &tst->arch.vex ); 1550 1551 /* Drop the bigLock */ 1552 VG_(release_BigLock)(tid, VgTs_WaitSys, "VG_(client_syscall)[async]"); 1553 /* Urr. We're now in a race against other threads trying to 1554 acquire the bigLock. I guess that doesn't matter provided 1555 that do_syscall_for_client only touches thread-local 1556 state. */ 1557 1558 /* Do the call, which operates directly on the guest state, 1559 not on our abstracted copies of the args/result. */ 1560 do_syscall_for_client(sysno, tst, &mask); 1561 1562 /* do_syscall_for_client may not return if the syscall was 1563 interrupted by a signal. In that case, flow of control is 1564 first to m_signals.async_sighandler, which calls 1565 VG_(fixup_guest_state_after_syscall_interrupted), which 1566 fixes up the guest state, and possibly calls 1567 VG_(post_syscall). Once that's done, control drops back 1568 to the scheduler. */ 1569 1570 /* Darwin: do_syscall_for_client may not return if the 1571 syscall was workq_ops(WQOPS_THREAD_RETURN) and the kernel 1572 responded by starting the thread at wqthread_hijack(reuse=1) 1573 (to run another workqueue item). In that case, wqthread_hijack 1574 calls ML_(wqthread_continue), which is similar to 1575 VG_(fixup_guest_state_after_syscall_interrupted). */ 1576 1577 /* Reacquire the lock */ 1578 VG_(acquire_BigLock)(tid, "VG_(client_syscall)[async]"); 1579 1580 /* Even more impedance matching. Extract the syscall status 1581 from the guest state. */ 1582 getSyscallStatusFromGuestState( &sci->status, &tst->arch.vex ); 1583 vg_assert(sci->status.what == SsComplete); 1584 1585 /* Be decorative, if required. */ 1586 if (VG_(clo_trace_syscalls)) { 1587 Bool failed = sr_isError(sci->status.sres); 1588 if (failed) { 1589 PRINT("SYSCALL[%d,%d](%s) ... [async] --> Failure(0x%llx)", 1590 VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno), 1591 (ULong)sr_Err(sci->status.sres)); 1592 } else { 1593 PRINT("SYSCALL[%d,%d](%s) ... [async] --> " 1594 "Success(0x%llx:0x%llx)", 1595 VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno), 1596 (ULong)sr_ResHI(sci->status.sres), 1597 (ULong)sr_Res(sci->status.sres) ); 1598 } 1599 } 1600 1601 } else { 1602 1603 /* run the syscall directly */ 1604 /* The pre-handler may have modified the syscall args, but 1605 since we're passing values in ->args directly to the 1606 kernel, there's no point in flushing them back to the 1607 guest state. Indeed doing so could be construed as 1608 incorrect. */ 1609 SysRes sres 1610 = VG_(do_syscall)(sysno, sci->args.arg1, sci->args.arg2, 1611 sci->args.arg3, sci->args.arg4, 1612 sci->args.arg5, sci->args.arg6, 1613 sci->args.arg7, sci->args.arg8 ); 1614 sci->status = convert_SysRes_to_SyscallStatus(sres); 1615 1616 /* Be decorative, if required. */ 1617 if (VG_(clo_trace_syscalls)) { 1618 Bool failed = sr_isError(sci->status.sres); 1619 if (failed) { 1620 PRINT("[sync] --> Failure(0x%llx)", 1621 (ULong)sr_Err(sci->status.sres) ); 1622 } else { 1623 PRINT("[sync] --> Success(0x%llx:0x%llx)", 1624 (ULong)sr_ResHI(sci->status.sres), 1625 (ULong)sr_Res(sci->status.sres) ); 1626 } 1627 } 1628 } 1629 } 1630 1631 vg_assert(sci->status.what == SsComplete); 1632 1633 vg_assert(VG_(is_running_thread)(tid)); 1634 1635 /* Dump the syscall result back in the guest state. This is 1636 a platform-specific action. */ 1637 if (!(sci->flags & SfNoWriteResult)) 1638 putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex ); 1639 1640 /* Situation now: 1641 - the guest state is now correctly modified following the syscall 1642 - modified args, original args and syscall status are still 1643 available in the syscallInfo[] entry for this syscall. 1644 1645 Now go on to do the post-syscall actions (read on down ..) 1646 */ 1647 PRINT(" "); 1648 VG_(post_syscall)(tid); 1649 PRINT("\n"); 1650 } 1651 1652 1653 /* Perform post syscall actions. The expected state on entry is 1654 precisely as at the end of VG_(client_syscall), that is: 1655 1656 - guest state up to date following the syscall 1657 - modified args, original args and syscall status are still 1658 available in the syscallInfo[] entry for this syscall. 1659 - syscall status matches what's in the guest state. 1660 1661 There are two ways to get here: the normal way -- being called by 1662 VG_(client_syscall), and the unusual way, from 1663 VG_(fixup_guest_state_after_syscall_interrupted). 1664 Darwin: there's a third way, ML_(wqthread_continue). 1665 */ 1666 void VG_(post_syscall) (ThreadId tid) 1667 { 1668 SyscallInfo* sci; 1669 const SyscallTableEntry* ent; 1670 SyscallStatus test_status; 1671 ThreadState* tst; 1672 Word sysno; 1673 1674 /* Preliminaries */ 1675 vg_assert(VG_(is_valid_tid)(tid)); 1676 vg_assert(tid >= 1 && tid < VG_N_THREADS); 1677 vg_assert(VG_(is_running_thread)(tid)); 1678 1679 tst = VG_(get_ThreadState)(tid); 1680 sci = & syscallInfo[tid]; 1681 1682 /* m_signals.sigvgkill_handler might call here even when not in 1683 a syscall. */ 1684 if (sci->status.what == SsIdle || sci->status.what == SsHandToKernel) { 1685 sci->status.what = SsIdle; 1686 return; 1687 } 1688 1689 /* Validate current syscallInfo entry. In particular we require 1690 that the current .status matches what's actually in the guest 1691 state. At least in the normal case where we have actually 1692 previously written the result into the guest state. */ 1693 vg_assert(sci->status.what == SsComplete); 1694 1695 getSyscallStatusFromGuestState( &test_status, &tst->arch.vex ); 1696 if (!(sci->flags & SfNoWriteResult)) 1697 vg_assert(eq_SyscallStatus( &sci->status, &test_status )); 1698 /* Failure of the above assertion on Darwin can indicate a problem 1699 in the syscall wrappers that pre-fail or pre-succeed the 1700 syscall, by calling SET_STATUS_Success or SET_STATUS_Failure, 1701 when they really should call SET_STATUS_from_SysRes. The former 1702 create a UNIX-class syscall result on Darwin, which may not be 1703 correct for the syscall; if that's the case then this assertion 1704 fires. See PRE(thread_fast_set_cthread_self) for an example. On 1705 non-Darwin platforms this assertion is should never fail, and this 1706 comment is completely irrelevant. */ 1707 /* Ok, looks sane */ 1708 1709 /* Get the system call number. Because the pre-handler isn't 1710 allowed to mess with it, it should be the same for both the 1711 original and potentially-modified args. */ 1712 vg_assert(sci->args.sysno == sci->orig_args.sysno); 1713 sysno = sci->args.sysno; 1714 ent = get_syscall_entry(sysno); 1715 1716 /* pre: status == Complete (asserted above) */ 1717 /* Consider either success or failure. Now run the post handler if: 1718 - it exists, and 1719 - Success or (Failure and PostOnFail is set) 1720 */ 1721 if (ent->after 1722 && ((!sr_isError(sci->status.sres)) 1723 || (sr_isError(sci->status.sres) 1724 && (sci->flags & SfPostOnFail) ))) { 1725 1726 (ent->after)( tid, &sci->args, &sci->status ); 1727 } 1728 1729 /* Because the post handler might have changed the status (eg, the 1730 post-handler for sys_open can change the result from success to 1731 failure if the kernel supplied a fd that it doesn't like), once 1732 again dump the syscall result back in the guest state.*/ 1733 if (!(sci->flags & SfNoWriteResult)) 1734 putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex ); 1735 1736 /* Do any post-syscall actions required by the tool. */ 1737 if (VG_(needs).syscall_wrapper) { 1738 UWord tmpv[8]; 1739 tmpv[0] = sci->orig_args.arg1; 1740 tmpv[1] = sci->orig_args.arg2; 1741 tmpv[2] = sci->orig_args.arg3; 1742 tmpv[3] = sci->orig_args.arg4; 1743 tmpv[4] = sci->orig_args.arg5; 1744 tmpv[5] = sci->orig_args.arg6; 1745 tmpv[6] = sci->orig_args.arg7; 1746 tmpv[7] = sci->orig_args.arg8; 1747 VG_TDICT_CALL(tool_post_syscall, tid, 1748 sysno, 1749 &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]), 1750 sci->status.sres); 1751 } 1752 1753 /* The syscall is done. */ 1754 vg_assert(sci->status.what == SsComplete); 1755 sci->status.what = SsIdle; 1756 1757 /* The pre/post wrappers may have concluded that pending signals 1758 might have been created, and will have set SfPollAfter to 1759 request a poll for them once the syscall is done. */ 1760 if (sci->flags & SfPollAfter) 1761 VG_(poll_signals)(tid); 1762 1763 /* Similarly, the wrappers might have asked for a yield 1764 afterwards. */ 1765 if (sci->flags & SfYieldAfter) 1766 VG_(vg_yield)(); 1767 } 1768 1769 1770 /* --------------------------------------------------------------------- 1771 Dealing with syscalls which get interrupted by a signal: 1772 VG_(fixup_guest_state_after_syscall_interrupted) 1773 ------------------------------------------------------------------ */ 1774 1775 /* Syscalls done on behalf of the client are finally handed off to the 1776 kernel in VG_(client_syscall) above, either by calling 1777 do_syscall_for_client (the async case), or by calling 1778 VG_(do_syscall6) (the sync case). 1779 1780 If the syscall is not interrupted by a signal (it may block and 1781 later unblock, but that's irrelevant here) then those functions 1782 eventually return and so control is passed to VG_(post_syscall). 1783 NB: not sure if the sync case can actually get interrupted, as it 1784 operates with all signals masked. 1785 1786 However, the syscall may get interrupted by an async-signal. In 1787 that case do_syscall_for_client/VG_(do_syscall6) do not 1788 return. Instead we wind up in m_signals.async_sighandler. We need 1789 to fix up the guest state to make it look like the syscall was 1790 interrupted for guest. So async_sighandler calls here, and this 1791 does the fixup. Note that from here we wind up calling 1792 VG_(post_syscall) too. 1793 */ 1794 1795 1796 /* These are addresses within ML_(do_syscall_for_client_WRK). See 1797 syscall-$PLAT.S for details. 1798 */ 1799 #if defined(VGO_linux) 1800 extern const Addr ML_(blksys_setup); 1801 extern const Addr ML_(blksys_restart); 1802 extern const Addr ML_(blksys_complete); 1803 extern const Addr ML_(blksys_committed); 1804 extern const Addr ML_(blksys_finished); 1805 #elif defined(VGO_darwin) 1806 /* Darwin requires extra uglyness */ 1807 extern const Addr ML_(blksys_setup_MACH); 1808 extern const Addr ML_(blksys_restart_MACH); 1809 extern const Addr ML_(blksys_complete_MACH); 1810 extern const Addr ML_(blksys_committed_MACH); 1811 extern const Addr ML_(blksys_finished_MACH); 1812 extern const Addr ML_(blksys_setup_MDEP); 1813 extern const Addr ML_(blksys_restart_MDEP); 1814 extern const Addr ML_(blksys_complete_MDEP); 1815 extern const Addr ML_(blksys_committed_MDEP); 1816 extern const Addr ML_(blksys_finished_MDEP); 1817 extern const Addr ML_(blksys_setup_UNIX); 1818 extern const Addr ML_(blksys_restart_UNIX); 1819 extern const Addr ML_(blksys_complete_UNIX); 1820 extern const Addr ML_(blksys_committed_UNIX); 1821 extern const Addr ML_(blksys_finished_UNIX); 1822 #else 1823 # error "Unknown OS" 1824 #endif 1825 1826 1827 /* Back up guest state to restart a system call. */ 1828 1829 void ML_(fixup_guest_state_to_restart_syscall) ( ThreadArchState* arch ) 1830 { 1831 #if defined(VGP_x86_linux) 1832 arch->vex.guest_EIP -= 2; // sizeof(int $0x80) 1833 1834 /* Make sure our caller is actually sane, and we're really backing 1835 back over a syscall. 1836 1837 int $0x80 == CD 80 1838 */ 1839 { 1840 UChar *p = (UChar *)arch->vex.guest_EIP; 1841 1842 if (p[0] != 0xcd || p[1] != 0x80) 1843 VG_(message)(Vg_DebugMsg, 1844 "?! restarting over syscall at %#x %02x %02x\n", 1845 arch->vex.guest_EIP, p[0], p[1]); 1846 1847 vg_assert(p[0] == 0xcd && p[1] == 0x80); 1848 } 1849 1850 #elif defined(VGP_amd64_linux) 1851 arch->vex.guest_RIP -= 2; // sizeof(syscall) 1852 1853 /* Make sure our caller is actually sane, and we're really backing 1854 back over a syscall. 1855 1856 syscall == 0F 05 1857 */ 1858 { 1859 UChar *p = (UChar *)arch->vex.guest_RIP; 1860 1861 if (p[0] != 0x0F || p[1] != 0x05) 1862 VG_(message)(Vg_DebugMsg, 1863 "?! restarting over syscall at %#llx %02x %02x\n", 1864 arch->vex.guest_RIP, p[0], p[1]); 1865 1866 vg_assert(p[0] == 0x0F && p[1] == 0x05); 1867 } 1868 1869 #elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux) 1870 arch->vex.guest_CIA -= 4; // sizeof(ppc32 instr) 1871 1872 /* Make sure our caller is actually sane, and we're really backing 1873 back over a syscall. 1874 1875 sc == 44 00 00 02 1876 */ 1877 { 1878 UChar *p = (UChar *)arch->vex.guest_CIA; 1879 1880 if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02) 1881 VG_(message)(Vg_DebugMsg, 1882 "?! restarting over syscall at %#llx %02x %02x %02x %02x\n", 1883 arch->vex.guest_CIA + 0ULL, p[0], p[1], p[2], p[3]); 1884 1885 vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2); 1886 } 1887 1888 #elif defined(VGP_arm_linux) 1889 if (arch->vex.guest_R15T & 1) { 1890 // Thumb mode. SVC is a encoded as 1891 // 1101 1111 imm8 1892 // where imm8 is the SVC number, and we only accept 0. 1893 arch->vex.guest_R15T -= 2; // sizeof(thumb 16 bit insn) 1894 UChar* p = (UChar*)(arch->vex.guest_R15T - 1); 1895 Bool valid = p[0] == 0 && p[1] == 0xDF; 1896 if (!valid) { 1897 VG_(message)(Vg_DebugMsg, 1898 "?! restarting over (Thumb) syscall that is not syscall " 1899 "at %#llx %02x %02x\n", 1900 arch->vex.guest_R15T - 1ULL, p[0], p[1]); 1901 } 1902 vg_assert(valid); 1903 // FIXME: NOTE, this really isn't right. We need to back up 1904 // ITSTATE to what it was before the SVC instruction, but we 1905 // don't know what it was. At least assert that it is now 1906 // zero, because if it is nonzero then it must also have 1907 // been nonzero for the SVC itself, which means it was 1908 // conditional. Urk. 1909 vg_assert(arch->vex.guest_ITSTATE == 0); 1910 } else { 1911 // ARM mode. SVC is encoded as 1912 // cond 1111 imm24 1913 // where imm24 is the SVC number, and we only accept 0. 1914 arch->vex.guest_R15T -= 4; // sizeof(arm instr) 1915 UChar* p = (UChar*)arch->vex.guest_R15T; 1916 Bool valid = p[0] == 0 && p[1] == 0 && p[2] == 0 1917 && (p[3] & 0xF) == 0xF; 1918 if (!valid) { 1919 VG_(message)(Vg_DebugMsg, 1920 "?! restarting over (ARM) syscall that is not syscall " 1921 "at %#llx %02x %02x %02x %02x\n", 1922 arch->vex.guest_R15T + 0ULL, p[0], p[1], p[2], p[3]); 1923 } 1924 vg_assert(valid); 1925 } 1926 1927 #elif defined(VGP_x86_darwin) 1928 arch->vex.guest_EIP = arch->vex.guest_IP_AT_SYSCALL; 1929 1930 /* Make sure our caller is actually sane, and we're really backing 1931 back over a syscall. 1932 1933 int $0x80 == CD 80 1934 int $0x81 == CD 81 1935 int $0x82 == CD 82 1936 sysenter == 0F 34 1937 */ 1938 { 1939 UChar *p = (UChar *)arch->vex.guest_EIP; 1940 Bool ok = (p[0] == 0xCD && p[1] == 0x80) 1941 || (p[0] == 0xCD && p[1] == 0x81) 1942 || (p[0] == 0xCD && p[1] == 0x82) 1943 || (p[0] == 0x0F && p[1] == 0x34); 1944 if (!ok) 1945 VG_(message)(Vg_DebugMsg, 1946 "?! restarting over syscall at %#x %02x %02x\n", 1947 arch->vex.guest_EIP, p[0], p[1]); 1948 vg_assert(ok); 1949 } 1950 1951 #elif defined(VGP_amd64_darwin) 1952 // DDD: #warning GrP fixme amd64 restart unimplemented 1953 vg_assert(0); 1954 1955 #elif defined(VGP_s390x_linux) 1956 arch->vex.guest_IA -= 2; // sizeof(syscall) 1957 1958 /* Make sure our caller is actually sane, and we're really backing 1959 back over a syscall. 1960 1961 syscall == 0A <num> 1962 */ 1963 { 1964 UChar *p = (UChar *)arch->vex.guest_IA; 1965 if (p[0] != 0x0A) 1966 VG_(message)(Vg_DebugMsg, 1967 "?! restarting over syscall at %#llx %02x %02x\n", 1968 arch->vex.guest_IA, p[0], p[1]); 1969 1970 vg_assert(p[0] == 0x0A); 1971 } 1972 1973 #elif defined(VGP_mips32_linux) 1974 1975 arch->vex.guest_PC -= 4; // sizeof(mips instr) 1976 1977 /* Make sure our caller is actually sane, and we're really backing 1978 back over a syscall. 1979 1980 syscall == 00 00 00 0C 1981 big endian 1982 syscall == 0C 00 00 00 1983 */ 1984 { 1985 UChar *p = (UChar *)(arch->vex.guest_PC); 1986 # if defined (VG_LITTLEENDIAN) 1987 if (p[0] != 0x0c || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0x00) 1988 VG_(message)(Vg_DebugMsg, 1989 "?! restarting over syscall at %#x %02x %02x %02x %02x\n", 1990 arch->vex.guest_PC, p[0], p[1], p[2], p[3]); 1991 1992 vg_assert(p[0] == 0x0c && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0x00); 1993 # elif defined (VG_BIGENDIAN) 1994 if (p[0] != 0x00 || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0x0c) 1995 VG_(message)(Vg_DebugMsg, 1996 "?! restarting over syscall at %#x %02x %02x %02x %02x\n", 1997 arch->vex.guest_PC, p[0], p[1], p[2], p[3]); 1998 1999 vg_assert(p[0] == 0x00 && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0x0c); 2000 # else 2001 # error "Unknown endianness" 2002 # endif 2003 } 2004 2005 #else 2006 # error "ML_(fixup_guest_state_to_restart_syscall): unknown plat" 2007 #endif 2008 } 2009 2010 2011 /* 2012 Fix up the guest state when a syscall is interrupted by a signal 2013 and so has been forced to return 'sysret'. 2014 2015 To do this, we determine the precise state of the syscall by 2016 looking at the (real) IP at the time the signal happened. The 2017 syscall sequence looks like: 2018 2019 1. unblock signals 2020 2. perform syscall 2021 3. save result to guest state (EAX, RAX, R3+CR0.SO, R0, V0) 2022 4. re-block signals 2023 2024 If a signal 2025 happens at Then Why? 2026 [1-2) restart nothing has happened (restart syscall) 2027 [2] restart syscall hasn't started, or kernel wants to restart 2028 [2-3) save syscall complete, but results not saved 2029 [3-4) syscall complete, results saved 2030 2031 Sometimes we never want to restart an interrupted syscall (because 2032 sigaction says not to), so we only restart if "restart" is True. 2033 2034 This will also call VG_(post_syscall) if the syscall has actually 2035 completed (either because it was interrupted, or because it 2036 actually finished). It will not call VG_(post_syscall) if the 2037 syscall is set up for restart, which means that the pre-wrapper may 2038 get called multiple times. 2039 */ 2040 2041 void 2042 VG_(fixup_guest_state_after_syscall_interrupted)( ThreadId tid, 2043 Addr ip, 2044 SysRes sres, 2045 Bool restart) 2046 { 2047 /* Note that we don't know the syscall number here, since (1) in 2048 general there's no reliable way to get hold of it short of 2049 stashing it in the guest state before the syscall, and (2) in 2050 any case we don't need to know it for the actions done by this 2051 routine. 2052 2053 Furthermore, 'sres' is only used in the case where the syscall 2054 is complete, but the result has not been committed to the guest 2055 state yet. In any other situation it will be meaningless and 2056 therefore ignored. */ 2057 2058 ThreadState* tst; 2059 SyscallStatus canonical; 2060 ThreadArchState* th_regs; 2061 SyscallInfo* sci; 2062 2063 /* Compute some Booleans indicating which range we're in. */ 2064 Bool outside_range, 2065 in_setup_to_restart, // [1,2) in the .S files 2066 at_restart, // [2] in the .S files 2067 in_complete_to_committed, // [3,4) in the .S files 2068 in_committed_to_finished; // [4,5) in the .S files 2069 2070 # if defined(VGO_linux) 2071 outside_range 2072 = ip < ML_(blksys_setup) || ip >= ML_(blksys_finished); 2073 in_setup_to_restart 2074 = ip >= ML_(blksys_setup) && ip < ML_(blksys_restart); 2075 at_restart 2076 = ip == ML_(blksys_restart); 2077 in_complete_to_committed 2078 = ip >= ML_(blksys_complete) && ip < ML_(blksys_committed); 2079 in_committed_to_finished 2080 = ip >= ML_(blksys_committed) && ip < ML_(blksys_finished); 2081 # elif defined(VGO_darwin) 2082 outside_range 2083 = (ip < ML_(blksys_setup_MACH) || ip >= ML_(blksys_finished_MACH)) 2084 && (ip < ML_(blksys_setup_MDEP) || ip >= ML_(blksys_finished_MDEP)) 2085 && (ip < ML_(blksys_setup_UNIX) || ip >= ML_(blksys_finished_UNIX)); 2086 in_setup_to_restart 2087 = (ip >= ML_(blksys_setup_MACH) && ip < ML_(blksys_restart_MACH)) 2088 || (ip >= ML_(blksys_setup_MDEP) && ip < ML_(blksys_restart_MDEP)) 2089 || (ip >= ML_(blksys_setup_UNIX) && ip < ML_(blksys_restart_UNIX)); 2090 at_restart 2091 = (ip == ML_(blksys_restart_MACH)) 2092 || (ip == ML_(blksys_restart_MDEP)) 2093 || (ip == ML_(blksys_restart_UNIX)); 2094 in_complete_to_committed 2095 = (ip >= ML_(blksys_complete_MACH) && ip < ML_(blksys_committed_MACH)) 2096 || (ip >= ML_(blksys_complete_MDEP) && ip < ML_(blksys_committed_MDEP)) 2097 || (ip >= ML_(blksys_complete_UNIX) && ip < ML_(blksys_committed_UNIX)); 2098 in_committed_to_finished 2099 = (ip >= ML_(blksys_committed_MACH) && ip < ML_(blksys_finished_MACH)) 2100 || (ip >= ML_(blksys_committed_MDEP) && ip < ML_(blksys_finished_MDEP)) 2101 || (ip >= ML_(blksys_committed_UNIX) && ip < ML_(blksys_finished_UNIX)); 2102 /* Wasn't that just So Much Fun? Does your head hurt yet? Mine does. */ 2103 # else 2104 # error "Unknown OS" 2105 # endif 2106 2107 if (VG_(clo_trace_signals)) 2108 VG_(message)( Vg_DebugMsg, 2109 "interrupted_syscall: tid=%d, ip=0x%llx, " 2110 "restart=%s, sres.isErr=%s, sres.val=%lld\n", 2111 (Int)tid, 2112 (ULong)ip, 2113 restart ? "True" : "False", 2114 sr_isError(sres) ? "True" : "False", 2115 (Long)(sr_isError(sres) ? sr_Err(sres) : sr_Res(sres)) ); 2116 2117 vg_assert(VG_(is_valid_tid)(tid)); 2118 vg_assert(tid >= 1 && tid < VG_N_THREADS); 2119 vg_assert(VG_(is_running_thread)(tid)); 2120 2121 tst = VG_(get_ThreadState)(tid); 2122 th_regs = &tst->arch; 2123 sci = & syscallInfo[tid]; 2124 2125 /* Figure out what the state of the syscall was by examining the 2126 (real) IP at the time of the signal, and act accordingly. */ 2127 if (outside_range) { 2128 if (VG_(clo_trace_signals)) 2129 VG_(message)( Vg_DebugMsg, 2130 " not in syscall at all: hmm, very suspicious\n" ); 2131 /* Looks like we weren't in a syscall at all. Hmm. */ 2132 vg_assert(sci->status.what != SsIdle); 2133 return; 2134 } 2135 2136 /* We should not be here unless this thread had first started up 2137 the machinery for a syscall by calling VG_(client_syscall). 2138 Hence: */ 2139 vg_assert(sci->status.what != SsIdle); 2140 2141 /* now, do one of four fixup actions, depending on where the IP has 2142 got to. */ 2143 2144 if (in_setup_to_restart) { 2145 /* syscall hasn't even started; go around again */ 2146 if (VG_(clo_trace_signals)) 2147 VG_(message)( Vg_DebugMsg, " not started: restarting\n"); 2148 vg_assert(sci->status.what == SsHandToKernel); 2149 ML_(fixup_guest_state_to_restart_syscall)(th_regs); 2150 } 2151 2152 else 2153 if (at_restart) { 2154 /* We're either about to run the syscall, or it was interrupted 2155 and the kernel restarted it. Restart if asked, otherwise 2156 EINTR it. */ 2157 if (restart) { 2158 if (VG_(clo_trace_signals)) 2159 VG_(message)( Vg_DebugMsg, " at syscall instr: restarting\n"); 2160 ML_(fixup_guest_state_to_restart_syscall)(th_regs); 2161 } else { 2162 if (VG_(clo_trace_signals)) 2163 VG_(message)( Vg_DebugMsg, " at syscall instr: returning EINTR\n"); 2164 canonical = convert_SysRes_to_SyscallStatus( 2165 VG_(mk_SysRes_Error)( VKI_EINTR ) 2166 ); 2167 if (!(sci->flags & SfNoWriteResult)) 2168 putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex ); 2169 sci->status = canonical; 2170 VG_(post_syscall)(tid); 2171 } 2172 } 2173 2174 else 2175 if (in_complete_to_committed) { 2176 /* Syscall complete, but result hasn't been written back yet. 2177 Write the SysRes we were supplied with back to the guest 2178 state. */ 2179 if (VG_(clo_trace_signals)) 2180 VG_(message)( Vg_DebugMsg, 2181 " completed, but uncommitted: committing\n"); 2182 canonical = convert_SysRes_to_SyscallStatus( sres ); 2183 if (!(sci->flags & SfNoWriteResult)) 2184 putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex ); 2185 sci->status = canonical; 2186 VG_(post_syscall)(tid); 2187 } 2188 2189 else 2190 if (in_committed_to_finished) { 2191 /* Result committed, but the signal mask has not been restored; 2192 we expect our caller (the signal handler) will have fixed 2193 this up. */ 2194 if (VG_(clo_trace_signals)) 2195 VG_(message)( Vg_DebugMsg, 2196 " completed and committed: nothing to do\n"); 2197 getSyscallStatusFromGuestState( &sci->status, &th_regs->vex ); 2198 vg_assert(sci->status.what == SsComplete); 2199 VG_(post_syscall)(tid); 2200 } 2201 2202 else 2203 VG_(core_panic)("?? strange syscall interrupt state?"); 2204 2205 /* In all cases, the syscall is now finished (even if we called 2206 ML_(fixup_guest_state_to_restart_syscall), since that just 2207 re-positions the guest's IP for another go at it). So we need 2208 to record that fact. */ 2209 sci->status.what = SsIdle; 2210 } 2211 2212 2213 #if defined(VGO_darwin) 2214 // Clean up after workq_ops(WQOPS_THREAD_RETURN) jumped to wqthread_hijack. 2215 // This is similar to VG_(fixup_guest_state_after_syscall_interrupted). 2216 // This longjmps back to the scheduler. 2217 void ML_(wqthread_continue_NORETURN)(ThreadId tid) 2218 { 2219 ThreadState* tst; 2220 SyscallInfo* sci; 2221 2222 VG_(acquire_BigLock)(tid, "wqthread_continue_NORETURN"); 2223 2224 PRINT("SYSCALL[%d,%d](%s) workq_ops() starting new workqueue item\n", 2225 VG_(getpid)(), tid, VG_SYSNUM_STRING(__NR_workq_ops)); 2226 2227 vg_assert(VG_(is_valid_tid)(tid)); 2228 vg_assert(tid >= 1 && tid < VG_N_THREADS); 2229 vg_assert(VG_(is_running_thread)(tid)); 2230 2231 tst = VG_(get_ThreadState)(tid); 2232 sci = & syscallInfo[tid]; 2233 vg_assert(sci->status.what != SsIdle); 2234 vg_assert(tst->os_state.wq_jmpbuf_valid); // check this BEFORE post_syscall 2235 2236 // Pretend the syscall completed normally, but don't touch the thread state. 2237 sci->status = convert_SysRes_to_SyscallStatus( VG_(mk_SysRes_Success)(0) ); 2238 sci->flags |= SfNoWriteResult; 2239 VG_(post_syscall)(tid); 2240 2241 sci->status.what = SsIdle; 2242 2243 vg_assert(tst->sched_jmpbuf_valid); 2244 VG_MINIMAL_LONGJMP(tst->sched_jmpbuf); 2245 2246 /* NOTREACHED */ 2247 vg_assert(0); 2248 } 2249 #endif 2250 2251 2252 /* --------------------------------------------------------------------- 2253 A place to store the where-to-call-when-really-done pointer 2254 ------------------------------------------------------------------ */ 2255 2256 // When the final thread is done, where shall I call to shutdown the 2257 // system cleanly? Is set once at startup (in m_main) and never 2258 // changes after that. Is basically a pointer to the exit 2259 // continuation. This is all just a nasty hack to avoid calling 2260 // directly from m_syswrap to m_main at exit, since that would cause 2261 // m_main to become part of a module cycle, which is silly. 2262 void (* VG_(address_of_m_main_shutdown_actions_NORETURN) ) 2263 (ThreadId,VgSchedReturnCode) 2264 = NULL; 2265 2266 /*--------------------------------------------------------------------*/ 2267 /*--- end ---*/ 2268 /*--------------------------------------------------------------------*/ 2269