1 2 /*--------------------------------------------------------------------*/ 3 /*--- Handle system calls. syswrap-main.c ---*/ 4 /*--------------------------------------------------------------------*/ 5 6 /* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2000-2010 Julian Seward 11 jseward (at) acm.org 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 */ 30 31 #include "libvex_guest_offsets.h" 32 #include "libvex_trc_values.h" 33 #include "pub_core_basics.h" 34 #include "pub_core_aspacemgr.h" 35 #include "pub_core_vki.h" 36 #include "pub_core_vkiscnums.h" 37 #include "pub_core_threadstate.h" 38 #include "pub_core_libcbase.h" 39 #include "pub_core_libcassert.h" 40 #include "pub_core_libcprint.h" 41 #include "pub_core_libcproc.h" // For VG_(getpid)() 42 #include "pub_core_libcsignal.h" 43 #include "pub_core_scheduler.h" // For VG_({acquire,release}_BigLock), 44 // and VG_(vg_yield) 45 #include "pub_core_stacktrace.h" // For VG_(get_and_pp_StackTrace)() 46 #include "pub_core_tooliface.h" 47 #include "pub_core_options.h" 48 #include "pub_core_signals.h" // For VG_SIGVGKILL, VG_(poll_signals) 49 #include "pub_core_syscall.h" 50 #include "pub_core_machine.h" 51 #include "pub_core_syswrap.h" 52 53 #include "priv_types_n_macros.h" 54 #include "priv_syswrap-main.h" 55 56 #if defined(VGO_darwin) 57 #include "priv_syswrap-darwin.h" 58 #endif 59 60 /* Useful info which needs to be recorded somewhere: 61 Use of registers in syscalls is: 62 63 NUM ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT 64 LINUX: 65 x86 eax ebx ecx edx esi edi ebp n/a n/a eax (== NUM) 66 amd64 rax rdi rsi rdx r10 r8 r9 n/a n/a rax (== NUM) 67 ppc32 r0 r3 r4 r5 r6 r7 r8 n/a n/a r3+CR0.SO (== ARG1) 68 ppc64 r0 r3 r4 r5 r6 r7 r8 n/a n/a r3+CR0.SO (== ARG1) 69 arm r7 r0 r1 r2 r3 r4 r5 n/a n/a r0 (== ARG1) 70 71 AIX: 72 ppc32 r2 r3 r4 r5 r6 r7 r8 r9 r10 r3(res),r4(err) 73 ppc64 r2 r3 r4 r5 r6 r7 r8 r9 r10 r3(res),r4(err) 74 75 DARWIN: 76 x86 eax +4 +8 +12 +16 +20 +24 +28 +32 edx:eax, eflags.c 77 amd64 rax rdi rsi rdx rcx r8 r9 +8 +16 rdx:rax, rflags.c 78 79 For x86-darwin, "+N" denotes "in memory at N(%esp)"; ditto 80 amd64-darwin. Apparently 0(%esp) is some kind of return address 81 (perhaps for syscalls done with "sysenter"?) I don't think it is 82 relevant for syscalls done with "int $0x80/1/2". 83 */ 84 85 /* This is the top level of the system-call handler module. All 86 system calls are channelled through here, doing two things: 87 88 * notify the tool of the events (mem/reg reads, writes) happening 89 90 * perform the syscall, usually by passing it along to the kernel 91 unmodified. 92 93 A magical piece of assembly code, do_syscall_for_client_WRK, in 94 syscall-$PLATFORM.S does the tricky bit of passing a syscall to the 95 kernel, whilst having the simulator retain control. 96 */ 97 98 /* The main function is VG_(client_syscall). The simulation calls it 99 whenever a client thread wants to do a syscall. The following is a 100 sketch of what it does. 101 102 * Ensures the root thread's stack is suitably mapped. Tedious and 103 arcane. See big big comment in VG_(client_syscall). 104 105 * First, it rounds up the syscall number and args (which is a 106 platform dependent activity) and puts them in a struct ("args") 107 and also a copy in "orig_args". 108 109 The pre/post wrappers refer to these structs and so no longer 110 need magic macros to access any specific registers. This struct 111 is stored in thread-specific storage. 112 113 114 * The pre-wrapper is called, passing it a pointer to struct 115 "args". 116 117 118 * The pre-wrapper examines the args and pokes the tool 119 appropriately. It may modify the args; this is why "orig_args" 120 is also stored. 121 122 The pre-wrapper may choose to 'do' the syscall itself, and 123 concludes one of three outcomes: 124 125 Success(N) -- syscall is already complete, with success; 126 result is N 127 128 Fail(N) -- syscall is already complete, with failure; 129 error code is N 130 131 HandToKernel -- (the usual case): this needs to be given to 132 the kernel to be done, using the values in 133 the possibly-modified "args" struct. 134 135 In addition, the pre-wrapper may set some flags: 136 137 MayBlock -- only applicable when outcome==HandToKernel 138 139 PostOnFail -- only applicable when outcome==HandToKernel or Fail 140 141 142 * If the pre-outcome is HandToKernel, the syscall is duly handed 143 off to the kernel (perhaps involving some thread switchery, but 144 that's not important). This reduces the possible set of outcomes 145 to either Success(N) or Fail(N). 146 147 148 * The outcome (Success(N) or Fail(N)) is written back to the guest 149 register(s). This is platform specific: 150 151 x86: Success(N) ==> eax = N 152 Fail(N) ==> eax = -N 153 154 ditto amd64 155 156 ppc32: Success(N) ==> r3 = N, CR0.SO = 0 157 Fail(N) ==> r3 = N, CR0.SO = 1 158 159 Darwin: 160 x86: Success(N) ==> edx:eax = N, cc = 0 161 Fail(N) ==> edx:eax = N, cc = 1 162 163 * The post wrapper is called if: 164 165 - it exists, and 166 - outcome==Success or (outcome==Fail and PostOnFail is set) 167 168 The post wrapper is passed the adulterated syscall args (struct 169 "args"), and the syscall outcome (viz, Success(N) or Fail(N)). 170 171 There are several other complications, primarily to do with 172 syscalls getting interrupted, explained in comments in the code. 173 */ 174 175 /* CAVEATS for writing wrappers. It is important to follow these! 176 177 The macros defined in priv_types_n_macros.h are designed to help 178 decouple the wrapper logic from the actual representation of 179 syscall args/results, since these wrappers are designed to work on 180 multiple platforms. 181 182 Sometimes a PRE wrapper will complete the syscall itself, without 183 handing it to the kernel. It will use one of SET_STATUS_Success, 184 SET_STATUS_Failure or SET_STATUS_from_SysRes to set the return 185 value. It is critical to appreciate that use of the macro does not 186 immediately cause the underlying guest state to be updated -- that 187 is done by the driver logic in this file, when the wrapper returns. 188 189 As a result, PRE wrappers of the following form will malfunction: 190 191 PRE(fooble) 192 { 193 ... do stuff ... 194 SET_STATUS_Somehow(...) 195 196 // do something that assumes guest state is up to date 197 } 198 199 In particular, direct or indirect calls to VG_(poll_signals) after 200 setting STATUS can cause the guest state to be read (in order to 201 build signal frames). Do not do this. If you want a signal poll 202 after the syscall goes through, do "*flags |= SfPollAfter" and the 203 driver logic will do it for you. 204 205 ----------- 206 207 Another critical requirement following introduction of new address 208 space manager (JRS, 20050923): 209 210 In a situation where the mappedness of memory has changed, aspacem 211 should be notified BEFORE the tool. Hence the following is 212 correct: 213 214 Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start); 215 VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start ); 216 if (d) 217 VG_(discard_translations)(s->start, s->end+1 - s->start); 218 219 whilst this is wrong: 220 221 VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start ); 222 Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start); 223 if (d) 224 VG_(discard_translations)(s->start, s->end+1 - s->start); 225 226 The reason is that the tool may itself ask aspacem for more shadow 227 memory as a result of the VG_TRACK call. In such a situation it is 228 critical that aspacem's segment array is up to date -- hence the 229 need to notify aspacem first. 230 231 ----------- 232 233 Also .. take care to call VG_(discard_translations) whenever 234 memory with execute permissions is unmapped. 235 */ 236 237 238 /* --------------------------------------------------------------------- 239 Do potentially blocking syscall for the client, and mess with 240 signal masks at the same time. 241 ------------------------------------------------------------------ */ 242 243 /* Perform a syscall on behalf of a client thread, using a specific 244 signal mask. On completion, the signal mask is set to restore_mask 245 (which presumably blocks almost everything). If a signal happens 246 during the syscall, the handler should call 247 VG_(fixup_guest_state_after_syscall_interrupted) to adjust the 248 thread's context to do the right thing. 249 250 The _WRK function is handwritten assembly, implemented per-platform 251 in coregrind/m_syswrap/syscall-$PLAT.S. It has some very magic 252 properties. See comments at the top of 253 VG_(fixup_guest_state_after_syscall_interrupted) below for details. 254 255 This function (these functions) are required to return zero in case 256 of success (even if the syscall itself failed), and nonzero if the 257 sigprocmask-swizzling calls failed. We don't actually care about 258 the failure values from sigprocmask, although most of the assembly 259 implementations do attempt to return that, using the convention 260 0 for success, or 0x8000 | error-code for failure. 261 */ 262 #if defined(VGO_linux) 263 extern 264 UWord ML_(do_syscall_for_client_WRK)( Word syscallno, 265 void* guest_state, 266 const vki_sigset_t *syscall_mask, 267 const vki_sigset_t *restore_mask, 268 Word sigsetSzB ); 269 #elif defined(VGO_aix5) 270 extern 271 UWord ML_(do_syscall_for_client_WRK)( Word syscallno, 272 void* guest_state, 273 const vki_sigset_t *syscall_mask, 274 const vki_sigset_t *restore_mask, 275 Word sigsetSzB, /* unused */ 276 Word __nr_sigprocmask ); 277 #elif defined(VGO_darwin) 278 extern 279 UWord ML_(do_syscall_for_client_unix_WRK)( Word syscallno, 280 void* guest_state, 281 const vki_sigset_t *syscall_mask, 282 const vki_sigset_t *restore_mask, 283 Word sigsetSzB ); /* unused */ 284 extern 285 UWord ML_(do_syscall_for_client_mach_WRK)( Word syscallno, 286 void* guest_state, 287 const vki_sigset_t *syscall_mask, 288 const vki_sigset_t *restore_mask, 289 Word sigsetSzB ); /* unused */ 290 extern 291 UWord ML_(do_syscall_for_client_mdep_WRK)( Word syscallno, 292 void* guest_state, 293 const vki_sigset_t *syscall_mask, 294 const vki_sigset_t *restore_mask, 295 Word sigsetSzB ); /* unused */ 296 #else 297 # error "Unknown OS" 298 #endif 299 300 301 static 302 void do_syscall_for_client ( Int syscallno, 303 ThreadState* tst, 304 const vki_sigset_t* syscall_mask ) 305 { 306 vki_sigset_t saved; 307 UWord err; 308 # if defined(VGO_linux) 309 err = ML_(do_syscall_for_client_WRK)( 310 syscallno, &tst->arch.vex, 311 syscall_mask, &saved, sizeof(vki_sigset_t) 312 ); 313 # elif defined(VGO_aix5) 314 err = ML_(do_syscall_for_client_WRK)( 315 syscallno, &tst->arch.vex, 316 syscall_mask, &saved, 0/*unused:sigsetSzB*/, 317 __NR_rt_sigprocmask 318 ); 319 # elif defined(VGO_darwin) 320 switch (VG_DARWIN_SYSNO_CLASS(syscallno)) { 321 case VG_DARWIN_SYSCALL_CLASS_UNIX: 322 err = ML_(do_syscall_for_client_unix_WRK)( 323 VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex, 324 syscall_mask, &saved, 0/*unused:sigsetSzB*/ 325 ); 326 break; 327 case VG_DARWIN_SYSCALL_CLASS_MACH: 328 err = ML_(do_syscall_for_client_mach_WRK)( 329 VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex, 330 syscall_mask, &saved, 0/*unused:sigsetSzB*/ 331 ); 332 break; 333 case VG_DARWIN_SYSCALL_CLASS_MDEP: 334 err = ML_(do_syscall_for_client_mdep_WRK)( 335 VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex, 336 syscall_mask, &saved, 0/*unused:sigsetSzB*/ 337 ); 338 break; 339 default: 340 vg_assert(0); 341 /*NOTREACHED*/ 342 break; 343 } 344 # else 345 # error "Unknown OS" 346 # endif 347 vg_assert2( 348 err == 0, 349 "ML_(do_syscall_for_client_WRK): sigprocmask error %d", 350 (Int)(err & 0xFFF) 351 ); 352 } 353 354 355 /* --------------------------------------------------------------------- 356 Impedance matchers and misc helpers 357 ------------------------------------------------------------------ */ 358 359 static 360 Bool eq_SyscallArgs ( SyscallArgs* a1, SyscallArgs* a2 ) 361 { 362 return a1->sysno == a2->sysno 363 && a1->arg1 == a2->arg1 364 && a1->arg2 == a2->arg2 365 && a1->arg3 == a2->arg3 366 && a1->arg4 == a2->arg4 367 && a1->arg5 == a2->arg5 368 && a1->arg6 == a2->arg6 369 && a1->arg7 == a2->arg7 370 && a1->arg8 == a2->arg8; 371 } 372 373 static 374 Bool eq_SyscallStatus ( SyscallStatus* s1, SyscallStatus* s2 ) 375 { 376 /* was: return s1->what == s2->what && sr_EQ( s1->sres, s2->sres ); */ 377 if (s1->what == s2->what && sr_EQ( s1->sres, s2->sres )) 378 return True; 379 # if defined(VGO_darwin) 380 /* Darwin-specific debugging guff */ 381 vg_assert(s1->what == s2->what); 382 VG_(printf)("eq_SyscallStatus:\n"); 383 VG_(printf)(" {%lu %lu %u}\n", s1->sres._wLO, s1->sres._wHI, s1->sres._mode); 384 VG_(printf)(" {%lu %lu %u}\n", s2->sres._wLO, s2->sres._wHI, s2->sres._mode); 385 vg_assert(0); 386 # endif 387 return False; 388 } 389 390 /* Convert between SysRes and SyscallStatus, to the extent possible. */ 391 392 static 393 SyscallStatus convert_SysRes_to_SyscallStatus ( SysRes res ) 394 { 395 SyscallStatus status; 396 status.what = SsComplete; 397 status.sres = res; 398 return status; 399 } 400 401 402 /* Impedance matchers. These convert syscall arg or result data from 403 the platform-specific in-guest-state format to the canonical 404 formats, and back. */ 405 406 static 407 void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs* canonical, 408 /*IN*/ VexGuestArchState* gst_vanilla, 409 /*IN*/ UInt trc ) 410 { 411 #if defined(VGP_x86_linux) 412 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 413 canonical->sysno = gst->guest_EAX; 414 canonical->arg1 = gst->guest_EBX; 415 canonical->arg2 = gst->guest_ECX; 416 canonical->arg3 = gst->guest_EDX; 417 canonical->arg4 = gst->guest_ESI; 418 canonical->arg5 = gst->guest_EDI; 419 canonical->arg6 = gst->guest_EBP; 420 canonical->arg7 = 0; 421 canonical->arg8 = 0; 422 423 #elif defined(VGP_amd64_linux) 424 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 425 canonical->sysno = gst->guest_RAX; 426 canonical->arg1 = gst->guest_RDI; 427 canonical->arg2 = gst->guest_RSI; 428 canonical->arg3 = gst->guest_RDX; 429 canonical->arg4 = gst->guest_R10; 430 canonical->arg5 = gst->guest_R8; 431 canonical->arg6 = gst->guest_R9; 432 canonical->arg7 = 0; 433 canonical->arg8 = 0; 434 435 #elif defined(VGP_ppc32_linux) 436 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla; 437 canonical->sysno = gst->guest_GPR0; 438 canonical->arg1 = gst->guest_GPR3; 439 canonical->arg2 = gst->guest_GPR4; 440 canonical->arg3 = gst->guest_GPR5; 441 canonical->arg4 = gst->guest_GPR6; 442 canonical->arg5 = gst->guest_GPR7; 443 canonical->arg6 = gst->guest_GPR8; 444 canonical->arg7 = 0; 445 canonical->arg8 = 0; 446 447 #elif defined(VGP_ppc64_linux) 448 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla; 449 canonical->sysno = gst->guest_GPR0; 450 canonical->arg1 = gst->guest_GPR3; 451 canonical->arg2 = gst->guest_GPR4; 452 canonical->arg3 = gst->guest_GPR5; 453 canonical->arg4 = gst->guest_GPR6; 454 canonical->arg5 = gst->guest_GPR7; 455 canonical->arg6 = gst->guest_GPR8; 456 canonical->arg7 = 0; 457 canonical->arg8 = 0; 458 459 #elif defined(VGP_arm_linux) 460 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla; 461 canonical->sysno = gst->guest_R7; 462 canonical->arg1 = gst->guest_R0; 463 canonical->arg2 = gst->guest_R1; 464 canonical->arg3 = gst->guest_R2; 465 canonical->arg4 = gst->guest_R3; 466 canonical->arg5 = gst->guest_R4; 467 canonical->arg6 = gst->guest_R5; 468 canonical->arg7 = 0; 469 canonical->arg8 = 0; 470 471 #elif defined(VGP_ppc32_aix5) 472 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla; 473 canonical->sysno = gst->guest_GPR2; 474 canonical->arg1 = gst->guest_GPR3; 475 canonical->arg2 = gst->guest_GPR4; 476 canonical->arg3 = gst->guest_GPR5; 477 canonical->arg4 = gst->guest_GPR6; 478 canonical->arg5 = gst->guest_GPR7; 479 canonical->arg6 = gst->guest_GPR8; 480 canonical->arg7 = gst->guest_GPR9; 481 canonical->arg8 = gst->guest_GPR10; 482 483 #elif defined(VGP_ppc64_aix5) 484 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla; 485 canonical->sysno = gst->guest_GPR2; 486 canonical->arg1 = gst->guest_GPR3; 487 canonical->arg2 = gst->guest_GPR4; 488 canonical->arg3 = gst->guest_GPR5; 489 canonical->arg4 = gst->guest_GPR6; 490 canonical->arg5 = gst->guest_GPR7; 491 canonical->arg6 = gst->guest_GPR8; 492 canonical->arg7 = gst->guest_GPR9; 493 canonical->arg8 = gst->guest_GPR10; 494 495 #elif defined(VGP_x86_darwin) 496 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 497 UWord *stack = (UWord *)gst->guest_ESP; 498 // GrP fixme hope syscalls aren't called with really shallow stacks... 499 canonical->sysno = gst->guest_EAX; 500 if (canonical->sysno != 0) { 501 // stack[0] is return address 502 canonical->arg1 = stack[1]; 503 canonical->arg2 = stack[2]; 504 canonical->arg3 = stack[3]; 505 canonical->arg4 = stack[4]; 506 canonical->arg5 = stack[5]; 507 canonical->arg6 = stack[6]; 508 canonical->arg7 = stack[7]; 509 canonical->arg8 = stack[8]; 510 } else { 511 // GrP fixme hack handle syscall() 512 // GrP fixme what about __syscall() ? 513 // stack[0] is return address 514 // DDD: the tool can't see that the params have been shifted! Can 515 // lead to incorrect checking, I think, because the PRRAn/PSARn 516 // macros will mention the pre-shifted args. 517 canonical->sysno = stack[1]; 518 vg_assert(canonical->sysno != 0); 519 canonical->arg1 = stack[2]; 520 canonical->arg2 = stack[3]; 521 canonical->arg3 = stack[4]; 522 canonical->arg4 = stack[5]; 523 canonical->arg5 = stack[6]; 524 canonical->arg6 = stack[7]; 525 canonical->arg7 = stack[8]; 526 canonical->arg8 = stack[9]; 527 528 PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n", 529 VG_(getpid)(), /*tid,*/ 530 VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno)); 531 } 532 533 // Here we determine what kind of syscall it was by looking at the 534 // interrupt kind, and then encode the syscall number using the 64-bit 535 // encoding for Valgrind's internal use. 536 // 537 // DDD: Would it be better to stash the JMP kind into the Darwin 538 // thread state rather than passing in the trc? 539 switch (trc) { 540 case VEX_TRC_JMP_SYS_INT128: 541 // int $0x80 = Unix, 64-bit result 542 vg_assert(canonical->sysno >= 0); 543 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno); 544 break; 545 case VEX_TRC_JMP_SYS_SYSENTER: 546 // syscall = Unix, 32-bit result 547 // OR Mach, 32-bit result 548 if (canonical->sysno >= 0) { 549 // GrP fixme hack: 0xffff == I386_SYSCALL_NUMBER_MASK 550 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno 551 & 0xffff); 552 } else { 553 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno); 554 } 555 break; 556 case VEX_TRC_JMP_SYS_INT129: 557 // int $0x81 = Mach, 32-bit result 558 vg_assert(canonical->sysno < 0); 559 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno); 560 break; 561 case VEX_TRC_JMP_SYS_INT130: 562 // int $0x82 = mdep, 32-bit result 563 vg_assert(canonical->sysno >= 0); 564 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MDEP(canonical->sysno); 565 break; 566 default: 567 vg_assert(0); 568 break; 569 } 570 571 #elif defined(VGP_amd64_darwin) 572 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 573 UWord *stack = (UWord *)gst->guest_RSP; 574 575 vg_assert(trc == VEX_TRC_JMP_SYS_SYSCALL); 576 577 // GrP fixme hope syscalls aren't called with really shallow stacks... 578 canonical->sysno = gst->guest_RAX; 579 if (canonical->sysno != __NR_syscall) { 580 // stack[0] is return address 581 canonical->arg1 = gst->guest_RDI; 582 canonical->arg2 = gst->guest_RSI; 583 canonical->arg3 = gst->guest_RDX; 584 canonical->arg4 = gst->guest_R10; // not rcx with syscall insn 585 canonical->arg5 = gst->guest_R8; 586 canonical->arg6 = gst->guest_R9; 587 canonical->arg7 = stack[1]; 588 canonical->arg8 = stack[2]; 589 } else { 590 // GrP fixme hack handle syscall() 591 // GrP fixme what about __syscall() ? 592 // stack[0] is return address 593 // DDD: the tool can't see that the params have been shifted! Can 594 // lead to incorrect checking, I think, because the PRRAn/PSARn 595 // macros will mention the pre-shifted args. 596 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(gst->guest_RDI); 597 vg_assert(canonical->sysno != __NR_syscall); 598 canonical->arg1 = gst->guest_RSI; 599 canonical->arg2 = gst->guest_RDX; 600 canonical->arg3 = gst->guest_R10; // not rcx with syscall insn 601 canonical->arg4 = gst->guest_R8; 602 canonical->arg5 = gst->guest_R9; 603 canonical->arg6 = stack[1]; 604 canonical->arg7 = stack[2]; 605 canonical->arg8 = stack[3]; 606 607 PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n", 608 VG_(getpid)(), /*tid,*/ 609 VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno)); 610 } 611 612 // no canonical->sysno adjustment needed 613 614 #else 615 # error "getSyscallArgsFromGuestState: unknown arch" 616 #endif 617 } 618 619 static 620 void putSyscallArgsIntoGuestState ( /*IN*/ SyscallArgs* canonical, 621 /*OUT*/VexGuestArchState* gst_vanilla ) 622 { 623 #if defined(VGP_x86_linux) 624 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 625 gst->guest_EAX = canonical->sysno; 626 gst->guest_EBX = canonical->arg1; 627 gst->guest_ECX = canonical->arg2; 628 gst->guest_EDX = canonical->arg3; 629 gst->guest_ESI = canonical->arg4; 630 gst->guest_EDI = canonical->arg5; 631 gst->guest_EBP = canonical->arg6; 632 633 #elif defined(VGP_amd64_linux) 634 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 635 gst->guest_RAX = canonical->sysno; 636 gst->guest_RDI = canonical->arg1; 637 gst->guest_RSI = canonical->arg2; 638 gst->guest_RDX = canonical->arg3; 639 gst->guest_R10 = canonical->arg4; 640 gst->guest_R8 = canonical->arg5; 641 gst->guest_R9 = canonical->arg6; 642 643 #elif defined(VGP_ppc32_linux) 644 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla; 645 gst->guest_GPR0 = canonical->sysno; 646 gst->guest_GPR3 = canonical->arg1; 647 gst->guest_GPR4 = canonical->arg2; 648 gst->guest_GPR5 = canonical->arg3; 649 gst->guest_GPR6 = canonical->arg4; 650 gst->guest_GPR7 = canonical->arg5; 651 gst->guest_GPR8 = canonical->arg6; 652 653 #elif defined(VGP_ppc64_linux) 654 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla; 655 gst->guest_GPR0 = canonical->sysno; 656 gst->guest_GPR3 = canonical->arg1; 657 gst->guest_GPR4 = canonical->arg2; 658 gst->guest_GPR5 = canonical->arg3; 659 gst->guest_GPR6 = canonical->arg4; 660 gst->guest_GPR7 = canonical->arg5; 661 gst->guest_GPR8 = canonical->arg6; 662 663 #elif defined(VGP_arm_linux) 664 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla; 665 gst->guest_R7 = canonical->sysno; 666 gst->guest_R0 = canonical->arg1; 667 gst->guest_R1 = canonical->arg2; 668 gst->guest_R2 = canonical->arg3; 669 gst->guest_R3 = canonical->arg4; 670 gst->guest_R4 = canonical->arg5; 671 gst->guest_R5 = canonical->arg6; 672 673 #elif defined(VGP_ppc32_aix5) 674 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla; 675 gst->guest_GPR2 = canonical->sysno; 676 gst->guest_GPR3 = canonical->arg1; 677 gst->guest_GPR4 = canonical->arg2; 678 gst->guest_GPR5 = canonical->arg3; 679 gst->guest_GPR6 = canonical->arg4; 680 gst->guest_GPR7 = canonical->arg5; 681 gst->guest_GPR8 = canonical->arg6; 682 gst->guest_GPR9 = canonical->arg7; 683 gst->guest_GPR10 = canonical->arg8; 684 685 #elif defined(VGP_ppc64_aix5) 686 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla; 687 gst->guest_GPR2 = canonical->sysno; 688 gst->guest_GPR3 = canonical->arg1; 689 gst->guest_GPR4 = canonical->arg2; 690 gst->guest_GPR5 = canonical->arg3; 691 gst->guest_GPR6 = canonical->arg4; 692 gst->guest_GPR7 = canonical->arg5; 693 gst->guest_GPR8 = canonical->arg6; 694 gst->guest_GPR9 = canonical->arg7; 695 gst->guest_GPR10 = canonical->arg8; 696 697 #elif defined(VGP_x86_darwin) 698 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 699 UWord *stack = (UWord *)gst->guest_ESP; 700 701 gst->guest_EAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno); 702 703 // GrP fixme? gst->guest_TEMP_EFLAG_C = 0; 704 // stack[0] is return address 705 stack[1] = canonical->arg1; 706 stack[2] = canonical->arg2; 707 stack[3] = canonical->arg3; 708 stack[4] = canonical->arg4; 709 stack[5] = canonical->arg5; 710 stack[6] = canonical->arg6; 711 stack[7] = canonical->arg7; 712 stack[8] = canonical->arg8; 713 714 #elif defined(VGP_amd64_darwin) 715 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 716 UWord *stack = (UWord *)gst->guest_RSP; 717 718 gst->guest_RAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno); 719 // GrP fixme? gst->guest_TEMP_EFLAG_C = 0; 720 721 // stack[0] is return address 722 gst->guest_RDI = canonical->arg1; 723 gst->guest_RSI = canonical->arg2; 724 gst->guest_RDX = canonical->arg3; 725 gst->guest_RCX = canonical->arg4; 726 gst->guest_R8 = canonical->arg5; 727 gst->guest_R9 = canonical->arg6; 728 stack[1] = canonical->arg7; 729 stack[2] = canonical->arg8; 730 731 #else 732 # error "putSyscallArgsIntoGuestState: unknown arch" 733 #endif 734 } 735 736 static 737 void getSyscallStatusFromGuestState ( /*OUT*/SyscallStatus* canonical, 738 /*IN*/ VexGuestArchState* gst_vanilla ) 739 { 740 # if defined(VGP_x86_linux) 741 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 742 canonical->sres = VG_(mk_SysRes_x86_linux)( gst->guest_EAX ); 743 canonical->what = SsComplete; 744 745 # elif defined(VGP_amd64_linux) 746 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 747 canonical->sres = VG_(mk_SysRes_amd64_linux)( gst->guest_RAX ); 748 canonical->what = SsComplete; 749 750 # elif defined(VGP_ppc32_linux) 751 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla; 752 UInt cr = LibVEX_GuestPPC32_get_CR( gst ); 753 UInt cr0so = (cr >> 28) & 1; 754 canonical->sres = VG_(mk_SysRes_ppc32_linux)( gst->guest_GPR3, cr0so ); 755 canonical->what = SsComplete; 756 757 # elif defined(VGP_ppc64_linux) 758 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla; 759 UInt cr = LibVEX_GuestPPC64_get_CR( gst ); 760 UInt cr0so = (cr >> 28) & 1; 761 canonical->sres = VG_(mk_SysRes_ppc64_linux)( gst->guest_GPR3, cr0so ); 762 canonical->what = SsComplete; 763 764 # elif defined(VGP_arm_linux) 765 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla; 766 canonical->sres = VG_(mk_SysRes_arm_linux)( gst->guest_R0 ); 767 canonical->what = SsComplete; 768 769 # elif defined(VGP_ppc32_aix5) 770 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla; 771 canonical->sres = VG_(mk_SysRes_ppc32_aix5)( gst->guest_GPR3, 772 gst->guest_GPR4 ); 773 canonical->what = SsComplete; 774 775 # elif defined(VGP_ppc64_aix5) 776 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla; 777 canonical->sres = VG_(mk_SysRes_ppc64_aix5)( gst->guest_GPR3, 778 gst->guest_GPR4 ); 779 canonical->what = SsComplete; 780 781 # elif defined(VGP_x86_darwin) 782 /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */ 783 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 784 UInt carry = 1 & LibVEX_GuestX86_get_eflags(gst); 785 UInt err = 0; 786 UInt wLO = 0; 787 UInt wHI = 0; 788 switch (gst->guest_SC_CLASS) { 789 case VG_DARWIN_SYSCALL_CLASS_UNIX: 790 // int $0x80 = Unix, 64-bit result 791 err = carry; 792 wLO = gst->guest_EAX; 793 wHI = gst->guest_EDX; 794 break; 795 case VG_DARWIN_SYSCALL_CLASS_MACH: 796 // int $0x81 = Mach, 32-bit result 797 wLO = gst->guest_EAX; 798 break; 799 case VG_DARWIN_SYSCALL_CLASS_MDEP: 800 // int $0x82 = mdep, 32-bit result 801 wLO = gst->guest_EAX; 802 break; 803 default: 804 vg_assert(0); 805 break; 806 } 807 canonical->sres = VG_(mk_SysRes_x86_darwin)( 808 gst->guest_SC_CLASS, err ? True : False, 809 wHI, wLO 810 ); 811 canonical->what = SsComplete; 812 813 # elif defined(VGP_amd64_darwin) 814 /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */ 815 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 816 ULong carry = 1 & LibVEX_GuestAMD64_get_rflags(gst); 817 ULong err = 0; 818 ULong wLO = 0; 819 ULong wHI = 0; 820 switch (gst->guest_SC_CLASS) { 821 case VG_DARWIN_SYSCALL_CLASS_UNIX: 822 // syscall = Unix, 128-bit result 823 err = carry; 824 wLO = gst->guest_RAX; 825 wHI = gst->guest_RDX; 826 break; 827 case VG_DARWIN_SYSCALL_CLASS_MACH: 828 // syscall = Mach, 64-bit result 829 wLO = gst->guest_RAX; 830 break; 831 case VG_DARWIN_SYSCALL_CLASS_MDEP: 832 // syscall = mdep, 64-bit result 833 wLO = gst->guest_RAX; 834 break; 835 default: 836 vg_assert(0); 837 break; 838 } 839 canonical->sres = VG_(mk_SysRes_amd64_darwin)( 840 gst->guest_SC_CLASS, err ? True : False, 841 wHI, wLO 842 ); 843 canonical->what = SsComplete; 844 845 # else 846 # error "getSyscallStatusFromGuestState: unknown arch" 847 # endif 848 } 849 850 static 851 void putSyscallStatusIntoGuestState ( /*IN*/ ThreadId tid, 852 /*IN*/ SyscallStatus* canonical, 853 /*OUT*/VexGuestArchState* gst_vanilla ) 854 { 855 # if defined(VGP_x86_linux) 856 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 857 vg_assert(canonical->what == SsComplete); 858 if (sr_isError(canonical->sres)) { 859 /* This isn't exactly right, in that really a Failure with res 860 not in the range 1 .. 4095 is unrepresentable in the 861 Linux-x86 scheme. Oh well. */ 862 gst->guest_EAX = - (Int)sr_Err(canonical->sres); 863 } else { 864 gst->guest_EAX = sr_Res(canonical->sres); 865 } 866 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 867 OFFSET_x86_EAX, sizeof(UWord) ); 868 869 # elif defined(VGP_amd64_linux) 870 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 871 vg_assert(canonical->what == SsComplete); 872 if (sr_isError(canonical->sres)) { 873 /* This isn't exactly right, in that really a Failure with res 874 not in the range 1 .. 4095 is unrepresentable in the 875 Linux-amd64 scheme. Oh well. */ 876 gst->guest_RAX = - (Long)sr_Err(canonical->sres); 877 } else { 878 gst->guest_RAX = sr_Res(canonical->sres); 879 } 880 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 881 OFFSET_amd64_RAX, sizeof(UWord) ); 882 883 # elif defined(VGP_ppc32_linux) 884 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla; 885 UInt old_cr = LibVEX_GuestPPC32_get_CR(gst); 886 vg_assert(canonical->what == SsComplete); 887 if (sr_isError(canonical->sres)) { 888 /* set CR0.SO */ 889 LibVEX_GuestPPC32_put_CR( old_cr | (1<<28), gst ); 890 gst->guest_GPR3 = sr_Err(canonical->sres); 891 } else { 892 /* clear CR0.SO */ 893 LibVEX_GuestPPC32_put_CR( old_cr & ~(1<<28), gst ); 894 gst->guest_GPR3 = sr_Res(canonical->sres); 895 } 896 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 897 OFFSET_ppc32_GPR3, sizeof(UWord) ); 898 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 899 OFFSET_ppc32_CR0_0, sizeof(UChar) ); 900 901 # elif defined(VGP_ppc64_linux) 902 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla; 903 UInt old_cr = LibVEX_GuestPPC64_get_CR(gst); 904 vg_assert(canonical->what == SsComplete); 905 if (sr_isError(canonical->sres)) { 906 /* set CR0.SO */ 907 LibVEX_GuestPPC64_put_CR( old_cr | (1<<28), gst ); 908 gst->guest_GPR3 = sr_Err(canonical->sres); 909 } else { 910 /* clear CR0.SO */ 911 LibVEX_GuestPPC64_put_CR( old_cr & ~(1<<28), gst ); 912 gst->guest_GPR3 = sr_Res(canonical->sres); 913 } 914 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 915 OFFSET_ppc64_GPR3, sizeof(UWord) ); 916 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 917 OFFSET_ppc64_CR0_0, sizeof(UChar) ); 918 919 # elif defined(VGP_arm_linux) 920 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla; 921 vg_assert(canonical->what == SsComplete); 922 if (sr_isError(canonical->sres)) { 923 /* This isn't exactly right, in that really a Failure with res 924 not in the range 1 .. 4095 is unrepresentable in the 925 Linux-arm scheme. Oh well. */ 926 gst->guest_R0 = - (Int)sr_Err(canonical->sres); 927 } else { 928 gst->guest_R0 = sr_Res(canonical->sres); 929 } 930 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 931 OFFSET_arm_R0, sizeof(UWord) ); 932 933 # elif defined(VGP_ppc32_aix5) 934 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla; 935 vg_assert(canonical->what == SsComplete); 936 gst->guest_GPR3 = canonical->sres.res; 937 gst->guest_GPR4 = canonical->sres.err; 938 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 939 OFFSET_ppc32_GPR3, sizeof(UWord) ); 940 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 941 OFFSET_ppc32_GPR4, sizeof(UWord) ); 942 943 # elif defined(VGP_ppc64_aix5) 944 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla; 945 vg_assert(canonical->what == SsComplete); 946 gst->guest_GPR3 = canonical->sres.res; 947 gst->guest_GPR4 = canonical->sres.err; 948 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 949 OFFSET_ppc64_GPR3, sizeof(UWord) ); 950 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 951 OFFSET_ppc64_GPR4, sizeof(UWord) ); 952 953 #elif defined(VGP_x86_darwin) 954 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla; 955 SysRes sres = canonical->sres; 956 vg_assert(canonical->what == SsComplete); 957 /* Unfortunately here we have to break abstraction and look 958 directly inside 'res', in order to decide what to do. */ 959 switch (sres._mode) { 960 case SysRes_MACH: // int $0x81 = Mach, 32-bit result 961 case SysRes_MDEP: // int $0x82 = mdep, 32-bit result 962 gst->guest_EAX = sres._wLO; 963 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 964 OFFSET_x86_EAX, sizeof(UInt) ); 965 break; 966 case SysRes_UNIX_OK: // int $0x80 = Unix, 64-bit result 967 case SysRes_UNIX_ERR: // int $0x80 = Unix, 64-bit error 968 gst->guest_EAX = sres._wLO; 969 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 970 OFFSET_x86_EAX, sizeof(UInt) ); 971 gst->guest_EDX = sres._wHI; 972 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 973 OFFSET_x86_EDX, sizeof(UInt) ); 974 LibVEX_GuestX86_put_eflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0, 975 gst ); 976 // GrP fixme sets defined for entire eflags, not just bit c 977 // DDD: this breaks exp-ptrcheck. 978 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 979 offsetof(VexGuestX86State, guest_CC_DEP1), sizeof(UInt) ); 980 break; 981 default: 982 vg_assert(0); 983 break; 984 } 985 986 #elif defined(VGP_amd64_darwin) 987 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla; 988 SysRes sres = canonical->sres; 989 vg_assert(canonical->what == SsComplete); 990 /* Unfortunately here we have to break abstraction and look 991 directly inside 'res', in order to decide what to do. */ 992 switch (sres._mode) { 993 case SysRes_MACH: // syscall = Mach, 64-bit result 994 case SysRes_MDEP: // syscall = mdep, 64-bit result 995 gst->guest_RAX = sres._wLO; 996 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 997 OFFSET_amd64_RAX, sizeof(ULong) ); 998 break; 999 case SysRes_UNIX_OK: // syscall = Unix, 128-bit result 1000 case SysRes_UNIX_ERR: // syscall = Unix, 128-bit error 1001 gst->guest_RAX = sres._wLO; 1002 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1003 OFFSET_amd64_RAX, sizeof(ULong) ); 1004 gst->guest_RDX = sres._wHI; 1005 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1006 OFFSET_amd64_RDX, sizeof(ULong) ); 1007 LibVEX_GuestAMD64_put_rflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0, 1008 gst ); 1009 // GrP fixme sets defined for entire rflags, not just bit c 1010 // DDD: this breaks exp-ptrcheck. 1011 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid, 1012 offsetof(VexGuestAMD64State, guest_CC_DEP1), sizeof(ULong) ); 1013 break; 1014 default: 1015 vg_assert(0); 1016 break; 1017 } 1018 1019 # else 1020 # error "putSyscallStatusIntoGuestState: unknown arch" 1021 # endif 1022 } 1023 1024 1025 /* Tell me the offsets in the guest state of the syscall params, so 1026 that the scalar argument checkers don't have to have this info 1027 hardwired. */ 1028 1029 static 1030 void getSyscallArgLayout ( /*OUT*/SyscallArgLayout* layout ) 1031 { 1032 #if defined(VGP_x86_linux) 1033 layout->o_sysno = OFFSET_x86_EAX; 1034 layout->o_arg1 = OFFSET_x86_EBX; 1035 layout->o_arg2 = OFFSET_x86_ECX; 1036 layout->o_arg3 = OFFSET_x86_EDX; 1037 layout->o_arg4 = OFFSET_x86_ESI; 1038 layout->o_arg5 = OFFSET_x86_EDI; 1039 layout->o_arg6 = OFFSET_x86_EBP; 1040 layout->uu_arg7 = -1; /* impossible value */ 1041 layout->uu_arg8 = -1; /* impossible value */ 1042 1043 #elif defined(VGP_amd64_linux) 1044 layout->o_sysno = OFFSET_amd64_RAX; 1045 layout->o_arg1 = OFFSET_amd64_RDI; 1046 layout->o_arg2 = OFFSET_amd64_RSI; 1047 layout->o_arg3 = OFFSET_amd64_RDX; 1048 layout->o_arg4 = OFFSET_amd64_R10; 1049 layout->o_arg5 = OFFSET_amd64_R8; 1050 layout->o_arg6 = OFFSET_amd64_R9; 1051 layout->uu_arg7 = -1; /* impossible value */ 1052 layout->uu_arg8 = -1; /* impossible value */ 1053 1054 #elif defined(VGP_ppc32_linux) 1055 layout->o_sysno = OFFSET_ppc32_GPR0; 1056 layout->o_arg1 = OFFSET_ppc32_GPR3; 1057 layout->o_arg2 = OFFSET_ppc32_GPR4; 1058 layout->o_arg3 = OFFSET_ppc32_GPR5; 1059 layout->o_arg4 = OFFSET_ppc32_GPR6; 1060 layout->o_arg5 = OFFSET_ppc32_GPR7; 1061 layout->o_arg6 = OFFSET_ppc32_GPR8; 1062 layout->uu_arg7 = -1; /* impossible value */ 1063 layout->uu_arg8 = -1; /* impossible value */ 1064 1065 #elif defined(VGP_ppc64_linux) 1066 layout->o_sysno = OFFSET_ppc64_GPR0; 1067 layout->o_arg1 = OFFSET_ppc64_GPR3; 1068 layout->o_arg2 = OFFSET_ppc64_GPR4; 1069 layout->o_arg3 = OFFSET_ppc64_GPR5; 1070 layout->o_arg4 = OFFSET_ppc64_GPR6; 1071 layout->o_arg5 = OFFSET_ppc64_GPR7; 1072 layout->o_arg6 = OFFSET_ppc64_GPR8; 1073 layout->uu_arg7 = -1; /* impossible value */ 1074 layout->uu_arg8 = -1; /* impossible value */ 1075 1076 #elif defined(VGP_arm_linux) 1077 layout->o_sysno = OFFSET_arm_R7; 1078 layout->o_arg1 = OFFSET_arm_R0; 1079 layout->o_arg2 = OFFSET_arm_R1; 1080 layout->o_arg3 = OFFSET_arm_R2; 1081 layout->o_arg4 = OFFSET_arm_R3; 1082 layout->o_arg5 = OFFSET_arm_R4; 1083 layout->o_arg6 = OFFSET_arm_R5; 1084 layout->uu_arg7 = -1; /* impossible value */ 1085 layout->uu_arg8 = -1; /* impossible value */ 1086 1087 #elif defined(VGP_ppc32_aix5) 1088 layout->o_sysno = OFFSET_ppc32_GPR2; 1089 layout->o_arg1 = OFFSET_ppc32_GPR3; 1090 layout->o_arg2 = OFFSET_ppc32_GPR4; 1091 layout->o_arg3 = OFFSET_ppc32_GPR5; 1092 layout->o_arg4 = OFFSET_ppc32_GPR6; 1093 layout->o_arg5 = OFFSET_ppc32_GPR7; 1094 layout->o_arg6 = OFFSET_ppc32_GPR8; 1095 layout->o_arg7 = OFFSET_ppc32_GPR9; 1096 layout->o_arg8 = OFFSET_ppc32_GPR10; 1097 1098 #elif defined(VGP_ppc64_aix5) 1099 layout->o_sysno = OFFSET_ppc64_GPR2; 1100 layout->o_arg1 = OFFSET_ppc64_GPR3; 1101 layout->o_arg2 = OFFSET_ppc64_GPR4; 1102 layout->o_arg3 = OFFSET_ppc64_GPR5; 1103 layout->o_arg4 = OFFSET_ppc64_GPR6; 1104 layout->o_arg5 = OFFSET_ppc64_GPR7; 1105 layout->o_arg6 = OFFSET_ppc64_GPR8; 1106 layout->o_arg7 = OFFSET_ppc64_GPR9; 1107 layout->o_arg8 = OFFSET_ppc64_GPR10; 1108 1109 #elif defined(VGP_x86_darwin) 1110 layout->o_sysno = OFFSET_x86_EAX; 1111 // syscall parameters are on stack in C convention 1112 layout->s_arg1 = sizeof(UWord) * 1; 1113 layout->s_arg2 = sizeof(UWord) * 2; 1114 layout->s_arg3 = sizeof(UWord) * 3; 1115 layout->s_arg4 = sizeof(UWord) * 4; 1116 layout->s_arg5 = sizeof(UWord) * 5; 1117 layout->s_arg6 = sizeof(UWord) * 6; 1118 layout->s_arg7 = sizeof(UWord) * 7; 1119 layout->s_arg8 = sizeof(UWord) * 8; 1120 1121 #elif defined(VGP_amd64_darwin) 1122 layout->o_sysno = OFFSET_amd64_RAX; 1123 layout->o_arg1 = OFFSET_amd64_RDI; 1124 layout->o_arg2 = OFFSET_amd64_RSI; 1125 layout->o_arg3 = OFFSET_amd64_RDX; 1126 layout->o_arg4 = OFFSET_amd64_RCX; 1127 layout->o_arg5 = OFFSET_amd64_R8; 1128 layout->o_arg6 = OFFSET_amd64_R9; 1129 layout->s_arg7 = sizeof(UWord) * 1; 1130 layout->s_arg8 = sizeof(UWord) * 2; 1131 1132 #else 1133 # error "getSyscallLayout: unknown arch" 1134 #endif 1135 } 1136 1137 1138 /* --------------------------------------------------------------------- 1139 The main driver logic 1140 ------------------------------------------------------------------ */ 1141 1142 /* Finding the handlers for a given syscall, or faking up one 1143 when no handler is found. */ 1144 1145 static 1146 void bad_before ( ThreadId tid, 1147 SyscallArgLayout* layout, 1148 /*MOD*/SyscallArgs* args, 1149 /*OUT*/SyscallStatus* status, 1150 /*OUT*/UWord* flags ) 1151 { 1152 VG_(dmsg)("WARNING: unhandled syscall: %s\n", 1153 VG_SYSNUM_STRING_EXTRA(args->sysno)); 1154 if (VG_(clo_verbosity) > 1) { 1155 VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size)); 1156 } 1157 VG_(dmsg)("You may be able to write your own handler.\n"); 1158 VG_(dmsg)("Read the file README_MISSING_SYSCALL_OR_IOCTL.\n"); 1159 VG_(dmsg)("Nevertheless we consider this a bug. Please report\n"); 1160 VG_(dmsg)("it at http://valgrind.org/support/bug_reports.html.\n"); 1161 1162 SET_STATUS_Failure(VKI_ENOSYS); 1163 } 1164 1165 static SyscallTableEntry bad_sys = 1166 { bad_before, NULL }; 1167 1168 static const SyscallTableEntry* get_syscall_entry ( Int syscallno ) 1169 { 1170 const SyscallTableEntry* sys = NULL; 1171 1172 # if defined(VGO_linux) 1173 sys = ML_(get_linux_syscall_entry)( syscallno ); 1174 1175 # elif defined(VGP_ppc32_aix5) 1176 sys = ML_(get_ppc32_aix5_syscall_entry) ( syscallno ); 1177 1178 # elif defined(VGP_ppc64_aix5) 1179 sys = ML_(get_ppc64_aix5_syscall_entry) ( syscallno ); 1180 1181 # elif defined(VGO_darwin) 1182 Int idx = VG_DARWIN_SYSNO_INDEX(syscallno); 1183 1184 switch (VG_DARWIN_SYSNO_CLASS(syscallno)) { 1185 case VG_DARWIN_SYSCALL_CLASS_UNIX: 1186 if (idx >= 0 && idx < ML_(syscall_table_size) && 1187 ML_(syscall_table)[idx].before != NULL) 1188 sys = &ML_(syscall_table)[idx]; 1189 break; 1190 case VG_DARWIN_SYSCALL_CLASS_MACH: 1191 if (idx >= 0 && idx < ML_(mach_trap_table_size) && 1192 ML_(mach_trap_table)[idx].before != NULL) 1193 sys = &ML_(mach_trap_table)[idx]; 1194 break; 1195 case VG_DARWIN_SYSCALL_CLASS_MDEP: 1196 if (idx >= 0 && idx < ML_(mdep_trap_table_size) && 1197 ML_(mdep_trap_table)[idx].before != NULL) 1198 sys = &ML_(mdep_trap_table)[idx]; 1199 break; 1200 default: 1201 vg_assert(0); 1202 break; 1203 } 1204 1205 # else 1206 # error Unknown OS 1207 # endif 1208 1209 return sys == NULL ? &bad_sys : sys; 1210 } 1211 1212 1213 /* Add and remove signals from mask so that we end up telling the 1214 kernel the state we actually want rather than what the client 1215 wants. */ 1216 static void sanitize_client_sigmask(vki_sigset_t *mask) 1217 { 1218 VG_(sigdelset)(mask, VKI_SIGKILL); 1219 VG_(sigdelset)(mask, VKI_SIGSTOP); 1220 VG_(sigdelset)(mask, VG_SIGVGKILL); /* never block */ 1221 } 1222 1223 typedef 1224 struct { 1225 SyscallArgs orig_args; 1226 SyscallArgs args; 1227 SyscallStatus status; 1228 UWord flags; 1229 } 1230 SyscallInfo; 1231 1232 SyscallInfo syscallInfo[VG_N_THREADS]; 1233 1234 1235 /* The scheduler needs to be able to zero out these records after a 1236 fork, hence this is exported from m_syswrap. */ 1237 void VG_(clear_syscallInfo) ( Int tid ) 1238 { 1239 vg_assert(tid >= 0 && tid < VG_N_THREADS); 1240 VG_(memset)( & syscallInfo[tid], 0, sizeof( syscallInfo[tid] )); 1241 syscallInfo[tid].status.what = SsIdle; 1242 } 1243 1244 static void ensure_initialised ( void ) 1245 { 1246 Int i; 1247 static Bool init_done = False; 1248 if (init_done) 1249 return; 1250 init_done = True; 1251 for (i = 0; i < VG_N_THREADS; i++) { 1252 VG_(clear_syscallInfo)( i ); 1253 } 1254 } 1255 1256 /* --- This is the main function of this file. --- */ 1257 1258 void VG_(client_syscall) ( ThreadId tid, UInt trc ) 1259 { 1260 Word sysno; 1261 ThreadState* tst; 1262 const SyscallTableEntry* ent; 1263 SyscallArgLayout layout; 1264 SyscallInfo* sci; 1265 1266 ensure_initialised(); 1267 1268 vg_assert(VG_(is_valid_tid)(tid)); 1269 vg_assert(tid >= 1 && tid < VG_N_THREADS); 1270 vg_assert(VG_(is_running_thread)(tid)); 1271 1272 tst = VG_(get_ThreadState)(tid); 1273 1274 /* BEGIN ensure root thread's stack is suitably mapped */ 1275 /* In some rare circumstances, we may do the syscall without the 1276 bottom page of the stack being mapped, because the stack pointer 1277 was moved down just a few instructions before the syscall 1278 instruction, and there have been no memory references since 1279 then, that would cause a call to VG_(extend_stack) to have 1280 happened. 1281 1282 In native execution that's OK: the kernel automagically extends 1283 the stack's mapped area down to cover the stack pointer (or sp - 1284 redzone, really). In simulated normal execution that's OK too, 1285 since any signals we get from accessing below the mapped area of 1286 the (guest's) stack lead us to VG_(extend_stack), where we 1287 simulate the kernel's stack extension logic. But that leaves 1288 the problem of entering a syscall with the SP unmapped. Because 1289 the kernel doesn't know that the segment immediately above SP is 1290 supposed to be a grow-down segment, it causes the syscall to 1291 fail, and thereby causes a divergence between native behaviour 1292 (syscall succeeds) and simulated behaviour (syscall fails). 1293 1294 This is quite a rare failure mode. It has only been seen 1295 affecting calls to sys_readlink on amd64-linux, and even then it 1296 requires a certain code sequence around the syscall to trigger 1297 it. Here is one: 1298 1299 extern int my_readlink ( const char* path ); 1300 asm( 1301 ".text\n" 1302 ".globl my_readlink\n" 1303 "my_readlink:\n" 1304 "\tsubq $0x1008,%rsp\n" 1305 "\tmovq %rdi,%rdi\n" // path is in rdi 1306 "\tmovq %rsp,%rsi\n" // &buf[0] -> rsi 1307 "\tmovl $0x1000,%edx\n" // sizeof(buf) in rdx 1308 "\tmovl $"__NR_READLINK",%eax\n" // syscall number 1309 "\tsyscall\n" 1310 "\taddq $0x1008,%rsp\n" 1311 "\tret\n" 1312 ".previous\n" 1313 ); 1314 1315 For more details, see bug #156404 1316 (https://bugs.kde.org/show_bug.cgi?id=156404). 1317 1318 The fix is actually very simple. We simply need to call 1319 VG_(extend_stack) for this thread, handing it the lowest 1320 possible valid address for stack (sp - redzone), to ensure the 1321 pages all the way down to that address, are mapped. Because 1322 this is a potentially expensive and frequent operation, we 1323 filter in two ways: 1324 1325 First, only the main thread (tid=1) has a growdown stack. So 1326 ignore all others. It is conceivable, although highly unlikely, 1327 that the main thread exits, and later another thread is 1328 allocated tid=1, but that's harmless, I believe; 1329 VG_(extend_stack) will do nothing when applied to a non-root 1330 thread. 1331 1332 Secondly, first call VG_(am_find_nsegment) directly, to see if 1333 the page holding (sp - redzone) is mapped correctly. If so, do 1334 nothing. This is almost always the case. VG_(extend_stack) 1335 calls VG_(am_find_nsegment) twice, so this optimisation -- and 1336 that's all it is -- more or less halves the number of calls to 1337 VG_(am_find_nsegment) required. 1338 1339 TODO: the test "seg->kind == SkAnonC" is really inadequate, 1340 because although it tests whether the segment is mapped 1341 _somehow_, it doesn't check that it has the right permissions 1342 (r,w, maybe x) ? We could test that here, but it will also be 1343 necessary to fix the corresponding test in VG_(extend_stack). 1344 1345 All this guff is of course Linux-specific. Hence the ifdef. 1346 */ 1347 # if defined(VGO_linux) 1348 if (tid == 1/*ROOT THREAD*/) { 1349 Addr stackMin = VG_(get_SP)(tid) - VG_STACK_REDZONE_SZB; 1350 NSegment const* seg = VG_(am_find_nsegment)(stackMin); 1351 if (seg && seg->kind == SkAnonC) { 1352 /* stackMin is already mapped. Nothing to do. */ 1353 } else { 1354 (void)VG_(extend_stack)( stackMin, 1355 tst->client_stack_szB ); 1356 } 1357 } 1358 # endif 1359 /* END ensure root thread's stack is suitably mapped */ 1360 1361 /* First off, get the syscall args and number. This is a 1362 platform-dependent action. */ 1363 1364 sci = & syscallInfo[tid]; 1365 vg_assert(sci->status.what == SsIdle); 1366 1367 getSyscallArgsFromGuestState( &sci->orig_args, &tst->arch.vex, trc ); 1368 1369 /* Copy .orig_args to .args. The pre-handler may modify .args, but 1370 we want to keep the originals too, just in case. */ 1371 sci->args = sci->orig_args; 1372 1373 /* Save the syscall number in the thread state in case the syscall 1374 is interrupted by a signal. */ 1375 sysno = sci->orig_args.sysno; 1376 1377 # if defined(VGO_darwin) 1378 /* Record syscall class. But why? Because the syscall might be 1379 interrupted by a signal, and in the signal handler (which will 1380 be m_signals.async_signalhandler) we will need to build a SysRes 1381 reflecting the syscall return result. In order to do that we 1382 need to know the syscall class. Hence stash it in the guest 1383 state of this thread. This madness is not needed on Linux or 1384 AIX5, because those OSs only have a single syscall return 1385 convention and so there is no ambiguity involved in converting 1386 the post-signal machine state into a SysRes. */ 1387 tst->arch.vex.guest_SC_CLASS = VG_DARWIN_SYSNO_CLASS(sysno); 1388 # endif 1389 1390 /* The default what-to-do-next thing is hand the syscall to the 1391 kernel, so we pre-set that here. Set .sres to something 1392 harmless looking (is irrelevant because .what is not 1393 SsComplete.) */ 1394 sci->status.what = SsHandToKernel; 1395 sci->status.sres = VG_(mk_SysRes_Error)(0); 1396 sci->flags = 0; 1397 1398 /* Fetch the syscall's handlers. If no handlers exist for this 1399 syscall, we are given dummy handlers which force an immediate 1400 return with ENOSYS. */ 1401 ent = get_syscall_entry(sysno); 1402 1403 /* Fetch the layout information, which tells us where in the guest 1404 state the syscall args reside. This is a platform-dependent 1405 action. This info is needed so that the scalar syscall argument 1406 checks (PRE_REG_READ calls) know which bits of the guest state 1407 they need to inspect. */ 1408 getSyscallArgLayout( &layout ); 1409 1410 /* Make sure the tmp signal mask matches the real signal mask; 1411 sigsuspend may change this. */ 1412 vg_assert(VG_(iseqsigset)(&tst->sig_mask, &tst->tmp_sig_mask)); 1413 1414 /* Right, we're finally ready to Party. Call the pre-handler and 1415 see what we get back. At this point: 1416 1417 sci->status.what is Unset (we don't know yet). 1418 sci->orig_args contains the original args. 1419 sci->args is the same as sci->orig_args. 1420 sci->flags is zero. 1421 */ 1422 1423 PRINT("SYSCALL[%d,%d](%s) ", 1424 VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno)); 1425 1426 /* Do any pre-syscall actions */ 1427 if (VG_(needs).syscall_wrapper) { 1428 UWord tmpv[8]; 1429 tmpv[0] = sci->orig_args.arg1; 1430 tmpv[1] = sci->orig_args.arg2; 1431 tmpv[2] = sci->orig_args.arg3; 1432 tmpv[3] = sci->orig_args.arg4; 1433 tmpv[4] = sci->orig_args.arg5; 1434 tmpv[5] = sci->orig_args.arg6; 1435 tmpv[6] = sci->orig_args.arg7; 1436 tmpv[7] = sci->orig_args.arg8; 1437 VG_TDICT_CALL(tool_pre_syscall, tid, sysno, 1438 &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0])); 1439 } 1440 1441 vg_assert(ent); 1442 vg_assert(ent->before); 1443 (ent->before)( tid, 1444 &layout, 1445 &sci->args, &sci->status, &sci->flags ); 1446 1447 /* The pre-handler may have modified: 1448 sci->args 1449 sci->status 1450 sci->flags 1451 All else remains unchanged. 1452 Although the args may be modified, pre handlers are not allowed 1453 to change the syscall number. 1454 */ 1455 /* Now we proceed according to what the pre-handler decided. */ 1456 vg_assert(sci->status.what == SsHandToKernel 1457 || sci->status.what == SsComplete); 1458 vg_assert(sci->args.sysno == sci->orig_args.sysno); 1459 1460 if (sci->status.what == SsComplete && !sr_isError(sci->status.sres)) { 1461 /* The pre-handler completed the syscall itself, declaring 1462 success. */ 1463 if (sci->flags & SfNoWriteResult) { 1464 PRINT(" --> [pre-success] NoWriteResult"); 1465 } else { 1466 PRINT(" --> [pre-success] Success(0x%llx:0x%llx)", 1467 (ULong)sr_ResHI(sci->status.sres), 1468 (ULong)sr_Res(sci->status.sres)); 1469 } 1470 /* In this case the allowable flags are to ask for a signal-poll 1471 and/or a yield after the call. Changing the args isn't 1472 allowed. */ 1473 vg_assert(0 == (sci->flags 1474 & ~(SfPollAfter | SfYieldAfter | SfNoWriteResult))); 1475 vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args)); 1476 } 1477 1478 else 1479 if (sci->status.what == SsComplete && sr_isError(sci->status.sres)) { 1480 /* The pre-handler decided to fail syscall itself. */ 1481 PRINT(" --> [pre-fail] Failure(0x%llx)", (ULong)sr_Err(sci->status.sres)); 1482 /* In this case, the pre-handler is also allowed to ask for the 1483 post-handler to be run anyway. Changing the args is not 1484 allowed. */ 1485 vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter))); 1486 vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args)); 1487 } 1488 1489 else 1490 if (sci->status.what != SsHandToKernel) { 1491 /* huh?! */ 1492 vg_assert(0); 1493 } 1494 1495 else /* (sci->status.what == HandToKernel) */ { 1496 /* Ok, this is the usual case -- and the complicated one. There 1497 are two subcases: sync and async. async is the general case 1498 and is to be used when there is any possibility that the 1499 syscall might block [a fact that the pre-handler must tell us 1500 via the sci->flags field.] Because the tidying-away / 1501 context-switch overhead of the async case could be large, if 1502 we are sure that the syscall will not block, we fast-track it 1503 by doing it directly in this thread, which is a lot 1504 simpler. */ 1505 1506 /* Check that the given flags are allowable: MayBlock, PollAfter 1507 and PostOnFail are ok. */ 1508 vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter))); 1509 1510 if (sci->flags & SfMayBlock) { 1511 1512 /* Syscall may block, so run it asynchronously */ 1513 vki_sigset_t mask; 1514 1515 PRINT(" --> [async] ... \n"); 1516 1517 mask = tst->sig_mask; 1518 sanitize_client_sigmask(&mask); 1519 1520 /* Gack. More impedance matching. Copy the possibly 1521 modified syscall args back into the guest state. */ 1522 /* JRS 2009-Mar-16: if the syscall args are possibly modified, 1523 then this assertion is senseless: 1524 vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args)); 1525 The case that exposed it was sys_posix_spawn on Darwin, 1526 which heavily modifies its arguments but then lets the call 1527 go through anyway, with SfToBlock set, hence we end up here. */ 1528 putSyscallArgsIntoGuestState( &sci->args, &tst->arch.vex ); 1529 1530 /* Drop the bigLock */ 1531 VG_(release_BigLock)(tid, VgTs_WaitSys, "VG_(client_syscall)[async]"); 1532 /* Urr. We're now in a race against other threads trying to 1533 acquire the bigLock. I guess that doesn't matter provided 1534 that do_syscall_for_client only touches thread-local 1535 state. */ 1536 1537 /* Do the call, which operates directly on the guest state, 1538 not on our abstracted copies of the args/result. */ 1539 do_syscall_for_client(sysno, tst, &mask); 1540 1541 /* do_syscall_for_client may not return if the syscall was 1542 interrupted by a signal. In that case, flow of control is 1543 first to m_signals.async_sighandler, which calls 1544 VG_(fixup_guest_state_after_syscall_interrupted), which 1545 fixes up the guest state, and possibly calls 1546 VG_(post_syscall). Once that's done, control drops back 1547 to the scheduler. */ 1548 1549 /* Darwin: do_syscall_for_client may not return if the 1550 syscall was workq_ops(WQOPS_THREAD_RETURN) and the kernel 1551 responded by starting the thread at wqthread_hijack(reuse=1) 1552 (to run another workqueue item). In that case, wqthread_hijack 1553 calls ML_(wqthread_continue), which is similar to 1554 VG_(fixup_guest_state_after_syscall_interrupted). */ 1555 1556 /* Reacquire the lock */ 1557 VG_(acquire_BigLock)(tid, "VG_(client_syscall)[async]"); 1558 1559 /* Even more impedance matching. Extract the syscall status 1560 from the guest state. */ 1561 getSyscallStatusFromGuestState( &sci->status, &tst->arch.vex ); 1562 vg_assert(sci->status.what == SsComplete); 1563 1564 /* Be decorative, if required. */ 1565 if (VG_(clo_trace_syscalls)) { 1566 Bool failed = sr_isError(sci->status.sres); 1567 if (failed) { 1568 PRINT("SYSCALL[%d,%d](%s) ... [async] --> Failure(0x%llx)", 1569 VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno), 1570 (ULong)sr_Err(sci->status.sres)); 1571 } else { 1572 PRINT("SYSCALL[%d,%d](%s) ... [async] --> " 1573 "Success(0x%llx:0x%llx)", 1574 VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno), 1575 (ULong)sr_ResHI(sci->status.sres), 1576 (ULong)sr_Res(sci->status.sres) ); 1577 } 1578 } 1579 1580 } else { 1581 1582 /* run the syscall directly */ 1583 /* The pre-handler may have modified the syscall args, but 1584 since we're passing values in ->args directly to the 1585 kernel, there's no point in flushing them back to the 1586 guest state. Indeed doing so could be construed as 1587 incorrect. */ 1588 SysRes sres 1589 = VG_(do_syscall)(sysno, sci->args.arg1, sci->args.arg2, 1590 sci->args.arg3, sci->args.arg4, 1591 sci->args.arg5, sci->args.arg6, 1592 sci->args.arg7, sci->args.arg8 ); 1593 sci->status = convert_SysRes_to_SyscallStatus(sres); 1594 1595 /* Be decorative, if required. */ 1596 if (VG_(clo_trace_syscalls)) { 1597 Bool failed = sr_isError(sci->status.sres); 1598 if (failed) { 1599 PRINT("[sync] --> Failure(0x%llx)", 1600 (ULong)sr_Err(sci->status.sres) ); 1601 } else { 1602 PRINT("[sync] --> Success(0x%llx:0x%llx)", 1603 (ULong)sr_ResHI(sci->status.sres), 1604 (ULong)sr_Res(sci->status.sres) ); 1605 } 1606 } 1607 } 1608 } 1609 1610 vg_assert(sci->status.what == SsComplete); 1611 1612 vg_assert(VG_(is_running_thread)(tid)); 1613 1614 /* Dump the syscall result back in the guest state. This is 1615 a platform-specific action. */ 1616 if (!(sci->flags & SfNoWriteResult)) 1617 putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex ); 1618 1619 /* Situation now: 1620 - the guest state is now correctly modified following the syscall 1621 - modified args, original args and syscall status are still 1622 available in the syscallInfo[] entry for this syscall. 1623 1624 Now go on to do the post-syscall actions (read on down ..) 1625 */ 1626 PRINT(" "); 1627 VG_(post_syscall)(tid); 1628 PRINT("\n"); 1629 } 1630 1631 1632 /* Perform post syscall actions. The expected state on entry is 1633 precisely as at the end of VG_(client_syscall), that is: 1634 1635 - guest state up to date following the syscall 1636 - modified args, original args and syscall status are still 1637 available in the syscallInfo[] entry for this syscall. 1638 - syscall status matches what's in the guest state. 1639 1640 There are two ways to get here: the normal way -- being called by 1641 VG_(client_syscall), and the unusual way, from 1642 VG_(fixup_guest_state_after_syscall_interrupted). 1643 Darwin: there's a third way, ML_(wqthread_continue). 1644 */ 1645 void VG_(post_syscall) (ThreadId tid) 1646 { 1647 SyscallInfo* sci; 1648 const SyscallTableEntry* ent; 1649 SyscallStatus test_status; 1650 ThreadState* tst; 1651 Word sysno; 1652 1653 /* Preliminaries */ 1654 vg_assert(VG_(is_valid_tid)(tid)); 1655 vg_assert(tid >= 1 && tid < VG_N_THREADS); 1656 vg_assert(VG_(is_running_thread)(tid)); 1657 1658 tst = VG_(get_ThreadState)(tid); 1659 sci = & syscallInfo[tid]; 1660 1661 /* m_signals.sigvgkill_handler might call here even when not in 1662 a syscall. */ 1663 if (sci->status.what == SsIdle || sci->status.what == SsHandToKernel) { 1664 sci->status.what = SsIdle; 1665 return; 1666 } 1667 1668 /* Validate current syscallInfo entry. In particular we require 1669 that the current .status matches what's actually in the guest 1670 state. At least in the normal case where we have actually 1671 previously written the result into the guest state. */ 1672 vg_assert(sci->status.what == SsComplete); 1673 1674 getSyscallStatusFromGuestState( &test_status, &tst->arch.vex ); 1675 if (!(sci->flags & SfNoWriteResult)) 1676 vg_assert(eq_SyscallStatus( &sci->status, &test_status )); 1677 /* Failure of the above assertion on Darwin can indicate a problem 1678 in the syscall wrappers that pre-fail or pre-succeed the 1679 syscall, by calling SET_STATUS_Success or SET_STATUS_Failure, 1680 when they really should call SET_STATUS_from_SysRes. The former 1681 create a UNIX-class syscall result on Darwin, which may not be 1682 correct for the syscall; if that's the case then this assertion 1683 fires. See PRE(thread_fast_set_cthread_self) for an example. On 1684 non-Darwin platforms this assertion is should never fail, and this 1685 comment is completely irrelevant. */ 1686 /* Ok, looks sane */ 1687 1688 /* Get the system call number. Because the pre-handler isn't 1689 allowed to mess with it, it should be the same for both the 1690 original and potentially-modified args. */ 1691 vg_assert(sci->args.sysno == sci->orig_args.sysno); 1692 sysno = sci->args.sysno; 1693 ent = get_syscall_entry(sysno); 1694 1695 /* pre: status == Complete (asserted above) */ 1696 /* Consider either success or failure. Now run the post handler if: 1697 - it exists, and 1698 - Success or (Failure and PostOnFail is set) 1699 */ 1700 if (ent->after 1701 && ((!sr_isError(sci->status.sres)) 1702 || (sr_isError(sci->status.sres) 1703 && (sci->flags & SfPostOnFail) ))) { 1704 1705 (ent->after)( tid, &sci->args, &sci->status ); 1706 } 1707 1708 /* Because the post handler might have changed the status (eg, the 1709 post-handler for sys_open can change the result from success to 1710 failure if the kernel supplied a fd that it doesn't like), once 1711 again dump the syscall result back in the guest state.*/ 1712 if (!(sci->flags & SfNoWriteResult)) 1713 putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex ); 1714 1715 /* Do any post-syscall actions required by the tool. */ 1716 if (VG_(needs).syscall_wrapper) { 1717 UWord tmpv[8]; 1718 tmpv[0] = sci->orig_args.arg1; 1719 tmpv[1] = sci->orig_args.arg2; 1720 tmpv[2] = sci->orig_args.arg3; 1721 tmpv[3] = sci->orig_args.arg4; 1722 tmpv[4] = sci->orig_args.arg5; 1723 tmpv[5] = sci->orig_args.arg6; 1724 tmpv[6] = sci->orig_args.arg7; 1725 tmpv[7] = sci->orig_args.arg8; 1726 VG_TDICT_CALL(tool_post_syscall, tid, 1727 sysno, 1728 &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]), 1729 sci->status.sres); 1730 } 1731 1732 /* The syscall is done. */ 1733 vg_assert(sci->status.what == SsComplete); 1734 sci->status.what = SsIdle; 1735 1736 /* The pre/post wrappers may have concluded that pending signals 1737 might have been created, and will have set SfPollAfter to 1738 request a poll for them once the syscall is done. */ 1739 if (sci->flags & SfPollAfter) 1740 VG_(poll_signals)(tid); 1741 1742 /* Similarly, the wrappers might have asked for a yield 1743 afterwards. */ 1744 if (sci->flags & SfYieldAfter) 1745 VG_(vg_yield)(); 1746 } 1747 1748 1749 /* --------------------------------------------------------------------- 1750 Dealing with syscalls which get interrupted by a signal: 1751 VG_(fixup_guest_state_after_syscall_interrupted) 1752 ------------------------------------------------------------------ */ 1753 1754 /* Syscalls done on behalf of the client are finally handed off to the 1755 kernel in VG_(client_syscall) above, either by calling 1756 do_syscall_for_client (the async case), or by calling 1757 VG_(do_syscall6) (the sync case). 1758 1759 If the syscall is not interrupted by a signal (it may block and 1760 later unblock, but that's irrelevant here) then those functions 1761 eventually return and so control is passed to VG_(post_syscall). 1762 NB: not sure if the sync case can actually get interrupted, as it 1763 operates with all signals masked. 1764 1765 However, the syscall may get interrupted by an async-signal. In 1766 that case do_syscall_for_client/VG_(do_syscall6) do not 1767 return. Instead we wind up in m_signals.async_sighandler. We need 1768 to fix up the guest state to make it look like the syscall was 1769 interrupted for guest. So async_sighandler calls here, and this 1770 does the fixup. Note that from here we wind up calling 1771 VG_(post_syscall) too. 1772 */ 1773 1774 1775 /* These are addresses within ML_(do_syscall_for_client_WRK). See 1776 syscall-$PLAT.S for details. 1777 */ 1778 #if defined(VGO_linux) || defined(VGO_aix5) 1779 extern const Addr ML_(blksys_setup); 1780 extern const Addr ML_(blksys_restart); 1781 extern const Addr ML_(blksys_complete); 1782 extern const Addr ML_(blksys_committed); 1783 extern const Addr ML_(blksys_finished); 1784 #elif defined(VGO_darwin) 1785 /* Darwin requires extra uglyness */ 1786 extern const Addr ML_(blksys_setup_MACH); 1787 extern const Addr ML_(blksys_restart_MACH); 1788 extern const Addr ML_(blksys_complete_MACH); 1789 extern const Addr ML_(blksys_committed_MACH); 1790 extern const Addr ML_(blksys_finished_MACH); 1791 extern const Addr ML_(blksys_setup_MDEP); 1792 extern const Addr ML_(blksys_restart_MDEP); 1793 extern const Addr ML_(blksys_complete_MDEP); 1794 extern const Addr ML_(blksys_committed_MDEP); 1795 extern const Addr ML_(blksys_finished_MDEP); 1796 extern const Addr ML_(blksys_setup_UNIX); 1797 extern const Addr ML_(blksys_restart_UNIX); 1798 extern const Addr ML_(blksys_complete_UNIX); 1799 extern const Addr ML_(blksys_committed_UNIX); 1800 extern const Addr ML_(blksys_finished_UNIX); 1801 #else 1802 # error "Unknown OS" 1803 #endif 1804 1805 1806 /* Back up guest state to restart a system call. */ 1807 1808 void ML_(fixup_guest_state_to_restart_syscall) ( ThreadArchState* arch ) 1809 { 1810 #if defined(VGP_x86_linux) 1811 arch->vex.guest_EIP -= 2; // sizeof(int $0x80) 1812 1813 /* Make sure our caller is actually sane, and we're really backing 1814 back over a syscall. 1815 1816 int $0x80 == CD 80 1817 */ 1818 { 1819 UChar *p = (UChar *)arch->vex.guest_EIP; 1820 1821 if (p[0] != 0xcd || p[1] != 0x80) 1822 VG_(message)(Vg_DebugMsg, 1823 "?! restarting over syscall at %#x %02x %02x\n", 1824 arch->vex.guest_EIP, p[0], p[1]); 1825 1826 vg_assert(p[0] == 0xcd && p[1] == 0x80); 1827 } 1828 1829 #elif defined(VGP_amd64_linux) 1830 arch->vex.guest_RIP -= 2; // sizeof(syscall) 1831 1832 /* Make sure our caller is actually sane, and we're really backing 1833 back over a syscall. 1834 1835 syscall == 0F 05 1836 */ 1837 { 1838 UChar *p = (UChar *)arch->vex.guest_RIP; 1839 1840 if (p[0] != 0x0F || p[1] != 0x05) 1841 VG_(message)(Vg_DebugMsg, 1842 "?! restarting over syscall at %#llx %02x %02x\n", 1843 arch->vex.guest_RIP, p[0], p[1]); 1844 1845 vg_assert(p[0] == 0x0F && p[1] == 0x05); 1846 } 1847 1848 #elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux) 1849 arch->vex.guest_CIA -= 4; // sizeof(ppc32 instr) 1850 1851 /* Make sure our caller is actually sane, and we're really backing 1852 back over a syscall. 1853 1854 sc == 44 00 00 02 1855 */ 1856 { 1857 UChar *p = (UChar *)arch->vex.guest_CIA; 1858 1859 if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02) 1860 VG_(message)(Vg_DebugMsg, 1861 "?! restarting over syscall at %#llx %02x %02x %02x %02x\n", 1862 arch->vex.guest_CIA + 0ULL, p[0], p[1], p[2], p[3]); 1863 1864 vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2); 1865 } 1866 1867 #elif defined(VGP_arm_linux) 1868 if (arch->vex.guest_R15T & 1) { 1869 // Thumb mode. SVC is a encoded as 1870 // 1101 1111 imm8 1871 // where imm8 is the SVC number, and we only accept 0. 1872 arch->vex.guest_R15T -= 2; // sizeof(thumb 16 bit insn) 1873 UChar* p = (UChar*)(arch->vex.guest_R15T - 1); 1874 Bool valid = p[0] == 0 && p[1] == 0xDF; 1875 if (!valid) { 1876 VG_(message)(Vg_DebugMsg, 1877 "?! restarting over (Thumb) syscall that is not syscall " 1878 "at %#llx %02x %02x\n", 1879 arch->vex.guest_R15T - 1ULL, p[0], p[1]); 1880 } 1881 vg_assert(valid); 1882 // FIXME: NOTE, this really isn't right. We need to back up 1883 // ITSTATE to what it was before the SVC instruction, but we 1884 // don't know what it was. At least assert that it is now 1885 // zero, because if it is nonzero then it must also have 1886 // been nonzero for the SVC itself, which means it was 1887 // conditional. Urk. 1888 vg_assert(arch->vex.guest_ITSTATE == 0); 1889 } else { 1890 // ARM mode. SVC is encoded as 1891 // cond 1111 imm24 1892 // where imm24 is the SVC number, and we only accept 0. 1893 arch->vex.guest_R15T -= 4; // sizeof(arm instr) 1894 UChar* p = (UChar*)arch->vex.guest_R15T; 1895 Bool valid = p[0] == 0 && p[1] == 0 && p[2] == 0 1896 && (p[3] & 0xF) == 0xF; 1897 if (!valid) { 1898 VG_(message)(Vg_DebugMsg, 1899 "?! restarting over (ARM) syscall that is not syscall " 1900 "at %#llx %02x %02x %02x %02x\n", 1901 arch->vex.guest_R15T + 0ULL, p[0], p[1], p[2], p[3]); 1902 } 1903 vg_assert(valid); 1904 } 1905 1906 #elif defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5) 1907 /* Hmm. This is problematic, because on AIX the kernel resumes 1908 after a syscall at LR, not at the insn following SC. Hence 1909 there is no obvious way to figure out where the SC is. Current 1910 solution is to have a pseudo-register in the guest state, 1911 CIA_AT_SC, which holds the address of the most recent SC 1912 executed. Backing up to that syscall then simply involves 1913 copying that value back into CIA (the program counter). */ 1914 arch->vex.guest_CIA = arch->vex.guest_CIA_AT_SC; 1915 1916 /* Make sure our caller is actually sane, and we're really backing 1917 back over a syscall. 1918 1919 sc == 44 00 00 02 1920 */ 1921 { 1922 UChar *p = (UChar *)arch->vex.guest_CIA; 1923 1924 if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02) 1925 VG_(message)(Vg_DebugMsg, 1926 "?! restarting over syscall at %#lx %02x %02x %02x %02x\n", 1927 (UWord)arch->vex.guest_CIA, p[0], p[1], p[2], p[3]); 1928 1929 vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2); 1930 } 1931 1932 #elif defined(VGP_x86_darwin) 1933 arch->vex.guest_EIP = arch->vex.guest_IP_AT_SYSCALL; 1934 1935 /* Make sure our caller is actually sane, and we're really backing 1936 back over a syscall. 1937 1938 int $0x80 == CD 80 1939 int $0x81 == CD 81 1940 int $0x82 == CD 82 1941 sysenter == 0F 34 1942 */ 1943 { 1944 UChar *p = (UChar *)arch->vex.guest_EIP; 1945 Bool ok = (p[0] == 0xCD && p[1] == 0x80) 1946 || (p[0] == 0xCD && p[1] == 0x81) 1947 || (p[0] == 0xCD && p[1] == 0x82) 1948 || (p[0] == 0x0F && p[1] == 0x34); 1949 if (!ok) 1950 VG_(message)(Vg_DebugMsg, 1951 "?! restarting over syscall at %#x %02x %02x\n", 1952 arch->vex.guest_EIP, p[0], p[1]); 1953 vg_assert(ok); 1954 } 1955 1956 #elif defined(VGP_amd64_darwin) 1957 // DDD: #warning GrP fixme amd64 restart unimplemented 1958 vg_assert(0); 1959 1960 #else 1961 # error "ML_(fixup_guest_state_to_restart_syscall): unknown plat" 1962 #endif 1963 } 1964 1965 /* 1966 Fix up the guest state when a syscall is interrupted by a signal 1967 and so has been forced to return 'sysret'. 1968 1969 To do this, we determine the precise state of the syscall by 1970 looking at the (real) IP at the time the signal happened. The 1971 syscall sequence looks like: 1972 1973 1. unblock signals 1974 2. perform syscall 1975 3. save result to guest state (EAX, RAX, R3+CR0.SO) 1976 4. re-block signals 1977 1978 If a signal 1979 happens at Then Why? 1980 [1-2) restart nothing has happened (restart syscall) 1981 [2] restart syscall hasn't started, or kernel wants to restart 1982 [2-3) save syscall complete, but results not saved 1983 [3-4) syscall complete, results saved 1984 1985 Sometimes we never want to restart an interrupted syscall (because 1986 sigaction says not to), so we only restart if "restart" is True. 1987 1988 This will also call VG_(post_syscall) if the syscall has actually 1989 completed (either because it was interrupted, or because it 1990 actually finished). It will not call VG_(post_syscall) if the 1991 syscall is set up for restart, which means that the pre-wrapper may 1992 get called multiple times. 1993 */ 1994 1995 void 1996 VG_(fixup_guest_state_after_syscall_interrupted)( ThreadId tid, 1997 Addr ip, 1998 SysRes sres, 1999 Bool restart) 2000 { 2001 /* Note that we don't know the syscall number here, since (1) in 2002 general there's no reliable way to get hold of it short of 2003 stashing it in the guest state before the syscall, and (2) in 2004 any case we don't need to know it for the actions done by this 2005 routine. 2006 2007 Furthermore, 'sres' is only used in the case where the syscall 2008 is complete, but the result has not been committed to the guest 2009 state yet. In any other situation it will be meaningless and 2010 therefore ignored. */ 2011 2012 ThreadState* tst; 2013 SyscallStatus canonical; 2014 ThreadArchState* th_regs; 2015 SyscallInfo* sci; 2016 2017 /* Compute some Booleans indicating which range we're in. */ 2018 Bool outside_range, 2019 in_setup_to_restart, // [1,2) in the .S files 2020 at_restart, // [2] in the .S files 2021 in_complete_to_committed, // [3,4) in the .S files 2022 in_committed_to_finished; // [4,5) in the .S files 2023 2024 # if defined(VGO_linux) || defined(VGO_aix5) 2025 outside_range 2026 = ip < ML_(blksys_setup) || ip >= ML_(blksys_finished); 2027 in_setup_to_restart 2028 = ip >= ML_(blksys_setup) && ip < ML_(blksys_restart); 2029 at_restart 2030 = ip == ML_(blksys_restart); 2031 in_complete_to_committed 2032 = ip >= ML_(blksys_complete) && ip < ML_(blksys_committed); 2033 in_committed_to_finished 2034 = ip >= ML_(blksys_committed) && ip < ML_(blksys_finished); 2035 # elif defined(VGO_darwin) 2036 outside_range 2037 = (ip < ML_(blksys_setup_MACH) || ip >= ML_(blksys_finished_MACH)) 2038 && (ip < ML_(blksys_setup_MDEP) || ip >= ML_(blksys_finished_MDEP)) 2039 && (ip < ML_(blksys_setup_UNIX) || ip >= ML_(blksys_finished_UNIX)); 2040 in_setup_to_restart 2041 = (ip >= ML_(blksys_setup_MACH) && ip < ML_(blksys_restart_MACH)) 2042 || (ip >= ML_(blksys_setup_MDEP) && ip < ML_(blksys_restart_MDEP)) 2043 || (ip >= ML_(blksys_setup_UNIX) && ip < ML_(blksys_restart_UNIX)); 2044 at_restart 2045 = (ip == ML_(blksys_restart_MACH)) 2046 || (ip == ML_(blksys_restart_MDEP)) 2047 || (ip == ML_(blksys_restart_UNIX)); 2048 in_complete_to_committed 2049 = (ip >= ML_(blksys_complete_MACH) && ip < ML_(blksys_committed_MACH)) 2050 || (ip >= ML_(blksys_complete_MDEP) && ip < ML_(blksys_committed_MDEP)) 2051 || (ip >= ML_(blksys_complete_UNIX) && ip < ML_(blksys_committed_UNIX)); 2052 in_committed_to_finished 2053 = (ip >= ML_(blksys_committed_MACH) && ip < ML_(blksys_finished_MACH)) 2054 || (ip >= ML_(blksys_committed_MDEP) && ip < ML_(blksys_finished_MDEP)) 2055 || (ip >= ML_(blksys_committed_UNIX) && ip < ML_(blksys_finished_UNIX)); 2056 /* Wasn't that just So Much Fun? Does your head hurt yet? Mine does. */ 2057 # else 2058 # error "Unknown OS" 2059 # endif 2060 2061 if (VG_(clo_trace_signals)) 2062 VG_(message)( Vg_DebugMsg, 2063 "interrupted_syscall: tid=%d, ip=0x%llx, " 2064 "restart=%s, sres.isErr=%s, sres.val=%lld\n", 2065 (Int)tid, 2066 (ULong)ip, 2067 restart ? "True" : "False", 2068 sr_isError(sres) ? "True" : "False", 2069 (Long)(sr_isError(sres) ? sr_Err(sres) : sr_Res(sres)) ); 2070 2071 vg_assert(VG_(is_valid_tid)(tid)); 2072 vg_assert(tid >= 1 && tid < VG_N_THREADS); 2073 vg_assert(VG_(is_running_thread)(tid)); 2074 2075 tst = VG_(get_ThreadState)(tid); 2076 th_regs = &tst->arch; 2077 sci = & syscallInfo[tid]; 2078 2079 /* Figure out what the state of the syscall was by examining the 2080 (real) IP at the time of the signal, and act accordingly. */ 2081 if (outside_range) { 2082 if (VG_(clo_trace_signals)) 2083 VG_(message)( Vg_DebugMsg, 2084 " not in syscall at all: hmm, very suspicious\n" ); 2085 /* Looks like we weren't in a syscall at all. Hmm. */ 2086 vg_assert(sci->status.what != SsIdle); 2087 return; 2088 } 2089 2090 /* We should not be here unless this thread had first started up 2091 the machinery for a syscall by calling VG_(client_syscall). 2092 Hence: */ 2093 vg_assert(sci->status.what != SsIdle); 2094 2095 /* now, do one of four fixup actions, depending on where the IP has 2096 got to. */ 2097 2098 if (in_setup_to_restart) { 2099 /* syscall hasn't even started; go around again */ 2100 if (VG_(clo_trace_signals)) 2101 VG_(message)( Vg_DebugMsg, " not started: restarting\n"); 2102 vg_assert(sci->status.what == SsHandToKernel); 2103 ML_(fixup_guest_state_to_restart_syscall)(th_regs); 2104 } 2105 2106 else 2107 if (at_restart) { 2108 /* We're either about to run the syscall, or it was interrupted 2109 and the kernel restarted it. Restart if asked, otherwise 2110 EINTR it. */ 2111 if (restart) { 2112 if (VG_(clo_trace_signals)) 2113 VG_(message)( Vg_DebugMsg, " at syscall instr: restarting\n"); 2114 ML_(fixup_guest_state_to_restart_syscall)(th_regs); 2115 } else { 2116 if (VG_(clo_trace_signals)) 2117 VG_(message)( Vg_DebugMsg, " at syscall instr: returning EINTR\n"); 2118 canonical = convert_SysRes_to_SyscallStatus( 2119 VG_(mk_SysRes_Error)( VKI_EINTR ) 2120 ); 2121 if (!(sci->flags & SfNoWriteResult)) 2122 putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex ); 2123 sci->status = canonical; 2124 VG_(post_syscall)(tid); 2125 } 2126 } 2127 2128 else 2129 if (in_complete_to_committed) { 2130 /* Syscall complete, but result hasn't been written back yet. 2131 Write the SysRes we were supplied with back to the guest 2132 state. */ 2133 if (VG_(clo_trace_signals)) 2134 VG_(message)( Vg_DebugMsg, 2135 " completed, but uncommitted: committing\n"); 2136 canonical = convert_SysRes_to_SyscallStatus( sres ); 2137 if (!(sci->flags & SfNoWriteResult)) 2138 putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex ); 2139 sci->status = canonical; 2140 VG_(post_syscall)(tid); 2141 } 2142 2143 else 2144 if (in_committed_to_finished) { 2145 /* Result committed, but the signal mask has not been restored; 2146 we expect our caller (the signal handler) will have fixed 2147 this up. */ 2148 if (VG_(clo_trace_signals)) 2149 VG_(message)( Vg_DebugMsg, 2150 " completed and committed: nothing to do\n"); 2151 getSyscallStatusFromGuestState( &sci->status, &th_regs->vex ); 2152 vg_assert(sci->status.what == SsComplete); 2153 VG_(post_syscall)(tid); 2154 } 2155 2156 else 2157 VG_(core_panic)("?? strange syscall interrupt state?"); 2158 2159 /* In all cases, the syscall is now finished (even if we called 2160 ML_(fixup_guest_state_to_restart_syscall), since that just 2161 re-positions the guest's IP for another go at it). So we need 2162 to record that fact. */ 2163 sci->status.what = SsIdle; 2164 } 2165 2166 2167 #if defined(VGO_darwin) 2168 // Clean up after workq_ops(WQOPS_THREAD_RETURN) jumped to wqthread_hijack. 2169 // This is similar to VG_(fixup_guest_state_after_syscall_interrupted). 2170 // This longjmps back to the scheduler. 2171 void ML_(wqthread_continue_NORETURN)(ThreadId tid) 2172 { 2173 ThreadState* tst; 2174 SyscallInfo* sci; 2175 2176 VG_(acquire_BigLock)(tid, "wqthread_continue_NORETURN"); 2177 2178 PRINT("SYSCALL[%d,%d](%s) workq_ops() starting new workqueue item\n", 2179 VG_(getpid)(), tid, VG_SYSNUM_STRING(__NR_workq_ops)); 2180 2181 vg_assert(VG_(is_valid_tid)(tid)); 2182 vg_assert(tid >= 1 && tid < VG_N_THREADS); 2183 vg_assert(VG_(is_running_thread)(tid)); 2184 2185 tst = VG_(get_ThreadState)(tid); 2186 sci = & syscallInfo[tid]; 2187 vg_assert(sci->status.what != SsIdle); 2188 vg_assert(tst->os_state.wq_jmpbuf_valid); // check this BEFORE post_syscall 2189 2190 // Pretend the syscall completed normally, but don't touch the thread state. 2191 sci->status = convert_SysRes_to_SyscallStatus( VG_(mk_SysRes_Success)(0) ); 2192 sci->flags |= SfNoWriteResult; 2193 VG_(post_syscall)(tid); 2194 2195 sci->status.what = SsIdle; 2196 2197 vg_assert(tst->sched_jmpbuf_valid); 2198 __builtin_longjmp(tst->sched_jmpbuf, True); 2199 2200 /* NOTREACHED */ 2201 vg_assert(0); 2202 } 2203 #endif 2204 2205 2206 /* --------------------------------------------------------------------- 2207 A place to store the where-to-call-when-really-done pointer 2208 ------------------------------------------------------------------ */ 2209 2210 // When the final thread is done, where shall I call to shutdown the 2211 // system cleanly? Is set once at startup (in m_main) and never 2212 // changes after that. Is basically a pointer to the exit 2213 // continuation. This is all just a nasty hack to avoid calling 2214 // directly from m_syswrap to m_main at exit, since that would cause 2215 // m_main to become part of a module cycle, which is silly. 2216 void (* VG_(address_of_m_main_shutdown_actions_NORETURN) ) 2217 (ThreadId,VgSchedReturnCode) 2218 = NULL; 2219 2220 /*--------------------------------------------------------------------*/ 2221 /*--- end ---*/ 2222 /*--------------------------------------------------------------------*/ 2223