Home | History | Annotate | Download | only in m_scheduler
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Thread scheduling.                               scheduler.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2000-2015 Julian Seward
     11       jseward (at) acm.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 /*
     32    Overview
     33 
     34    Valgrind tries to emulate the kernel's threading as closely as
     35    possible.  The client does all threading via the normal syscalls
     36    (on Linux: clone, etc).  Valgrind emulates this by creating exactly
     37    the same process structure as would be created without Valgrind.
     38    There are no extra threads.
     39 
     40    The main difference is that Valgrind only allows one client thread
     41    to run at once.  This is controlled with the CPU Big Lock,
     42    "the_BigLock".  Any time a thread wants to run client code or
     43    manipulate any shared state (which is anything other than its own
     44    ThreadState entry), it must hold the_BigLock.
     45 
     46    When a thread is about to block in a blocking syscall, it releases
     47    the_BigLock, and re-takes it when it becomes runnable again (either
     48    because the syscall finished, or we took a signal).
     49 
     50    VG_(scheduler) therefore runs in each thread.  It returns only when
     51    the thread is exiting, either because it exited itself, or it was
     52    told to exit by another thread.
     53 
     54    This file is almost entirely OS-independent.  The details of how
     55    the OS handles threading and signalling are abstracted away and
     56    implemented elsewhere.  [Some of the functions have worked their
     57    way back for the moment, until we do an OS port in earnest...]
     58 */
     59 
     60 
     61 #include "pub_core_basics.h"
     62 #include "pub_core_debuglog.h"
     63 #include "pub_core_vki.h"
     64 #include "pub_core_vkiscnums.h"  // __NR_sched_yield
     65 #include "pub_core_threadstate.h"
     66 #include "pub_core_clientstate.h"
     67 #include "pub_core_aspacemgr.h"
     68 #include "pub_core_clreq.h"      // for VG_USERREQ__*
     69 #include "pub_core_dispatch.h"
     70 #include "pub_core_errormgr.h"   // For VG_(get_n_errs_found)()
     71 #include "pub_core_gdbserver.h"  // for VG_(gdbserver)/VG_(gdbserver_activity)
     72 #include "pub_core_libcbase.h"
     73 #include "pub_core_libcassert.h"
     74 #include "pub_core_libcprint.h"
     75 #include "pub_core_libcproc.h"
     76 #include "pub_core_libcsignal.h"
     77 #if defined(VGO_darwin)
     78 #include "pub_core_mach.h"
     79 #endif
     80 #include "pub_core_machine.h"
     81 #include "pub_core_mallocfree.h"
     82 #include "pub_core_options.h"
     83 #include "pub_core_replacemalloc.h"
     84 #include "pub_core_sbprofile.h"
     85 #include "pub_core_signals.h"
     86 #include "pub_core_stacks.h"
     87 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
     88 #include "pub_core_syscall.h"
     89 #include "pub_core_syswrap.h"
     90 #include "pub_core_tooliface.h"
     91 #include "pub_core_translate.h"     // For VG_(translate)()
     92 #include "pub_core_transtab.h"
     93 #include "pub_core_debuginfo.h"     // VG_(di_notify_pdb_debuginfo)
     94 #include "priv_sched-lock.h"
     95 #include "pub_core_scheduler.h"     // self
     96 #include "pub_core_redir.h"
     97 #include "libvex_emnote.h"          // VexEmNote
     98 
     99 
    100 /* ---------------------------------------------------------------------
    101    Types and globals for the scheduler.
    102    ------------------------------------------------------------------ */
    103 
    104 /* ThreadId and ThreadState are defined elsewhere*/
    105 
    106 /* Defines the thread-scheduling timeslice, in terms of the number of
    107    basic blocks we attempt to run each thread for.  Smaller values
    108    give finer interleaving but much increased scheduling overheads. */
    109 #define SCHEDULING_QUANTUM   100000
    110 
    111 /* If False, a fault is Valgrind-internal (ie, a bug) */
    112 Bool VG_(in_generated_code) = False;
    113 
    114 /* 64-bit counter for the number of basic blocks done. */
    115 static ULong bbs_done = 0;
    116 
    117 /* Counter to see if vgdb activity is to be verified.
    118    When nr of bbs done reaches vgdb_next_poll, scheduler will
    119    poll for gdbserver activity. VG_(force_vgdb_poll) and
    120    VG_(disable_vgdb_poll) allows the valgrind core (e.g. m_gdbserver)
    121    to control when the next poll will be done. */
    122 static ULong vgdb_next_poll;
    123 
    124 /* Forwards */
    125 static void do_client_request ( ThreadId tid );
    126 static void scheduler_sanity ( ThreadId tid );
    127 static void mostly_clear_thread_record ( ThreadId tid );
    128 
    129 /* Stats. */
    130 static ULong n_scheduling_events_MINOR = 0;
    131 static ULong n_scheduling_events_MAJOR = 0;
    132 
    133 /* Stats: number of XIndirs, and number that missed in the fast
    134    cache. */
    135 static ULong stats__n_xindirs = 0;
    136 static ULong stats__n_xindir_misses = 0;
    137 
    138 /* And 32-bit temp bins for the above, so that 32-bit platforms don't
    139    have to do 64 bit incs on the hot path through
    140    VG_(cp_disp_xindir). */
    141 /*global*/ UInt VG_(stats__n_xindirs_32) = 0;
    142 /*global*/ UInt VG_(stats__n_xindir_misses_32) = 0;
    143 
    144 /* Sanity checking counts. */
    145 static UInt sanity_fast_count = 0;
    146 static UInt sanity_slow_count = 0;
    147 
    148 void VG_(print_scheduler_stats)(void)
    149 {
    150    VG_(message)(Vg_DebugMsg,
    151       "scheduler: %'llu event checks.\n", bbs_done );
    152    VG_(message)(Vg_DebugMsg,
    153                 "scheduler: %'llu indir transfers, %'llu misses (1 in %llu)\n",
    154                 stats__n_xindirs, stats__n_xindir_misses,
    155                 stats__n_xindirs / (stats__n_xindir_misses
    156                                     ? stats__n_xindir_misses : 1));
    157    VG_(message)(Vg_DebugMsg,
    158       "scheduler: %'llu/%'llu major/minor sched events.\n",
    159       n_scheduling_events_MAJOR, n_scheduling_events_MINOR);
    160    VG_(message)(Vg_DebugMsg,
    161                 "   sanity: %u cheap, %u expensive checks.\n",
    162                 sanity_fast_count, sanity_slow_count );
    163 }
    164 
    165 /*
    166  * Mutual exclusion object used to serialize threads.
    167  */
    168 static struct sched_lock *the_BigLock;
    169 
    170 
    171 /* ---------------------------------------------------------------------
    172    Helper functions for the scheduler.
    173    ------------------------------------------------------------------ */
    174 
    175 static
    176 void print_sched_event ( ThreadId tid, const HChar* what )
    177 {
    178    VG_(message)(Vg_DebugMsg, "  SCHED[%u]: %s\n", tid, what );
    179 }
    180 
    181 /* For showing SB profiles, if the user asks to see them. */
    182 static
    183 void maybe_show_sb_profile ( void )
    184 {
    185    /* DO NOT MAKE NON-STATIC */
    186    static ULong bbs_done_lastcheck = 0;
    187    /* */
    188    vg_assert(VG_(clo_profyle_interval) > 0);
    189    Long delta = (Long)(bbs_done - bbs_done_lastcheck);
    190    vg_assert(delta >= 0);
    191    if ((ULong)delta >= VG_(clo_profyle_interval)) {
    192       bbs_done_lastcheck = bbs_done;
    193       VG_(get_and_show_SB_profile)(bbs_done);
    194    }
    195 }
    196 
    197 static
    198 const HChar* name_of_sched_event ( UInt event )
    199 {
    200    switch (event) {
    201       case VEX_TRC_JMP_INVALICACHE:    return "INVALICACHE";
    202       case VEX_TRC_JMP_FLUSHDCACHE:    return "FLUSHDCACHE";
    203       case VEX_TRC_JMP_NOREDIR:        return "NOREDIR";
    204       case VEX_TRC_JMP_SIGILL:         return "SIGILL";
    205       case VEX_TRC_JMP_SIGTRAP:        return "SIGTRAP";
    206       case VEX_TRC_JMP_SIGSEGV:        return "SIGSEGV";
    207       case VEX_TRC_JMP_SIGBUS:         return "SIGBUS";
    208       case VEX_TRC_JMP_SIGFPE_INTOVF:
    209       case VEX_TRC_JMP_SIGFPE_INTDIV:  return "SIGFPE";
    210       case VEX_TRC_JMP_EMWARN:         return "EMWARN";
    211       case VEX_TRC_JMP_EMFAIL:         return "EMFAIL";
    212       case VEX_TRC_JMP_CLIENTREQ:      return "CLIENTREQ";
    213       case VEX_TRC_JMP_YIELD:          return "YIELD";
    214       case VEX_TRC_JMP_NODECODE:       return "NODECODE";
    215       case VEX_TRC_JMP_MAPFAIL:        return "MAPFAIL";
    216       case VEX_TRC_JMP_SYS_SYSCALL:    return "SYSCALL";
    217       case VEX_TRC_JMP_SYS_INT32:      return "INT32";
    218       case VEX_TRC_JMP_SYS_INT128:     return "INT128";
    219       case VEX_TRC_JMP_SYS_INT129:     return "INT129";
    220       case VEX_TRC_JMP_SYS_INT130:     return "INT130";
    221       case VEX_TRC_JMP_SYS_INT145:     return "INT145";
    222       case VEX_TRC_JMP_SYS_INT210:     return "INT210";
    223       case VEX_TRC_JMP_SYS_SYSENTER:   return "SYSENTER";
    224       case VEX_TRC_JMP_BORING:         return "VEX_BORING";
    225 
    226       case VG_TRC_BORING:              return "VG_BORING";
    227       case VG_TRC_INNER_FASTMISS:      return "FASTMISS";
    228       case VG_TRC_INNER_COUNTERZERO:   return "COUNTERZERO";
    229       case VG_TRC_FAULT_SIGNAL:        return "FAULTSIGNAL";
    230       case VG_TRC_INVARIANT_FAILED:    return "INVFAILED";
    231       case VG_TRC_CHAIN_ME_TO_SLOW_EP: return "CHAIN_ME_SLOW";
    232       case VG_TRC_CHAIN_ME_TO_FAST_EP: return "CHAIN_ME_FAST";
    233       default:                         return "??UNKNOWN??";
    234   }
    235 }
    236 
    237 /* Allocate a completely empty ThreadState record. */
    238 ThreadId VG_(alloc_ThreadState) ( void )
    239 {
    240    Int i;
    241    for (i = 1; i < VG_N_THREADS; i++) {
    242       if (VG_(threads)[i].status == VgTs_Empty) {
    243 	 VG_(threads)[i].status = VgTs_Init;
    244 	 VG_(threads)[i].exitreason = VgSrc_None;
    245          if (VG_(threads)[i].thread_name)
    246             VG_(free)(VG_(threads)[i].thread_name);
    247          VG_(threads)[i].thread_name = NULL;
    248          return i;
    249       }
    250    }
    251    VG_(printf)("Use --max-threads=INT to specify a larger number of threads\n"
    252                "and rerun valgrind\n");
    253    VG_(core_panic)("Max number of threads is too low");
    254    /*NOTREACHED*/
    255 }
    256 
    257 /*
    258    Mark a thread as Runnable.  This will block until the_BigLock is
    259    available, so that we get exclusive access to all the shared
    260    structures and the CPU.  Up until we get the_BigLock, we must not
    261    touch any shared state.
    262 
    263    When this returns, we'll actually be running.
    264  */
    265 void VG_(acquire_BigLock)(ThreadId tid, const HChar* who)
    266 {
    267    ThreadState *tst;
    268 
    269 #if 0
    270    if (VG_(clo_trace_sched)) {
    271       HChar buf[VG_(strlen)(who) + 30];
    272       VG_(sprintf)(buf, "waiting for lock (%s)", who);
    273       print_sched_event(tid, buf);
    274    }
    275 #endif
    276 
    277    /* First, acquire the_BigLock.  We can't do anything else safely
    278       prior to this point.  Even doing debug printing prior to this
    279       point is, technically, wrong. */
    280    VG_(acquire_BigLock_LL)(NULL);
    281 
    282    tst = VG_(get_ThreadState)(tid);
    283 
    284    vg_assert(tst->status != VgTs_Runnable);
    285 
    286    tst->status = VgTs_Runnable;
    287 
    288    if (VG_(running_tid) != VG_INVALID_THREADID)
    289       VG_(printf)("tid %u found %u running\n", tid, VG_(running_tid));
    290    vg_assert(VG_(running_tid) == VG_INVALID_THREADID);
    291    VG_(running_tid) = tid;
    292 
    293    { Addr gsp = VG_(get_SP)(tid);
    294       if (NULL != VG_(tdict).track_new_mem_stack_w_ECU)
    295          VG_(unknown_SP_update_w_ECU)(gsp, gsp, 0/*unknown origin*/);
    296       else
    297          VG_(unknown_SP_update)(gsp, gsp);
    298    }
    299 
    300    if (VG_(clo_trace_sched)) {
    301       HChar buf[VG_(strlen)(who) + 30];
    302       VG_(sprintf)(buf, " acquired lock (%s)", who);
    303       print_sched_event(tid, buf);
    304    }
    305 }
    306 
    307 /*
    308    Set a thread into a sleeping state, and give up exclusive access to
    309    the CPU.  On return, the thread must be prepared to block until it
    310    is ready to run again (generally this means blocking in a syscall,
    311    but it may mean that we remain in a Runnable state and we're just
    312    yielding the CPU to another thread).
    313  */
    314 void VG_(release_BigLock)(ThreadId tid, ThreadStatus sleepstate,
    315                           const HChar* who)
    316 {
    317    ThreadState *tst = VG_(get_ThreadState)(tid);
    318 
    319    vg_assert(tst->status == VgTs_Runnable);
    320 
    321    vg_assert(sleepstate == VgTs_WaitSys ||
    322 	     sleepstate == VgTs_Yielding);
    323 
    324    tst->status = sleepstate;
    325 
    326    vg_assert(VG_(running_tid) == tid);
    327    VG_(running_tid) = VG_INVALID_THREADID;
    328 
    329    if (VG_(clo_trace_sched)) {
    330       const HChar *status = VG_(name_of_ThreadStatus)(sleepstate);
    331       HChar buf[VG_(strlen)(who) + VG_(strlen)(status) + 30];
    332       VG_(sprintf)(buf, "releasing lock (%s) -> %s", who, status);
    333       print_sched_event(tid, buf);
    334    }
    335 
    336    /* Release the_BigLock; this will reschedule any runnable
    337       thread. */
    338    VG_(release_BigLock_LL)(NULL);
    339 }
    340 
    341 static void init_BigLock(void)
    342 {
    343    vg_assert(!the_BigLock);
    344    the_BigLock = ML_(create_sched_lock)();
    345 }
    346 
    347 static void deinit_BigLock(void)
    348 {
    349    ML_(destroy_sched_lock)(the_BigLock);
    350    the_BigLock = NULL;
    351 }
    352 
    353 /* See pub_core_scheduler.h for description */
    354 void VG_(acquire_BigLock_LL) ( const HChar* who )
    355 {
    356    ML_(acquire_sched_lock)(the_BigLock);
    357 }
    358 
    359 /* See pub_core_scheduler.h for description */
    360 void VG_(release_BigLock_LL) ( const HChar* who )
    361 {
    362    ML_(release_sched_lock)(the_BigLock);
    363 }
    364 
    365 Bool VG_(owns_BigLock_LL) ( ThreadId tid )
    366 {
    367    return (ML_(get_sched_lock_owner)(the_BigLock)
    368            == VG_(threads)[tid].os_state.lwpid);
    369 }
    370 
    371 
    372 /* Clear out the ThreadState and release the semaphore. Leaves the
    373    ThreadState in VgTs_Zombie state, so that it doesn't get
    374    reallocated until the caller is really ready. */
    375 void VG_(exit_thread)(ThreadId tid)
    376 {
    377    vg_assert(VG_(is_valid_tid)(tid));
    378    vg_assert(VG_(is_running_thread)(tid));
    379    vg_assert(VG_(is_exiting)(tid));
    380 
    381    mostly_clear_thread_record(tid);
    382    VG_(running_tid) = VG_INVALID_THREADID;
    383 
    384    /* There should still be a valid exitreason for this thread */
    385    vg_assert(VG_(threads)[tid].exitreason != VgSrc_None);
    386 
    387    if (VG_(clo_trace_sched))
    388       print_sched_event(tid, "release lock in VG_(exit_thread)");
    389 
    390    VG_(release_BigLock_LL)(NULL);
    391 }
    392 
    393 /* If 'tid' is blocked in a syscall, send it SIGVGKILL so as to get it
    394    out of the syscall and onto doing the next thing, whatever that is.
    395    If it isn't blocked in a syscall, has no effect on the thread. */
    396 void VG_(get_thread_out_of_syscall)(ThreadId tid)
    397 {
    398    vg_assert(VG_(is_valid_tid)(tid));
    399    vg_assert(!VG_(is_running_thread)(tid));
    400 
    401    if (VG_(threads)[tid].status == VgTs_WaitSys) {
    402       if (VG_(clo_trace_signals)) {
    403 	 VG_(message)(Vg_DebugMsg,
    404                       "get_thread_out_of_syscall zaps tid %u lwp %d\n",
    405 		      tid, VG_(threads)[tid].os_state.lwpid);
    406       }
    407 #     if defined(VGO_darwin)
    408       {
    409          // GrP fixme use mach primitives on darwin?
    410          // GrP fixme thread_abort_safely?
    411          // GrP fixme race for thread with WaitSys set but not in syscall yet?
    412          extern kern_return_t thread_abort(mach_port_t);
    413          thread_abort(VG_(threads)[tid].os_state.lwpid);
    414       }
    415 #     else
    416       {
    417          __attribute__((unused))
    418          Int r = VG_(tkill)(VG_(threads)[tid].os_state.lwpid, VG_SIGVGKILL);
    419          /* JRS 2009-Mar-20: should we assert for r==0 (tkill succeeded)?
    420             I'm really not sure.  Here's a race scenario which argues
    421             that we shoudn't; but equally I'm not sure the scenario is
    422             even possible, because of constraints caused by the question
    423             of who holds the BigLock when.
    424 
    425             Target thread tid does sys_read on a socket and blocks.  This
    426             function gets called, and we observe correctly that tid's
    427             status is WaitSys but then for whatever reason this function
    428             goes very slowly for a while.  Then data arrives from
    429             wherever, tid's sys_read returns, tid exits.  Then we do
    430             tkill on tid, but tid no longer exists; tkill returns an
    431             error code and the assert fails. */
    432          /* vg_assert(r == 0); */
    433       }
    434 #     endif
    435    }
    436 }
    437 
    438 /*
    439    Yield the CPU for a short time to let some other thread run.
    440  */
    441 void VG_(vg_yield)(void)
    442 {
    443    ThreadId tid = VG_(running_tid);
    444 
    445    vg_assert(tid != VG_INVALID_THREADID);
    446    vg_assert(VG_(threads)[tid].os_state.lwpid == VG_(gettid)());
    447 
    448    VG_(release_BigLock)(tid, VgTs_Yielding, "VG_(vg_yield)");
    449 
    450    /*
    451       Tell the kernel we're yielding.
    452     */
    453 #  if defined(VGO_linux) || defined(VGO_darwin)
    454    VG_(do_syscall0)(__NR_sched_yield);
    455 #  elif defined(VGO_solaris)
    456    VG_(do_syscall0)(__NR_yield);
    457 #  else
    458 #    error Unknown OS
    459 #  endif
    460 
    461    VG_(acquire_BigLock)(tid, "VG_(vg_yield)");
    462 }
    463 
    464 
    465 /* Set the standard set of blocked signals, used whenever we're not
    466    running a client syscall. */
    467 static void block_signals(void)
    468 {
    469    vki_sigset_t mask;
    470 
    471    VG_(sigfillset)(&mask);
    472 
    473    /* Don't block these because they're synchronous */
    474    VG_(sigdelset)(&mask, VKI_SIGSEGV);
    475    VG_(sigdelset)(&mask, VKI_SIGBUS);
    476    VG_(sigdelset)(&mask, VKI_SIGFPE);
    477    VG_(sigdelset)(&mask, VKI_SIGILL);
    478    VG_(sigdelset)(&mask, VKI_SIGTRAP);
    479 
    480    /* Can't block these anyway */
    481    VG_(sigdelset)(&mask, VKI_SIGSTOP);
    482    VG_(sigdelset)(&mask, VKI_SIGKILL);
    483 
    484    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, NULL);
    485 }
    486 
    487 static void os_state_clear(ThreadState *tst)
    488 {
    489    tst->os_state.lwpid       = 0;
    490    tst->os_state.threadgroup = 0;
    491 #  if defined(VGO_linux)
    492    /* no other fields to clear */
    493 #  elif defined(VGO_darwin)
    494    tst->os_state.post_mach_trap_fn = NULL;
    495    tst->os_state.pthread           = 0;
    496    tst->os_state.func_arg          = 0;
    497    VG_(memset)(&tst->os_state.child_go, 0, sizeof(tst->os_state.child_go));
    498    VG_(memset)(&tst->os_state.child_done, 0, sizeof(tst->os_state.child_done));
    499    tst->os_state.wq_jmpbuf_valid   = False;
    500    tst->os_state.remote_port       = 0;
    501    tst->os_state.msgh_id           = 0;
    502    VG_(memset)(&tst->os_state.mach_args, 0, sizeof(tst->os_state.mach_args));
    503 #  elif defined(VGO_solaris)
    504 #  if defined(VGP_x86_solaris)
    505    tst->os_state.thrptr = 0;
    506 #  endif
    507    tst->os_state.stk_id = (UWord)-1;
    508    tst->os_state.ustack = NULL;
    509    tst->os_state.in_door_return = False;
    510    tst->os_state.door_return_procedure = 0;
    511    tst->os_state.oldcontext = NULL;
    512    tst->os_state.schedctl_data = 0;
    513    tst->os_state.daemon_thread = False;
    514 #  else
    515 #    error "Unknown OS"
    516 #  endif
    517 }
    518 
    519 static void os_state_init(ThreadState *tst)
    520 {
    521    tst->os_state.valgrind_stack_base    = 0;
    522    tst->os_state.valgrind_stack_init_SP = 0;
    523    os_state_clear(tst);
    524 }
    525 
    526 static
    527 void mostly_clear_thread_record ( ThreadId tid )
    528 {
    529    vki_sigset_t savedmask;
    530 
    531    vg_assert(tid >= 0 && tid < VG_N_THREADS);
    532    VG_(cleanup_thread)(&VG_(threads)[tid].arch);
    533    VG_(threads)[tid].tid = tid;
    534 
    535    /* Leave the thread in Zombie, so that it doesn't get reallocated
    536       until the caller is finally done with the thread stack. */
    537    VG_(threads)[tid].status               = VgTs_Zombie;
    538 
    539    VG_(sigemptyset)(&VG_(threads)[tid].sig_mask);
    540    VG_(sigemptyset)(&VG_(threads)[tid].tmp_sig_mask);
    541 
    542    os_state_clear(&VG_(threads)[tid]);
    543 
    544    /* start with no altstack */
    545    VG_(threads)[tid].altstack.ss_sp = (void *)0xdeadbeef;
    546    VG_(threads)[tid].altstack.ss_size = 0;
    547    VG_(threads)[tid].altstack.ss_flags = VKI_SS_DISABLE;
    548 
    549    VG_(clear_out_queued_signals)(tid, &savedmask);
    550 
    551    VG_(threads)[tid].sched_jmpbuf_valid = False;
    552 }
    553 
    554 /*
    555    Called in the child after fork.  If the parent has multiple
    556    threads, then we've inherited a VG_(threads) array describing them,
    557    but only the thread which called fork() is actually alive in the
    558    child.  This functions needs to clean up all those other thread
    559    structures.
    560 
    561    Whichever tid in the parent which called fork() becomes the
    562    master_tid in the child.  That's because the only living slot in
    563    VG_(threads) in the child after fork is VG_(threads)[tid], and it
    564    would be too hard to try to re-number the thread and relocate the
    565    thread state down to VG_(threads)[1].
    566 
    567    This function also needs to reinitialize the_BigLock, since
    568    otherwise we may end up sharing its state with the parent, which
    569    would be deeply confusing.
    570 */
    571 static void sched_fork_cleanup(ThreadId me)
    572 {
    573    ThreadId tid;
    574    vg_assert(VG_(running_tid) == me);
    575 
    576 #  if defined(VGO_darwin)
    577    // GrP fixme hack reset Mach ports
    578    VG_(mach_init)();
    579 #  endif
    580 
    581    VG_(threads)[me].os_state.lwpid = VG_(gettid)();
    582    VG_(threads)[me].os_state.threadgroup = VG_(getpid)();
    583 
    584    /* clear out all the unused thread slots */
    585    for (tid = 1; tid < VG_N_THREADS; tid++) {
    586       if (tid != me) {
    587          mostly_clear_thread_record(tid);
    588 	 VG_(threads)[tid].status = VgTs_Empty;
    589          VG_(clear_syscallInfo)(tid);
    590       }
    591    }
    592 
    593    /* re-init and take the sema */
    594    deinit_BigLock();
    595    init_BigLock();
    596    VG_(acquire_BigLock_LL)(NULL);
    597 }
    598 
    599 
    600 /* First phase of initialisation of the scheduler.  Initialise the
    601    bigLock, zeroise the VG_(threads) structure and decide on the
    602    ThreadId of the root thread.
    603 */
    604 ThreadId VG_(scheduler_init_phase1) ( void )
    605 {
    606    Int i;
    607    ThreadId tid_main;
    608 
    609    VG_(debugLog)(1,"sched","sched_init_phase1\n");
    610 
    611    if (VG_(clo_fair_sched) != disable_fair_sched
    612        && !ML_(set_sched_lock_impl)(sched_lock_ticket)
    613        && VG_(clo_fair_sched) == enable_fair_sched)
    614    {
    615       VG_(printf)("Error: fair scheduling is not supported on this system.\n");
    616       VG_(exit)(1);
    617    }
    618 
    619    if (VG_(clo_verbosity) > 1) {
    620       VG_(message)(Vg_DebugMsg,
    621                    "Scheduler: using %s scheduler lock implementation.\n",
    622                    ML_(get_sched_lock_name)());
    623    }
    624 
    625    init_BigLock();
    626 
    627    for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
    628       /* Paranoia .. completely zero it out. */
    629       VG_(memset)( & VG_(threads)[i], 0, sizeof( VG_(threads)[i] ) );
    630 
    631       VG_(threads)[i].sig_queue = NULL;
    632 
    633       os_state_init(&VG_(threads)[i]);
    634       mostly_clear_thread_record(i);
    635 
    636       VG_(threads)[i].status                    = VgTs_Empty;
    637       VG_(threads)[i].client_stack_szB          = 0;
    638       VG_(threads)[i].client_stack_highest_byte = (Addr)NULL;
    639       VG_(threads)[i].err_disablement_level     = 0;
    640       VG_(threads)[i].thread_name               = NULL;
    641    }
    642 
    643    tid_main = VG_(alloc_ThreadState)();
    644 
    645    /* Bleh.  Unfortunately there are various places in the system that
    646       assume that the main thread has a ThreadId of 1.
    647       - Helgrind (possibly)
    648       - stack overflow message in default_action() in m_signals.c
    649       - definitely a lot more places
    650    */
    651    vg_assert(tid_main == 1);
    652 
    653    return tid_main;
    654 }
    655 
    656 
    657 /* Second phase of initialisation of the scheduler.  Given the root
    658    ThreadId computed by first phase of initialisation, fill in stack
    659    details and acquire bigLock.  Initialise the scheduler.  This is
    660    called at startup.  The caller subsequently initialises the guest
    661    state components of this main thread.
    662 */
    663 void VG_(scheduler_init_phase2) ( ThreadId tid_main,
    664                                   Addr     clstack_end,
    665                                   SizeT    clstack_size )
    666 {
    667    VG_(debugLog)(1,"sched","sched_init_phase2: tid_main=%u, "
    668                    "cls_end=0x%lx, cls_sz=%lu\n",
    669                    tid_main, clstack_end, clstack_size);
    670 
    671    vg_assert(VG_IS_PAGE_ALIGNED(clstack_end+1));
    672    vg_assert(VG_IS_PAGE_ALIGNED(clstack_size));
    673 
    674    VG_(threads)[tid_main].client_stack_highest_byte
    675       = clstack_end;
    676    VG_(threads)[tid_main].client_stack_szB
    677       = clstack_size;
    678 
    679    VG_(atfork)(NULL, NULL, sched_fork_cleanup);
    680 }
    681 
    682 
    683 /* ---------------------------------------------------------------------
    684    Helpers for running translations.
    685    ------------------------------------------------------------------ */
    686 
    687 /* Use gcc's built-in setjmp/longjmp.  longjmp must not restore signal
    688    mask state, but does need to pass "val" through.  jumped must be a
    689    volatile UWord. */
    690 #define SCHEDSETJMP(tid, jumped, stmt)					\
    691    do {									\
    692       ThreadState * volatile _qq_tst = VG_(get_ThreadState)(tid);	\
    693 									\
    694       (jumped) = VG_MINIMAL_SETJMP(_qq_tst->sched_jmpbuf);              \
    695       if ((jumped) == ((UWord)0)) {                                     \
    696 	 vg_assert(!_qq_tst->sched_jmpbuf_valid);			\
    697 	 _qq_tst->sched_jmpbuf_valid = True;				\
    698 	 stmt;								\
    699       }	else if (VG_(clo_trace_sched))					\
    700 	 VG_(printf)("SCHEDSETJMP(line %d) tid %u, jumped=%lu\n",       \
    701                      __LINE__, tid, jumped);                            \
    702       vg_assert(_qq_tst->sched_jmpbuf_valid);				\
    703       _qq_tst->sched_jmpbuf_valid = False;				\
    704    } while(0)
    705 
    706 
    707 /* Do various guest state alignment checks prior to running a thread.
    708    Specifically, check that what we have matches Vex's guest state
    709    layout requirements.  See libvex.h for details, but in short the
    710    requirements are: There must be no holes in between the primary
    711    guest state, its two copies, and the spill area.  In short, all 4
    712    areas must be aligned on the LibVEX_GUEST_STATE_ALIGN boundary and
    713    be placed back-to-back without holes in between. */
    714 static void do_pre_run_checks ( volatile ThreadState* tst )
    715 {
    716    Addr a_vex     = (Addr) & tst->arch.vex;
    717    Addr a_vexsh1  = (Addr) & tst->arch.vex_shadow1;
    718    Addr a_vexsh2  = (Addr) & tst->arch.vex_shadow2;
    719    Addr a_spill   = (Addr) & tst->arch.vex_spill;
    720    UInt sz_vex    = (UInt) sizeof tst->arch.vex;
    721    UInt sz_vexsh1 = (UInt) sizeof tst->arch.vex_shadow1;
    722    UInt sz_vexsh2 = (UInt) sizeof tst->arch.vex_shadow2;
    723    UInt sz_spill  = (UInt) sizeof tst->arch.vex_spill;
    724 
    725    if (0)
    726    VG_(printf)("gst %p %u, sh1 %p %u, "
    727                "sh2 %p %u, spill %p %u\n",
    728                (void*)a_vex, sz_vex,
    729                (void*)a_vexsh1, sz_vexsh1,
    730                (void*)a_vexsh2, sz_vexsh2,
    731                (void*)a_spill, sz_spill );
    732 
    733    vg_assert(sz_vex    % LibVEX_GUEST_STATE_ALIGN == 0);
    734    vg_assert(sz_vexsh1 % LibVEX_GUEST_STATE_ALIGN == 0);
    735    vg_assert(sz_vexsh2 % LibVEX_GUEST_STATE_ALIGN == 0);
    736    vg_assert(sz_spill  % LibVEX_GUEST_STATE_ALIGN == 0);
    737 
    738    vg_assert(a_vex    % LibVEX_GUEST_STATE_ALIGN == 0);
    739    vg_assert(a_vexsh1 % LibVEX_GUEST_STATE_ALIGN == 0);
    740    vg_assert(a_vexsh2 % LibVEX_GUEST_STATE_ALIGN == 0);
    741    vg_assert(a_spill  % LibVEX_GUEST_STATE_ALIGN == 0);
    742 
    743    /* Check that the guest state and its two shadows have the same
    744       size, and that there are no holes in between.  The latter is
    745       important because Memcheck assumes that it can reliably access
    746       the shadows by indexing off a pointer to the start of the
    747       primary guest state area. */
    748    vg_assert(sz_vex == sz_vexsh1);
    749    vg_assert(sz_vex == sz_vexsh2);
    750    vg_assert(a_vex + 1 * sz_vex == a_vexsh1);
    751    vg_assert(a_vex + 2 * sz_vex == a_vexsh2);
    752    /* Also check there's no hole between the second shadow area and
    753       the spill area. */
    754    vg_assert(sz_spill == LibVEX_N_SPILL_BYTES);
    755    vg_assert(a_vex + 3 * sz_vex == a_spill);
    756 
    757 #  if defined(VGA_x86)
    758    /* x86 XMM regs must form an array, ie, have no holes in
    759       between. */
    760    vg_assert(
    761       (offsetof(VexGuestX86State,guest_XMM7)
    762        - offsetof(VexGuestX86State,guest_XMM0))
    763       == (8/*#regs*/-1) * 16/*bytes per reg*/
    764    );
    765    vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestX86State,guest_XMM0)));
    766    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestX86State,guest_FPREG)));
    767    vg_assert(8 == offsetof(VexGuestX86State,guest_EAX));
    768    vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EAX)));
    769    vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EIP)));
    770 #  endif
    771 
    772 #  if defined(VGA_amd64)
    773    /* amd64 YMM regs must form an array, ie, have no holes in
    774       between. */
    775    vg_assert(
    776       (offsetof(VexGuestAMD64State,guest_YMM16)
    777        - offsetof(VexGuestAMD64State,guest_YMM0))
    778       == (17/*#regs*/-1) * 32/*bytes per reg*/
    779    );
    780    vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestAMD64State,guest_YMM0)));
    781    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_FPREG)));
    782    vg_assert(16 == offsetof(VexGuestAMD64State,guest_RAX));
    783    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RAX)));
    784    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RIP)));
    785 #  endif
    786 
    787 #  if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
    788    /* ppc guest_state vector regs must be 16 byte aligned for
    789       loads/stores.  This is important! */
    790    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR0));
    791    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR0));
    792    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR0));
    793    /* be extra paranoid .. */
    794    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR1));
    795    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR1));
    796    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR1));
    797 #  endif
    798 
    799 #  if defined(VGA_arm)
    800    /* arm guest_state VFP regs must be 8 byte aligned for
    801       loads/stores.  Let's use 16 just to be on the safe side. */
    802    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_D0));
    803    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_D0));
    804    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_D0));
    805    /* be extra paranoid .. */
    806    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_D1));
    807    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_D1));
    808    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_D1));
    809 #  endif
    810 
    811 #  if defined(VGA_arm64)
    812    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_X0));
    813    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_X0));
    814    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_X0));
    815    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_Q0));
    816    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_Q0));
    817    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_Q0));
    818 #  endif
    819 
    820 #  if defined(VGA_s390x)
    821    /* no special requirements */
    822 #  endif
    823 
    824 #  if defined(VGA_mips32) || defined(VGA_mips64)
    825    /* no special requirements */
    826 #  endif
    827 }
    828 
    829 // NO_VGDB_POLL value ensures vgdb is not polled, while
    830 // VGDB_POLL_ASAP ensures that the next scheduler call
    831 // will cause a poll.
    832 #define NO_VGDB_POLL    0xffffffffffffffffULL
    833 #define VGDB_POLL_ASAP  0x0ULL
    834 
    835 void VG_(disable_vgdb_poll) (void )
    836 {
    837    vgdb_next_poll = NO_VGDB_POLL;
    838 }
    839 void VG_(force_vgdb_poll) ( void )
    840 {
    841    vgdb_next_poll = VGDB_POLL_ASAP;
    842 }
    843 
    844 /* Run the thread tid for a while, and return a VG_TRC_* value
    845    indicating why VG_(disp_run_translations) stopped, and possibly an
    846    auxiliary word.  Also, only allow the thread to run for at most
    847    *dispatchCtrP events.  If (as is the normal case) use_alt_host_addr
    848    is False, we are running ordinary redir'd translations, and we
    849    should therefore start by looking up the guest next IP in TT.  If
    850    it is True then we ignore the guest next IP and just run from
    851    alt_host_addr, which presumably points at host code for a no-redir
    852    translation.
    853 
    854    Return results are placed in two_words.  two_words[0] is set to the
    855    TRC.  In the case where that is VG_TRC_CHAIN_ME_TO_{SLOW,FAST}_EP,
    856    the address to patch is placed in two_words[1].
    857 */
    858 static
    859 void run_thread_for_a_while ( /*OUT*/HWord* two_words,
    860                               /*MOD*/Int*   dispatchCtrP,
    861                               ThreadId      tid,
    862                               HWord         alt_host_addr,
    863                               Bool          use_alt_host_addr )
    864 {
    865    volatile HWord        jumped         = 0;
    866    volatile ThreadState* tst            = NULL; /* stop gcc complaining */
    867    volatile Int          done_this_time = 0;
    868    volatile HWord        host_code_addr = 0;
    869 
    870    /* Paranoia */
    871    vg_assert(VG_(is_valid_tid)(tid));
    872    vg_assert(VG_(is_running_thread)(tid));
    873    vg_assert(!VG_(is_exiting)(tid));
    874    vg_assert(*dispatchCtrP > 0);
    875 
    876    tst = VG_(get_ThreadState)(tid);
    877    do_pre_run_checks( tst );
    878    /* end Paranoia */
    879 
    880    /* Futz with the XIndir stats counters. */
    881    vg_assert(VG_(stats__n_xindirs_32) == 0);
    882    vg_assert(VG_(stats__n_xindir_misses_32) == 0);
    883 
    884    /* Clear return area. */
    885    two_words[0] = two_words[1] = 0;
    886 
    887    /* Figure out where we're starting from. */
    888    if (use_alt_host_addr) {
    889       /* unusual case -- no-redir translation */
    890       host_code_addr = alt_host_addr;
    891    } else {
    892       /* normal case -- redir translation */
    893       UInt cno = (UInt)VG_TT_FAST_HASH((Addr)tst->arch.vex.VG_INSTR_PTR);
    894       if (LIKELY(VG_(tt_fast)[cno].guest == (Addr)tst->arch.vex.VG_INSTR_PTR))
    895          host_code_addr = VG_(tt_fast)[cno].host;
    896       else {
    897          Addr res = 0;
    898          /* not found in VG_(tt_fast). Searching here the transtab
    899             improves the performance compared to returning directly
    900             to the scheduler. */
    901          Bool  found = VG_(search_transtab)(&res, NULL, NULL,
    902                                             (Addr)tst->arch.vex.VG_INSTR_PTR,
    903                                             True/*upd cache*/
    904                                             );
    905          if (LIKELY(found)) {
    906             host_code_addr = res;
    907          } else {
    908             /* At this point, we know that we intended to start at a
    909                normal redir translation, but it was not found.  In
    910                which case we can return now claiming it's not
    911                findable. */
    912             two_words[0] = VG_TRC_INNER_FASTMISS; /* hmm, is that right? */
    913             return;
    914          }
    915       }
    916    }
    917    /* We have either a no-redir or a redir translation. */
    918    vg_assert(host_code_addr != 0); /* implausible */
    919 
    920    /* there should be no undealt-with signals */
    921    //vg_assert(VG_(threads)[tid].siginfo.si_signo == 0);
    922 
    923    /* Set up event counter stuff for the run. */
    924    tst->arch.vex.host_EvC_COUNTER = *dispatchCtrP;
    925    tst->arch.vex.host_EvC_FAILADDR
    926       = (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail) );
    927 
    928    if (0) {
    929       vki_sigset_t m;
    930       Int i, err = VG_(sigprocmask)(VKI_SIG_SETMASK, NULL, &m);
    931       vg_assert(err == 0);
    932       VG_(printf)("tid %u: entering code with unblocked signals: ", tid);
    933       for (i = 1; i <= _VKI_NSIG; i++)
    934          if (!VG_(sigismember)(&m, i))
    935             VG_(printf)("%d ", i);
    936       VG_(printf)("\n");
    937    }
    938 
    939    /* Set up return-value area. */
    940 
    941    // Tell the tool this thread is about to run client code
    942    VG_TRACK( start_client_code, tid, bbs_done );
    943 
    944    vg_assert(VG_(in_generated_code) == False);
    945    VG_(in_generated_code) = True;
    946 
    947    SCHEDSETJMP(
    948       tid,
    949       jumped,
    950       VG_(disp_run_translations)(
    951          two_words,
    952          (volatile void*)&tst->arch.vex,
    953          host_code_addr
    954       )
    955    );
    956 
    957    vg_assert(VG_(in_generated_code) == True);
    958    VG_(in_generated_code) = False;
    959 
    960    if (jumped != (HWord)0) {
    961       /* We get here if the client took a fault that caused our signal
    962          handler to longjmp. */
    963       vg_assert(two_words[0] == 0 && two_words[1] == 0); // correct?
    964       two_words[0] = VG_TRC_FAULT_SIGNAL;
    965       two_words[1] = 0;
    966       block_signals();
    967    }
    968 
    969    /* Merge the 32-bit XIndir/miss counters into the 64 bit versions,
    970       and zero out the 32-bit ones in preparation for the next run of
    971       generated code. */
    972    stats__n_xindirs += (ULong)VG_(stats__n_xindirs_32);
    973    VG_(stats__n_xindirs_32) = 0;
    974    stats__n_xindir_misses += (ULong)VG_(stats__n_xindir_misses_32);
    975    VG_(stats__n_xindir_misses_32) = 0;
    976 
    977    /* Inspect the event counter. */
    978    vg_assert((Int)tst->arch.vex.host_EvC_COUNTER >= -1);
    979    vg_assert(tst->arch.vex.host_EvC_FAILADDR
    980              == (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail)) );
    981 
    982    /* The number of events done this time is the difference between
    983       the event counter originally and what it is now.  Except -- if
    984       it has gone negative (to -1) then the transition 0 to -1 doesn't
    985       correspond to a real executed block, so back it out.  It's like
    986       this because the event checks decrement the counter first and
    987       check it for negativeness second, hence the 0 to -1 transition
    988       causes a bailout and the block it happens in isn't executed. */
    989    {
    990      Int dispatchCtrAfterwards = (Int)tst->arch.vex.host_EvC_COUNTER;
    991      done_this_time = *dispatchCtrP - dispatchCtrAfterwards;
    992      if (dispatchCtrAfterwards == -1) {
    993         done_this_time--;
    994      } else {
    995         /* If the generated code drives the counter below -1, something
    996            is seriously wrong. */
    997         vg_assert(dispatchCtrAfterwards >= 0);
    998      }
    999    }
   1000 
   1001    vg_assert(done_this_time >= 0);
   1002    bbs_done += (ULong)done_this_time;
   1003 
   1004    *dispatchCtrP -= done_this_time;
   1005    vg_assert(*dispatchCtrP >= 0);
   1006 
   1007    // Tell the tool this thread has stopped running client code
   1008    VG_TRACK( stop_client_code, tid, bbs_done );
   1009 
   1010    if (bbs_done >= vgdb_next_poll) {
   1011       if (VG_(clo_vgdb_poll))
   1012          vgdb_next_poll = bbs_done + (ULong)VG_(clo_vgdb_poll);
   1013       else
   1014          /* value was changed due to gdbserver invocation via ptrace */
   1015          vgdb_next_poll = NO_VGDB_POLL;
   1016       if (VG_(gdbserver_activity) (tid))
   1017          VG_(gdbserver) (tid);
   1018    }
   1019 
   1020    /* TRC value and possible auxiliary patch-address word are already
   1021       in two_words[0] and [1] respectively, as a result of the call to
   1022       VG_(run_innerloop). */
   1023    /* Stay sane .. */
   1024    if (two_words[0] == VG_TRC_CHAIN_ME_TO_SLOW_EP
   1025        || two_words[0] == VG_TRC_CHAIN_ME_TO_FAST_EP) {
   1026       vg_assert(two_words[1] != 0); /* we have a legit patch addr */
   1027    } else {
   1028       vg_assert(two_words[1] == 0); /* nobody messed with it */
   1029    }
   1030 }
   1031 
   1032 
   1033 /* ---------------------------------------------------------------------
   1034    The scheduler proper.
   1035    ------------------------------------------------------------------ */
   1036 
   1037 static void handle_tt_miss ( ThreadId tid )
   1038 {
   1039    Bool found;
   1040    Addr ip = VG_(get_IP)(tid);
   1041 
   1042    /* Trivial event.  Miss in the fast-cache.  Do a full
   1043       lookup for it. */
   1044    found = VG_(search_transtab)( NULL, NULL, NULL,
   1045                                  ip, True/*upd_fast_cache*/ );
   1046    if (UNLIKELY(!found)) {
   1047       /* Not found; we need to request a translation. */
   1048       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
   1049                           bbs_done, True/*allow redirection*/ )) {
   1050          found = VG_(search_transtab)( NULL, NULL, NULL,
   1051                                        ip, True );
   1052          vg_assert2(found, "handle_tt_miss: missing tt_fast entry");
   1053 
   1054       } else {
   1055 	 // If VG_(translate)() fails, it's because it had to throw a
   1056 	 // signal because the client jumped to a bad address.  That
   1057 	 // means that either a signal has been set up for delivery,
   1058 	 // or the thread has been marked for termination.  Either
   1059 	 // way, we just need to go back into the scheduler loop.
   1060       }
   1061    }
   1062 }
   1063 
   1064 static
   1065 void handle_chain_me ( ThreadId tid, void* place_to_chain, Bool toFastEP )
   1066 {
   1067    Bool found          = False;
   1068    Addr ip             = VG_(get_IP)(tid);
   1069    SECno to_sNo         = INV_SNO;
   1070    TTEno to_tteNo       = INV_TTE;
   1071 
   1072    found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
   1073                                  ip, False/*dont_upd_fast_cache*/ );
   1074    if (!found) {
   1075       /* Not found; we need to request a translation. */
   1076       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
   1077                           bbs_done, True/*allow redirection*/ )) {
   1078          found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
   1079                                        ip, False );
   1080          vg_assert2(found, "handle_chain_me: missing tt_fast entry");
   1081       } else {
   1082 	 // If VG_(translate)() fails, it's because it had to throw a
   1083 	 // signal because the client jumped to a bad address.  That
   1084 	 // means that either a signal has been set up for delivery,
   1085 	 // or the thread has been marked for termination.  Either
   1086 	 // way, we just need to go back into the scheduler loop.
   1087         return;
   1088       }
   1089    }
   1090    vg_assert(found);
   1091    vg_assert(to_sNo != INV_SNO);
   1092    vg_assert(to_tteNo != INV_TTE);
   1093 
   1094    /* So, finally we know where to patch through to.  Do the patching
   1095       and update the various admin tables that allow it to be undone
   1096       in the case that the destination block gets deleted. */
   1097    VG_(tt_tc_do_chaining)( place_to_chain,
   1098                            to_sNo, to_tteNo, toFastEP );
   1099 }
   1100 
   1101 static void handle_syscall(ThreadId tid, UInt trc)
   1102 {
   1103    ThreadState * volatile tst = VG_(get_ThreadState)(tid);
   1104    volatile UWord jumped;
   1105 
   1106    /* Syscall may or may not block; either way, it will be
   1107       complete by the time this call returns, and we'll be
   1108       runnable again.  We could take a signal while the
   1109       syscall runs. */
   1110 
   1111    if (VG_(clo_sanity_level) >= 3) {
   1112       HChar buf[50];    // large enough
   1113       VG_(sprintf)(buf, "(BEFORE SYSCALL, tid %u)", tid);
   1114       Bool ok = VG_(am_do_sync_check)(buf, __FILE__, __LINE__);
   1115       vg_assert(ok);
   1116    }
   1117 
   1118    SCHEDSETJMP(tid, jumped, VG_(client_syscall)(tid, trc));
   1119 
   1120    if (VG_(clo_sanity_level) >= 3) {
   1121       HChar buf[50];    // large enough
   1122       VG_(sprintf)(buf, "(AFTER SYSCALL, tid %u)", tid);
   1123       Bool ok = VG_(am_do_sync_check)(buf, __FILE__, __LINE__);
   1124       vg_assert(ok);
   1125    }
   1126 
   1127    if (!VG_(is_running_thread)(tid))
   1128       VG_(printf)("tid %u not running; VG_(running_tid)=%u, tid %u status %u\n",
   1129 		  tid, VG_(running_tid), tid, tst->status);
   1130    vg_assert(VG_(is_running_thread)(tid));
   1131 
   1132    if (jumped != (UWord)0) {
   1133       block_signals();
   1134       VG_(poll_signals)(tid);
   1135    }
   1136 }
   1137 
   1138 /* tid just requested a jump to the noredir version of its current
   1139    program counter.  So make up that translation if needed, run it,
   1140    and return the resulting thread return code in two_words[]. */
   1141 static
   1142 void handle_noredir_jump ( /*OUT*/HWord* two_words,
   1143                            /*MOD*/Int*   dispatchCtrP,
   1144                            ThreadId tid )
   1145 {
   1146    /* Clear return area. */
   1147    two_words[0] = two_words[1] = 0;
   1148 
   1149    Addr  hcode = 0;
   1150    Addr  ip    = VG_(get_IP)(tid);
   1151 
   1152    Bool  found = VG_(search_unredir_transtab)( &hcode, ip );
   1153    if (!found) {
   1154       /* Not found; we need to request a translation. */
   1155       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/, bbs_done,
   1156                           False/*NO REDIRECTION*/ )) {
   1157 
   1158          found = VG_(search_unredir_transtab)( &hcode, ip );
   1159          vg_assert2(found, "unredir translation missing after creation?!");
   1160       } else {
   1161 	 // If VG_(translate)() fails, it's because it had to throw a
   1162 	 // signal because the client jumped to a bad address.  That
   1163 	 // means that either a signal has been set up for delivery,
   1164 	 // or the thread has been marked for termination.  Either
   1165 	 // way, we just need to go back into the scheduler loop.
   1166          two_words[0] = VG_TRC_BORING;
   1167          return;
   1168       }
   1169 
   1170    }
   1171 
   1172    vg_assert(found);
   1173    vg_assert(hcode != 0);
   1174 
   1175    /* Otherwise run it and return the resulting VG_TRC_* value. */
   1176    vg_assert(*dispatchCtrP > 0); /* so as to guarantee progress */
   1177    run_thread_for_a_while( two_words, dispatchCtrP, tid,
   1178                            hcode, True/*use hcode*/ );
   1179 }
   1180 
   1181 
   1182 /*
   1183    Run a thread until it wants to exit.
   1184 
   1185    We assume that the caller has already called VG_(acquire_BigLock) for
   1186    us, so we own the VCPU.  Also, all signals are blocked.
   1187  */
   1188 VgSchedReturnCode VG_(scheduler) ( ThreadId tid )
   1189 {
   1190    /* Holds the remaining size of this thread's "timeslice". */
   1191    Int dispatch_ctr = 0;
   1192 
   1193    ThreadState *tst = VG_(get_ThreadState)(tid);
   1194    static Bool vgdb_startup_action_done = False;
   1195 
   1196    if (VG_(clo_trace_sched))
   1197       print_sched_event(tid, "entering VG_(scheduler)");
   1198 
   1199    /* Do vgdb initialization (but once). Only the first (main) task
   1200       starting up will do the below.
   1201       Initialize gdbserver earlier than at the first
   1202       thread VG_(scheduler) is causing problems:
   1203       * at the end of VG_(scheduler_init_phase2) :
   1204         The main thread is in VgTs_Init state, but in a not yet
   1205         consistent state => the thread cannot be reported to gdb
   1206         (e.g. causes an assert in LibVEX_GuestX86_get_eflags when giving
   1207         back the guest registers to gdb).
   1208       * at end of valgrind_main, just
   1209         before VG_(main_thread_wrapper_NORETURN)(1) :
   1210         The main thread is still in VgTs_Init state but in a
   1211         more advanced state. However, the thread state is not yet
   1212         completely initialized : a.o., the os_state is not yet fully
   1213         set => the thread is then not properly reported to gdb,
   1214         which is then confused (causing e.g. a duplicate thread be
   1215         shown, without thread id).
   1216       * it would be possible to initialize gdbserver "lower" in the
   1217         call stack (e.g. in VG_(main_thread_wrapper_NORETURN)) but
   1218         these are platform dependent and the place at which
   1219         the thread state is completely initialized is not
   1220         specific anymore to the main thread (so a similar "do it only
   1221         once" would be needed).
   1222 
   1223         => a "once only" initialization here is the best compromise. */
   1224    if (!vgdb_startup_action_done) {
   1225       vg_assert(tid == 1); // it must be the main thread.
   1226       vgdb_startup_action_done = True;
   1227       if (VG_(clo_vgdb) != Vg_VgdbNo) {
   1228          /* If we have to poll, ensures we do an initial poll at first
   1229             scheduler call. Otherwise, ensure no poll (unless interrupted
   1230             by ptrace). */
   1231          if (VG_(clo_vgdb_poll))
   1232             VG_(force_vgdb_poll) ();
   1233          else
   1234             VG_(disable_vgdb_poll) ();
   1235 
   1236          vg_assert (VG_(dyn_vgdb_error) == VG_(clo_vgdb_error));
   1237          /* As we are initializing, VG_(dyn_vgdb_error) can't have been
   1238             changed yet. */
   1239 
   1240          VG_(gdbserver_prerun_action) (1);
   1241       } else {
   1242          VG_(disable_vgdb_poll) ();
   1243       }
   1244    }
   1245 
   1246    if (SimHintiS(SimHint_no_nptl_pthread_stackcache, VG_(clo_sim_hints))
   1247        && tid != 1) {
   1248       /* We disable the stack cache the first time we see a thread other
   1249          than the main thread appearing. At this moment, we are sure the pthread
   1250          lib loading is done/variable was initialised by pthread lib/... */
   1251       if (VG_(client__stack_cache_actsize__addr)) {
   1252          if (*VG_(client__stack_cache_actsize__addr) == 0) {
   1253             VG_(debugLog)(1,"sched",
   1254                           "pthread stack cache size disable done"
   1255                           " via kludge\n");
   1256             *VG_(client__stack_cache_actsize__addr) = 1000 * 1000 * 1000;
   1257             /* Set a value big enough to be above the hardcoded maximum stack
   1258                cache size in glibc, small enough to allow a pthread stack size
   1259                to be added without risk of overflow. */
   1260          }
   1261       } else {
   1262           VG_(debugLog)(0,"sched",
   1263                         "WARNING: pthread stack cache cannot be disabled!\n");
   1264           VG_(clo_sim_hints) &= ~SimHint2S(SimHint_no_nptl_pthread_stackcache);
   1265           /* Remove SimHint_no_nptl_pthread_stackcache from VG_(clo_sim_hints)
   1266              to avoid having a msg for all following threads. */
   1267       }
   1268    }
   1269 
   1270    /* set the proper running signal mask */
   1271    block_signals();
   1272 
   1273    vg_assert(VG_(is_running_thread)(tid));
   1274 
   1275    dispatch_ctr = SCHEDULING_QUANTUM;
   1276 
   1277    while (!VG_(is_exiting)(tid)) {
   1278 
   1279       vg_assert(dispatch_ctr >= 0);
   1280       if (dispatch_ctr == 0) {
   1281 
   1282 	 /* Our slice is done, so yield the CPU to another thread.  On
   1283             Linux, this doesn't sleep between sleeping and running,
   1284             since that would take too much time. */
   1285 
   1286 	 /* 4 July 06: it seems that a zero-length nsleep is needed to
   1287             cause async thread cancellation (canceller.c) to terminate
   1288             in finite time; else it is in some kind of race/starvation
   1289             situation and completion is arbitrarily delayed (although
   1290             this is not a deadlock).
   1291 
   1292             Unfortunately these sleeps cause MPI jobs not to terminate
   1293             sometimes (some kind of livelock).  So sleeping once
   1294             every N opportunities appears to work. */
   1295 
   1296 	 /* 3 Aug 06: doing sys__nsleep works but crashes some apps.
   1297             sys_yield also helps the problem, whilst not crashing apps. */
   1298 
   1299 	 VG_(release_BigLock)(tid, VgTs_Yielding,
   1300                                    "VG_(scheduler):timeslice");
   1301 	 /* ------------ now we don't have The Lock ------------ */
   1302 
   1303 	 VG_(acquire_BigLock)(tid, "VG_(scheduler):timeslice");
   1304 	 /* ------------ now we do have The Lock ------------ */
   1305 
   1306 	 /* OK, do some relatively expensive housekeeping stuff */
   1307 	 scheduler_sanity(tid);
   1308 	 VG_(sanity_check_general)(False);
   1309 
   1310 	 /* Look for any pending signals for this thread, and set them up
   1311 	    for delivery */
   1312 	 VG_(poll_signals)(tid);
   1313 
   1314 	 if (VG_(is_exiting)(tid))
   1315 	    break;		/* poll_signals picked up a fatal signal */
   1316 
   1317 	 /* For stats purposes only. */
   1318 	 n_scheduling_events_MAJOR++;
   1319 
   1320 	 /* Figure out how many bbs to ask vg_run_innerloop to do. */
   1321          dispatch_ctr = SCHEDULING_QUANTUM;
   1322 
   1323 	 /* paranoia ... */
   1324 	 vg_assert(tst->tid == tid);
   1325 	 vg_assert(tst->os_state.lwpid == VG_(gettid)());
   1326       }
   1327 
   1328       /* For stats purposes only. */
   1329       n_scheduling_events_MINOR++;
   1330 
   1331       if (0)
   1332          VG_(message)(Vg_DebugMsg, "thread %u: running for %d bbs\n",
   1333                                    tid, dispatch_ctr - 1 );
   1334 
   1335       HWord trc[2]; /* "two_words" */
   1336       run_thread_for_a_while( &trc[0],
   1337                               &dispatch_ctr,
   1338                               tid, 0/*ignored*/, False );
   1339 
   1340       if (VG_(clo_trace_sched) && VG_(clo_verbosity) > 2) {
   1341          const HChar *name = name_of_sched_event(trc[0]);
   1342          HChar buf[VG_(strlen)(name) + 10];    // large enough
   1343 	 VG_(sprintf)(buf, "TRC: %s", name);
   1344 	 print_sched_event(tid, buf);
   1345       }
   1346 
   1347       if (trc[0] == VEX_TRC_JMP_NOREDIR) {
   1348          /* If we got a request to run a no-redir version of
   1349             something, do so now -- handle_noredir_jump just (creates
   1350             and) runs that one translation.  The flip side is that the
   1351             noredir translation can't itself return another noredir
   1352             request -- that would be nonsensical.  It can, however,
   1353             return VG_TRC_BORING, which just means keep going as
   1354             normal. */
   1355          /* Note that the fact that we need to continue with a
   1356             no-redir jump is not recorded anywhere else in this
   1357             thread's state.  So we *must* execute the block right now
   1358             -- we can't fail to execute it and later resume with it,
   1359             because by then we'll have forgotten the fact that it
   1360             should be run as no-redir, but will get run as a normal
   1361             potentially-redir'd, hence screwing up.  This really ought
   1362             to be cleaned up, by noting in the guest state that the
   1363             next block to be executed should be no-redir.  Then we can
   1364             suspend and resume at any point, which isn't the case at
   1365             the moment. */
   1366          /* We can't enter a no-redir translation with the dispatch
   1367             ctr set to zero, for the reasons commented just above --
   1368             we need to force it to execute right now.  So, if the
   1369             dispatch ctr is zero, set it to one.  Note that this would
   1370             have the bad side effect of holding the Big Lock arbitrary
   1371             long should there be an arbitrarily long sequence of
   1372             back-to-back no-redir translations to run.  But we assert
   1373             just below that this translation cannot request another
   1374             no-redir jump, so we should be safe against that. */
   1375          if (dispatch_ctr == 0) {
   1376             dispatch_ctr = 1;
   1377          }
   1378          handle_noredir_jump( &trc[0],
   1379                               &dispatch_ctr,
   1380                               tid );
   1381          vg_assert(trc[0] != VEX_TRC_JMP_NOREDIR);
   1382 
   1383          /* This can't be allowed to happen, since it means the block
   1384             didn't execute, and we have no way to resume-as-noredir
   1385             after we get more timeslice.  But I don't think it ever
   1386             can, since handle_noredir_jump will assert if the counter
   1387             is zero on entry. */
   1388          vg_assert(trc[0] != VG_TRC_INNER_COUNTERZERO);
   1389          /* This asserts the same thing. */
   1390          vg_assert(dispatch_ctr >= 0);
   1391 
   1392          /* A no-redir translation can't return with a chain-me
   1393             request, since chaining in the no-redir cache is too
   1394             complex. */
   1395          vg_assert(trc[0] != VG_TRC_CHAIN_ME_TO_SLOW_EP
   1396                    && trc[0] != VG_TRC_CHAIN_ME_TO_FAST_EP);
   1397       }
   1398 
   1399       switch (trc[0]) {
   1400       case VEX_TRC_JMP_BORING:
   1401          /* assisted dispatch, no event.  Used by no-redir
   1402             translations to force return to the scheduler. */
   1403       case VG_TRC_BORING:
   1404          /* no special event, just keep going. */
   1405          break;
   1406 
   1407       case VG_TRC_INNER_FASTMISS:
   1408 	 vg_assert(dispatch_ctr >= 0);
   1409 	 handle_tt_miss(tid);
   1410 	 break;
   1411 
   1412       case VG_TRC_CHAIN_ME_TO_SLOW_EP: {
   1413          if (0) VG_(printf)("sched: CHAIN_TO_SLOW_EP: %p\n", (void*)trc[1] );
   1414          handle_chain_me(tid, (void*)trc[1], False);
   1415          break;
   1416       }
   1417 
   1418       case VG_TRC_CHAIN_ME_TO_FAST_EP: {
   1419          if (0) VG_(printf)("sched: CHAIN_TO_FAST_EP: %p\n", (void*)trc[1] );
   1420          handle_chain_me(tid, (void*)trc[1], True);
   1421          break;
   1422       }
   1423 
   1424       case VEX_TRC_JMP_CLIENTREQ:
   1425 	 do_client_request(tid);
   1426 	 break;
   1427 
   1428       case VEX_TRC_JMP_SYS_INT128:  /* x86-linux */
   1429       case VEX_TRC_JMP_SYS_INT129:  /* x86-darwin */
   1430       case VEX_TRC_JMP_SYS_INT130:  /* x86-darwin */
   1431       case VEX_TRC_JMP_SYS_INT145:  /* x86-solaris */
   1432       case VEX_TRC_JMP_SYS_INT210:  /* x86-solaris */
   1433       /* amd64-linux, ppc32-linux, amd64-darwin, amd64-solaris */
   1434       case VEX_TRC_JMP_SYS_SYSCALL:
   1435 	 handle_syscall(tid, trc[0]);
   1436 	 if (VG_(clo_sanity_level) > 2)
   1437 	    VG_(sanity_check_general)(True); /* sanity-check every syscall */
   1438 	 break;
   1439 
   1440       case VEX_TRC_JMP_YIELD:
   1441 	 /* Explicit yield, because this thread is in a spin-lock
   1442 	    or something.  Only let the thread run for a short while
   1443             longer.  Because swapping to another thread is expensive,
   1444             we're prepared to let this thread eat a little more CPU
   1445             before swapping to another.  That means that short term
   1446             spins waiting for hardware to poke memory won't cause a
   1447             thread swap. */
   1448          if (dispatch_ctr > 1000)
   1449             dispatch_ctr = 1000;
   1450 	 break;
   1451 
   1452       case VG_TRC_INNER_COUNTERZERO:
   1453 	 /* Timeslice is out.  Let a new thread be scheduled. */
   1454 	 vg_assert(dispatch_ctr == 0);
   1455 	 break;
   1456 
   1457       case VG_TRC_FAULT_SIGNAL:
   1458 	 /* Everything should be set up (either we're exiting, or
   1459 	    about to start in a signal handler). */
   1460 	 break;
   1461 
   1462       case VEX_TRC_JMP_MAPFAIL:
   1463          /* Failure of arch-specific address translation (x86/amd64
   1464             segment override use) */
   1465          /* jrs 2005 03 11: is this correct? */
   1466          VG_(synth_fault)(tid);
   1467          break;
   1468 
   1469       case VEX_TRC_JMP_EMWARN: {
   1470          static Int  counts[EmNote_NUMBER];
   1471          static Bool counts_initted = False;
   1472          VexEmNote ew;
   1473          const HChar* what;
   1474          Bool      show;
   1475          Int       q;
   1476          if (!counts_initted) {
   1477             counts_initted = True;
   1478             for (q = 0; q < EmNote_NUMBER; q++)
   1479                counts[q] = 0;
   1480          }
   1481          ew   = (VexEmNote)VG_(threads)[tid].arch.vex.guest_EMNOTE;
   1482          what = (ew < 0 || ew >= EmNote_NUMBER)
   1483                    ? "unknown (?!)"
   1484                    : LibVEX_EmNote_string(ew);
   1485          show = (ew < 0 || ew >= EmNote_NUMBER)
   1486                    ? True
   1487                    : counts[ew]++ < 3;
   1488          if (show && VG_(clo_show_emwarns) && !VG_(clo_xml)) {
   1489             VG_(message)( Vg_UserMsg,
   1490                           "Emulation warning: unsupported action:\n");
   1491             VG_(message)( Vg_UserMsg, "  %s\n", what);
   1492             VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
   1493          }
   1494          break;
   1495       }
   1496 
   1497       case VEX_TRC_JMP_EMFAIL: {
   1498          VexEmNote ew;
   1499          const HChar* what;
   1500          ew   = (VexEmNote)VG_(threads)[tid].arch.vex.guest_EMNOTE;
   1501          what = (ew < 0 || ew >= EmNote_NUMBER)
   1502                    ? "unknown (?!)"
   1503                    : LibVEX_EmNote_string(ew);
   1504          VG_(message)( Vg_UserMsg,
   1505                        "Emulation fatal error -- Valgrind cannot continue:\n");
   1506          VG_(message)( Vg_UserMsg, "  %s\n", what);
   1507          VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
   1508          VG_(message)(Vg_UserMsg, "\n");
   1509          VG_(message)(Vg_UserMsg, "Valgrind has to exit now.  Sorry.\n");
   1510          VG_(message)(Vg_UserMsg, "\n");
   1511          VG_(exit)(1);
   1512          break;
   1513       }
   1514 
   1515       case VEX_TRC_JMP_SIGILL:
   1516          VG_(synth_sigill)(tid, VG_(get_IP)(tid));
   1517          break;
   1518 
   1519       case VEX_TRC_JMP_SIGTRAP:
   1520          VG_(synth_sigtrap)(tid);
   1521          break;
   1522 
   1523       case VEX_TRC_JMP_SIGSEGV:
   1524          VG_(synth_fault)(tid);
   1525          break;
   1526 
   1527       case VEX_TRC_JMP_SIGBUS:
   1528          VG_(synth_sigbus)(tid);
   1529          break;
   1530 
   1531       case VEX_TRC_JMP_SIGFPE_INTDIV:
   1532          VG_(synth_sigfpe)(tid, VKI_FPE_INTDIV);
   1533          break;
   1534 
   1535       case VEX_TRC_JMP_SIGFPE_INTOVF:
   1536          VG_(synth_sigfpe)(tid, VKI_FPE_INTOVF);
   1537          break;
   1538 
   1539       case VEX_TRC_JMP_NODECODE: {
   1540          Addr addr = VG_(get_IP)(tid);
   1541 
   1542          if (VG_(clo_sigill_diag)) {
   1543             VG_(umsg)(
   1544                "valgrind: Unrecognised instruction at address %#lx.\n", addr);
   1545             VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   1546 #        define M(a) VG_(umsg)(a "\n");
   1547          M("Your program just tried to execute an instruction that Valgrind" );
   1548          M("did not recognise.  There are two possible reasons for this."    );
   1549          M("1. Your program has a bug and erroneously jumped to a non-code"  );
   1550          M("   location.  If you are running Memcheck and you just saw a"    );
   1551          M("   warning about a bad jump, it's probably your program's fault.");
   1552          M("2. The instruction is legitimate but Valgrind doesn't handle it,");
   1553          M("   i.e. it's Valgrind's fault.  If you think this is the case or");
   1554          M("   you are not sure, please let us know and we'll try to fix it.");
   1555          M("Either way, Valgrind will now raise a SIGILL signal which will"  );
   1556          M("probably kill your program."                                     );
   1557 #        undef M
   1558          }
   1559 #        if defined(VGA_s390x)
   1560          /* Now that the complaint is out we need to adjust the guest_IA. The
   1561             reason is that -- after raising the exception -- execution will
   1562             continue with the insn that follows the invalid insn. As the first
   1563             2 bits of the invalid insn determine its length in the usual way,
   1564             we can compute the address of the next insn here and adjust the
   1565             guest_IA accordingly. This adjustment is essential and tested by
   1566             none/tests/s390x/op_exception.c (which would loop forever
   1567             otherwise) */
   1568          UChar byte = ((UChar *)addr)[0];
   1569          UInt  insn_length = ((((byte >> 6) + 1) >> 1) + 1) << 1;
   1570          Addr  next_insn_addr = addr + insn_length;
   1571          VG_(set_IP)(tid, next_insn_addr);
   1572 #        endif
   1573          VG_(synth_sigill)(tid, addr);
   1574          break;
   1575       }
   1576 
   1577       case VEX_TRC_JMP_INVALICACHE:
   1578          VG_(discard_translations)(
   1579             (Addr)VG_(threads)[tid].arch.vex.guest_CMSTART,
   1580             VG_(threads)[tid].arch.vex.guest_CMLEN,
   1581             "scheduler(VEX_TRC_JMP_INVALICACHE)"
   1582          );
   1583          if (0)
   1584             VG_(printf)("dump translations done.\n");
   1585          break;
   1586 
   1587       case VEX_TRC_JMP_FLUSHDCACHE: {
   1588          void* start = (void*)VG_(threads)[tid].arch.vex.guest_CMSTART;
   1589          SizeT len   = VG_(threads)[tid].arch.vex.guest_CMLEN;
   1590          VG_(debugLog)(2, "sched", "flush_dcache(%p, %lu)\n", start, len);
   1591          VG_(flush_dcache)(start, len);
   1592          break;
   1593       }
   1594 
   1595       case VG_TRC_INVARIANT_FAILED:
   1596          /* This typically happens if, after running generated code,
   1597             it is detected that host CPU settings (eg, FPU/Vector
   1598             control words) are not as they should be.  Vex's code
   1599             generation specifies the state such control words should
   1600             be in on entry to Vex-generated code, and they should be
   1601             unchanged on exit from it.  Failure of this assertion
   1602             usually means a bug in Vex's code generation. */
   1603          //{ UInt xx;
   1604          //  __asm__ __volatile__ (
   1605          //     "\t.word 0xEEF12A10\n"  // fmrx r2,fpscr
   1606          //     "\tmov %0, r2" : "=r"(xx) : : "r2" );
   1607          //  VG_(printf)("QQQQ new fpscr = %08x\n", xx);
   1608          //}
   1609          vg_assert2(0, "VG_(scheduler), phase 3: "
   1610                        "run_innerloop detected host "
   1611                        "state invariant failure", trc);
   1612 
   1613       case VEX_TRC_JMP_SYS_SYSENTER:
   1614          /* Do whatever simulation is appropriate for an x86 sysenter
   1615             instruction.  Note that it is critical to set this thread's
   1616             guest_EIP to point at the code to execute after the
   1617             sysenter, since Vex-generated code will not have set it --
   1618             vex does not know what it should be.  Vex sets the next
   1619             address to zero, so if you don't set guest_EIP, the thread
   1620             will jump to zero afterwards and probably die as a result. */
   1621 #        if defined(VGP_x86_linux)
   1622          vg_assert2(0, "VG_(scheduler), phase 3: "
   1623                        "sysenter_x86 on x86-linux is not supported");
   1624 #        elif defined(VGP_x86_darwin) || defined(VGP_x86_solaris)
   1625          /* return address in client edx */
   1626          VG_(threads)[tid].arch.vex.guest_EIP
   1627             = VG_(threads)[tid].arch.vex.guest_EDX;
   1628          handle_syscall(tid, trc[0]);
   1629 #        else
   1630          vg_assert2(0, "VG_(scheduler), phase 3: "
   1631                        "sysenter_x86 on non-x86 platform?!?!");
   1632 #        endif
   1633          break;
   1634 
   1635       default:
   1636 	 vg_assert2(0, "VG_(scheduler), phase 3: "
   1637                        "unexpected thread return code (%u)", trc[0]);
   1638 	 /* NOTREACHED */
   1639 	 break;
   1640 
   1641       } /* switch (trc) */
   1642 
   1643       if (UNLIKELY(VG_(clo_profyle_sbs)) && VG_(clo_profyle_interval) > 0)
   1644          maybe_show_sb_profile();
   1645    }
   1646 
   1647    if (VG_(clo_trace_sched))
   1648       print_sched_event(tid, "exiting VG_(scheduler)");
   1649 
   1650    vg_assert(VG_(is_exiting)(tid));
   1651 
   1652    return tst->exitreason;
   1653 }
   1654 
   1655 
   1656 /*
   1657    This causes all threads to forceably exit.  They aren't actually
   1658    dead by the time this returns; you need to call
   1659    VG_(reap_threads)() to wait for them.
   1660  */
   1661 void VG_(nuke_all_threads_except) ( ThreadId me, VgSchedReturnCode src )
   1662 {
   1663    ThreadId tid;
   1664 
   1665    vg_assert(VG_(is_running_thread)(me));
   1666 
   1667    for (tid = 1; tid < VG_N_THREADS; tid++) {
   1668       if (tid == me
   1669           || VG_(threads)[tid].status == VgTs_Empty)
   1670          continue;
   1671       if (0)
   1672          VG_(printf)(
   1673             "VG_(nuke_all_threads_except): nuking tid %u\n", tid);
   1674 
   1675       VG_(threads)[tid].exitreason = src;
   1676       if (src == VgSrc_FatalSig)
   1677          VG_(threads)[tid].os_state.fatalsig = VKI_SIGKILL;
   1678       VG_(get_thread_out_of_syscall)(tid);
   1679    }
   1680 }
   1681 
   1682 
   1683 /* ---------------------------------------------------------------------
   1684    Specifying shadow register values
   1685    ------------------------------------------------------------------ */
   1686 
   1687 #if defined(VGA_x86)
   1688 #  define VG_CLREQ_ARGS       guest_EAX
   1689 #  define VG_CLREQ_RET        guest_EDX
   1690 #elif defined(VGA_amd64)
   1691 #  define VG_CLREQ_ARGS       guest_RAX
   1692 #  define VG_CLREQ_RET        guest_RDX
   1693 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
   1694 #  define VG_CLREQ_ARGS       guest_GPR4
   1695 #  define VG_CLREQ_RET        guest_GPR3
   1696 #elif defined(VGA_arm)
   1697 #  define VG_CLREQ_ARGS       guest_R4
   1698 #  define VG_CLREQ_RET        guest_R3
   1699 #elif defined(VGA_arm64)
   1700 #  define VG_CLREQ_ARGS       guest_X4
   1701 #  define VG_CLREQ_RET        guest_X3
   1702 #elif defined (VGA_s390x)
   1703 #  define VG_CLREQ_ARGS       guest_r2
   1704 #  define VG_CLREQ_RET        guest_r3
   1705 #elif defined(VGA_mips32) || defined(VGA_mips64)
   1706 #  define VG_CLREQ_ARGS       guest_r12
   1707 #  define VG_CLREQ_RET        guest_r11
   1708 #elif defined(VGA_tilegx)
   1709 #  define VG_CLREQ_ARGS       guest_r12
   1710 #  define VG_CLREQ_RET        guest_r11
   1711 #else
   1712 #  error Unknown arch
   1713 #endif
   1714 
   1715 #define CLREQ_ARGS(regs)   ((regs).vex.VG_CLREQ_ARGS)
   1716 #define CLREQ_RET(regs)    ((regs).vex.VG_CLREQ_RET)
   1717 #define O_CLREQ_RET        (offsetof(VexGuestArchState, VG_CLREQ_RET))
   1718 
   1719 // These macros write a value to a client's thread register, and tell the
   1720 // tool that it's happened (if necessary).
   1721 
   1722 #define SET_CLREQ_RETVAL(zztid, zzval) \
   1723    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
   1724         VG_TRACK( post_reg_write, \
   1725                   Vg_CoreClientReq, zztid, O_CLREQ_RET, sizeof(UWord)); \
   1726    } while (0)
   1727 
   1728 #define SET_CLCALL_RETVAL(zztid, zzval, f) \
   1729    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
   1730         VG_TRACK( post_reg_write_clientcall_return, \
   1731                   zztid, O_CLREQ_RET, sizeof(UWord), f); \
   1732    } while (0)
   1733 
   1734 
   1735 /* ---------------------------------------------------------------------
   1736    Handle client requests.
   1737    ------------------------------------------------------------------ */
   1738 
   1739 // OS-specific(?) client requests
   1740 static Bool os_client_request(ThreadId tid, UWord *args)
   1741 {
   1742    Bool handled = True;
   1743 
   1744    vg_assert(VG_(is_running_thread)(tid));
   1745 
   1746    switch(args[0]) {
   1747    case VG_USERREQ__LIBC_FREERES_DONE:
   1748       /* This is equivalent to an exit() syscall, but we don't set the
   1749 	 exitcode (since it might already be set) */
   1750       if (0 || VG_(clo_trace_syscalls) || VG_(clo_trace_sched))
   1751          VG_(message)(Vg_DebugMsg,
   1752                       "__libc_freeres() done; really quitting!\n");
   1753       VG_(threads)[tid].exitreason = VgSrc_ExitThread;
   1754       break;
   1755 
   1756    default:
   1757       handled = False;
   1758       break;
   1759    }
   1760 
   1761    return handled;
   1762 }
   1763 
   1764 
   1765 /* Write out a client message, possibly including a back trace. Return
   1766    the number of characters written. In case of XML output, the format
   1767    string as well as any arguments it requires will be XML'ified.
   1768    I.e. special characters such as the angle brackets will be translated
   1769    into proper escape sequences. */
   1770 static
   1771 Int print_client_message( ThreadId tid, const HChar *format,
   1772                           va_list *vargsp, Bool include_backtrace)
   1773 {
   1774    Int count;
   1775 
   1776    if (VG_(clo_xml)) {
   1777       /* Translate the format string as follows:
   1778          <  -->  &lt;
   1779          >  -->  &gt;
   1780          &  -->  &amp;
   1781          %s -->  %pS
   1782          Yes, yes, it's simplified but in synch with
   1783          myvprintf_str_XML_simplistic and VG_(debugLog_vprintf).
   1784       */
   1785 
   1786       /* Allocate a buffer that is for sure large enough. */
   1787       HChar xml_format[VG_(strlen)(format) * 5 + 1];
   1788 
   1789       const HChar *p;
   1790       HChar *q = xml_format;
   1791 
   1792       for (p = format; *p; ++p) {
   1793          switch (*p) {
   1794          case '<': VG_(strcpy)(q, "&lt;");  q += 4; break;
   1795          case '>': VG_(strcpy)(q, "&gt;");  q += 4; break;
   1796          case '&': VG_(strcpy)(q, "&amp;"); q += 5; break;
   1797          case '%':
   1798             /* Careful: make sure %%s stays %%s */
   1799             *q++ = *p++;
   1800             if (*p == 's') {
   1801               *q++ = 'p';
   1802               *q++ = 'S';
   1803             } else {
   1804               *q++ = *p;
   1805             }
   1806             break;
   1807 
   1808          default:
   1809             *q++ = *p;
   1810             break;
   1811          }
   1812       }
   1813       *q = '\0';
   1814 
   1815       VG_(printf_xml)( "<clientmsg>\n" );
   1816       VG_(printf_xml)( "  <tid>%u</tid>\n", tid );
   1817       const ThreadState *tst = VG_(get_ThreadState)(tid);
   1818       if (tst->thread_name)
   1819          VG_(printf_xml)("  <threadname>%s</threadname>\n", tst->thread_name);
   1820       VG_(printf_xml)( "  <text>" );
   1821       count = VG_(vprintf_xml)( xml_format, *vargsp );
   1822       VG_(printf_xml)( "  </text>\n" );
   1823    } else {
   1824       count = VG_(vmessage)( Vg_ClientMsg, format, *vargsp );
   1825       VG_(message_flush)();
   1826    }
   1827 
   1828    if (include_backtrace)
   1829       VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
   1830 
   1831    if (VG_(clo_xml))
   1832       VG_(printf_xml)( "</clientmsg>\n" );
   1833 
   1834    return count;
   1835 }
   1836 
   1837 
   1838 /* Do a client request for the thread tid.  After the request, tid may
   1839    or may not still be runnable; if not, the scheduler will have to
   1840    choose a new thread to run.
   1841 */
   1842 static
   1843 void do_client_request ( ThreadId tid )
   1844 {
   1845    UWord* arg = (UWord*)(CLREQ_ARGS(VG_(threads)[tid].arch));
   1846    UWord req_no = arg[0];
   1847 
   1848    if (0)
   1849       VG_(printf)("req no = 0x%lx, arg = %p\n", req_no, arg);
   1850    switch (req_no) {
   1851 
   1852       case VG_USERREQ__CLIENT_CALL0: {
   1853          UWord (*f)(ThreadId) = (__typeof__(f))arg[1];
   1854 	 if (f == NULL)
   1855 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL0: func=%p\n", f);
   1856 	 else
   1857 	    SET_CLCALL_RETVAL(tid, f ( tid ), (Addr)f);
   1858          break;
   1859       }
   1860       case VG_USERREQ__CLIENT_CALL1: {
   1861          UWord (*f)(ThreadId, UWord) = (__typeof__(f))arg[1];
   1862 	 if (f == NULL)
   1863 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL1: func=%p\n", f);
   1864 	 else
   1865 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2] ), (Addr)f );
   1866          break;
   1867       }
   1868       case VG_USERREQ__CLIENT_CALL2: {
   1869          UWord (*f)(ThreadId, UWord, UWord) = (__typeof__(f))arg[1];
   1870 	 if (f == NULL)
   1871 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL2: func=%p\n", f);
   1872 	 else
   1873 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3] ), (Addr)f );
   1874          break;
   1875       }
   1876       case VG_USERREQ__CLIENT_CALL3: {
   1877          UWord (*f)(ThreadId, UWord, UWord, UWord) = (__typeof__(f))arg[1];
   1878 	 if (f == NULL)
   1879 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL3: func=%p\n", f);
   1880 	 else
   1881 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3], arg[4] ), (Addr)f );
   1882          break;
   1883       }
   1884 
   1885       // Nb: this looks like a circular definition, because it kind of is.
   1886       // See comment in valgrind.h to understand what's going on.
   1887       case VG_USERREQ__RUNNING_ON_VALGRIND:
   1888          SET_CLREQ_RETVAL(tid, RUNNING_ON_VALGRIND+1);
   1889          break;
   1890 
   1891       case VG_USERREQ__PRINTF: {
   1892          const HChar* format = (HChar *)arg[1];
   1893          /* JRS 2010-Jan-28: this is DEPRECATED; use the
   1894             _VALIST_BY_REF version instead */
   1895          if (sizeof(va_list) != sizeof(UWord))
   1896             goto va_list_casting_error_NORETURN;
   1897          union {
   1898             va_list vargs;
   1899             unsigned long uw;
   1900          } u;
   1901          u.uw = (unsigned long)arg[2];
   1902          Int count =
   1903             print_client_message( tid, format, &u.vargs,
   1904                                   /* include_backtrace */ False );
   1905          SET_CLREQ_RETVAL( tid, count );
   1906          break;
   1907       }
   1908 
   1909       case VG_USERREQ__PRINTF_BACKTRACE: {
   1910          const HChar* format = (HChar *)arg[1];
   1911          /* JRS 2010-Jan-28: this is DEPRECATED; use the
   1912             _VALIST_BY_REF version instead */
   1913          if (sizeof(va_list) != sizeof(UWord))
   1914             goto va_list_casting_error_NORETURN;
   1915          union {
   1916             va_list vargs;
   1917             unsigned long uw;
   1918          } u;
   1919          u.uw = (unsigned long)arg[2];
   1920          Int count =
   1921             print_client_message( tid, format, &u.vargs,
   1922                                   /* include_backtrace */ True );
   1923          SET_CLREQ_RETVAL( tid, count );
   1924          break;
   1925       }
   1926 
   1927       case VG_USERREQ__PRINTF_VALIST_BY_REF: {
   1928          const HChar* format = (HChar *)arg[1];
   1929          va_list* vargsp = (va_list*)arg[2];
   1930          Int count =
   1931             print_client_message( tid, format, vargsp,
   1932                                   /* include_backtrace */ False );
   1933 
   1934          SET_CLREQ_RETVAL( tid, count );
   1935          break;
   1936       }
   1937 
   1938       case VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF: {
   1939          const HChar* format = (HChar *)arg[1];
   1940          va_list* vargsp = (va_list*)arg[2];
   1941          Int count =
   1942             print_client_message( tid, format, vargsp,
   1943                                   /* include_backtrace */ True );
   1944          SET_CLREQ_RETVAL( tid, count );
   1945          break;
   1946       }
   1947 
   1948       case VG_USERREQ__INTERNAL_PRINTF_VALIST_BY_REF: {
   1949          va_list* vargsp = (va_list*)arg[2];
   1950          Int count =
   1951             VG_(vmessage)( Vg_DebugMsg, (HChar *)arg[1], *vargsp );
   1952          VG_(message_flush)();
   1953          SET_CLREQ_RETVAL( tid, count );
   1954          break;
   1955       }
   1956 
   1957       case VG_USERREQ__ADD_IFUNC_TARGET: {
   1958          VG_(redir_add_ifunc_target)( arg[1], arg[2] );
   1959          SET_CLREQ_RETVAL( tid, 0);
   1960          break; }
   1961 
   1962       case VG_USERREQ__STACK_REGISTER: {
   1963          UWord sid = VG_(register_stack)((Addr)arg[1], (Addr)arg[2]);
   1964          SET_CLREQ_RETVAL( tid, sid );
   1965          break; }
   1966 
   1967       case VG_USERREQ__STACK_DEREGISTER: {
   1968          VG_(deregister_stack)(arg[1]);
   1969          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   1970          break; }
   1971 
   1972       case VG_USERREQ__STACK_CHANGE: {
   1973          VG_(change_stack)(arg[1], (Addr)arg[2], (Addr)arg[3]);
   1974          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   1975          break; }
   1976 
   1977       case VG_USERREQ__GET_MALLOCFUNCS: {
   1978 	 struct vg_mallocfunc_info *info = (struct vg_mallocfunc_info *)arg[1];
   1979 
   1980 	 info->tl_malloc               = VG_(tdict).tool_malloc;
   1981 	 info->tl_calloc               = VG_(tdict).tool_calloc;
   1982 	 info->tl_realloc              = VG_(tdict).tool_realloc;
   1983 	 info->tl_memalign             = VG_(tdict).tool_memalign;
   1984 	 info->tl___builtin_new        = VG_(tdict).tool___builtin_new;
   1985 	 info->tl___builtin_vec_new    = VG_(tdict).tool___builtin_vec_new;
   1986 	 info->tl_free                 = VG_(tdict).tool_free;
   1987 	 info->tl___builtin_delete     = VG_(tdict).tool___builtin_delete;
   1988 	 info->tl___builtin_vec_delete = VG_(tdict).tool___builtin_vec_delete;
   1989          info->tl_malloc_usable_size   = VG_(tdict).tool_malloc_usable_size;
   1990 
   1991 	 info->mallinfo                = VG_(mallinfo);
   1992 	 info->clo_trace_malloc        = VG_(clo_trace_malloc);
   1993 
   1994          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   1995 
   1996 	 break;
   1997       }
   1998 
   1999       /* Requests from the client program */
   2000 
   2001       case VG_USERREQ__DISCARD_TRANSLATIONS:
   2002          if (VG_(clo_verbosity) > 2)
   2003             VG_(printf)( "client request: DISCARD_TRANSLATIONS,"
   2004                          " addr %p,  len %lu\n",
   2005                          (void*)arg[1], arg[2] );
   2006 
   2007          VG_(discard_translations)(
   2008             arg[1], arg[2], "scheduler(VG_USERREQ__DISCARD_TRANSLATIONS)"
   2009          );
   2010 
   2011          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   2012 	 break;
   2013 
   2014       case VG_USERREQ__COUNT_ERRORS:
   2015          SET_CLREQ_RETVAL( tid, VG_(get_n_errs_found)() );
   2016          break;
   2017 
   2018       case VG_USERREQ__LOAD_PDB_DEBUGINFO:
   2019          VG_(di_notify_pdb_debuginfo)( arg[1], arg[2], arg[3], arg[4] );
   2020          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   2021          break;
   2022 
   2023       case VG_USERREQ__MAP_IP_TO_SRCLOC: {
   2024          Addr   ip    = arg[1];
   2025          HChar* buf64 = (HChar*)arg[2];  // points to a HChar [64] array
   2026          const HChar *buf;  // points to a string of unknown size
   2027 
   2028          VG_(memset)(buf64, 0, 64);
   2029          UInt linenum = 0;
   2030          Bool ok = VG_(get_filename_linenum)(
   2031                       ip, &buf, NULL, &linenum
   2032                    );
   2033          if (ok) {
   2034             /* For backward compatibility truncate the filename to
   2035                49 characters. */
   2036             VG_(strncpy)(buf64, buf, 50);
   2037             buf64[49] = '\0';
   2038             UInt i;
   2039             for (i = 0; i < 50; i++) {
   2040                if (buf64[i] == 0)
   2041                   break;
   2042             }
   2043             VG_(sprintf)(buf64+i, ":%u", linenum);  // safe
   2044          } else {
   2045             buf64[0] = 0;
   2046          }
   2047 
   2048          SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
   2049          break;
   2050       }
   2051 
   2052       case VG_USERREQ__CHANGE_ERR_DISABLEMENT: {
   2053          Word delta = arg[1];
   2054          vg_assert(delta == 1 || delta == -1);
   2055          ThreadState* tst = VG_(get_ThreadState)(tid);
   2056          vg_assert(tst);
   2057          if (delta == 1 && tst->err_disablement_level < 0xFFFFFFFF) {
   2058             tst->err_disablement_level++;
   2059          }
   2060          else
   2061          if (delta == -1 && tst->err_disablement_level > 0) {
   2062             tst->err_disablement_level--;
   2063          }
   2064          SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
   2065          break;
   2066       }
   2067 
   2068       case VG_USERREQ__GDB_MONITOR_COMMAND: {
   2069          UWord ret;
   2070          ret = (UWord) VG_(client_monitor_command) ((HChar*)arg[1]);
   2071          SET_CLREQ_RETVAL(tid, ret);
   2072          break;
   2073       }
   2074 
   2075       case VG_USERREQ__MALLOCLIKE_BLOCK:
   2076       case VG_USERREQ__RESIZEINPLACE_BLOCK:
   2077       case VG_USERREQ__FREELIKE_BLOCK:
   2078          // Ignore them if the addr is NULL;  otherwise pass onto the tool.
   2079          if (!arg[1]) {
   2080             SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   2081             break;
   2082          } else {
   2083             goto my_default;
   2084          }
   2085 
   2086       case VG_USERREQ__VEX_INIT_FOR_IRI:
   2087          LibVEX_InitIRI ( (IRICB *)arg[1] );
   2088          break;
   2089 
   2090       default:
   2091        my_default:
   2092 	 if (os_client_request(tid, arg)) {
   2093 	    // do nothing, os_client_request() handled it
   2094          } else if (VG_(needs).client_requests) {
   2095 	    UWord ret;
   2096 
   2097             if (VG_(clo_verbosity) > 2)
   2098                VG_(printf)("client request: code %lx,  addr %p,  len %lu\n",
   2099                            arg[0], (void*)arg[1], arg[2] );
   2100 
   2101 	    if ( VG_TDICT_CALL(tool_handle_client_request, tid, arg, &ret) )
   2102 	       SET_CLREQ_RETVAL(tid, ret);
   2103          } else {
   2104 	    static Bool whined = False;
   2105 
   2106 	    if (!whined && VG_(clo_verbosity) > 2) {
   2107                // Allow for requests in core, but defined by tools, which
   2108                // have 0 and 0 in their two high bytes.
   2109                HChar c1 = (arg[0] >> 24) & 0xff;
   2110                HChar c2 = (arg[0] >> 16) & 0xff;
   2111                if (c1 == 0) c1 = '_';
   2112                if (c2 == 0) c2 = '_';
   2113 	       VG_(message)(Vg_UserMsg, "Warning:\n"
   2114                    "  unhandled client request: 0x%lx (%c%c+0x%lx).  Perhaps\n"
   2115 		   "  VG_(needs).client_requests should be set?\n",
   2116 			    arg[0], c1, c2, arg[0] & 0xffff);
   2117 	       whined = True;
   2118 	    }
   2119          }
   2120          break;
   2121    }
   2122    return;
   2123 
   2124    /*NOTREACHED*/
   2125   va_list_casting_error_NORETURN:
   2126    VG_(umsg)(
   2127       "Valgrind: fatal error - cannot continue: use of the deprecated\n"
   2128       "client requests VG_USERREQ__PRINTF or VG_USERREQ__PRINTF_BACKTRACE\n"
   2129       "on a platform where they cannot be supported.  Please use the\n"
   2130       "equivalent _VALIST_BY_REF versions instead.\n"
   2131       "\n"
   2132       "This is a binary-incompatible change in Valgrind's client request\n"
   2133       "mechanism.  It is unfortunate, but difficult to avoid.  End-users\n"
   2134       "are expected to almost never see this message.  The only case in\n"
   2135       "which you might see this message is if your code uses the macros\n"
   2136       "VALGRIND_PRINTF or VALGRIND_PRINTF_BACKTRACE.  If so, you will need\n"
   2137       "to recompile such code, using the header files from this version of\n"
   2138       "Valgrind, and not any previous version.\n"
   2139       "\n"
   2140       "If you see this mesage in any other circumstances, it is probably\n"
   2141       "a bug in Valgrind.  In this case, please file a bug report at\n"
   2142       "\n"
   2143       "   http://www.valgrind.org/support/bug_reports.html\n"
   2144       "\n"
   2145       "Will now abort.\n"
   2146    );
   2147    vg_assert(0);
   2148 }
   2149 
   2150 
   2151 /* ---------------------------------------------------------------------
   2152    Sanity checking (permanently engaged)
   2153    ------------------------------------------------------------------ */
   2154 
   2155 /* Internal consistency checks on the sched structures. */
   2156 static
   2157 void scheduler_sanity ( ThreadId tid )
   2158 {
   2159    Bool bad = False;
   2160    Int lwpid = VG_(gettid)();
   2161 
   2162    if (!VG_(is_running_thread)(tid)) {
   2163       VG_(message)(Vg_DebugMsg,
   2164 		   "Thread %u is supposed to be running, "
   2165                    "but doesn't own the_BigLock (owned by %u)\n",
   2166 		   tid, VG_(running_tid));
   2167       bad = True;
   2168    }
   2169 
   2170    if (lwpid != VG_(threads)[tid].os_state.lwpid) {
   2171       VG_(message)(Vg_DebugMsg,
   2172                    "Thread %u supposed to be in LWP %d, but we're actually %d\n",
   2173                    tid, VG_(threads)[tid].os_state.lwpid, VG_(gettid)());
   2174       bad = True;
   2175    }
   2176 
   2177    if (lwpid != ML_(get_sched_lock_owner)(the_BigLock)) {
   2178       VG_(message)(Vg_DebugMsg,
   2179                    "Thread (LWPID) %u doesn't own the_BigLock\n",
   2180                    tid);
   2181       bad = True;
   2182    }
   2183 
   2184    if (0) {
   2185       /* Periodically show the state of all threads, for debugging
   2186          purposes. */
   2187       static UInt lasttime = 0;
   2188       UInt now;
   2189       now = VG_(read_millisecond_timer)();
   2190       if ((!bad) && (lasttime + 4000/*ms*/ <= now)) {
   2191          lasttime = now;
   2192          VG_(printf)("\n------------ Sched State at %d ms ------------\n",
   2193                      (Int)now);
   2194          VG_(show_sched_status)(True,  // host_stacktrace
   2195                                 True,  // stack_usage
   2196                                 True); // exited_threads);
   2197       }
   2198    }
   2199 
   2200    /* core_panic also shows the sched status, which is why we don't
   2201       show it above if bad==True. */
   2202    if (bad)
   2203       VG_(core_panic)("scheduler_sanity: failed");
   2204 }
   2205 
   2206 void VG_(sanity_check_general) ( Bool force_expensive )
   2207 {
   2208    ThreadId tid;
   2209 
   2210    static UInt next_slow_check_at = 1;
   2211    static UInt slow_check_interval = 25;
   2212 
   2213    if (VG_(clo_sanity_level) < 1) return;
   2214 
   2215    /* --- First do all the tests that we can do quickly. ---*/
   2216 
   2217    sanity_fast_count++;
   2218 
   2219    /* Check stuff pertaining to the memory check system. */
   2220 
   2221    /* Check that nobody has spuriously claimed that the first or
   2222       last 16 pages of memory have become accessible [...] */
   2223    if (VG_(needs).sanity_checks) {
   2224       vg_assert(VG_TDICT_CALL(tool_cheap_sanity_check));
   2225    }
   2226 
   2227    /* --- Now some more expensive checks. ---*/
   2228 
   2229    /* Once every now and again, check some more expensive stuff.
   2230       Gradually increase the interval between such checks so as not to
   2231       burden long-running programs too much. */
   2232    if ( force_expensive
   2233         || VG_(clo_sanity_level) > 1
   2234         || (VG_(clo_sanity_level) == 1
   2235             && sanity_fast_count == next_slow_check_at)) {
   2236 
   2237       if (0) VG_(printf)("SLOW at %u\n", sanity_fast_count-1);
   2238 
   2239       next_slow_check_at = sanity_fast_count - 1 + slow_check_interval;
   2240       slow_check_interval++;
   2241       sanity_slow_count++;
   2242 
   2243       if (VG_(needs).sanity_checks) {
   2244           vg_assert(VG_TDICT_CALL(tool_expensive_sanity_check));
   2245       }
   2246 
   2247       /* Look for stack overruns.  Visit all threads. */
   2248       for (tid = 1; tid < VG_N_THREADS; tid++) {
   2249 	 SizeT    remains;
   2250          VgStack* stack;
   2251 
   2252 	 if (VG_(threads)[tid].status == VgTs_Empty ||
   2253 	     VG_(threads)[tid].status == VgTs_Zombie)
   2254 	    continue;
   2255 
   2256          stack
   2257             = (VgStack*)
   2258               VG_(get_ThreadState)(tid)->os_state.valgrind_stack_base;
   2259          SizeT limit
   2260             = 4096; // Let's say.  Checking more causes lots of L2 misses.
   2261 	 remains
   2262             = VG_(am_get_VgStack_unused_szB)(stack, limit);
   2263 	 if (remains < limit)
   2264 	    VG_(message)(Vg_DebugMsg,
   2265                          "WARNING: Thread %u is within %lu bytes "
   2266                          "of running out of valgrind stack!\n"
   2267                          "Valgrind stack size can be increased "
   2268                          "using --valgrind-stacksize=....\n",
   2269 		         tid, remains);
   2270       }
   2271    }
   2272 
   2273    if (VG_(clo_sanity_level) > 1) {
   2274       /* Check sanity of the low-level memory manager.  Note that bugs
   2275          in the client's code can cause this to fail, so we don't do
   2276          this check unless specially asked for.  And because it's
   2277          potentially very expensive. */
   2278       VG_(sanity_check_malloc_all)();
   2279    }
   2280 }
   2281 
   2282 /*--------------------------------------------------------------------*/
   2283 /*--- end                                                          ---*/
   2284 /*--------------------------------------------------------------------*/
   2285