Home | History | Annotate | Download | only in m_scheduler
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Thread scheduling.                               scheduler.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2000-2013 Julian Seward
     11       jseward (at) acm.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 /*
     32    Overview
     33 
     34    Valgrind tries to emulate the kernel's threading as closely as
     35    possible.  The client does all threading via the normal syscalls
     36    (on Linux: clone, etc).  Valgrind emulates this by creating exactly
     37    the same process structure as would be created without Valgrind.
     38    There are no extra threads.
     39 
     40    The main difference is that Valgrind only allows one client thread
     41    to run at once.  This is controlled with the CPU Big Lock,
     42    "the_BigLock".  Any time a thread wants to run client code or
     43    manipulate any shared state (which is anything other than its own
     44    ThreadState entry), it must hold the_BigLock.
     45 
     46    When a thread is about to block in a blocking syscall, it releases
     47    the_BigLock, and re-takes it when it becomes runnable again (either
     48    because the syscall finished, or we took a signal).
     49 
     50    VG_(scheduler) therefore runs in each thread.  It returns only when
     51    the thread is exiting, either because it exited itself, or it was
     52    told to exit by another thread.
     53 
     54    This file is almost entirely OS-independent.  The details of how
     55    the OS handles threading and signalling are abstracted away and
     56    implemented elsewhere.  [Some of the functions have worked their
     57    way back for the moment, until we do an OS port in earnest...]
     58 */
     59 
     60 
     61 #include "pub_core_basics.h"
     62 #include "pub_core_debuglog.h"
     63 #include "pub_core_vki.h"
     64 #include "pub_core_vkiscnums.h"    // __NR_sched_yield
     65 #include "pub_core_libcsetjmp.h"   // to keep _threadstate.h happy
     66 #include "pub_core_threadstate.h"
     67 #include "pub_core_aspacemgr.h"
     68 #include "pub_core_clreq.h"         // for VG_USERREQ__*
     69 #include "pub_core_dispatch.h"
     70 #include "pub_core_errormgr.h"      // For VG_(get_n_errs_found)()
     71 #include "pub_core_gdbserver.h"     // for VG_(gdbserver) and VG_(gdbserver_activity)
     72 #include "pub_core_libcbase.h"
     73 #include "pub_core_libcassert.h"
     74 #include "pub_core_libcprint.h"
     75 #include "pub_core_libcproc.h"
     76 #include "pub_core_libcsignal.h"
     77 #if defined(VGO_darwin)
     78 #include "pub_core_mach.h"
     79 #endif
     80 #include "pub_core_machine.h"
     81 #include "pub_core_mallocfree.h"
     82 #include "pub_core_options.h"
     83 #include "pub_core_replacemalloc.h"
     84 #include "pub_core_sbprofile.h"
     85 #include "pub_core_signals.h"
     86 #include "pub_core_stacks.h"
     87 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
     88 #include "pub_core_syscall.h"
     89 #include "pub_core_syswrap.h"
     90 #include "pub_core_tooliface.h"
     91 #include "pub_core_translate.h"     // For VG_(translate)()
     92 #include "pub_core_transtab.h"
     93 #include "pub_core_debuginfo.h"     // VG_(di_notify_pdb_debuginfo)
     94 #include "priv_sched-lock.h"
     95 #include "pub_core_scheduler.h"     // self
     96 #include "pub_core_redir.h"
     97 #include "libvex_emnote.h"          // VexEmNote
     98 
     99 
    100 /* ---------------------------------------------------------------------
    101    Types and globals for the scheduler.
    102    ------------------------------------------------------------------ */
    103 
    104 /* ThreadId and ThreadState are defined elsewhere*/
    105 
    106 /* Defines the thread-scheduling timeslice, in terms of the number of
    107    basic blocks we attempt to run each thread for.  Smaller values
    108    give finer interleaving but much increased scheduling overheads. */
    109 #define SCHEDULING_QUANTUM   100000
    110 
    111 /* If False, a fault is Valgrind-internal (ie, a bug) */
    112 Bool VG_(in_generated_code) = False;
    113 
    114 /* 64-bit counter for the number of basic blocks done. */
    115 static ULong bbs_done = 0;
    116 
    117 /* Counter to see if vgdb activity is to be verified.
    118    When nr of bbs done reaches vgdb_next_poll, scheduler will
    119    poll for gdbserver activity. VG_(force_vgdb_poll) and
    120    VG_(disable_vgdb_poll) allows the valgrind core (e.g. m_gdbserver)
    121    to control when the next poll will be done. */
    122 static ULong vgdb_next_poll;
    123 
    124 /* Forwards */
    125 static void do_client_request ( ThreadId tid );
    126 static void scheduler_sanity ( ThreadId tid );
    127 static void mostly_clear_thread_record ( ThreadId tid );
    128 
    129 /* Stats. */
    130 static ULong n_scheduling_events_MINOR = 0;
    131 static ULong n_scheduling_events_MAJOR = 0;
    132 
    133 /* Stats: number of XIndirs, and number that missed in the fast
    134    cache. */
    135 static ULong stats__n_xindirs = 0;
    136 static ULong stats__n_xindir_misses = 0;
    137 
    138 /* And 32-bit temp bins for the above, so that 32-bit platforms don't
    139    have to do 64 bit incs on the hot path through
    140    VG_(cp_disp_xindir). */
    141 /*global*/ UInt VG_(stats__n_xindirs_32) = 0;
    142 /*global*/ UInt VG_(stats__n_xindir_misses_32) = 0;
    143 
    144 /* Sanity checking counts. */
    145 static UInt sanity_fast_count = 0;
    146 static UInt sanity_slow_count = 0;
    147 
    148 void VG_(print_scheduler_stats)(void)
    149 {
    150    VG_(message)(Vg_DebugMsg,
    151       "scheduler: %'llu event checks.\n", bbs_done );
    152    VG_(message)(Vg_DebugMsg,
    153                 "scheduler: %'llu indir transfers, %'llu misses (1 in %llu)\n",
    154                 stats__n_xindirs, stats__n_xindir_misses,
    155                 stats__n_xindirs / (stats__n_xindir_misses
    156                                     ? stats__n_xindir_misses : 1));
    157    VG_(message)(Vg_DebugMsg,
    158       "scheduler: %'llu/%'llu major/minor sched events.\n",
    159       n_scheduling_events_MAJOR, n_scheduling_events_MINOR);
    160    VG_(message)(Vg_DebugMsg,
    161                 "   sanity: %d cheap, %d expensive checks.\n",
    162                 sanity_fast_count, sanity_slow_count );
    163 }
    164 
    165 /*
    166  * Mutual exclusion object used to serialize threads.
    167  */
    168 static struct sched_lock *the_BigLock;
    169 
    170 
    171 /* ---------------------------------------------------------------------
    172    Helper functions for the scheduler.
    173    ------------------------------------------------------------------ */
    174 
    175 static
    176 void print_sched_event ( ThreadId tid, const HChar* what )
    177 {
    178    VG_(message)(Vg_DebugMsg, "  SCHED[%d]: %s\n", tid, what );
    179 }
    180 
    181 /* For showing SB profiles, if the user asks to see them. */
    182 static
    183 void maybe_show_sb_profile ( void )
    184 {
    185    /* DO NOT MAKE NON-STATIC */
    186    static ULong bbs_done_lastcheck = 0;
    187    /* */
    188    vg_assert(VG_(clo_profyle_interval) > 0);
    189    Long delta = (Long)(bbs_done - bbs_done_lastcheck);
    190    vg_assert(delta >= 0);
    191    if ((ULong)delta >= VG_(clo_profyle_interval)) {
    192       bbs_done_lastcheck = bbs_done;
    193       VG_(get_and_show_SB_profile)(bbs_done);
    194    }
    195 }
    196 
    197 static
    198 const HChar* name_of_sched_event ( UInt event )
    199 {
    200    switch (event) {
    201       case VEX_TRC_JMP_INVALICACHE:    return "INVALICACHE";
    202       case VEX_TRC_JMP_FLUSHDCACHE:    return "FLUSHDCACHE";
    203       case VEX_TRC_JMP_NOREDIR:        return "NOREDIR";
    204       case VEX_TRC_JMP_SIGILL:         return "SIGILL";
    205       case VEX_TRC_JMP_SIGTRAP:        return "SIGTRAP";
    206       case VEX_TRC_JMP_SIGSEGV:        return "SIGSEGV";
    207       case VEX_TRC_JMP_SIGBUS:         return "SIGBUS";
    208       case VEX_TRC_JMP_SIGFPE_INTOVF:
    209       case VEX_TRC_JMP_SIGFPE_INTDIV:  return "SIGFPE";
    210       case VEX_TRC_JMP_EMWARN:         return "EMWARN";
    211       case VEX_TRC_JMP_EMFAIL:         return "EMFAIL";
    212       case VEX_TRC_JMP_CLIENTREQ:      return "CLIENTREQ";
    213       case VEX_TRC_JMP_YIELD:          return "YIELD";
    214       case VEX_TRC_JMP_NODECODE:       return "NODECODE";
    215       case VEX_TRC_JMP_MAPFAIL:        return "MAPFAIL";
    216       case VEX_TRC_JMP_SYS_SYSCALL:    return "SYSCALL";
    217       case VEX_TRC_JMP_SYS_INT32:      return "INT32";
    218       case VEX_TRC_JMP_SYS_INT128:     return "INT128";
    219       case VEX_TRC_JMP_SYS_INT129:     return "INT129";
    220       case VEX_TRC_JMP_SYS_INT130:     return "INT130";
    221       case VEX_TRC_JMP_SYS_SYSENTER:   return "SYSENTER";
    222       case VEX_TRC_JMP_BORING:         return "VEX_BORING";
    223 
    224       case VG_TRC_BORING:              return "VG_BORING";
    225       case VG_TRC_INNER_FASTMISS:      return "FASTMISS";
    226       case VG_TRC_INNER_COUNTERZERO:   return "COUNTERZERO";
    227       case VG_TRC_FAULT_SIGNAL:        return "FAULTSIGNAL";
    228       case VG_TRC_INVARIANT_FAILED:    return "INVFAILED";
    229       case VG_TRC_CHAIN_ME_TO_SLOW_EP: return "CHAIN_ME_SLOW";
    230       case VG_TRC_CHAIN_ME_TO_FAST_EP: return "CHAIN_ME_FAST";
    231       default:                         return "??UNKNOWN??";
    232   }
    233 }
    234 
    235 /* Allocate a completely empty ThreadState record. */
    236 ThreadId VG_(alloc_ThreadState) ( void )
    237 {
    238    Int i;
    239    for (i = 1; i < VG_N_THREADS; i++) {
    240       if (VG_(threads)[i].status == VgTs_Empty) {
    241 	 VG_(threads)[i].status = VgTs_Init;
    242 	 VG_(threads)[i].exitreason = VgSrc_None;
    243          if (VG_(threads)[i].thread_name)
    244             VG_(arena_free)(VG_AR_CORE, VG_(threads)[i].thread_name);
    245          VG_(threads)[i].thread_name = NULL;
    246          return i;
    247       }
    248    }
    249    VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
    250    VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
    251    VG_(core_panic)("VG_N_THREADS is too low");
    252    /*NOTREACHED*/
    253 }
    254 
    255 /*
    256    Mark a thread as Runnable.  This will block until the_BigLock is
    257    available, so that we get exclusive access to all the shared
    258    structures and the CPU.  Up until we get the_BigLock, we must not
    259    touch any shared state.
    260 
    261    When this returns, we'll actually be running.
    262  */
    263 void VG_(acquire_BigLock)(ThreadId tid, const HChar* who)
    264 {
    265    ThreadState *tst;
    266 
    267 #if 0
    268    if (VG_(clo_trace_sched)) {
    269       HChar buf[100];
    270       vg_assert(VG_(strlen)(who) <= 100-50);
    271       VG_(sprintf)(buf, "waiting for lock (%s)", who);
    272       print_sched_event(tid, buf);
    273    }
    274 #endif
    275 
    276    /* First, acquire the_BigLock.  We can't do anything else safely
    277       prior to this point.  Even doing debug printing prior to this
    278       point is, technically, wrong. */
    279    VG_(acquire_BigLock_LL)(NULL);
    280 
    281    tst = VG_(get_ThreadState)(tid);
    282 
    283    vg_assert(tst->status != VgTs_Runnable);
    284 
    285    tst->status = VgTs_Runnable;
    286 
    287    if (VG_(running_tid) != VG_INVALID_THREADID)
    288       VG_(printf)("tid %d found %d running\n", tid, VG_(running_tid));
    289    vg_assert(VG_(running_tid) == VG_INVALID_THREADID);
    290    VG_(running_tid) = tid;
    291 
    292    { Addr gsp = VG_(get_SP)(tid);
    293       if (NULL != VG_(tdict).track_new_mem_stack_w_ECU)
    294          VG_(unknown_SP_update_w_ECU)(gsp, gsp, 0/*unknown origin*/);
    295       else
    296          VG_(unknown_SP_update)(gsp, gsp);
    297    }
    298 
    299    if (VG_(clo_trace_sched)) {
    300       HChar buf[150];
    301       vg_assert(VG_(strlen)(who) <= 150-50);
    302       VG_(sprintf)(buf, " acquired lock (%s)", who);
    303       print_sched_event(tid, buf);
    304    }
    305 }
    306 
    307 /*
    308    Set a thread into a sleeping state, and give up exclusive access to
    309    the CPU.  On return, the thread must be prepared to block until it
    310    is ready to run again (generally this means blocking in a syscall,
    311    but it may mean that we remain in a Runnable state and we're just
    312    yielding the CPU to another thread).
    313  */
    314 void VG_(release_BigLock)(ThreadId tid, ThreadStatus sleepstate,
    315                           const HChar* who)
    316 {
    317    ThreadState *tst = VG_(get_ThreadState)(tid);
    318 
    319    vg_assert(tst->status == VgTs_Runnable);
    320 
    321    vg_assert(sleepstate == VgTs_WaitSys ||
    322 	     sleepstate == VgTs_Yielding);
    323 
    324    tst->status = sleepstate;
    325 
    326    vg_assert(VG_(running_tid) == tid);
    327    VG_(running_tid) = VG_INVALID_THREADID;
    328 
    329    if (VG_(clo_trace_sched)) {
    330       HChar buf[200];
    331       vg_assert(VG_(strlen)(who) <= 200-100);
    332       VG_(sprintf)(buf, "releasing lock (%s) -> %s",
    333                         who, VG_(name_of_ThreadStatus)(sleepstate));
    334       print_sched_event(tid, buf);
    335    }
    336 
    337    /* Release the_BigLock; this will reschedule any runnable
    338       thread. */
    339    VG_(release_BigLock_LL)(NULL);
    340 }
    341 
    342 static void init_BigLock(void)
    343 {
    344    vg_assert(!the_BigLock);
    345    the_BigLock = ML_(create_sched_lock)();
    346 }
    347 
    348 static void deinit_BigLock(void)
    349 {
    350    ML_(destroy_sched_lock)(the_BigLock);
    351    the_BigLock = NULL;
    352 }
    353 
    354 /* See pub_core_scheduler.h for description */
    355 void VG_(acquire_BigLock_LL) ( const HChar* who )
    356 {
    357    ML_(acquire_sched_lock)(the_BigLock);
    358 }
    359 
    360 /* See pub_core_scheduler.h for description */
    361 void VG_(release_BigLock_LL) ( const HChar* who )
    362 {
    363    ML_(release_sched_lock)(the_BigLock);
    364 }
    365 
    366 Bool VG_(owns_BigLock_LL) ( ThreadId tid )
    367 {
    368    return (ML_(get_sched_lock_owner)(the_BigLock)
    369            == VG_(threads)[tid].os_state.lwpid);
    370 }
    371 
    372 
    373 /* Clear out the ThreadState and release the semaphore. Leaves the
    374    ThreadState in VgTs_Zombie state, so that it doesn't get
    375    reallocated until the caller is really ready. */
    376 void VG_(exit_thread)(ThreadId tid)
    377 {
    378    vg_assert(VG_(is_valid_tid)(tid));
    379    vg_assert(VG_(is_running_thread)(tid));
    380    vg_assert(VG_(is_exiting)(tid));
    381 
    382    mostly_clear_thread_record(tid);
    383    VG_(running_tid) = VG_INVALID_THREADID;
    384 
    385    /* There should still be a valid exitreason for this thread */
    386    vg_assert(VG_(threads)[tid].exitreason != VgSrc_None);
    387 
    388    if (VG_(clo_trace_sched))
    389       print_sched_event(tid, "release lock in VG_(exit_thread)");
    390 
    391    VG_(release_BigLock_LL)(NULL);
    392 }
    393 
    394 /* If 'tid' is blocked in a syscall, send it SIGVGKILL so as to get it
    395    out of the syscall and onto doing the next thing, whatever that is.
    396    If it isn't blocked in a syscall, has no effect on the thread. */
    397 void VG_(get_thread_out_of_syscall)(ThreadId tid)
    398 {
    399    vg_assert(VG_(is_valid_tid)(tid));
    400    vg_assert(!VG_(is_running_thread)(tid));
    401 
    402    if (VG_(threads)[tid].status == VgTs_WaitSys) {
    403       if (VG_(clo_trace_signals)) {
    404 	 VG_(message)(Vg_DebugMsg,
    405                       "get_thread_out_of_syscall zaps tid %d lwp %d\n",
    406 		      tid, VG_(threads)[tid].os_state.lwpid);
    407       }
    408 #     if defined(VGO_darwin)
    409       {
    410          // GrP fixme use mach primitives on darwin?
    411          // GrP fixme thread_abort_safely?
    412          // GrP fixme race for thread with WaitSys set but not in syscall yet?
    413          extern kern_return_t thread_abort(mach_port_t);
    414          thread_abort(VG_(threads)[tid].os_state.lwpid);
    415       }
    416 #     else
    417       {
    418          __attribute__((unused))
    419          Int r = VG_(tkill)(VG_(threads)[tid].os_state.lwpid, VG_SIGVGKILL);
    420          /* JRS 2009-Mar-20: should we assert for r==0 (tkill succeeded)?
    421             I'm really not sure.  Here's a race scenario which argues
    422             that we shoudn't; but equally I'm not sure the scenario is
    423             even possible, because of constraints caused by the question
    424             of who holds the BigLock when.
    425 
    426             Target thread tid does sys_read on a socket and blocks.  This
    427             function gets called, and we observe correctly that tid's
    428             status is WaitSys but then for whatever reason this function
    429             goes very slowly for a while.  Then data arrives from
    430             wherever, tid's sys_read returns, tid exits.  Then we do
    431             tkill on tid, but tid no longer exists; tkill returns an
    432             error code and the assert fails. */
    433          /* vg_assert(r == 0); */
    434       }
    435 #     endif
    436    }
    437 }
    438 
    439 /*
    440    Yield the CPU for a short time to let some other thread run.
    441  */
    442 void VG_(vg_yield)(void)
    443 {
    444    ThreadId tid = VG_(running_tid);
    445 
    446    vg_assert(tid != VG_INVALID_THREADID);
    447    vg_assert(VG_(threads)[tid].os_state.lwpid == VG_(gettid)());
    448 
    449    VG_(release_BigLock)(tid, VgTs_Yielding, "VG_(vg_yield)");
    450 
    451    /*
    452       Tell the kernel we're yielding.
    453     */
    454    VG_(do_syscall0)(__NR_sched_yield);
    455 
    456    VG_(acquire_BigLock)(tid, "VG_(vg_yield)");
    457 }
    458 
    459 
    460 /* Set the standard set of blocked signals, used whenever we're not
    461    running a client syscall. */
    462 static void block_signals(void)
    463 {
    464    vki_sigset_t mask;
    465 
    466    VG_(sigfillset)(&mask);
    467 
    468    /* Don't block these because they're synchronous */
    469    VG_(sigdelset)(&mask, VKI_SIGSEGV);
    470    VG_(sigdelset)(&mask, VKI_SIGBUS);
    471    VG_(sigdelset)(&mask, VKI_SIGFPE);
    472    VG_(sigdelset)(&mask, VKI_SIGILL);
    473    VG_(sigdelset)(&mask, VKI_SIGTRAP);
    474 
    475    /* Can't block these anyway */
    476    VG_(sigdelset)(&mask, VKI_SIGSTOP);
    477    VG_(sigdelset)(&mask, VKI_SIGKILL);
    478 
    479    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, NULL);
    480 }
    481 
    482 static void os_state_clear(ThreadState *tst)
    483 {
    484    tst->os_state.lwpid       = 0;
    485    tst->os_state.threadgroup = 0;
    486 #  if defined(VGO_linux)
    487    /* no other fields to clear */
    488 #  elif defined(VGO_darwin)
    489    tst->os_state.post_mach_trap_fn = NULL;
    490    tst->os_state.pthread           = 0;
    491    tst->os_state.func_arg          = 0;
    492    VG_(memset)(&tst->os_state.child_go, 0, sizeof(tst->os_state.child_go));
    493    VG_(memset)(&tst->os_state.child_done, 0, sizeof(tst->os_state.child_done));
    494    tst->os_state.wq_jmpbuf_valid   = False;
    495    tst->os_state.remote_port       = 0;
    496    tst->os_state.msgh_id           = 0;
    497    VG_(memset)(&tst->os_state.mach_args, 0, sizeof(tst->os_state.mach_args));
    498 #  else
    499 #    error "Unknown OS"
    500 #  endif
    501 }
    502 
    503 static void os_state_init(ThreadState *tst)
    504 {
    505    tst->os_state.valgrind_stack_base    = 0;
    506    tst->os_state.valgrind_stack_init_SP = 0;
    507    os_state_clear(tst);
    508 }
    509 
    510 static
    511 void mostly_clear_thread_record ( ThreadId tid )
    512 {
    513    vki_sigset_t savedmask;
    514 
    515    vg_assert(tid >= 0 && tid < VG_N_THREADS);
    516    VG_(cleanup_thread)(&VG_(threads)[tid].arch);
    517    VG_(threads)[tid].tid = tid;
    518 
    519    /* Leave the thread in Zombie, so that it doesn't get reallocated
    520       until the caller is finally done with the thread stack. */
    521    VG_(threads)[tid].status               = VgTs_Zombie;
    522 
    523    VG_(sigemptyset)(&VG_(threads)[tid].sig_mask);
    524    VG_(sigemptyset)(&VG_(threads)[tid].tmp_sig_mask);
    525 
    526    os_state_clear(&VG_(threads)[tid]);
    527 
    528    /* start with no altstack */
    529    VG_(threads)[tid].altstack.ss_sp = (void *)0xdeadbeef;
    530    VG_(threads)[tid].altstack.ss_size = 0;
    531    VG_(threads)[tid].altstack.ss_flags = VKI_SS_DISABLE;
    532 
    533    VG_(clear_out_queued_signals)(tid, &savedmask);
    534 
    535    VG_(threads)[tid].sched_jmpbuf_valid = False;
    536 }
    537 
    538 /*
    539    Called in the child after fork.  If the parent has multiple
    540    threads, then we've inherited a VG_(threads) array describing them,
    541    but only the thread which called fork() is actually alive in the
    542    child.  This functions needs to clean up all those other thread
    543    structures.
    544 
    545    Whichever tid in the parent which called fork() becomes the
    546    master_tid in the child.  That's because the only living slot in
    547    VG_(threads) in the child after fork is VG_(threads)[tid], and it
    548    would be too hard to try to re-number the thread and relocate the
    549    thread state down to VG_(threads)[1].
    550 
    551    This function also needs to reinitialize the_BigLock, since
    552    otherwise we may end up sharing its state with the parent, which
    553    would be deeply confusing.
    554 */
    555 static void sched_fork_cleanup(ThreadId me)
    556 {
    557    ThreadId tid;
    558    vg_assert(VG_(running_tid) == me);
    559 
    560 #  if defined(VGO_darwin)
    561    // GrP fixme hack reset Mach ports
    562    VG_(mach_init)();
    563 #  endif
    564 
    565    VG_(threads)[me].os_state.lwpid = VG_(gettid)();
    566    VG_(threads)[me].os_state.threadgroup = VG_(getpid)();
    567 
    568    /* clear out all the unused thread slots */
    569    for (tid = 1; tid < VG_N_THREADS; tid++) {
    570       if (tid != me) {
    571          mostly_clear_thread_record(tid);
    572 	 VG_(threads)[tid].status = VgTs_Empty;
    573          VG_(clear_syscallInfo)(tid);
    574       }
    575    }
    576 
    577    /* re-init and take the sema */
    578    deinit_BigLock();
    579    init_BigLock();
    580    VG_(acquire_BigLock_LL)(NULL);
    581 }
    582 
    583 
    584 /* First phase of initialisation of the scheduler.  Initialise the
    585    bigLock, zeroise the VG_(threads) structure and decide on the
    586    ThreadId of the root thread.
    587 */
    588 ThreadId VG_(scheduler_init_phase1) ( void )
    589 {
    590    Int i;
    591    ThreadId tid_main;
    592 
    593    VG_(debugLog)(1,"sched","sched_init_phase1\n");
    594 
    595    if (VG_(clo_fair_sched) != disable_fair_sched
    596        && !ML_(set_sched_lock_impl)(sched_lock_ticket)
    597        && VG_(clo_fair_sched) == enable_fair_sched)
    598    {
    599       VG_(printf)("Error: fair scheduling is not supported on this system.\n");
    600       VG_(exit)(1);
    601    }
    602 
    603    if (VG_(clo_verbosity) > 1) {
    604       VG_(message)(Vg_DebugMsg,
    605                    "Scheduler: using %s scheduler lock implementation.\n",
    606                    ML_(get_sched_lock_name)());
    607    }
    608 
    609    init_BigLock();
    610 
    611    for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
    612       /* Paranoia .. completely zero it out. */
    613       VG_(memset)( & VG_(threads)[i], 0, sizeof( VG_(threads)[i] ) );
    614 
    615       VG_(threads)[i].sig_queue = NULL;
    616 
    617       os_state_init(&VG_(threads)[i]);
    618       mostly_clear_thread_record(i);
    619 
    620       VG_(threads)[i].status                    = VgTs_Empty;
    621       VG_(threads)[i].client_stack_szB          = 0;
    622       VG_(threads)[i].client_stack_highest_word = (Addr)NULL;
    623       VG_(threads)[i].err_disablement_level     = 0;
    624       VG_(threads)[i].thread_name               = NULL;
    625    }
    626 
    627    tid_main = VG_(alloc_ThreadState)();
    628 
    629    /* Bleh.  Unfortunately there are various places in the system that
    630       assume that the main thread has a ThreadId of 1.
    631       - Helgrind (possibly)
    632       - stack overflow message in default_action() in m_signals.c
    633       - definitely a lot more places
    634    */
    635    vg_assert(tid_main == 1);
    636 
    637    return tid_main;
    638 }
    639 
    640 
    641 /* Second phase of initialisation of the scheduler.  Given the root
    642    ThreadId computed by first phase of initialisation, fill in stack
    643    details and acquire bigLock.  Initialise the scheduler.  This is
    644    called at startup.  The caller subsequently initialises the guest
    645    state components of this main thread.
    646 */
    647 void VG_(scheduler_init_phase2) ( ThreadId tid_main,
    648                                   Addr     clstack_end,
    649                                   SizeT    clstack_size )
    650 {
    651    VG_(debugLog)(1,"sched","sched_init_phase2: tid_main=%d, "
    652                    "cls_end=0x%lx, cls_sz=%ld\n",
    653                    tid_main, clstack_end, clstack_size);
    654 
    655    vg_assert(VG_IS_PAGE_ALIGNED(clstack_end+1));
    656    vg_assert(VG_IS_PAGE_ALIGNED(clstack_size));
    657 
    658    VG_(threads)[tid_main].client_stack_highest_word
    659       = clstack_end + 1 - sizeof(UWord);
    660    VG_(threads)[tid_main].client_stack_szB
    661       = clstack_size;
    662 
    663    VG_(atfork)(NULL, NULL, sched_fork_cleanup);
    664 }
    665 
    666 
    667 /* ---------------------------------------------------------------------
    668    Helpers for running translations.
    669    ------------------------------------------------------------------ */
    670 
    671 /* Use gcc's built-in setjmp/longjmp.  longjmp must not restore signal
    672    mask state, but does need to pass "val" through.  jumped must be a
    673    volatile UWord. */
    674 #define SCHEDSETJMP(tid, jumped, stmt)					\
    675    do {									\
    676       ThreadState * volatile _qq_tst = VG_(get_ThreadState)(tid);	\
    677 									\
    678       (jumped) = VG_MINIMAL_SETJMP(_qq_tst->sched_jmpbuf);              \
    679       if ((jumped) == ((UWord)0)) {                                     \
    680 	 vg_assert(!_qq_tst->sched_jmpbuf_valid);			\
    681 	 _qq_tst->sched_jmpbuf_valid = True;				\
    682 	 stmt;								\
    683       }	else if (VG_(clo_trace_sched))					\
    684 	 VG_(printf)("SCHEDSETJMP(line %d) tid %d, jumped=%ld\n",       \
    685                      __LINE__, tid, jumped);                            \
    686       vg_assert(_qq_tst->sched_jmpbuf_valid);				\
    687       _qq_tst->sched_jmpbuf_valid = False;				\
    688    } while(0)
    689 
    690 
    691 /* Do various guest state alignment checks prior to running a thread.
    692    Specifically, check that what we have matches Vex's guest state
    693    layout requirements.  See libvex.h for details, but in short the
    694    requirements are: There must be no holes in between the primary
    695    guest state, its two copies, and the spill area.  In short, all 4
    696    areas must have a 16-aligned size and be 16-aligned, and placed
    697    back-to-back. */
    698 static void do_pre_run_checks ( ThreadState* tst )
    699 {
    700    Addr a_vex     = (Addr) & tst->arch.vex;
    701    Addr a_vexsh1  = (Addr) & tst->arch.vex_shadow1;
    702    Addr a_vexsh2  = (Addr) & tst->arch.vex_shadow2;
    703    Addr a_spill   = (Addr) & tst->arch.vex_spill;
    704    UInt sz_vex    = (UInt) sizeof tst->arch.vex;
    705    UInt sz_vexsh1 = (UInt) sizeof tst->arch.vex_shadow1;
    706    UInt sz_vexsh2 = (UInt) sizeof tst->arch.vex_shadow2;
    707    UInt sz_spill  = (UInt) sizeof tst->arch.vex_spill;
    708 
    709    if (0)
    710    VG_(printf)("gst %p %d, sh1 %p %d, "
    711                "sh2 %p %d, spill %p %d\n",
    712                (void*)a_vex, sz_vex,
    713                (void*)a_vexsh1, sz_vexsh1,
    714                (void*)a_vexsh2, sz_vexsh2,
    715                (void*)a_spill, sz_spill );
    716 
    717    vg_assert(VG_IS_16_ALIGNED(sz_vex));
    718    vg_assert(VG_IS_16_ALIGNED(sz_vexsh1));
    719    vg_assert(VG_IS_16_ALIGNED(sz_vexsh2));
    720    vg_assert(VG_IS_16_ALIGNED(sz_spill));
    721 
    722    vg_assert(VG_IS_16_ALIGNED(a_vex));
    723    vg_assert(VG_IS_16_ALIGNED(a_vexsh1));
    724    vg_assert(VG_IS_16_ALIGNED(a_vexsh2));
    725    vg_assert(VG_IS_16_ALIGNED(a_spill));
    726 
    727    /* Check that the guest state and its two shadows have the same
    728       size, and that there are no holes in between.  The latter is
    729       important because Memcheck assumes that it can reliably access
    730       the shadows by indexing off a pointer to the start of the
    731       primary guest state area. */
    732    vg_assert(sz_vex == sz_vexsh1);
    733    vg_assert(sz_vex == sz_vexsh2);
    734    vg_assert(a_vex + 1 * sz_vex == a_vexsh1);
    735    vg_assert(a_vex + 2 * sz_vex == a_vexsh2);
    736    /* Also check there's no hole between the second shadow area and
    737       the spill area. */
    738    vg_assert(sz_spill == LibVEX_N_SPILL_BYTES);
    739    vg_assert(a_vex + 3 * sz_vex == a_spill);
    740 
    741 #  if defined(VGA_x86)
    742    /* x86 XMM regs must form an array, ie, have no holes in
    743       between. */
    744    vg_assert(
    745       (offsetof(VexGuestX86State,guest_XMM7)
    746        - offsetof(VexGuestX86State,guest_XMM0))
    747       == (8/*#regs*/-1) * 16/*bytes per reg*/
    748    );
    749    vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestX86State,guest_XMM0)));
    750    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestX86State,guest_FPREG)));
    751    vg_assert(8 == offsetof(VexGuestX86State,guest_EAX));
    752    vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EAX)));
    753    vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EIP)));
    754 #  endif
    755 
    756 #  if defined(VGA_amd64)
    757    /* amd64 YMM regs must form an array, ie, have no holes in
    758       between. */
    759    vg_assert(
    760       (offsetof(VexGuestAMD64State,guest_YMM16)
    761        - offsetof(VexGuestAMD64State,guest_YMM0))
    762       == (17/*#regs*/-1) * 32/*bytes per reg*/
    763    );
    764    vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestAMD64State,guest_YMM0)));
    765    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_FPREG)));
    766    vg_assert(16 == offsetof(VexGuestAMD64State,guest_RAX));
    767    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RAX)));
    768    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RIP)));
    769 #  endif
    770 
    771 #  if defined(VGA_ppc32) || defined(VGA_ppc64)
    772    /* ppc guest_state vector regs must be 16 byte aligned for
    773       loads/stores.  This is important! */
    774    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR0));
    775    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR0));
    776    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR0));
    777    /* be extra paranoid .. */
    778    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR1));
    779    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR1));
    780    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR1));
    781 #  endif
    782 
    783 #  if defined(VGA_arm)
    784    /* arm guest_state VFP regs must be 8 byte aligned for
    785       loads/stores.  Let's use 16 just to be on the safe side. */
    786    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_D0));
    787    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_D0));
    788    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_D0));
    789    /* be extra paranoid .. */
    790    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_D1));
    791    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_D1));
    792    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_D1));
    793 #  endif
    794 
    795 #  if defined(VGA_arm64)
    796    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_X0));
    797    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_X0));
    798    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_X0));
    799    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_Q0));
    800    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_Q0));
    801    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_Q0));
    802 #  endif
    803 
    804 #  if defined(VGA_s390x)
    805    /* no special requirements */
    806 #  endif
    807 
    808 #  if defined(VGA_mips32) || defined(VGA_mips64)
    809    /* no special requirements */
    810 #  endif
    811 }
    812 
    813 // NO_VGDB_POLL value ensures vgdb is not polled, while
    814 // VGDB_POLL_ASAP ensures that the next scheduler call
    815 // will cause a poll.
    816 #define NO_VGDB_POLL    0xffffffffffffffffULL
    817 #define VGDB_POLL_ASAP  0x0ULL
    818 
    819 void VG_(disable_vgdb_poll) (void )
    820 {
    821    vgdb_next_poll = NO_VGDB_POLL;
    822 }
    823 void VG_(force_vgdb_poll) ( void )
    824 {
    825    vgdb_next_poll = VGDB_POLL_ASAP;
    826 }
    827 
    828 /* Run the thread tid for a while, and return a VG_TRC_* value
    829    indicating why VG_(disp_run_translations) stopped, and possibly an
    830    auxiliary word.  Also, only allow the thread to run for at most
    831    *dispatchCtrP events.  If (as is the normal case) use_alt_host_addr
    832    is False, we are running ordinary redir'd translations, and we
    833    should therefore start by looking up the guest next IP in TT.  If
    834    it is True then we ignore the guest next IP and just run from
    835    alt_host_addr, which presumably points at host code for a no-redir
    836    translation.
    837 
    838    Return results are placed in two_words.  two_words[0] is set to the
    839    TRC.  In the case where that is VG_TRC_CHAIN_ME_TO_{SLOW,FAST}_EP,
    840    the address to patch is placed in two_words[1].
    841 */
    842 static
    843 void run_thread_for_a_while ( /*OUT*/HWord* two_words,
    844                               /*MOD*/Int*   dispatchCtrP,
    845                               ThreadId      tid,
    846                               HWord         alt_host_addr,
    847                               Bool          use_alt_host_addr )
    848 {
    849    volatile HWord        jumped         = 0;
    850    volatile ThreadState* tst            = NULL; /* stop gcc complaining */
    851    volatile Int          done_this_time = 0;
    852    volatile HWord        host_code_addr = 0;
    853 
    854    /* Paranoia */
    855    vg_assert(VG_(is_valid_tid)(tid));
    856    vg_assert(VG_(is_running_thread)(tid));
    857    vg_assert(!VG_(is_exiting)(tid));
    858    vg_assert(*dispatchCtrP > 0);
    859 
    860    tst = VG_(get_ThreadState)(tid);
    861    do_pre_run_checks( (ThreadState*)tst );
    862    /* end Paranoia */
    863 
    864    /* Futz with the XIndir stats counters. */
    865    vg_assert(VG_(stats__n_xindirs_32) == 0);
    866    vg_assert(VG_(stats__n_xindir_misses_32) == 0);
    867 
    868    /* Clear return area. */
    869    two_words[0] = two_words[1] = 0;
    870 
    871    /* Figure out where we're starting from. */
    872    if (use_alt_host_addr) {
    873       /* unusual case -- no-redir translation */
    874       host_code_addr = alt_host_addr;
    875    } else {
    876       /* normal case -- redir translation */
    877       UInt cno = (UInt)VG_TT_FAST_HASH((Addr)tst->arch.vex.VG_INSTR_PTR);
    878       if (LIKELY(VG_(tt_fast)[cno].guest == (Addr)tst->arch.vex.VG_INSTR_PTR))
    879          host_code_addr = VG_(tt_fast)[cno].host;
    880       else {
    881          AddrH res   = 0;
    882          /* not found in VG_(tt_fast). Searching here the transtab
    883             improves the performance compared to returning directly
    884             to the scheduler. */
    885          Bool  found = VG_(search_transtab)(&res, NULL, NULL,
    886                                             (Addr)tst->arch.vex.VG_INSTR_PTR,
    887                                             True/*upd cache*/
    888                                             );
    889          if (LIKELY(found)) {
    890             host_code_addr = res;
    891          } else {
    892             /* At this point, we know that we intended to start at a
    893                normal redir translation, but it was not found.  In
    894                which case we can return now claiming it's not
    895                findable. */
    896             two_words[0] = VG_TRC_INNER_FASTMISS; /* hmm, is that right? */
    897             return;
    898          }
    899       }
    900    }
    901    /* We have either a no-redir or a redir translation. */
    902    vg_assert(host_code_addr != 0); /* implausible */
    903 
    904    /* there should be no undealt-with signals */
    905    //vg_assert(VG_(threads)[tid].siginfo.si_signo == 0);
    906 
    907    /* Set up event counter stuff for the run. */
    908    tst->arch.vex.host_EvC_COUNTER = *dispatchCtrP;
    909    tst->arch.vex.host_EvC_FAILADDR
    910       = (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail) );
    911 
    912    if (0) {
    913       vki_sigset_t m;
    914       Int i, err = VG_(sigprocmask)(VKI_SIG_SETMASK, NULL, &m);
    915       vg_assert(err == 0);
    916       VG_(printf)("tid %d: entering code with unblocked signals: ", tid);
    917       for (i = 1; i <= _VKI_NSIG; i++)
    918          if (!VG_(sigismember)(&m, i))
    919             VG_(printf)("%d ", i);
    920       VG_(printf)("\n");
    921    }
    922 
    923    /* Set up return-value area. */
    924 
    925    // Tell the tool this thread is about to run client code
    926    VG_TRACK( start_client_code, tid, bbs_done );
    927 
    928    vg_assert(VG_(in_generated_code) == False);
    929    VG_(in_generated_code) = True;
    930 
    931    SCHEDSETJMP(
    932       tid,
    933       jumped,
    934       VG_(disp_run_translations)(
    935          two_words,
    936          (void*)&tst->arch.vex,
    937          host_code_addr
    938       )
    939    );
    940 
    941    vg_assert(VG_(in_generated_code) == True);
    942    VG_(in_generated_code) = False;
    943 
    944    if (jumped != (HWord)0) {
    945       /* We get here if the client took a fault that caused our signal
    946          handler to longjmp. */
    947       vg_assert(two_words[0] == 0 && two_words[1] == 0); // correct?
    948       two_words[0] = VG_TRC_FAULT_SIGNAL;
    949       two_words[1] = 0;
    950       block_signals();
    951    }
    952 
    953    /* Merge the 32-bit XIndir/miss counters into the 64 bit versions,
    954       and zero out the 32-bit ones in preparation for the next run of
    955       generated code. */
    956    stats__n_xindirs += (ULong)VG_(stats__n_xindirs_32);
    957    VG_(stats__n_xindirs_32) = 0;
    958    stats__n_xindir_misses += (ULong)VG_(stats__n_xindir_misses_32);
    959    VG_(stats__n_xindir_misses_32) = 0;
    960 
    961    /* Inspect the event counter. */
    962    vg_assert((Int)tst->arch.vex.host_EvC_COUNTER >= -1);
    963    vg_assert(tst->arch.vex.host_EvC_FAILADDR
    964              == (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail)) );
    965 
    966    done_this_time = *dispatchCtrP - ((Int)tst->arch.vex.host_EvC_COUNTER + 1);
    967 
    968    vg_assert(done_this_time >= 0);
    969    bbs_done += (ULong)done_this_time;
    970 
    971    *dispatchCtrP -= done_this_time;
    972    vg_assert(*dispatchCtrP >= 0);
    973 
    974    // Tell the tool this thread has stopped running client code
    975    VG_TRACK( stop_client_code, tid, bbs_done );
    976 
    977    if (bbs_done >= vgdb_next_poll) {
    978       if (VG_(clo_vgdb_poll))
    979          vgdb_next_poll = bbs_done + (ULong)VG_(clo_vgdb_poll);
    980       else
    981          /* value was changed due to gdbserver invocation via ptrace */
    982          vgdb_next_poll = NO_VGDB_POLL;
    983       if (VG_(gdbserver_activity) (tid))
    984          VG_(gdbserver) (tid);
    985    }
    986 
    987    /* TRC value and possible auxiliary patch-address word are already
    988       in two_words[0] and [1] respectively, as a result of the call to
    989       VG_(run_innerloop). */
    990    /* Stay sane .. */
    991    if (two_words[0] == VG_TRC_CHAIN_ME_TO_SLOW_EP
    992        || two_words[0] == VG_TRC_CHAIN_ME_TO_FAST_EP) {
    993       vg_assert(two_words[1] != 0); /* we have a legit patch addr */
    994    } else {
    995       vg_assert(two_words[1] == 0); /* nobody messed with it */
    996    }
    997 }
    998 
    999 
   1000 /* ---------------------------------------------------------------------
   1001    The scheduler proper.
   1002    ------------------------------------------------------------------ */
   1003 
   1004 static void handle_tt_miss ( ThreadId tid )
   1005 {
   1006    Bool found;
   1007    Addr ip = VG_(get_IP)(tid);
   1008 
   1009    /* Trivial event.  Miss in the fast-cache.  Do a full
   1010       lookup for it. */
   1011    found = VG_(search_transtab)( NULL, NULL, NULL,
   1012                                  ip, True/*upd_fast_cache*/ );
   1013    if (UNLIKELY(!found)) {
   1014       /* Not found; we need to request a translation. */
   1015       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
   1016                           bbs_done, True/*allow redirection*/ )) {
   1017          found = VG_(search_transtab)( NULL, NULL, NULL,
   1018                                        ip, True );
   1019          vg_assert2(found, "handle_tt_miss: missing tt_fast entry");
   1020 
   1021       } else {
   1022 	 // If VG_(translate)() fails, it's because it had to throw a
   1023 	 // signal because the client jumped to a bad address.  That
   1024 	 // means that either a signal has been set up for delivery,
   1025 	 // or the thread has been marked for termination.  Either
   1026 	 // way, we just need to go back into the scheduler loop.
   1027       }
   1028    }
   1029 }
   1030 
   1031 static
   1032 void handle_chain_me ( ThreadId tid, void* place_to_chain, Bool toFastEP )
   1033 {
   1034    Bool found          = False;
   1035    Addr ip             = VG_(get_IP)(tid);
   1036    UInt to_sNo         = (UInt)-1;
   1037    UInt to_tteNo       = (UInt)-1;
   1038 
   1039    found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
   1040                                  ip, False/*dont_upd_fast_cache*/ );
   1041    if (!found) {
   1042       /* Not found; we need to request a translation. */
   1043       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
   1044                           bbs_done, True/*allow redirection*/ )) {
   1045          found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
   1046                                        ip, False );
   1047          vg_assert2(found, "handle_chain_me: missing tt_fast entry");
   1048       } else {
   1049 	 // If VG_(translate)() fails, it's because it had to throw a
   1050 	 // signal because the client jumped to a bad address.  That
   1051 	 // means that either a signal has been set up for delivery,
   1052 	 // or the thread has been marked for termination.  Either
   1053 	 // way, we just need to go back into the scheduler loop.
   1054         return;
   1055       }
   1056    }
   1057    vg_assert(found);
   1058    vg_assert(to_sNo != -1);
   1059    vg_assert(to_tteNo != -1);
   1060 
   1061    /* So, finally we know where to patch through to.  Do the patching
   1062       and update the various admin tables that allow it to be undone
   1063       in the case that the destination block gets deleted. */
   1064    VG_(tt_tc_do_chaining)( place_to_chain,
   1065                            to_sNo, to_tteNo, toFastEP );
   1066 }
   1067 
   1068 static void handle_syscall(ThreadId tid, UInt trc)
   1069 {
   1070    ThreadState * volatile tst = VG_(get_ThreadState)(tid);
   1071    volatile UWord jumped;
   1072 
   1073    /* Syscall may or may not block; either way, it will be
   1074       complete by the time this call returns, and we'll be
   1075       runnable again.  We could take a signal while the
   1076       syscall runs. */
   1077 
   1078    if (VG_(clo_sanity_level >= 3))
   1079       VG_(am_do_sync_check)("(BEFORE SYSCALL)",__FILE__,__LINE__);
   1080 
   1081    SCHEDSETJMP(tid, jumped, VG_(client_syscall)(tid, trc));
   1082 
   1083    if (VG_(clo_sanity_level >= 3))
   1084       VG_(am_do_sync_check)("(AFTER SYSCALL)",__FILE__,__LINE__);
   1085 
   1086    if (!VG_(is_running_thread)(tid))
   1087       VG_(printf)("tid %d not running; VG_(running_tid)=%d, tid %d status %d\n",
   1088 		  tid, VG_(running_tid), tid, tst->status);
   1089    vg_assert(VG_(is_running_thread)(tid));
   1090 
   1091    if (jumped != (UWord)0) {
   1092       block_signals();
   1093       VG_(poll_signals)(tid);
   1094    }
   1095 }
   1096 
   1097 /* tid just requested a jump to the noredir version of its current
   1098    program counter.  So make up that translation if needed, run it,
   1099    and return the resulting thread return code in two_words[]. */
   1100 static
   1101 void handle_noredir_jump ( /*OUT*/HWord* two_words,
   1102                            /*MOD*/Int*   dispatchCtrP,
   1103                            ThreadId tid )
   1104 {
   1105    /* Clear return area. */
   1106    two_words[0] = two_words[1] = 0;
   1107 
   1108    AddrH hcode = 0;
   1109    Addr  ip    = VG_(get_IP)(tid);
   1110 
   1111    Bool  found = VG_(search_unredir_transtab)( &hcode, ip );
   1112    if (!found) {
   1113       /* Not found; we need to request a translation. */
   1114       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/, bbs_done,
   1115                           False/*NO REDIRECTION*/ )) {
   1116 
   1117          found = VG_(search_unredir_transtab)( &hcode, ip );
   1118          vg_assert2(found, "unredir translation missing after creation?!");
   1119       } else {
   1120 	 // If VG_(translate)() fails, it's because it had to throw a
   1121 	 // signal because the client jumped to a bad address.  That
   1122 	 // means that either a signal has been set up for delivery,
   1123 	 // or the thread has been marked for termination.  Either
   1124 	 // way, we just need to go back into the scheduler loop.
   1125          two_words[0] = VG_TRC_BORING;
   1126          return;
   1127       }
   1128 
   1129    }
   1130 
   1131    vg_assert(found);
   1132    vg_assert(hcode != 0);
   1133 
   1134    /* Otherwise run it and return the resulting VG_TRC_* value. */
   1135    vg_assert(*dispatchCtrP > 0); /* so as to guarantee progress */
   1136    run_thread_for_a_while( two_words, dispatchCtrP, tid,
   1137                            hcode, True/*use hcode*/ );
   1138 }
   1139 
   1140 
   1141 /*
   1142    Run a thread until it wants to exit.
   1143 
   1144    We assume that the caller has already called VG_(acquire_BigLock) for
   1145    us, so we own the VCPU.  Also, all signals are blocked.
   1146  */
   1147 VgSchedReturnCode VG_(scheduler) ( ThreadId tid )
   1148 {
   1149    /* Holds the remaining size of this thread's "timeslice". */
   1150    Int dispatch_ctr = 0;
   1151 
   1152    ThreadState *tst = VG_(get_ThreadState)(tid);
   1153    static Bool vgdb_startup_action_done = False;
   1154 
   1155    if (VG_(clo_trace_sched))
   1156       print_sched_event(tid, "entering VG_(scheduler)");
   1157 
   1158    /* Do vgdb initialization (but once). Only the first (main) task
   1159       starting up will do the below.
   1160       Initialize gdbserver earlier than at the first
   1161       thread VG_(scheduler) is causing problems:
   1162       * at the end of VG_(scheduler_init_phase2) :
   1163         The main thread is in VgTs_Init state, but in a not yet
   1164         consistent state => the thread cannot be reported to gdb
   1165         (e.g. causes an assert in LibVEX_GuestX86_get_eflags when giving
   1166         back the guest registers to gdb).
   1167       * at end of valgrind_main, just
   1168         before VG_(main_thread_wrapper_NORETURN)(1) :
   1169         The main thread is still in VgTs_Init state but in a
   1170         more advanced state. However, the thread state is not yet
   1171         completely initialized : a.o., the os_state is not yet fully
   1172         set => the thread is then not properly reported to gdb,
   1173         which is then confused (causing e.g. a duplicate thread be
   1174         shown, without thread id).
   1175       * it would be possible to initialize gdbserver "lower" in the
   1176         call stack (e.g. in VG_(main_thread_wrapper_NORETURN)) but
   1177         these are platform dependent and the place at which
   1178         the thread state is completely initialized is not
   1179         specific anymore to the main thread (so a similar "do it only
   1180         once" would be needed).
   1181 
   1182         => a "once only" initialization here is the best compromise. */
   1183    if (!vgdb_startup_action_done) {
   1184       vg_assert(tid == 1); // it must be the main thread.
   1185       vgdb_startup_action_done = True;
   1186       if (VG_(clo_vgdb) != Vg_VgdbNo) {
   1187          /* If we have to poll, ensures we do an initial poll at first
   1188             scheduler call. Otherwise, ensure no poll (unless interrupted
   1189             by ptrace). */
   1190          if (VG_(clo_vgdb_poll))
   1191             VG_(force_vgdb_poll) ();
   1192          else
   1193             VG_(disable_vgdb_poll) ();
   1194 
   1195          vg_assert (VG_(dyn_vgdb_error) == VG_(clo_vgdb_error));
   1196          /* As we are initializing, VG_(dyn_vgdb_error) can't have been
   1197             changed yet. */
   1198 
   1199          VG_(gdbserver_prerun_action) (1);
   1200       } else {
   1201          VG_(disable_vgdb_poll) ();
   1202       }
   1203    }
   1204 
   1205    /* set the proper running signal mask */
   1206    block_signals();
   1207 
   1208    vg_assert(VG_(is_running_thread)(tid));
   1209 
   1210    dispatch_ctr = SCHEDULING_QUANTUM;
   1211 
   1212    while (!VG_(is_exiting)(tid)) {
   1213 
   1214       vg_assert(dispatch_ctr >= 0);
   1215       if (dispatch_ctr == 0) {
   1216 
   1217 	 /* Our slice is done, so yield the CPU to another thread.  On
   1218             Linux, this doesn't sleep between sleeping and running,
   1219             since that would take too much time. */
   1220 
   1221 	 /* 4 July 06: it seems that a zero-length nsleep is needed to
   1222             cause async thread cancellation (canceller.c) to terminate
   1223             in finite time; else it is in some kind of race/starvation
   1224             situation and completion is arbitrarily delayed (although
   1225             this is not a deadlock).
   1226 
   1227             Unfortunately these sleeps cause MPI jobs not to terminate
   1228             sometimes (some kind of livelock).  So sleeping once
   1229             every N opportunities appears to work. */
   1230 
   1231 	 /* 3 Aug 06: doing sys__nsleep works but crashes some apps.
   1232             sys_yield also helps the problem, whilst not crashing apps. */
   1233 
   1234 	 VG_(release_BigLock)(tid, VgTs_Yielding,
   1235                                    "VG_(scheduler):timeslice");
   1236 	 /* ------------ now we don't have The Lock ------------ */
   1237 
   1238 	 VG_(acquire_BigLock)(tid, "VG_(scheduler):timeslice");
   1239 	 /* ------------ now we do have The Lock ------------ */
   1240 
   1241 	 /* OK, do some relatively expensive housekeeping stuff */
   1242 	 scheduler_sanity(tid);
   1243 	 VG_(sanity_check_general)(False);
   1244 
   1245 	 /* Look for any pending signals for this thread, and set them up
   1246 	    for delivery */
   1247 	 VG_(poll_signals)(tid);
   1248 
   1249 	 if (VG_(is_exiting)(tid))
   1250 	    break;		/* poll_signals picked up a fatal signal */
   1251 
   1252 	 /* For stats purposes only. */
   1253 	 n_scheduling_events_MAJOR++;
   1254 
   1255 	 /* Figure out how many bbs to ask vg_run_innerloop to do.  Note
   1256 	    that it decrements the counter before testing it for zero, so
   1257 	    that if tst->dispatch_ctr is set to N you get at most N-1
   1258 	    iterations.  Also this means that tst->dispatch_ctr must
   1259 	    exceed zero before entering the innerloop.  Also also, the
   1260 	    decrement is done before the bb is actually run, so you
   1261 	    always get at least one decrement even if nothing happens. */
   1262          // FIXME is this right?
   1263          dispatch_ctr = SCHEDULING_QUANTUM;
   1264 
   1265 	 /* paranoia ... */
   1266 	 vg_assert(tst->tid == tid);
   1267 	 vg_assert(tst->os_state.lwpid == VG_(gettid)());
   1268       }
   1269 
   1270       /* For stats purposes only. */
   1271       n_scheduling_events_MINOR++;
   1272 
   1273       if (0)
   1274          VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs\n",
   1275                                    tid, dispatch_ctr - 1 );
   1276 
   1277       HWord trc[2]; /* "two_words" */
   1278       run_thread_for_a_while( &trc[0],
   1279                               &dispatch_ctr,
   1280                               tid, 0/*ignored*/, False );
   1281 
   1282       if (VG_(clo_trace_sched) && VG_(clo_verbosity) > 2) {
   1283 	 HChar buf[50];
   1284 	 VG_(sprintf)(buf, "TRC: %s", name_of_sched_event(trc[0]));
   1285 	 print_sched_event(tid, buf);
   1286       }
   1287 
   1288       if (trc[0] == VEX_TRC_JMP_NOREDIR) {
   1289          /* If we got a request to run a no-redir version of
   1290             something, do so now -- handle_noredir_jump just (creates
   1291             and) runs that one translation.  The flip side is that the
   1292             noredir translation can't itself return another noredir
   1293             request -- that would be nonsensical.  It can, however,
   1294             return VG_TRC_BORING, which just means keep going as
   1295             normal. */
   1296          /* Note that the fact that we need to continue with a
   1297             no-redir jump is not recorded anywhere else in this
   1298             thread's state.  So we *must* execute the block right now
   1299             -- we can't fail to execute it and later resume with it,
   1300             because by then we'll have forgotten the fact that it
   1301             should be run as no-redir, but will get run as a normal
   1302             potentially-redir'd, hence screwing up.  This really ought
   1303             to be cleaned up, by noting in the guest state that the
   1304             next block to be executed should be no-redir.  Then we can
   1305             suspend and resume at any point, which isn't the case at
   1306             the moment. */
   1307          handle_noredir_jump( &trc[0],
   1308                               &dispatch_ctr,
   1309                               tid );
   1310          vg_assert(trc[0] != VEX_TRC_JMP_NOREDIR);
   1311 
   1312          /* This can't be allowed to happen, since it means the block
   1313             didn't execute, and we have no way to resume-as-noredir
   1314             after we get more timeslice.  But I don't think it ever
   1315             can, since handle_noredir_jump will assert if the counter
   1316             is zero on entry. */
   1317          vg_assert(trc[0] != VG_TRC_INNER_COUNTERZERO);
   1318 
   1319          /* A no-redir translation can't return with a chain-me
   1320             request, since chaining in the no-redir cache is too
   1321             complex. */
   1322          vg_assert(trc[0] != VG_TRC_CHAIN_ME_TO_SLOW_EP
   1323                    && trc[0] != VG_TRC_CHAIN_ME_TO_FAST_EP);
   1324       }
   1325 
   1326       switch (trc[0]) {
   1327       case VEX_TRC_JMP_BORING:
   1328          /* assisted dispatch, no event.  Used by no-redir
   1329             translations to force return to the scheduler. */
   1330       case VG_TRC_BORING:
   1331          /* no special event, just keep going. */
   1332          break;
   1333 
   1334       case VG_TRC_INNER_FASTMISS:
   1335 	 vg_assert(dispatch_ctr > 0);
   1336 	 handle_tt_miss(tid);
   1337 	 break;
   1338 
   1339       case VG_TRC_CHAIN_ME_TO_SLOW_EP: {
   1340          if (0) VG_(printf)("sched: CHAIN_TO_SLOW_EP: %p\n", (void*)trc[1] );
   1341          handle_chain_me(tid, (void*)trc[1], False);
   1342          break;
   1343       }
   1344 
   1345       case VG_TRC_CHAIN_ME_TO_FAST_EP: {
   1346          if (0) VG_(printf)("sched: CHAIN_TO_FAST_EP: %p\n", (void*)trc[1] );
   1347          handle_chain_me(tid, (void*)trc[1], True);
   1348          break;
   1349       }
   1350 
   1351       case VEX_TRC_JMP_CLIENTREQ:
   1352 	 do_client_request(tid);
   1353 	 break;
   1354 
   1355       case VEX_TRC_JMP_SYS_INT128:  /* x86-linux */
   1356       case VEX_TRC_JMP_SYS_INT129:  /* x86-darwin */
   1357       case VEX_TRC_JMP_SYS_INT130:  /* x86-darwin */
   1358       case VEX_TRC_JMP_SYS_SYSCALL: /* amd64-linux, ppc32-linux, amd64-darwin */
   1359 	 handle_syscall(tid, trc[0]);
   1360 	 if (VG_(clo_sanity_level) > 2)
   1361 	    VG_(sanity_check_general)(True); /* sanity-check every syscall */
   1362 	 break;
   1363 
   1364       case VEX_TRC_JMP_YIELD:
   1365 	 /* Explicit yield, because this thread is in a spin-lock
   1366 	    or something.  Only let the thread run for a short while
   1367             longer.  Because swapping to another thread is expensive,
   1368             we're prepared to let this thread eat a little more CPU
   1369             before swapping to another.  That means that short term
   1370             spins waiting for hardware to poke memory won't cause a
   1371             thread swap. */
   1372 	 if (dispatch_ctr > 1000)
   1373             dispatch_ctr = 1000;
   1374 	 break;
   1375 
   1376       case VG_TRC_INNER_COUNTERZERO:
   1377 	 /* Timeslice is out.  Let a new thread be scheduled. */
   1378 	 vg_assert(dispatch_ctr == 0);
   1379 	 break;
   1380 
   1381       case VG_TRC_FAULT_SIGNAL:
   1382 	 /* Everything should be set up (either we're exiting, or
   1383 	    about to start in a signal handler). */
   1384 	 break;
   1385 
   1386       case VEX_TRC_JMP_MAPFAIL:
   1387          /* Failure of arch-specific address translation (x86/amd64
   1388             segment override use) */
   1389          /* jrs 2005 03 11: is this correct? */
   1390          VG_(synth_fault)(tid);
   1391          break;
   1392 
   1393       case VEX_TRC_JMP_EMWARN: {
   1394          static Int  counts[EmNote_NUMBER];
   1395          static Bool counts_initted = False;
   1396          VexEmNote ew;
   1397          const HChar* what;
   1398          Bool      show;
   1399          Int       q;
   1400          if (!counts_initted) {
   1401             counts_initted = True;
   1402             for (q = 0; q < EmNote_NUMBER; q++)
   1403                counts[q] = 0;
   1404          }
   1405          ew   = (VexEmNote)VG_(threads)[tid].arch.vex.guest_EMNOTE;
   1406          what = (ew < 0 || ew >= EmNote_NUMBER)
   1407                    ? "unknown (?!)"
   1408                    : LibVEX_EmNote_string(ew);
   1409          show = (ew < 0 || ew >= EmNote_NUMBER)
   1410                    ? True
   1411                    : counts[ew]++ < 3;
   1412          if (show && VG_(clo_show_emwarns) && !VG_(clo_xml)) {
   1413             VG_(message)( Vg_UserMsg,
   1414                           "Emulation warning: unsupported action:\n");
   1415             VG_(message)( Vg_UserMsg, "  %s\n", what);
   1416             VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
   1417          }
   1418          break;
   1419       }
   1420 
   1421       case VEX_TRC_JMP_EMFAIL: {
   1422          VexEmNote ew;
   1423          const HChar* what;
   1424          ew   = (VexEmNote)VG_(threads)[tid].arch.vex.guest_EMNOTE;
   1425          what = (ew < 0 || ew >= EmNote_NUMBER)
   1426                    ? "unknown (?!)"
   1427                    : LibVEX_EmNote_string(ew);
   1428          VG_(message)( Vg_UserMsg,
   1429                        "Emulation fatal error -- Valgrind cannot continue:\n");
   1430          VG_(message)( Vg_UserMsg, "  %s\n", what);
   1431          VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
   1432          VG_(message)(Vg_UserMsg, "\n");
   1433          VG_(message)(Vg_UserMsg, "Valgrind has to exit now.  Sorry.\n");
   1434          VG_(message)(Vg_UserMsg, "\n");
   1435          VG_(exit)(1);
   1436          break;
   1437       }
   1438 
   1439       case VEX_TRC_JMP_SIGILL:
   1440          VG_(synth_sigill)(tid, VG_(get_IP)(tid));
   1441          break;
   1442 
   1443       case VEX_TRC_JMP_SIGTRAP:
   1444          VG_(synth_sigtrap)(tid);
   1445          break;
   1446 
   1447       case VEX_TRC_JMP_SIGSEGV:
   1448          VG_(synth_fault)(tid);
   1449          break;
   1450 
   1451       case VEX_TRC_JMP_SIGBUS:
   1452          VG_(synth_sigbus)(tid);
   1453          break;
   1454 
   1455       case VEX_TRC_JMP_SIGFPE_INTDIV:
   1456          VG_(synth_sigfpe)(tid, VKI_FPE_INTDIV);
   1457          break;
   1458 
   1459       case VEX_TRC_JMP_SIGFPE_INTOVF:
   1460          VG_(synth_sigfpe)(tid, VKI_FPE_INTOVF);
   1461          break;
   1462 
   1463       case VEX_TRC_JMP_NODECODE: {
   1464          Addr addr = VG_(get_IP)(tid);
   1465 
   1466          if (VG_(clo_sigill_diag)) {
   1467             VG_(umsg)(
   1468                "valgrind: Unrecognised instruction at address %#lx.\n", addr);
   1469             VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   1470 #        define M(a) VG_(umsg)(a "\n");
   1471          M("Your program just tried to execute an instruction that Valgrind" );
   1472          M("did not recognise.  There are two possible reasons for this."    );
   1473          M("1. Your program has a bug and erroneously jumped to a non-code"  );
   1474          M("   location.  If you are running Memcheck and you just saw a"    );
   1475          M("   warning about a bad jump, it's probably your program's fault.");
   1476          M("2. The instruction is legitimate but Valgrind doesn't handle it,");
   1477          M("   i.e. it's Valgrind's fault.  If you think this is the case or");
   1478          M("   you are not sure, please let us know and we'll try to fix it.");
   1479          M("Either way, Valgrind will now raise a SIGILL signal which will"  );
   1480          M("probably kill your program."                                     );
   1481 #        undef M
   1482          }
   1483 #        if defined(VGA_s390x)
   1484          /* Now that the complaint is out we need to adjust the guest_IA. The
   1485             reason is that -- after raising the exception -- execution will
   1486             continue with the insn that follows the invalid insn. As the first
   1487             2 bits of the invalid insn determine its length in the usual way,
   1488             we can compute the address of the next insn here and adjust the
   1489             guest_IA accordingly. This adjustment is essential and tested by
   1490             none/tests/s390x/op_exception.c (which would loop forever
   1491             otherwise) */
   1492          UChar byte = ((UChar *)addr)[0];
   1493          UInt  insn_length = ((((byte >> 6) + 1) >> 1) + 1) << 1;
   1494          Addr  next_insn_addr = addr + insn_length;
   1495          VG_(set_IP)(tid, next_insn_addr);
   1496 #        endif
   1497          VG_(synth_sigill)(tid, addr);
   1498          break;
   1499       }
   1500 
   1501       case VEX_TRC_JMP_INVALICACHE:
   1502          VG_(discard_translations)(
   1503             (Addr64)VG_(threads)[tid].arch.vex.guest_CMSTART,
   1504             VG_(threads)[tid].arch.vex.guest_CMLEN,
   1505             "scheduler(VEX_TRC_JMP_INVALICACHE)"
   1506          );
   1507          if (0)
   1508             VG_(printf)("dump translations done.\n");
   1509          break;
   1510 
   1511       case VEX_TRC_JMP_FLUSHDCACHE: {
   1512          void* start = (void*)VG_(threads)[tid].arch.vex.guest_CMSTART;
   1513          SizeT len   = VG_(threads)[tid].arch.vex.guest_CMLEN;
   1514          VG_(debugLog)(2, "sched", "flush_dcache(%p, %lu)\n", start, len);
   1515          VG_(flush_dcache)(start, len);
   1516          break;
   1517       }
   1518 
   1519       case VG_TRC_INVARIANT_FAILED:
   1520          /* This typically happens if, after running generated code,
   1521             it is detected that host CPU settings (eg, FPU/Vector
   1522             control words) are not as they should be.  Vex's code
   1523             generation specifies the state such control words should
   1524             be in on entry to Vex-generated code, and they should be
   1525             unchanged on exit from it.  Failure of this assertion
   1526             usually means a bug in Vex's code generation. */
   1527          //{ UInt xx;
   1528          //  __asm__ __volatile__ (
   1529          //     "\t.word 0xEEF12A10\n"  // fmrx r2,fpscr
   1530          //     "\tmov %0, r2" : "=r"(xx) : : "r2" );
   1531          //  VG_(printf)("QQQQ new fpscr = %08x\n", xx);
   1532          //}
   1533          vg_assert2(0, "VG_(scheduler), phase 3: "
   1534                        "run_innerloop detected host "
   1535                        "state invariant failure", trc);
   1536 
   1537       case VEX_TRC_JMP_SYS_SYSENTER:
   1538          /* Do whatever simulation is appropriate for an x86 sysenter
   1539             instruction.  Note that it is critical to set this thread's
   1540             guest_EIP to point at the code to execute after the
   1541             sysenter, since Vex-generated code will not have set it --
   1542             vex does not know what it should be.  Vex sets the next
   1543             address to zero, so if you don't set guest_EIP, the thread
   1544             will jump to zero afterwards and probably die as a result. */
   1545 #        if defined(VGP_x86_linux)
   1546          vg_assert2(0, "VG_(scheduler), phase 3: "
   1547                        "sysenter_x86 on x86-linux is not supported");
   1548 #        elif defined(VGP_x86_darwin)
   1549          /* return address in client edx */
   1550          VG_(threads)[tid].arch.vex.guest_EIP
   1551             = VG_(threads)[tid].arch.vex.guest_EDX;
   1552          handle_syscall(tid, trc[0]);
   1553 #        else
   1554          vg_assert2(0, "VG_(scheduler), phase 3: "
   1555                        "sysenter_x86 on non-x86 platform?!?!");
   1556 #        endif
   1557          break;
   1558 
   1559       default:
   1560 	 vg_assert2(0, "VG_(scheduler), phase 3: "
   1561                        "unexpected thread return code (%u)", trc[0]);
   1562 	 /* NOTREACHED */
   1563 	 break;
   1564 
   1565       } /* switch (trc) */
   1566 
   1567       if (UNLIKELY(VG_(clo_profyle_sbs)) && VG_(clo_profyle_interval) > 0)
   1568          maybe_show_sb_profile();
   1569    }
   1570 
   1571    if (VG_(clo_trace_sched))
   1572       print_sched_event(tid, "exiting VG_(scheduler)");
   1573 
   1574    vg_assert(VG_(is_exiting)(tid));
   1575 
   1576    return tst->exitreason;
   1577 }
   1578 
   1579 
   1580 /*
   1581    This causes all threads to forceably exit.  They aren't actually
   1582    dead by the time this returns; you need to call
   1583    VG_(reap_threads)() to wait for them.
   1584  */
   1585 void VG_(nuke_all_threads_except) ( ThreadId me, VgSchedReturnCode src )
   1586 {
   1587    ThreadId tid;
   1588 
   1589    vg_assert(VG_(is_running_thread)(me));
   1590 
   1591    for (tid = 1; tid < VG_N_THREADS; tid++) {
   1592       if (tid == me
   1593           || VG_(threads)[tid].status == VgTs_Empty)
   1594          continue;
   1595       if (0)
   1596          VG_(printf)(
   1597             "VG_(nuke_all_threads_except): nuking tid %d\n", tid);
   1598 
   1599       VG_(threads)[tid].exitreason = src;
   1600       if (src == VgSrc_FatalSig)
   1601          VG_(threads)[tid].os_state.fatalsig = VKI_SIGKILL;
   1602       VG_(get_thread_out_of_syscall)(tid);
   1603    }
   1604 }
   1605 
   1606 
   1607 /* ---------------------------------------------------------------------
   1608    Specifying shadow register values
   1609    ------------------------------------------------------------------ */
   1610 
   1611 #if defined(VGA_x86)
   1612 #  define VG_CLREQ_ARGS       guest_EAX
   1613 #  define VG_CLREQ_RET        guest_EDX
   1614 #elif defined(VGA_amd64)
   1615 #  define VG_CLREQ_ARGS       guest_RAX
   1616 #  define VG_CLREQ_RET        guest_RDX
   1617 #elif defined(VGA_ppc32) || defined(VGA_ppc64)
   1618 #  define VG_CLREQ_ARGS       guest_GPR4
   1619 #  define VG_CLREQ_RET        guest_GPR3
   1620 #elif defined(VGA_arm)
   1621 #  define VG_CLREQ_ARGS       guest_R4
   1622 #  define VG_CLREQ_RET        guest_R3
   1623 #elif defined(VGA_arm64)
   1624 #  define VG_CLREQ_ARGS       guest_X4
   1625 #  define VG_CLREQ_RET        guest_X3
   1626 #elif defined (VGA_s390x)
   1627 #  define VG_CLREQ_ARGS       guest_r2
   1628 #  define VG_CLREQ_RET        guest_r3
   1629 #elif defined(VGA_mips32) || defined(VGA_mips64)
   1630 #  define VG_CLREQ_ARGS       guest_r12
   1631 #  define VG_CLREQ_RET        guest_r11
   1632 #else
   1633 #  error Unknown arch
   1634 #endif
   1635 
   1636 #define CLREQ_ARGS(regs)   ((regs).vex.VG_CLREQ_ARGS)
   1637 #define CLREQ_RET(regs)    ((regs).vex.VG_CLREQ_RET)
   1638 #define O_CLREQ_RET        (offsetof(VexGuestArchState, VG_CLREQ_RET))
   1639 
   1640 // These macros write a value to a client's thread register, and tell the
   1641 // tool that it's happened (if necessary).
   1642 
   1643 #define SET_CLREQ_RETVAL(zztid, zzval) \
   1644    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
   1645         VG_TRACK( post_reg_write, \
   1646                   Vg_CoreClientReq, zztid, O_CLREQ_RET, sizeof(UWord)); \
   1647    } while (0)
   1648 
   1649 #define SET_CLCALL_RETVAL(zztid, zzval, f) \
   1650    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
   1651         VG_TRACK( post_reg_write_clientcall_return, \
   1652                   zztid, O_CLREQ_RET, sizeof(UWord), f); \
   1653    } while (0)
   1654 
   1655 
   1656 /* ---------------------------------------------------------------------
   1657    Handle client requests.
   1658    ------------------------------------------------------------------ */
   1659 
   1660 // OS-specific(?) client requests
   1661 static Bool os_client_request(ThreadId tid, UWord *args)
   1662 {
   1663    Bool handled = True;
   1664 
   1665    vg_assert(VG_(is_running_thread)(tid));
   1666 
   1667    switch(args[0]) {
   1668    case VG_USERREQ__LIBC_FREERES_DONE:
   1669       /* This is equivalent to an exit() syscall, but we don't set the
   1670 	 exitcode (since it might already be set) */
   1671       if (0 || VG_(clo_trace_syscalls) || VG_(clo_trace_sched))
   1672          VG_(message)(Vg_DebugMsg,
   1673                       "__libc_freeres() done; really quitting!\n");
   1674       VG_(threads)[tid].exitreason = VgSrc_ExitThread;
   1675       break;
   1676 
   1677    default:
   1678       handled = False;
   1679       break;
   1680    }
   1681 
   1682    return handled;
   1683 }
   1684 
   1685 
   1686 /* Write out a client message, possibly including a back trace. Return
   1687    the number of characters written. In case of XML output, the format
   1688    string as well as any arguments it requires will be XML'ified.
   1689    I.e. special characters such as the angle brackets will be translated
   1690    into proper escape sequences. */
   1691 static
   1692 Int print_client_message( ThreadId tid, const HChar *format,
   1693                           va_list *vargsp, Bool include_backtrace)
   1694 {
   1695    Int count;
   1696 
   1697    if (VG_(clo_xml)) {
   1698       /* Translate the format string as follows:
   1699          <  -->  &lt;
   1700          >  -->  &gt;
   1701          &  -->  &amp;
   1702          %s -->  %pS
   1703          Yes, yes, it's simplified but in synch with
   1704          myvprintf_str_XML_simplistic and VG_(debugLog_vprintf).
   1705       */
   1706 
   1707       /* Allocate a buffer that is for sure large enough. */
   1708       HChar xml_format[VG_(strlen)(format) * 5 + 1];
   1709 
   1710       const HChar *p;
   1711       HChar *q = xml_format;
   1712 
   1713       for (p = format; *p; ++p) {
   1714          switch (*p) {
   1715          case '<': VG_(strcpy)(q, "&lt;");  q += 4; break;
   1716          case '>': VG_(strcpy)(q, "&gt;");  q += 4; break;
   1717          case '&': VG_(strcpy)(q, "&amp;"); q += 5; break;
   1718          case '%':
   1719             /* Careful: make sure %%s stays %%s */
   1720             *q++ = *p++;
   1721             if (*p == 's') {
   1722               *q++ = 'p';
   1723               *q++ = 'S';
   1724             } else {
   1725               *q++ = *p;
   1726             }
   1727             break;
   1728 
   1729          default:
   1730             *q++ = *p;
   1731             break;
   1732          }
   1733       }
   1734       *q = '\0';
   1735 
   1736       VG_(printf_xml)( "<clientmsg>\n" );
   1737       VG_(printf_xml)( "  <tid>%d</tid>\n", tid );
   1738       VG_(printf_xml)( "  <text>" );
   1739       count = VG_(vprintf_xml)( xml_format, *vargsp );
   1740       VG_(printf_xml)( "  </text>\n" );
   1741    } else {
   1742       count = VG_(vmessage)( Vg_ClientMsg, format, *vargsp );
   1743       VG_(message_flush)();
   1744    }
   1745 
   1746    if (include_backtrace)
   1747       VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
   1748 
   1749    if (VG_(clo_xml))
   1750       VG_(printf_xml)( "</clientmsg>\n" );
   1751 
   1752    return count;
   1753 }
   1754 
   1755 
   1756 /* Do a client request for the thread tid.  After the request, tid may
   1757    or may not still be runnable; if not, the scheduler will have to
   1758    choose a new thread to run.
   1759 */
   1760 static
   1761 void do_client_request ( ThreadId tid )
   1762 {
   1763    UWord* arg = (UWord*)(CLREQ_ARGS(VG_(threads)[tid].arch));
   1764    UWord req_no = arg[0];
   1765 
   1766    if (0)
   1767       VG_(printf)("req no = 0x%llx, arg = %p\n", (ULong)req_no, arg);
   1768    switch (req_no) {
   1769 
   1770       case VG_USERREQ__CLIENT_CALL0: {
   1771          UWord (*f)(ThreadId) = (void*)arg[1];
   1772 	 if (f == NULL)
   1773 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL0: func=%p\n", f);
   1774 	 else
   1775 	    SET_CLCALL_RETVAL(tid, f ( tid ), (Addr)f);
   1776          break;
   1777       }
   1778       case VG_USERREQ__CLIENT_CALL1: {
   1779          UWord (*f)(ThreadId, UWord) = (void*)arg[1];
   1780 	 if (f == NULL)
   1781 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL1: func=%p\n", f);
   1782 	 else
   1783 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2] ), (Addr)f );
   1784          break;
   1785       }
   1786       case VG_USERREQ__CLIENT_CALL2: {
   1787          UWord (*f)(ThreadId, UWord, UWord) = (void*)arg[1];
   1788 	 if (f == NULL)
   1789 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL2: func=%p\n", f);
   1790 	 else
   1791 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3] ), (Addr)f );
   1792          break;
   1793       }
   1794       case VG_USERREQ__CLIENT_CALL3: {
   1795          UWord (*f)(ThreadId, UWord, UWord, UWord) = (void*)arg[1];
   1796 	 if (f == NULL)
   1797 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL3: func=%p\n", f);
   1798 	 else
   1799 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3], arg[4] ), (Addr)f );
   1800          break;
   1801       }
   1802 
   1803       // Nb: this looks like a circular definition, because it kind of is.
   1804       // See comment in valgrind.h to understand what's going on.
   1805       case VG_USERREQ__RUNNING_ON_VALGRIND:
   1806          SET_CLREQ_RETVAL(tid, RUNNING_ON_VALGRIND+1);
   1807          break;
   1808 
   1809       case VG_USERREQ__PRINTF: {
   1810          const HChar* format = (HChar *)arg[1];
   1811          /* JRS 2010-Jan-28: this is DEPRECATED; use the
   1812             _VALIST_BY_REF version instead */
   1813          if (sizeof(va_list) != sizeof(UWord))
   1814             goto va_list_casting_error_NORETURN;
   1815          union {
   1816             va_list vargs;
   1817             unsigned long uw;
   1818          } u;
   1819          u.uw = (unsigned long)arg[2];
   1820          Int count =
   1821             print_client_message( tid, format, &u.vargs,
   1822                                   /* include_backtrace */ False );
   1823          SET_CLREQ_RETVAL( tid, count );
   1824          break;
   1825       }
   1826 
   1827       case VG_USERREQ__PRINTF_BACKTRACE: {
   1828          const HChar* format = (HChar *)arg[1];
   1829          /* JRS 2010-Jan-28: this is DEPRECATED; use the
   1830             _VALIST_BY_REF version instead */
   1831          if (sizeof(va_list) != sizeof(UWord))
   1832             goto va_list_casting_error_NORETURN;
   1833          union {
   1834             va_list vargs;
   1835             unsigned long uw;
   1836          } u;
   1837          u.uw = (unsigned long)arg[2];
   1838          Int count =
   1839             print_client_message( tid, format, &u.vargs,
   1840                                   /* include_backtrace */ True );
   1841          SET_CLREQ_RETVAL( tid, count );
   1842          break;
   1843       }
   1844 
   1845       case VG_USERREQ__PRINTF_VALIST_BY_REF: {
   1846          const HChar* format = (HChar *)arg[1];
   1847          va_list* vargsp = (va_list*)arg[2];
   1848          Int count =
   1849             print_client_message( tid, format, vargsp,
   1850                                   /* include_backtrace */ False );
   1851 
   1852          SET_CLREQ_RETVAL( tid, count );
   1853          break;
   1854       }
   1855 
   1856       case VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF: {
   1857          const HChar* format = (HChar *)arg[1];
   1858          va_list* vargsp = (va_list*)arg[2];
   1859          Int count =
   1860             print_client_message( tid, format, vargsp,
   1861                                   /* include_backtrace */ True );
   1862          SET_CLREQ_RETVAL( tid, count );
   1863          break;
   1864       }
   1865 
   1866       case VG_USERREQ__INTERNAL_PRINTF_VALIST_BY_REF: {
   1867          va_list* vargsp = (va_list*)arg[2];
   1868          Int count =
   1869             VG_(vmessage)( Vg_DebugMsg, (HChar *)arg[1], *vargsp );
   1870          VG_(message_flush)();
   1871          SET_CLREQ_RETVAL( tid, count );
   1872          break;
   1873       }
   1874 
   1875       case VG_USERREQ__ADD_IFUNC_TARGET: {
   1876          VG_(redir_add_ifunc_target)( arg[1], arg[2] );
   1877          SET_CLREQ_RETVAL( tid, 0);
   1878          break; }
   1879 
   1880       case VG_USERREQ__STACK_REGISTER: {
   1881          UWord sid = VG_(register_stack)((Addr)arg[1], (Addr)arg[2]);
   1882          SET_CLREQ_RETVAL( tid, sid );
   1883          break; }
   1884 
   1885       case VG_USERREQ__STACK_DEREGISTER: {
   1886          VG_(deregister_stack)(arg[1]);
   1887          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   1888          break; }
   1889 
   1890       case VG_USERREQ__STACK_CHANGE: {
   1891          VG_(change_stack)(arg[1], (Addr)arg[2], (Addr)arg[3]);
   1892          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   1893          break; }
   1894 
   1895       case VG_USERREQ__GET_MALLOCFUNCS: {
   1896 	 struct vg_mallocfunc_info *info = (struct vg_mallocfunc_info *)arg[1];
   1897 
   1898 	 info->tl_malloc               = VG_(tdict).tool_malloc;
   1899 	 info->tl_calloc               = VG_(tdict).tool_calloc;
   1900 	 info->tl_realloc              = VG_(tdict).tool_realloc;
   1901 	 info->tl_memalign             = VG_(tdict).tool_memalign;
   1902 	 info->tl___builtin_new        = VG_(tdict).tool___builtin_new;
   1903 	 info->tl___builtin_vec_new    = VG_(tdict).tool___builtin_vec_new;
   1904 	 info->tl_free                 = VG_(tdict).tool_free;
   1905 	 info->tl___builtin_delete     = VG_(tdict).tool___builtin_delete;
   1906 	 info->tl___builtin_vec_delete = VG_(tdict).tool___builtin_vec_delete;
   1907          info->tl_malloc_usable_size   = VG_(tdict).tool_malloc_usable_size;
   1908 
   1909 	 info->mallinfo                = VG_(mallinfo);
   1910 	 info->clo_trace_malloc        = VG_(clo_trace_malloc);
   1911 
   1912          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   1913 
   1914 	 break;
   1915       }
   1916 
   1917       /* Requests from the client program */
   1918 
   1919       case VG_USERREQ__DISCARD_TRANSLATIONS:
   1920          if (VG_(clo_verbosity) > 2)
   1921             VG_(printf)( "client request: DISCARD_TRANSLATIONS,"
   1922                          " addr %p,  len %lu\n",
   1923                          (void*)arg[1], arg[2] );
   1924 
   1925          VG_(discard_translations)(
   1926             arg[1], arg[2], "scheduler(VG_USERREQ__DISCARD_TRANSLATIONS)"
   1927          );
   1928 
   1929          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   1930 	 break;
   1931 
   1932       case VG_USERREQ__COUNT_ERRORS:
   1933          SET_CLREQ_RETVAL( tid, VG_(get_n_errs_found)() );
   1934          break;
   1935 
   1936       case VG_USERREQ__LOAD_PDB_DEBUGINFO:
   1937          VG_(di_notify_pdb_debuginfo)( arg[1], arg[2], arg[3], arg[4] );
   1938          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   1939          break;
   1940 
   1941       case VG_USERREQ__MAP_IP_TO_SRCLOC: {
   1942          Addr   ip    = arg[1];
   1943          HChar* buf64 = (HChar*)arg[2];
   1944 
   1945          VG_(memset)(buf64, 0, 64);
   1946          UInt linenum = 0;
   1947          Bool ok = VG_(get_filename_linenum)(
   1948                       ip, &buf64[0], 50, NULL, 0, NULL, &linenum
   1949                    );
   1950          if (ok) {
   1951             /* Find the terminating zero in the first 50 bytes. */
   1952             UInt i;
   1953             for (i = 0; i < 50; i++) {
   1954                if (buf64[i] == 0)
   1955                   break;
   1956             }
   1957             /* We must find a zero somewhere in 0 .. 49.  Else
   1958                VG_(get_filename_linenum) is not properly zero
   1959                terminating. */
   1960             vg_assert(i < 50);
   1961             VG_(sprintf)(&buf64[i], ":%u", linenum);
   1962          } else {
   1963             buf64[0] = 0;
   1964          }
   1965 
   1966          SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
   1967          break;
   1968       }
   1969 
   1970       case VG_USERREQ__CHANGE_ERR_DISABLEMENT: {
   1971          Word delta = arg[1];
   1972          vg_assert(delta == 1 || delta == -1);
   1973          ThreadState* tst = VG_(get_ThreadState)(tid);
   1974          vg_assert(tst);
   1975          if (delta == 1 && tst->err_disablement_level < 0xFFFFFFFF) {
   1976             tst->err_disablement_level++;
   1977          }
   1978          else
   1979          if (delta == -1 && tst->err_disablement_level > 0) {
   1980             tst->err_disablement_level--;
   1981          }
   1982          SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
   1983          break;
   1984       }
   1985 
   1986       case VG_USERREQ__GDB_MONITOR_COMMAND: {
   1987          UWord ret;
   1988          ret = (UWord) VG_(client_monitor_command) ((HChar*)arg[1]);
   1989          SET_CLREQ_RETVAL(tid, ret);
   1990          break;
   1991       }
   1992 
   1993       case VG_USERREQ__MALLOCLIKE_BLOCK:
   1994       case VG_USERREQ__RESIZEINPLACE_BLOCK:
   1995       case VG_USERREQ__FREELIKE_BLOCK:
   1996          // Ignore them if the addr is NULL;  otherwise pass onto the tool.
   1997          if (!arg[1]) {
   1998             SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   1999             break;
   2000          } else {
   2001             goto my_default;
   2002          }
   2003 
   2004       case VG_USERREQ__VEX_INIT_FOR_IRI:
   2005          LibVEX_InitIRI ( (IRICB *)arg[1] );
   2006          break;
   2007 
   2008       default:
   2009        my_default:
   2010 	 if (os_client_request(tid, arg)) {
   2011 	    // do nothing, os_client_request() handled it
   2012          } else if (VG_(needs).client_requests) {
   2013 	    UWord ret;
   2014 
   2015             if (VG_(clo_verbosity) > 2)
   2016                VG_(printf)("client request: code %lx,  addr %p,  len %lu\n",
   2017                            arg[0], (void*)arg[1], arg[2] );
   2018 
   2019 	    if ( VG_TDICT_CALL(tool_handle_client_request, tid, arg, &ret) )
   2020 	       SET_CLREQ_RETVAL(tid, ret);
   2021          } else {
   2022 	    static Bool whined = False;
   2023 
   2024 	    if (!whined && VG_(clo_verbosity) > 2) {
   2025                // Allow for requests in core, but defined by tools, which
   2026                // have 0 and 0 in their two high bytes.
   2027                HChar c1 = (arg[0] >> 24) & 0xff;
   2028                HChar c2 = (arg[0] >> 16) & 0xff;
   2029                if (c1 == 0) c1 = '_';
   2030                if (c2 == 0) c2 = '_';
   2031 	       VG_(message)(Vg_UserMsg, "Warning:\n"
   2032                    "  unhandled client request: 0x%lx (%c%c+0x%lx).  Perhaps\n"
   2033 		   "  VG_(needs).client_requests should be set?\n",
   2034 			    arg[0], c1, c2, arg[0] & 0xffff);
   2035 	       whined = True;
   2036 	    }
   2037          }
   2038          break;
   2039    }
   2040    return;
   2041 
   2042    /*NOTREACHED*/
   2043   va_list_casting_error_NORETURN:
   2044    VG_(umsg)(
   2045       "Valgrind: fatal error - cannot continue: use of the deprecated\n"
   2046       "client requests VG_USERREQ__PRINTF or VG_USERREQ__PRINTF_BACKTRACE\n"
   2047       "on a platform where they cannot be supported.  Please use the\n"
   2048       "equivalent _VALIST_BY_REF versions instead.\n"
   2049       "\n"
   2050       "This is a binary-incompatible change in Valgrind's client request\n"
   2051       "mechanism.  It is unfortunate, but difficult to avoid.  End-users\n"
   2052       "are expected to almost never see this message.  The only case in\n"
   2053       "which you might see this message is if your code uses the macros\n"
   2054       "VALGRIND_PRINTF or VALGRIND_PRINTF_BACKTRACE.  If so, you will need\n"
   2055       "to recompile such code, using the header files from this version of\n"
   2056       "Valgrind, and not any previous version.\n"
   2057       "\n"
   2058       "If you see this mesage in any other circumstances, it is probably\n"
   2059       "a bug in Valgrind.  In this case, please file a bug report at\n"
   2060       "\n"
   2061       "   http://www.valgrind.org/support/bug_reports.html\n"
   2062       "\n"
   2063       "Will now abort.\n"
   2064    );
   2065    vg_assert(0);
   2066 }
   2067 
   2068 
   2069 /* ---------------------------------------------------------------------
   2070    Sanity checking (permanently engaged)
   2071    ------------------------------------------------------------------ */
   2072 
   2073 /* Internal consistency checks on the sched structures. */
   2074 static
   2075 void scheduler_sanity ( ThreadId tid )
   2076 {
   2077    Bool bad = False;
   2078    Int lwpid = VG_(gettid)();
   2079 
   2080    if (!VG_(is_running_thread)(tid)) {
   2081       VG_(message)(Vg_DebugMsg,
   2082 		   "Thread %d is supposed to be running, "
   2083                    "but doesn't own the_BigLock (owned by %d)\n",
   2084 		   tid, VG_(running_tid));
   2085       bad = True;
   2086    }
   2087 
   2088    if (lwpid != VG_(threads)[tid].os_state.lwpid) {
   2089       VG_(message)(Vg_DebugMsg,
   2090                    "Thread %d supposed to be in LWP %d, but we're actually %d\n",
   2091                    tid, VG_(threads)[tid].os_state.lwpid, VG_(gettid)());
   2092       bad = True;
   2093    }
   2094 
   2095    if (lwpid != ML_(get_sched_lock_owner)(the_BigLock)) {
   2096       VG_(message)(Vg_DebugMsg,
   2097                    "Thread (LWPID) %d doesn't own the_BigLock\n",
   2098                    tid);
   2099       bad = True;
   2100    }
   2101 
   2102    if (0) {
   2103       /* Periodically show the state of all threads, for debugging
   2104          purposes. */
   2105       static UInt lasttime = 0;
   2106       UInt now;
   2107       now = VG_(read_millisecond_timer)();
   2108       if ((!bad) && (lasttime + 4000/*ms*/ <= now)) {
   2109          lasttime = now;
   2110          VG_(printf)("\n------------ Sched State at %d ms ------------\n",
   2111                      (Int)now);
   2112          VG_(show_sched_status)(True,  // host_stacktrace
   2113                                 True,  // valgrind_stack_usage
   2114                                 True); // exited_threads);
   2115       }
   2116    }
   2117 
   2118    /* core_panic also shows the sched status, which is why we don't
   2119       show it above if bad==True. */
   2120    if (bad)
   2121       VG_(core_panic)("scheduler_sanity: failed");
   2122 }
   2123 
   2124 void VG_(sanity_check_general) ( Bool force_expensive )
   2125 {
   2126    ThreadId tid;
   2127 
   2128    static UInt next_slow_check_at = 1;
   2129    static UInt slow_check_interval = 25;
   2130 
   2131    if (VG_(clo_sanity_level) < 1) return;
   2132 
   2133    /* --- First do all the tests that we can do quickly. ---*/
   2134 
   2135    sanity_fast_count++;
   2136 
   2137    /* Check stuff pertaining to the memory check system. */
   2138 
   2139    /* Check that nobody has spuriously claimed that the first or
   2140       last 16 pages of memory have become accessible [...] */
   2141    if (VG_(needs).sanity_checks) {
   2142       vg_assert(VG_TDICT_CALL(tool_cheap_sanity_check));
   2143    }
   2144 
   2145    /* --- Now some more expensive checks. ---*/
   2146 
   2147    /* Once every now and again, check some more expensive stuff.
   2148       Gradually increase the interval between such checks so as not to
   2149       burden long-running programs too much. */
   2150    if ( force_expensive
   2151         || VG_(clo_sanity_level) > 1
   2152         || (VG_(clo_sanity_level) == 1
   2153             && sanity_fast_count == next_slow_check_at)) {
   2154 
   2155       if (0) VG_(printf)("SLOW at %d\n", sanity_fast_count-1);
   2156 
   2157       next_slow_check_at = sanity_fast_count - 1 + slow_check_interval;
   2158       slow_check_interval++;
   2159       sanity_slow_count++;
   2160 
   2161       if (VG_(needs).sanity_checks) {
   2162           vg_assert(VG_TDICT_CALL(tool_expensive_sanity_check));
   2163       }
   2164 
   2165       /* Look for stack overruns.  Visit all threads. */
   2166       for (tid = 1; tid < VG_N_THREADS; tid++) {
   2167 	 SizeT    remains;
   2168          VgStack* stack;
   2169 
   2170 	 if (VG_(threads)[tid].status == VgTs_Empty ||
   2171 	     VG_(threads)[tid].status == VgTs_Zombie)
   2172 	    continue;
   2173 
   2174          stack
   2175             = (VgStack*)
   2176               VG_(get_ThreadState)(tid)->os_state.valgrind_stack_base;
   2177          SizeT limit
   2178             = 4096; // Let's say.  Checking more causes lots of L2 misses.
   2179 	 remains
   2180             = VG_(am_get_VgStack_unused_szB)(stack, limit);
   2181 	 if (remains < limit)
   2182 	    VG_(message)(Vg_DebugMsg,
   2183                          "WARNING: Thread %d is within %ld bytes "
   2184                          "of running out of stack!\n",
   2185 		         tid, remains);
   2186       }
   2187    }
   2188 
   2189    if (VG_(clo_sanity_level) > 1) {
   2190       /* Check sanity of the low-level memory manager.  Note that bugs
   2191          in the client's code can cause this to fail, so we don't do
   2192          this check unless specially asked for.  And because it's
   2193          potentially very expensive. */
   2194       VG_(sanity_check_malloc_all)();
   2195    }
   2196 }
   2197 
   2198 /*--------------------------------------------------------------------*/
   2199 /*--- end                                                          ---*/
   2200 /*--------------------------------------------------------------------*/
   2201