Home | History | Annotate | Download | only in m_scheduler
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Thread scheduling.                               scheduler.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2000-2013 Julian Seward
     11       jseward (at) acm.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 /*
     32    Overview
     33 
     34    Valgrind tries to emulate the kernel's threading as closely as
     35    possible.  The client does all threading via the normal syscalls
     36    (on Linux: clone, etc).  Valgrind emulates this by creating exactly
     37    the same process structure as would be created without Valgrind.
     38    There are no extra threads.
     39 
     40    The main difference is that Valgrind only allows one client thread
     41    to run at once.  This is controlled with the CPU Big Lock,
     42    "the_BigLock".  Any time a thread wants to run client code or
     43    manipulate any shared state (which is anything other than its own
     44    ThreadState entry), it must hold the_BigLock.
     45 
     46    When a thread is about to block in a blocking syscall, it releases
     47    the_BigLock, and re-takes it when it becomes runnable again (either
     48    because the syscall finished, or we took a signal).
     49 
     50    VG_(scheduler) therefore runs in each thread.  It returns only when
     51    the thread is exiting, either because it exited itself, or it was
     52    told to exit by another thread.
     53 
     54    This file is almost entirely OS-independent.  The details of how
     55    the OS handles threading and signalling are abstracted away and
     56    implemented elsewhere.  [Some of the functions have worked their
     57    way back for the moment, until we do an OS port in earnest...]
     58 */
     59 
     60 
     61 #include "pub_core_basics.h"
     62 #include "pub_core_debuglog.h"
     63 #include "pub_core_vki.h"
     64 #include "pub_core_vkiscnums.h"  // __NR_sched_yield
     65 #include "pub_core_threadstate.h"
     66 #include "pub_core_clientstate.h"
     67 #include "pub_core_aspacemgr.h"
     68 #include "pub_core_clreq.h"      // for VG_USERREQ__*
     69 #include "pub_core_dispatch.h"
     70 #include "pub_core_errormgr.h"   // For VG_(get_n_errs_found)()
     71 #include "pub_core_gdbserver.h"  // for VG_(gdbserver)/VG_(gdbserver_activity)
     72 #include "pub_core_libcbase.h"
     73 #include "pub_core_libcassert.h"
     74 #include "pub_core_libcprint.h"
     75 #include "pub_core_libcproc.h"
     76 #include "pub_core_libcsignal.h"
     77 #if defined(VGO_darwin)
     78 #include "pub_core_mach.h"
     79 #endif
     80 #include "pub_core_machine.h"
     81 #include "pub_core_mallocfree.h"
     82 #include "pub_core_options.h"
     83 #include "pub_core_replacemalloc.h"
     84 #include "pub_core_sbprofile.h"
     85 #include "pub_core_signals.h"
     86 #include "pub_core_stacks.h"
     87 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
     88 #include "pub_core_syscall.h"
     89 #include "pub_core_syswrap.h"
     90 #include "pub_core_tooliface.h"
     91 #include "pub_core_translate.h"     // For VG_(translate)()
     92 #include "pub_core_transtab.h"
     93 #include "pub_core_debuginfo.h"     // VG_(di_notify_pdb_debuginfo)
     94 #include "priv_sched-lock.h"
     95 #include "pub_core_scheduler.h"     // self
     96 #include "pub_core_redir.h"
     97 #include "libvex_emnote.h"          // VexEmNote
     98 
     99 
    100 /* ---------------------------------------------------------------------
    101    Types and globals for the scheduler.
    102    ------------------------------------------------------------------ */
    103 
    104 /* ThreadId and ThreadState are defined elsewhere*/
    105 
    106 /* Defines the thread-scheduling timeslice, in terms of the number of
    107    basic blocks we attempt to run each thread for.  Smaller values
    108    give finer interleaving but much increased scheduling overheads. */
    109 #define SCHEDULING_QUANTUM   100000
    110 
    111 /* If False, a fault is Valgrind-internal (ie, a bug) */
    112 Bool VG_(in_generated_code) = False;
    113 
    114 /* 64-bit counter for the number of basic blocks done. */
    115 static ULong bbs_done = 0;
    116 
    117 /* Counter to see if vgdb activity is to be verified.
    118    When nr of bbs done reaches vgdb_next_poll, scheduler will
    119    poll for gdbserver activity. VG_(force_vgdb_poll) and
    120    VG_(disable_vgdb_poll) allows the valgrind core (e.g. m_gdbserver)
    121    to control when the next poll will be done. */
    122 static ULong vgdb_next_poll;
    123 
    124 /* Forwards */
    125 static void do_client_request ( ThreadId tid );
    126 static void scheduler_sanity ( ThreadId tid );
    127 static void mostly_clear_thread_record ( ThreadId tid );
    128 
    129 /* Stats. */
    130 static ULong n_scheduling_events_MINOR = 0;
    131 static ULong n_scheduling_events_MAJOR = 0;
    132 
    133 /* Stats: number of XIndirs, and number that missed in the fast
    134    cache. */
    135 static ULong stats__n_xindirs = 0;
    136 static ULong stats__n_xindir_misses = 0;
    137 
    138 /* And 32-bit temp bins for the above, so that 32-bit platforms don't
    139    have to do 64 bit incs on the hot path through
    140    VG_(cp_disp_xindir). */
    141 /*global*/ UInt VG_(stats__n_xindirs_32) = 0;
    142 /*global*/ UInt VG_(stats__n_xindir_misses_32) = 0;
    143 
    144 /* Sanity checking counts. */
    145 static UInt sanity_fast_count = 0;
    146 static UInt sanity_slow_count = 0;
    147 
    148 void VG_(print_scheduler_stats)(void)
    149 {
    150    VG_(message)(Vg_DebugMsg,
    151       "scheduler: %'llu event checks.\n", bbs_done );
    152    VG_(message)(Vg_DebugMsg,
    153                 "scheduler: %'llu indir transfers, %'llu misses (1 in %llu)\n",
    154                 stats__n_xindirs, stats__n_xindir_misses,
    155                 stats__n_xindirs / (stats__n_xindir_misses
    156                                     ? stats__n_xindir_misses : 1));
    157    VG_(message)(Vg_DebugMsg,
    158       "scheduler: %'llu/%'llu major/minor sched events.\n",
    159       n_scheduling_events_MAJOR, n_scheduling_events_MINOR);
    160    VG_(message)(Vg_DebugMsg,
    161                 "   sanity: %d cheap, %d expensive checks.\n",
    162                 sanity_fast_count, sanity_slow_count );
    163 }
    164 
    165 /*
    166  * Mutual exclusion object used to serialize threads.
    167  */
    168 static struct sched_lock *the_BigLock;
    169 
    170 
    171 /* ---------------------------------------------------------------------
    172    Helper functions for the scheduler.
    173    ------------------------------------------------------------------ */
    174 
    175 static
    176 void print_sched_event ( ThreadId tid, const HChar* what )
    177 {
    178    VG_(message)(Vg_DebugMsg, "  SCHED[%d]: %s\n", tid, what );
    179 }
    180 
    181 /* For showing SB profiles, if the user asks to see them. */
    182 static
    183 void maybe_show_sb_profile ( void )
    184 {
    185    /* DO NOT MAKE NON-STATIC */
    186    static ULong bbs_done_lastcheck = 0;
    187    /* */
    188    vg_assert(VG_(clo_profyle_interval) > 0);
    189    Long delta = (Long)(bbs_done - bbs_done_lastcheck);
    190    vg_assert(delta >= 0);
    191    if ((ULong)delta >= VG_(clo_profyle_interval)) {
    192       bbs_done_lastcheck = bbs_done;
    193       VG_(get_and_show_SB_profile)(bbs_done);
    194    }
    195 }
    196 
    197 static
    198 const HChar* name_of_sched_event ( UInt event )
    199 {
    200    switch (event) {
    201       case VEX_TRC_JMP_INVALICACHE:    return "INVALICACHE";
    202       case VEX_TRC_JMP_FLUSHDCACHE:    return "FLUSHDCACHE";
    203       case VEX_TRC_JMP_NOREDIR:        return "NOREDIR";
    204       case VEX_TRC_JMP_SIGILL:         return "SIGILL";
    205       case VEX_TRC_JMP_SIGTRAP:        return "SIGTRAP";
    206       case VEX_TRC_JMP_SIGSEGV:        return "SIGSEGV";
    207       case VEX_TRC_JMP_SIGBUS:         return "SIGBUS";
    208       case VEX_TRC_JMP_SIGFPE_INTOVF:
    209       case VEX_TRC_JMP_SIGFPE_INTDIV:  return "SIGFPE";
    210       case VEX_TRC_JMP_EMWARN:         return "EMWARN";
    211       case VEX_TRC_JMP_EMFAIL:         return "EMFAIL";
    212       case VEX_TRC_JMP_CLIENTREQ:      return "CLIENTREQ";
    213       case VEX_TRC_JMP_YIELD:          return "YIELD";
    214       case VEX_TRC_JMP_NODECODE:       return "NODECODE";
    215       case VEX_TRC_JMP_MAPFAIL:        return "MAPFAIL";
    216       case VEX_TRC_JMP_SYS_SYSCALL:    return "SYSCALL";
    217       case VEX_TRC_JMP_SYS_INT32:      return "INT32";
    218       case VEX_TRC_JMP_SYS_INT128:     return "INT128";
    219       case VEX_TRC_JMP_SYS_INT129:     return "INT129";
    220       case VEX_TRC_JMP_SYS_INT130:     return "INT130";
    221       case VEX_TRC_JMP_SYS_SYSENTER:   return "SYSENTER";
    222       case VEX_TRC_JMP_BORING:         return "VEX_BORING";
    223 
    224       case VG_TRC_BORING:              return "VG_BORING";
    225       case VG_TRC_INNER_FASTMISS:      return "FASTMISS";
    226       case VG_TRC_INNER_COUNTERZERO:   return "COUNTERZERO";
    227       case VG_TRC_FAULT_SIGNAL:        return "FAULTSIGNAL";
    228       case VG_TRC_INVARIANT_FAILED:    return "INVFAILED";
    229       case VG_TRC_CHAIN_ME_TO_SLOW_EP: return "CHAIN_ME_SLOW";
    230       case VG_TRC_CHAIN_ME_TO_FAST_EP: return "CHAIN_ME_FAST";
    231       default:                         return "??UNKNOWN??";
    232   }
    233 }
    234 
    235 /* Allocate a completely empty ThreadState record. */
    236 ThreadId VG_(alloc_ThreadState) ( void )
    237 {
    238    Int i;
    239    for (i = 1; i < VG_N_THREADS; i++) {
    240       if (VG_(threads)[i].status == VgTs_Empty) {
    241 	 VG_(threads)[i].status = VgTs_Init;
    242 	 VG_(threads)[i].exitreason = VgSrc_None;
    243          if (VG_(threads)[i].thread_name)
    244             VG_(free)(VG_(threads)[i].thread_name);
    245          VG_(threads)[i].thread_name = NULL;
    246          return i;
    247       }
    248    }
    249    VG_(printf)("Use --max-threads=INT to specify a larger number of threads\n"
    250                "and rerun valgrind\n");
    251    VG_(core_panic)("Max number of threads is too low");
    252    /*NOTREACHED*/
    253 }
    254 
    255 /*
    256    Mark a thread as Runnable.  This will block until the_BigLock is
    257    available, so that we get exclusive access to all the shared
    258    structures and the CPU.  Up until we get the_BigLock, we must not
    259    touch any shared state.
    260 
    261    When this returns, we'll actually be running.
    262  */
    263 void VG_(acquire_BigLock)(ThreadId tid, const HChar* who)
    264 {
    265    ThreadState *tst;
    266 
    267 #if 0
    268    if (VG_(clo_trace_sched)) {
    269       HChar buf[VG_(strlen)(who) + 30];
    270       VG_(sprintf)(buf, "waiting for lock (%s)", who);
    271       print_sched_event(tid, buf);
    272    }
    273 #endif
    274 
    275    /* First, acquire the_BigLock.  We can't do anything else safely
    276       prior to this point.  Even doing debug printing prior to this
    277       point is, technically, wrong. */
    278    VG_(acquire_BigLock_LL)(NULL);
    279 
    280    tst = VG_(get_ThreadState)(tid);
    281 
    282    vg_assert(tst->status != VgTs_Runnable);
    283 
    284    tst->status = VgTs_Runnable;
    285 
    286    if (VG_(running_tid) != VG_INVALID_THREADID)
    287       VG_(printf)("tid %d found %d running\n", tid, VG_(running_tid));
    288    vg_assert(VG_(running_tid) == VG_INVALID_THREADID);
    289    VG_(running_tid) = tid;
    290 
    291    { Addr gsp = VG_(get_SP)(tid);
    292       if (NULL != VG_(tdict).track_new_mem_stack_w_ECU)
    293          VG_(unknown_SP_update_w_ECU)(gsp, gsp, 0/*unknown origin*/);
    294       else
    295          VG_(unknown_SP_update)(gsp, gsp);
    296    }
    297 
    298    if (VG_(clo_trace_sched)) {
    299       HChar buf[VG_(strlen)(who) + 30];
    300       VG_(sprintf)(buf, " acquired lock (%s)", who);
    301       print_sched_event(tid, buf);
    302    }
    303 }
    304 
    305 /*
    306    Set a thread into a sleeping state, and give up exclusive access to
    307    the CPU.  On return, the thread must be prepared to block until it
    308    is ready to run again (generally this means blocking in a syscall,
    309    but it may mean that we remain in a Runnable state and we're just
    310    yielding the CPU to another thread).
    311  */
    312 void VG_(release_BigLock)(ThreadId tid, ThreadStatus sleepstate,
    313                           const HChar* who)
    314 {
    315    ThreadState *tst = VG_(get_ThreadState)(tid);
    316 
    317    vg_assert(tst->status == VgTs_Runnable);
    318 
    319    vg_assert(sleepstate == VgTs_WaitSys ||
    320 	     sleepstate == VgTs_Yielding);
    321 
    322    tst->status = sleepstate;
    323 
    324    vg_assert(VG_(running_tid) == tid);
    325    VG_(running_tid) = VG_INVALID_THREADID;
    326 
    327    if (VG_(clo_trace_sched)) {
    328       const HChar *status = VG_(name_of_ThreadStatus)(sleepstate);
    329       HChar buf[VG_(strlen)(who) + VG_(strlen)(status) + 30];
    330       VG_(sprintf)(buf, "releasing lock (%s) -> %s", who, status);
    331       print_sched_event(tid, buf);
    332    }
    333 
    334    /* Release the_BigLock; this will reschedule any runnable
    335       thread. */
    336    VG_(release_BigLock_LL)(NULL);
    337 }
    338 
    339 static void init_BigLock(void)
    340 {
    341    vg_assert(!the_BigLock);
    342    the_BigLock = ML_(create_sched_lock)();
    343 }
    344 
    345 static void deinit_BigLock(void)
    346 {
    347    ML_(destroy_sched_lock)(the_BigLock);
    348    the_BigLock = NULL;
    349 }
    350 
    351 /* See pub_core_scheduler.h for description */
    352 void VG_(acquire_BigLock_LL) ( const HChar* who )
    353 {
    354    ML_(acquire_sched_lock)(the_BigLock);
    355 }
    356 
    357 /* See pub_core_scheduler.h for description */
    358 void VG_(release_BigLock_LL) ( const HChar* who )
    359 {
    360    ML_(release_sched_lock)(the_BigLock);
    361 }
    362 
    363 Bool VG_(owns_BigLock_LL) ( ThreadId tid )
    364 {
    365    return (ML_(get_sched_lock_owner)(the_BigLock)
    366            == VG_(threads)[tid].os_state.lwpid);
    367 }
    368 
    369 
    370 /* Clear out the ThreadState and release the semaphore. Leaves the
    371    ThreadState in VgTs_Zombie state, so that it doesn't get
    372    reallocated until the caller is really ready. */
    373 void VG_(exit_thread)(ThreadId tid)
    374 {
    375    vg_assert(VG_(is_valid_tid)(tid));
    376    vg_assert(VG_(is_running_thread)(tid));
    377    vg_assert(VG_(is_exiting)(tid));
    378 
    379    mostly_clear_thread_record(tid);
    380    VG_(running_tid) = VG_INVALID_THREADID;
    381 
    382    /* There should still be a valid exitreason for this thread */
    383    vg_assert(VG_(threads)[tid].exitreason != VgSrc_None);
    384 
    385    if (VG_(clo_trace_sched))
    386       print_sched_event(tid, "release lock in VG_(exit_thread)");
    387 
    388    VG_(release_BigLock_LL)(NULL);
    389 }
    390 
    391 /* If 'tid' is blocked in a syscall, send it SIGVGKILL so as to get it
    392    out of the syscall and onto doing the next thing, whatever that is.
    393    If it isn't blocked in a syscall, has no effect on the thread. */
    394 void VG_(get_thread_out_of_syscall)(ThreadId tid)
    395 {
    396    vg_assert(VG_(is_valid_tid)(tid));
    397    vg_assert(!VG_(is_running_thread)(tid));
    398 
    399    if (VG_(threads)[tid].status == VgTs_WaitSys) {
    400       if (VG_(clo_trace_signals)) {
    401 	 VG_(message)(Vg_DebugMsg,
    402                       "get_thread_out_of_syscall zaps tid %d lwp %d\n",
    403 		      tid, VG_(threads)[tid].os_state.lwpid);
    404       }
    405 #     if defined(VGO_darwin)
    406       {
    407          // GrP fixme use mach primitives on darwin?
    408          // GrP fixme thread_abort_safely?
    409          // GrP fixme race for thread with WaitSys set but not in syscall yet?
    410          extern kern_return_t thread_abort(mach_port_t);
    411          thread_abort(VG_(threads)[tid].os_state.lwpid);
    412       }
    413 #     else
    414       {
    415          __attribute__((unused))
    416          Int r = VG_(tkill)(VG_(threads)[tid].os_state.lwpid, VG_SIGVGKILL);
    417          /* JRS 2009-Mar-20: should we assert for r==0 (tkill succeeded)?
    418             I'm really not sure.  Here's a race scenario which argues
    419             that we shoudn't; but equally I'm not sure the scenario is
    420             even possible, because of constraints caused by the question
    421             of who holds the BigLock when.
    422 
    423             Target thread tid does sys_read on a socket and blocks.  This
    424             function gets called, and we observe correctly that tid's
    425             status is WaitSys but then for whatever reason this function
    426             goes very slowly for a while.  Then data arrives from
    427             wherever, tid's sys_read returns, tid exits.  Then we do
    428             tkill on tid, but tid no longer exists; tkill returns an
    429             error code and the assert fails. */
    430          /* vg_assert(r == 0); */
    431       }
    432 #     endif
    433    }
    434 }
    435 
    436 /*
    437    Yield the CPU for a short time to let some other thread run.
    438  */
    439 void VG_(vg_yield)(void)
    440 {
    441    ThreadId tid = VG_(running_tid);
    442 
    443    vg_assert(tid != VG_INVALID_THREADID);
    444    vg_assert(VG_(threads)[tid].os_state.lwpid == VG_(gettid)());
    445 
    446    VG_(release_BigLock)(tid, VgTs_Yielding, "VG_(vg_yield)");
    447 
    448    /*
    449       Tell the kernel we're yielding.
    450     */
    451    VG_(do_syscall0)(__NR_sched_yield);
    452 
    453    VG_(acquire_BigLock)(tid, "VG_(vg_yield)");
    454 }
    455 
    456 
    457 /* Set the standard set of blocked signals, used whenever we're not
    458    running a client syscall. */
    459 static void block_signals(void)
    460 {
    461    vki_sigset_t mask;
    462 
    463    VG_(sigfillset)(&mask);
    464 
    465    /* Don't block these because they're synchronous */
    466    VG_(sigdelset)(&mask, VKI_SIGSEGV);
    467    VG_(sigdelset)(&mask, VKI_SIGBUS);
    468    VG_(sigdelset)(&mask, VKI_SIGFPE);
    469    VG_(sigdelset)(&mask, VKI_SIGILL);
    470    VG_(sigdelset)(&mask, VKI_SIGTRAP);
    471 
    472    /* Can't block these anyway */
    473    VG_(sigdelset)(&mask, VKI_SIGSTOP);
    474    VG_(sigdelset)(&mask, VKI_SIGKILL);
    475 
    476    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, NULL);
    477 }
    478 
    479 static void os_state_clear(ThreadState *tst)
    480 {
    481    tst->os_state.lwpid       = 0;
    482    tst->os_state.threadgroup = 0;
    483 #  if defined(VGO_linux)
    484    /* no other fields to clear */
    485 #  elif defined(VGO_darwin)
    486    tst->os_state.post_mach_trap_fn = NULL;
    487    tst->os_state.pthread           = 0;
    488    tst->os_state.func_arg          = 0;
    489    VG_(memset)(&tst->os_state.child_go, 0, sizeof(tst->os_state.child_go));
    490    VG_(memset)(&tst->os_state.child_done, 0, sizeof(tst->os_state.child_done));
    491    tst->os_state.wq_jmpbuf_valid   = False;
    492    tst->os_state.remote_port       = 0;
    493    tst->os_state.msgh_id           = 0;
    494    VG_(memset)(&tst->os_state.mach_args, 0, sizeof(tst->os_state.mach_args));
    495 #  else
    496 #    error "Unknown OS"
    497 #  endif
    498 }
    499 
    500 static void os_state_init(ThreadState *tst)
    501 {
    502    tst->os_state.valgrind_stack_base    = 0;
    503    tst->os_state.valgrind_stack_init_SP = 0;
    504    os_state_clear(tst);
    505 }
    506 
    507 static
    508 void mostly_clear_thread_record ( ThreadId tid )
    509 {
    510    vki_sigset_t savedmask;
    511 
    512    vg_assert(tid >= 0 && tid < VG_N_THREADS);
    513    VG_(cleanup_thread)(&VG_(threads)[tid].arch);
    514    VG_(threads)[tid].tid = tid;
    515 
    516    /* Leave the thread in Zombie, so that it doesn't get reallocated
    517       until the caller is finally done with the thread stack. */
    518    VG_(threads)[tid].status               = VgTs_Zombie;
    519 
    520    VG_(sigemptyset)(&VG_(threads)[tid].sig_mask);
    521    VG_(sigemptyset)(&VG_(threads)[tid].tmp_sig_mask);
    522 
    523    os_state_clear(&VG_(threads)[tid]);
    524 
    525    /* start with no altstack */
    526    VG_(threads)[tid].altstack.ss_sp = (void *)0xdeadbeef;
    527    VG_(threads)[tid].altstack.ss_size = 0;
    528    VG_(threads)[tid].altstack.ss_flags = VKI_SS_DISABLE;
    529 
    530    VG_(clear_out_queued_signals)(tid, &savedmask);
    531 
    532    VG_(threads)[tid].sched_jmpbuf_valid = False;
    533 }
    534 
    535 /*
    536    Called in the child after fork.  If the parent has multiple
    537    threads, then we've inherited a VG_(threads) array describing them,
    538    but only the thread which called fork() is actually alive in the
    539    child.  This functions needs to clean up all those other thread
    540    structures.
    541 
    542    Whichever tid in the parent which called fork() becomes the
    543    master_tid in the child.  That's because the only living slot in
    544    VG_(threads) in the child after fork is VG_(threads)[tid], and it
    545    would be too hard to try to re-number the thread and relocate the
    546    thread state down to VG_(threads)[1].
    547 
    548    This function also needs to reinitialize the_BigLock, since
    549    otherwise we may end up sharing its state with the parent, which
    550    would be deeply confusing.
    551 */
    552 static void sched_fork_cleanup(ThreadId me)
    553 {
    554    ThreadId tid;
    555    vg_assert(VG_(running_tid) == me);
    556 
    557 #  if defined(VGO_darwin)
    558    // GrP fixme hack reset Mach ports
    559    VG_(mach_init)();
    560 #  endif
    561 
    562    VG_(threads)[me].os_state.lwpid = VG_(gettid)();
    563    VG_(threads)[me].os_state.threadgroup = VG_(getpid)();
    564 
    565    /* clear out all the unused thread slots */
    566    for (tid = 1; tid < VG_N_THREADS; tid++) {
    567       if (tid != me) {
    568          mostly_clear_thread_record(tid);
    569 	 VG_(threads)[tid].status = VgTs_Empty;
    570          VG_(clear_syscallInfo)(tid);
    571       }
    572    }
    573 
    574    /* re-init and take the sema */
    575    deinit_BigLock();
    576    init_BigLock();
    577    VG_(acquire_BigLock_LL)(NULL);
    578 }
    579 
    580 
    581 /* First phase of initialisation of the scheduler.  Initialise the
    582    bigLock, zeroise the VG_(threads) structure and decide on the
    583    ThreadId of the root thread.
    584 */
    585 ThreadId VG_(scheduler_init_phase1) ( void )
    586 {
    587    Int i;
    588    ThreadId tid_main;
    589 
    590    VG_(debugLog)(1,"sched","sched_init_phase1\n");
    591 
    592    if (VG_(clo_fair_sched) != disable_fair_sched
    593        && !ML_(set_sched_lock_impl)(sched_lock_ticket)
    594        && VG_(clo_fair_sched) == enable_fair_sched)
    595    {
    596       VG_(printf)("Error: fair scheduling is not supported on this system.\n");
    597       VG_(exit)(1);
    598    }
    599 
    600    if (VG_(clo_verbosity) > 1) {
    601       VG_(message)(Vg_DebugMsg,
    602                    "Scheduler: using %s scheduler lock implementation.\n",
    603                    ML_(get_sched_lock_name)());
    604    }
    605 
    606    init_BigLock();
    607 
    608    for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
    609       /* Paranoia .. completely zero it out. */
    610       VG_(memset)( & VG_(threads)[i], 0, sizeof( VG_(threads)[i] ) );
    611 
    612       VG_(threads)[i].sig_queue = NULL;
    613 
    614       os_state_init(&VG_(threads)[i]);
    615       mostly_clear_thread_record(i);
    616 
    617       VG_(threads)[i].status                    = VgTs_Empty;
    618       VG_(threads)[i].client_stack_szB          = 0;
    619       VG_(threads)[i].client_stack_highest_byte = (Addr)NULL;
    620       VG_(threads)[i].err_disablement_level     = 0;
    621       VG_(threads)[i].thread_name               = NULL;
    622    }
    623 
    624    tid_main = VG_(alloc_ThreadState)();
    625 
    626    /* Bleh.  Unfortunately there are various places in the system that
    627       assume that the main thread has a ThreadId of 1.
    628       - Helgrind (possibly)
    629       - stack overflow message in default_action() in m_signals.c
    630       - definitely a lot more places
    631    */
    632    vg_assert(tid_main == 1);
    633 
    634    return tid_main;
    635 }
    636 
    637 
    638 /* Second phase of initialisation of the scheduler.  Given the root
    639    ThreadId computed by first phase of initialisation, fill in stack
    640    details and acquire bigLock.  Initialise the scheduler.  This is
    641    called at startup.  The caller subsequently initialises the guest
    642    state components of this main thread.
    643 */
    644 void VG_(scheduler_init_phase2) ( ThreadId tid_main,
    645                                   Addr     clstack_end,
    646                                   SizeT    clstack_size )
    647 {
    648    VG_(debugLog)(1,"sched","sched_init_phase2: tid_main=%d, "
    649                    "cls_end=0x%lx, cls_sz=%ld\n",
    650                    tid_main, clstack_end, clstack_size);
    651 
    652    vg_assert(VG_IS_PAGE_ALIGNED(clstack_end+1));
    653    vg_assert(VG_IS_PAGE_ALIGNED(clstack_size));
    654 
    655    VG_(threads)[tid_main].client_stack_highest_byte
    656       = clstack_end;
    657    VG_(threads)[tid_main].client_stack_szB
    658       = clstack_size;
    659 
    660    VG_(atfork)(NULL, NULL, sched_fork_cleanup);
    661 }
    662 
    663 
    664 /* ---------------------------------------------------------------------
    665    Helpers for running translations.
    666    ------------------------------------------------------------------ */
    667 
    668 /* Use gcc's built-in setjmp/longjmp.  longjmp must not restore signal
    669    mask state, but does need to pass "val" through.  jumped must be a
    670    volatile UWord. */
    671 #define SCHEDSETJMP(tid, jumped, stmt)					\
    672    do {									\
    673       ThreadState * volatile _qq_tst = VG_(get_ThreadState)(tid);	\
    674 									\
    675       (jumped) = VG_MINIMAL_SETJMP(_qq_tst->sched_jmpbuf);              \
    676       if ((jumped) == ((UWord)0)) {                                     \
    677 	 vg_assert(!_qq_tst->sched_jmpbuf_valid);			\
    678 	 _qq_tst->sched_jmpbuf_valid = True;				\
    679 	 stmt;								\
    680       }	else if (VG_(clo_trace_sched))					\
    681 	 VG_(printf)("SCHEDSETJMP(line %d) tid %d, jumped=%ld\n",       \
    682                      __LINE__, tid, jumped);                            \
    683       vg_assert(_qq_tst->sched_jmpbuf_valid);				\
    684       _qq_tst->sched_jmpbuf_valid = False;				\
    685    } while(0)
    686 
    687 
    688 /* Do various guest state alignment checks prior to running a thread.
    689    Specifically, check that what we have matches Vex's guest state
    690    layout requirements.  See libvex.h for details, but in short the
    691    requirements are: There must be no holes in between the primary
    692    guest state, its two copies, and the spill area.  In short, all 4
    693    areas must be aligned on the LibVEX_GUEST_STATE_ALIGN boundary and
    694    be placed back-to-back without holes in between. */
    695 static void do_pre_run_checks ( volatile ThreadState* tst )
    696 {
    697    Addr a_vex     = (Addr) & tst->arch.vex;
    698    Addr a_vexsh1  = (Addr) & tst->arch.vex_shadow1;
    699    Addr a_vexsh2  = (Addr) & tst->arch.vex_shadow2;
    700    Addr a_spill   = (Addr) & tst->arch.vex_spill;
    701    UInt sz_vex    = (UInt) sizeof tst->arch.vex;
    702    UInt sz_vexsh1 = (UInt) sizeof tst->arch.vex_shadow1;
    703    UInt sz_vexsh2 = (UInt) sizeof tst->arch.vex_shadow2;
    704    UInt sz_spill  = (UInt) sizeof tst->arch.vex_spill;
    705 
    706    if (0)
    707    VG_(printf)("gst %p %d, sh1 %p %d, "
    708                "sh2 %p %d, spill %p %d\n",
    709                (void*)a_vex, sz_vex,
    710                (void*)a_vexsh1, sz_vexsh1,
    711                (void*)a_vexsh2, sz_vexsh2,
    712                (void*)a_spill, sz_spill );
    713 
    714    vg_assert(sz_vex    % LibVEX_GUEST_STATE_ALIGN == 0);
    715    vg_assert(sz_vexsh1 % LibVEX_GUEST_STATE_ALIGN == 0);
    716    vg_assert(sz_vexsh2 % LibVEX_GUEST_STATE_ALIGN == 0);
    717    vg_assert(sz_spill  % LibVEX_GUEST_STATE_ALIGN == 0);
    718 
    719    vg_assert(a_vex    % LibVEX_GUEST_STATE_ALIGN == 0);
    720    vg_assert(a_vexsh1 % LibVEX_GUEST_STATE_ALIGN == 0);
    721    vg_assert(a_vexsh2 % LibVEX_GUEST_STATE_ALIGN == 0);
    722    vg_assert(a_spill  % LibVEX_GUEST_STATE_ALIGN == 0);
    723 
    724    /* Check that the guest state and its two shadows have the same
    725       size, and that there are no holes in between.  The latter is
    726       important because Memcheck assumes that it can reliably access
    727       the shadows by indexing off a pointer to the start of the
    728       primary guest state area. */
    729    vg_assert(sz_vex == sz_vexsh1);
    730    vg_assert(sz_vex == sz_vexsh2);
    731    vg_assert(a_vex + 1 * sz_vex == a_vexsh1);
    732    vg_assert(a_vex + 2 * sz_vex == a_vexsh2);
    733    /* Also check there's no hole between the second shadow area and
    734       the spill area. */
    735    vg_assert(sz_spill == LibVEX_N_SPILL_BYTES);
    736    vg_assert(a_vex + 3 * sz_vex == a_spill);
    737 
    738 #  if defined(VGA_x86)
    739    /* x86 XMM regs must form an array, ie, have no holes in
    740       between. */
    741    vg_assert(
    742       (offsetof(VexGuestX86State,guest_XMM7)
    743        - offsetof(VexGuestX86State,guest_XMM0))
    744       == (8/*#regs*/-1) * 16/*bytes per reg*/
    745    );
    746    vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestX86State,guest_XMM0)));
    747    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestX86State,guest_FPREG)));
    748    vg_assert(8 == offsetof(VexGuestX86State,guest_EAX));
    749    vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EAX)));
    750    vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EIP)));
    751 #  endif
    752 
    753 #  if defined(VGA_amd64)
    754    /* amd64 YMM regs must form an array, ie, have no holes in
    755       between. */
    756    vg_assert(
    757       (offsetof(VexGuestAMD64State,guest_YMM16)
    758        - offsetof(VexGuestAMD64State,guest_YMM0))
    759       == (17/*#regs*/-1) * 32/*bytes per reg*/
    760    );
    761    vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestAMD64State,guest_YMM0)));
    762    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_FPREG)));
    763    vg_assert(16 == offsetof(VexGuestAMD64State,guest_RAX));
    764    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RAX)));
    765    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RIP)));
    766 #  endif
    767 
    768 #  if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
    769    /* ppc guest_state vector regs must be 16 byte aligned for
    770       loads/stores.  This is important! */
    771    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR0));
    772    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR0));
    773    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR0));
    774    /* be extra paranoid .. */
    775    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR1));
    776    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR1));
    777    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR1));
    778 #  endif
    779 
    780 #  if defined(VGA_arm)
    781    /* arm guest_state VFP regs must be 8 byte aligned for
    782       loads/stores.  Let's use 16 just to be on the safe side. */
    783    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_D0));
    784    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_D0));
    785    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_D0));
    786    /* be extra paranoid .. */
    787    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_D1));
    788    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_D1));
    789    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_D1));
    790 #  endif
    791 
    792 #  if defined(VGA_arm64)
    793    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_X0));
    794    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_X0));
    795    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_X0));
    796    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_Q0));
    797    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_Q0));
    798    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_Q0));
    799 #  endif
    800 
    801 #  if defined(VGA_s390x)
    802    /* no special requirements */
    803 #  endif
    804 
    805 #  if defined(VGA_mips32) || defined(VGA_mips64)
    806    /* no special requirements */
    807 #  endif
    808 }
    809 
    810 // NO_VGDB_POLL value ensures vgdb is not polled, while
    811 // VGDB_POLL_ASAP ensures that the next scheduler call
    812 // will cause a poll.
    813 #define NO_VGDB_POLL    0xffffffffffffffffULL
    814 #define VGDB_POLL_ASAP  0x0ULL
    815 
    816 void VG_(disable_vgdb_poll) (void )
    817 {
    818    vgdb_next_poll = NO_VGDB_POLL;
    819 }
    820 void VG_(force_vgdb_poll) ( void )
    821 {
    822    vgdb_next_poll = VGDB_POLL_ASAP;
    823 }
    824 
    825 /* Run the thread tid for a while, and return a VG_TRC_* value
    826    indicating why VG_(disp_run_translations) stopped, and possibly an
    827    auxiliary word.  Also, only allow the thread to run for at most
    828    *dispatchCtrP events.  If (as is the normal case) use_alt_host_addr
    829    is False, we are running ordinary redir'd translations, and we
    830    should therefore start by looking up the guest next IP in TT.  If
    831    it is True then we ignore the guest next IP and just run from
    832    alt_host_addr, which presumably points at host code for a no-redir
    833    translation.
    834 
    835    Return results are placed in two_words.  two_words[0] is set to the
    836    TRC.  In the case where that is VG_TRC_CHAIN_ME_TO_{SLOW,FAST}_EP,
    837    the address to patch is placed in two_words[1].
    838 */
    839 static
    840 void run_thread_for_a_while ( /*OUT*/HWord* two_words,
    841                               /*MOD*/Int*   dispatchCtrP,
    842                               ThreadId      tid,
    843                               HWord         alt_host_addr,
    844                               Bool          use_alt_host_addr )
    845 {
    846    volatile HWord        jumped         = 0;
    847    volatile ThreadState* tst            = NULL; /* stop gcc complaining */
    848    volatile Int          done_this_time = 0;
    849    volatile HWord        host_code_addr = 0;
    850 
    851    /* Paranoia */
    852    vg_assert(VG_(is_valid_tid)(tid));
    853    vg_assert(VG_(is_running_thread)(tid));
    854    vg_assert(!VG_(is_exiting)(tid));
    855    vg_assert(*dispatchCtrP > 0);
    856 
    857    tst = VG_(get_ThreadState)(tid);
    858    do_pre_run_checks( tst );
    859    /* end Paranoia */
    860 
    861    /* Futz with the XIndir stats counters. */
    862    vg_assert(VG_(stats__n_xindirs_32) == 0);
    863    vg_assert(VG_(stats__n_xindir_misses_32) == 0);
    864 
    865    /* Clear return area. */
    866    two_words[0] = two_words[1] = 0;
    867 
    868    /* Figure out where we're starting from. */
    869    if (use_alt_host_addr) {
    870       /* unusual case -- no-redir translation */
    871       host_code_addr = alt_host_addr;
    872    } else {
    873       /* normal case -- redir translation */
    874       UInt cno = (UInt)VG_TT_FAST_HASH((Addr)tst->arch.vex.VG_INSTR_PTR);
    875       if (LIKELY(VG_(tt_fast)[cno].guest == (Addr)tst->arch.vex.VG_INSTR_PTR))
    876          host_code_addr = VG_(tt_fast)[cno].host;
    877       else {
    878          Addr res = 0;
    879          /* not found in VG_(tt_fast). Searching here the transtab
    880             improves the performance compared to returning directly
    881             to the scheduler. */
    882          Bool  found = VG_(search_transtab)(&res, NULL, NULL,
    883                                             (Addr)tst->arch.vex.VG_INSTR_PTR,
    884                                             True/*upd cache*/
    885                                             );
    886          if (LIKELY(found)) {
    887             host_code_addr = res;
    888          } else {
    889             /* At this point, we know that we intended to start at a
    890                normal redir translation, but it was not found.  In
    891                which case we can return now claiming it's not
    892                findable. */
    893             two_words[0] = VG_TRC_INNER_FASTMISS; /* hmm, is that right? */
    894             return;
    895          }
    896       }
    897    }
    898    /* We have either a no-redir or a redir translation. */
    899    vg_assert(host_code_addr != 0); /* implausible */
    900 
    901    /* there should be no undealt-with signals */
    902    //vg_assert(VG_(threads)[tid].siginfo.si_signo == 0);
    903 
    904    /* Set up event counter stuff for the run. */
    905    tst->arch.vex.host_EvC_COUNTER = *dispatchCtrP;
    906    tst->arch.vex.host_EvC_FAILADDR
    907       = (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail) );
    908 
    909    if (0) {
    910       vki_sigset_t m;
    911       Int i, err = VG_(sigprocmask)(VKI_SIG_SETMASK, NULL, &m);
    912       vg_assert(err == 0);
    913       VG_(printf)("tid %d: entering code with unblocked signals: ", tid);
    914       for (i = 1; i <= _VKI_NSIG; i++)
    915          if (!VG_(sigismember)(&m, i))
    916             VG_(printf)("%d ", i);
    917       VG_(printf)("\n");
    918    }
    919 
    920    /* Set up return-value area. */
    921 
    922    // Tell the tool this thread is about to run client code
    923    VG_TRACK( start_client_code, tid, bbs_done );
    924 
    925    vg_assert(VG_(in_generated_code) == False);
    926    VG_(in_generated_code) = True;
    927 
    928    SCHEDSETJMP(
    929       tid,
    930       jumped,
    931       VG_(disp_run_translations)(
    932          two_words,
    933          (volatile void*)&tst->arch.vex,
    934          host_code_addr
    935       )
    936    );
    937 
    938    vg_assert(VG_(in_generated_code) == True);
    939    VG_(in_generated_code) = False;
    940 
    941    if (jumped != (HWord)0) {
    942       /* We get here if the client took a fault that caused our signal
    943          handler to longjmp. */
    944       vg_assert(two_words[0] == 0 && two_words[1] == 0); // correct?
    945       two_words[0] = VG_TRC_FAULT_SIGNAL;
    946       two_words[1] = 0;
    947       block_signals();
    948    }
    949 
    950    /* Merge the 32-bit XIndir/miss counters into the 64 bit versions,
    951       and zero out the 32-bit ones in preparation for the next run of
    952       generated code. */
    953    stats__n_xindirs += (ULong)VG_(stats__n_xindirs_32);
    954    VG_(stats__n_xindirs_32) = 0;
    955    stats__n_xindir_misses += (ULong)VG_(stats__n_xindir_misses_32);
    956    VG_(stats__n_xindir_misses_32) = 0;
    957 
    958    /* Inspect the event counter. */
    959    vg_assert((Int)tst->arch.vex.host_EvC_COUNTER >= -1);
    960    vg_assert(tst->arch.vex.host_EvC_FAILADDR
    961              == (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail)) );
    962 
    963    /* The number of events done this time is the difference between
    964       the event counter originally and what it is now.  Except -- if
    965       it has gone negative (to -1) then the transition 0 to -1 doesn't
    966       correspond to a real executed block, so back it out.  It's like
    967       this because the event checks decrement the counter first and
    968       check it for negativeness second, hence the 0 to -1 transition
    969       causes a bailout and the block it happens in isn't executed. */
    970    {
    971      Int dispatchCtrAfterwards = (Int)tst->arch.vex.host_EvC_COUNTER;
    972      done_this_time = *dispatchCtrP - dispatchCtrAfterwards;
    973      if (dispatchCtrAfterwards == -1) {
    974         done_this_time--;
    975      } else {
    976         /* If the generated code drives the counter below -1, something
    977            is seriously wrong. */
    978         vg_assert(dispatchCtrAfterwards >= 0);
    979      }
    980    }
    981 
    982    vg_assert(done_this_time >= 0);
    983    bbs_done += (ULong)done_this_time;
    984 
    985    *dispatchCtrP -= done_this_time;
    986    vg_assert(*dispatchCtrP >= 0);
    987 
    988    // Tell the tool this thread has stopped running client code
    989    VG_TRACK( stop_client_code, tid, bbs_done );
    990 
    991    if (bbs_done >= vgdb_next_poll) {
    992       if (VG_(clo_vgdb_poll))
    993          vgdb_next_poll = bbs_done + (ULong)VG_(clo_vgdb_poll);
    994       else
    995          /* value was changed due to gdbserver invocation via ptrace */
    996          vgdb_next_poll = NO_VGDB_POLL;
    997       if (VG_(gdbserver_activity) (tid))
    998          VG_(gdbserver) (tid);
    999    }
   1000 
   1001    /* TRC value and possible auxiliary patch-address word are already
   1002       in two_words[0] and [1] respectively, as a result of the call to
   1003       VG_(run_innerloop). */
   1004    /* Stay sane .. */
   1005    if (two_words[0] == VG_TRC_CHAIN_ME_TO_SLOW_EP
   1006        || two_words[0] == VG_TRC_CHAIN_ME_TO_FAST_EP) {
   1007       vg_assert(two_words[1] != 0); /* we have a legit patch addr */
   1008    } else {
   1009       vg_assert(two_words[1] == 0); /* nobody messed with it */
   1010    }
   1011 }
   1012 
   1013 
   1014 /* ---------------------------------------------------------------------
   1015    The scheduler proper.
   1016    ------------------------------------------------------------------ */
   1017 
   1018 static void handle_tt_miss ( ThreadId tid )
   1019 {
   1020    Bool found;
   1021    Addr ip = VG_(get_IP)(tid);
   1022 
   1023    /* Trivial event.  Miss in the fast-cache.  Do a full
   1024       lookup for it. */
   1025    found = VG_(search_transtab)( NULL, NULL, NULL,
   1026                                  ip, True/*upd_fast_cache*/ );
   1027    if (UNLIKELY(!found)) {
   1028       /* Not found; we need to request a translation. */
   1029       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
   1030                           bbs_done, True/*allow redirection*/ )) {
   1031          found = VG_(search_transtab)( NULL, NULL, NULL,
   1032                                        ip, True );
   1033          vg_assert2(found, "handle_tt_miss: missing tt_fast entry");
   1034 
   1035       } else {
   1036 	 // If VG_(translate)() fails, it's because it had to throw a
   1037 	 // signal because the client jumped to a bad address.  That
   1038 	 // means that either a signal has been set up for delivery,
   1039 	 // or the thread has been marked for termination.  Either
   1040 	 // way, we just need to go back into the scheduler loop.
   1041       }
   1042    }
   1043 }
   1044 
   1045 static
   1046 void handle_chain_me ( ThreadId tid, void* place_to_chain, Bool toFastEP )
   1047 {
   1048    Bool found          = False;
   1049    Addr ip             = VG_(get_IP)(tid);
   1050    SECno to_sNo         = INV_SNO;
   1051    TTEno to_tteNo       = INV_TTE;
   1052 
   1053    found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
   1054                                  ip, False/*dont_upd_fast_cache*/ );
   1055    if (!found) {
   1056       /* Not found; we need to request a translation. */
   1057       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
   1058                           bbs_done, True/*allow redirection*/ )) {
   1059          found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
   1060                                        ip, False );
   1061          vg_assert2(found, "handle_chain_me: missing tt_fast entry");
   1062       } else {
   1063 	 // If VG_(translate)() fails, it's because it had to throw a
   1064 	 // signal because the client jumped to a bad address.  That
   1065 	 // means that either a signal has been set up for delivery,
   1066 	 // or the thread has been marked for termination.  Either
   1067 	 // way, we just need to go back into the scheduler loop.
   1068         return;
   1069       }
   1070    }
   1071    vg_assert(found);
   1072    vg_assert(to_sNo != INV_SNO);
   1073    vg_assert(to_tteNo != INV_TTE);
   1074 
   1075    /* So, finally we know where to patch through to.  Do the patching
   1076       and update the various admin tables that allow it to be undone
   1077       in the case that the destination block gets deleted. */
   1078    VG_(tt_tc_do_chaining)( place_to_chain,
   1079                            to_sNo, to_tteNo, toFastEP );
   1080 }
   1081 
   1082 static void handle_syscall(ThreadId tid, UInt trc)
   1083 {
   1084    ThreadState * volatile tst = VG_(get_ThreadState)(tid);
   1085    volatile UWord jumped;
   1086 
   1087    /* Syscall may or may not block; either way, it will be
   1088       complete by the time this call returns, and we'll be
   1089       runnable again.  We could take a signal while the
   1090       syscall runs. */
   1091 
   1092    if (VG_(clo_sanity_level) >= 3) {
   1093       HChar buf[50];    // large enough
   1094       VG_(sprintf)(buf, "(BEFORE SYSCALL, tid %d)", tid);
   1095       Bool ok = VG_(am_do_sync_check)(buf, __FILE__, __LINE__);
   1096       vg_assert(ok);
   1097    }
   1098 
   1099    SCHEDSETJMP(tid, jumped, VG_(client_syscall)(tid, trc));
   1100 
   1101    if (VG_(clo_sanity_level) >= 3) {
   1102       HChar buf[50];    // large enough
   1103       VG_(sprintf)(buf, "(AFTER SYSCALL, tid %d)", tid);
   1104       Bool ok = VG_(am_do_sync_check)(buf, __FILE__, __LINE__);
   1105       vg_assert(ok);
   1106    }
   1107 
   1108    if (!VG_(is_running_thread)(tid))
   1109       VG_(printf)("tid %d not running; VG_(running_tid)=%d, tid %d status %d\n",
   1110 		  tid, VG_(running_tid), tid, tst->status);
   1111    vg_assert(VG_(is_running_thread)(tid));
   1112 
   1113    if (jumped != (UWord)0) {
   1114       block_signals();
   1115       VG_(poll_signals)(tid);
   1116    }
   1117 }
   1118 
   1119 /* tid just requested a jump to the noredir version of its current
   1120    program counter.  So make up that translation if needed, run it,
   1121    and return the resulting thread return code in two_words[]. */
   1122 static
   1123 void handle_noredir_jump ( /*OUT*/HWord* two_words,
   1124                            /*MOD*/Int*   dispatchCtrP,
   1125                            ThreadId tid )
   1126 {
   1127    /* Clear return area. */
   1128    two_words[0] = two_words[1] = 0;
   1129 
   1130    Addr  hcode = 0;
   1131    Addr  ip    = VG_(get_IP)(tid);
   1132 
   1133    Bool  found = VG_(search_unredir_transtab)( &hcode, ip );
   1134    if (!found) {
   1135       /* Not found; we need to request a translation. */
   1136       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/, bbs_done,
   1137                           False/*NO REDIRECTION*/ )) {
   1138 
   1139          found = VG_(search_unredir_transtab)( &hcode, ip );
   1140          vg_assert2(found, "unredir translation missing after creation?!");
   1141       } else {
   1142 	 // If VG_(translate)() fails, it's because it had to throw a
   1143 	 // signal because the client jumped to a bad address.  That
   1144 	 // means that either a signal has been set up for delivery,
   1145 	 // or the thread has been marked for termination.  Either
   1146 	 // way, we just need to go back into the scheduler loop.
   1147          two_words[0] = VG_TRC_BORING;
   1148          return;
   1149       }
   1150 
   1151    }
   1152 
   1153    vg_assert(found);
   1154    vg_assert(hcode != 0);
   1155 
   1156    /* Otherwise run it and return the resulting VG_TRC_* value. */
   1157    vg_assert(*dispatchCtrP > 0); /* so as to guarantee progress */
   1158    run_thread_for_a_while( two_words, dispatchCtrP, tid,
   1159                            hcode, True/*use hcode*/ );
   1160 }
   1161 
   1162 
   1163 /*
   1164    Run a thread until it wants to exit.
   1165 
   1166    We assume that the caller has already called VG_(acquire_BigLock) for
   1167    us, so we own the VCPU.  Also, all signals are blocked.
   1168  */
   1169 VgSchedReturnCode VG_(scheduler) ( ThreadId tid )
   1170 {
   1171    /* Holds the remaining size of this thread's "timeslice". */
   1172    Int dispatch_ctr = 0;
   1173 
   1174    ThreadState *tst = VG_(get_ThreadState)(tid);
   1175    static Bool vgdb_startup_action_done = False;
   1176 
   1177    if (VG_(clo_trace_sched))
   1178       print_sched_event(tid, "entering VG_(scheduler)");
   1179 
   1180    /* Do vgdb initialization (but once). Only the first (main) task
   1181       starting up will do the below.
   1182       Initialize gdbserver earlier than at the first
   1183       thread VG_(scheduler) is causing problems:
   1184       * at the end of VG_(scheduler_init_phase2) :
   1185         The main thread is in VgTs_Init state, but in a not yet
   1186         consistent state => the thread cannot be reported to gdb
   1187         (e.g. causes an assert in LibVEX_GuestX86_get_eflags when giving
   1188         back the guest registers to gdb).
   1189       * at end of valgrind_main, just
   1190         before VG_(main_thread_wrapper_NORETURN)(1) :
   1191         The main thread is still in VgTs_Init state but in a
   1192         more advanced state. However, the thread state is not yet
   1193         completely initialized : a.o., the os_state is not yet fully
   1194         set => the thread is then not properly reported to gdb,
   1195         which is then confused (causing e.g. a duplicate thread be
   1196         shown, without thread id).
   1197       * it would be possible to initialize gdbserver "lower" in the
   1198         call stack (e.g. in VG_(main_thread_wrapper_NORETURN)) but
   1199         these are platform dependent and the place at which
   1200         the thread state is completely initialized is not
   1201         specific anymore to the main thread (so a similar "do it only
   1202         once" would be needed).
   1203 
   1204         => a "once only" initialization here is the best compromise. */
   1205    if (!vgdb_startup_action_done) {
   1206       vg_assert(tid == 1); // it must be the main thread.
   1207       vgdb_startup_action_done = True;
   1208       if (VG_(clo_vgdb) != Vg_VgdbNo) {
   1209          /* If we have to poll, ensures we do an initial poll at first
   1210             scheduler call. Otherwise, ensure no poll (unless interrupted
   1211             by ptrace). */
   1212          if (VG_(clo_vgdb_poll))
   1213             VG_(force_vgdb_poll) ();
   1214          else
   1215             VG_(disable_vgdb_poll) ();
   1216 
   1217          vg_assert (VG_(dyn_vgdb_error) == VG_(clo_vgdb_error));
   1218          /* As we are initializing, VG_(dyn_vgdb_error) can't have been
   1219             changed yet. */
   1220 
   1221          VG_(gdbserver_prerun_action) (1);
   1222       } else {
   1223          VG_(disable_vgdb_poll) ();
   1224       }
   1225    }
   1226 
   1227    if (SimHintiS(SimHint_no_nptl_pthread_stackcache, VG_(clo_sim_hints))
   1228        && tid != 1) {
   1229       /* We disable the stack cache the first time we see a thread other
   1230          than the main thread appearing. At this moment, we are sure the pthread
   1231          lib loading is done/variable was initialised by pthread lib/... */
   1232       if (VG_(client__stack_cache_actsize__addr)) {
   1233          if (*VG_(client__stack_cache_actsize__addr) == 0) {
   1234             VG_(debugLog)(1,"sched",
   1235                           "pthread stack cache size disable done"
   1236                           " via kludge\n");
   1237             *VG_(client__stack_cache_actsize__addr) = 1000 * 1000 * 1000;
   1238             /* Set a value big enough to be above the hardcoded maximum stack
   1239                cache size in glibc, small enough to allow a pthread stack size
   1240                to be added without risk of overflow. */
   1241          }
   1242       } else {
   1243           VG_(debugLog)(0,"sched",
   1244                         "WARNING: pthread stack cache cannot be disabled!\n");
   1245           VG_(clo_sim_hints) &= ~SimHint2S(SimHint_no_nptl_pthread_stackcache);
   1246           /* Remove SimHint_no_nptl_pthread_stackcache from VG_(clo_sim_hints)
   1247              to avoid having a msg for all following threads. */
   1248       }
   1249    }
   1250 
   1251    /* set the proper running signal mask */
   1252    block_signals();
   1253 
   1254    vg_assert(VG_(is_running_thread)(tid));
   1255 
   1256    dispatch_ctr = SCHEDULING_QUANTUM;
   1257 
   1258    while (!VG_(is_exiting)(tid)) {
   1259 
   1260       vg_assert(dispatch_ctr >= 0);
   1261       if (dispatch_ctr == 0) {
   1262 
   1263 	 /* Our slice is done, so yield the CPU to another thread.  On
   1264             Linux, this doesn't sleep between sleeping and running,
   1265             since that would take too much time. */
   1266 
   1267 	 /* 4 July 06: it seems that a zero-length nsleep is needed to
   1268             cause async thread cancellation (canceller.c) to terminate
   1269             in finite time; else it is in some kind of race/starvation
   1270             situation and completion is arbitrarily delayed (although
   1271             this is not a deadlock).
   1272 
   1273             Unfortunately these sleeps cause MPI jobs not to terminate
   1274             sometimes (some kind of livelock).  So sleeping once
   1275             every N opportunities appears to work. */
   1276 
   1277 	 /* 3 Aug 06: doing sys__nsleep works but crashes some apps.
   1278             sys_yield also helps the problem, whilst not crashing apps. */
   1279 
   1280 	 VG_(release_BigLock)(tid, VgTs_Yielding,
   1281                                    "VG_(scheduler):timeslice");
   1282 	 /* ------------ now we don't have The Lock ------------ */
   1283 
   1284 	 VG_(acquire_BigLock)(tid, "VG_(scheduler):timeslice");
   1285 	 /* ------------ now we do have The Lock ------------ */
   1286 
   1287 	 /* OK, do some relatively expensive housekeeping stuff */
   1288 	 scheduler_sanity(tid);
   1289 	 VG_(sanity_check_general)(False);
   1290 
   1291 	 /* Look for any pending signals for this thread, and set them up
   1292 	    for delivery */
   1293 	 VG_(poll_signals)(tid);
   1294 
   1295 	 if (VG_(is_exiting)(tid))
   1296 	    break;		/* poll_signals picked up a fatal signal */
   1297 
   1298 	 /* For stats purposes only. */
   1299 	 n_scheduling_events_MAJOR++;
   1300 
   1301 	 /* Figure out how many bbs to ask vg_run_innerloop to do. */
   1302          dispatch_ctr = SCHEDULING_QUANTUM;
   1303 
   1304 	 /* paranoia ... */
   1305 	 vg_assert(tst->tid == tid);
   1306 	 vg_assert(tst->os_state.lwpid == VG_(gettid)());
   1307       }
   1308 
   1309       /* For stats purposes only. */
   1310       n_scheduling_events_MINOR++;
   1311 
   1312       if (0)
   1313          VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs\n",
   1314                                    tid, dispatch_ctr - 1 );
   1315 
   1316       HWord trc[2]; /* "two_words" */
   1317       run_thread_for_a_while( &trc[0],
   1318                               &dispatch_ctr,
   1319                               tid, 0/*ignored*/, False );
   1320 
   1321       if (VG_(clo_trace_sched) && VG_(clo_verbosity) > 2) {
   1322          const HChar *name = name_of_sched_event(trc[0]);
   1323          HChar buf[VG_(strlen)(name) + 10];    // large enough
   1324 	 VG_(sprintf)(buf, "TRC: %s", name);
   1325 	 print_sched_event(tid, buf);
   1326       }
   1327 
   1328       if (trc[0] == VEX_TRC_JMP_NOREDIR) {
   1329          /* If we got a request to run a no-redir version of
   1330             something, do so now -- handle_noredir_jump just (creates
   1331             and) runs that one translation.  The flip side is that the
   1332             noredir translation can't itself return another noredir
   1333             request -- that would be nonsensical.  It can, however,
   1334             return VG_TRC_BORING, which just means keep going as
   1335             normal. */
   1336          /* Note that the fact that we need to continue with a
   1337             no-redir jump is not recorded anywhere else in this
   1338             thread's state.  So we *must* execute the block right now
   1339             -- we can't fail to execute it and later resume with it,
   1340             because by then we'll have forgotten the fact that it
   1341             should be run as no-redir, but will get run as a normal
   1342             potentially-redir'd, hence screwing up.  This really ought
   1343             to be cleaned up, by noting in the guest state that the
   1344             next block to be executed should be no-redir.  Then we can
   1345             suspend and resume at any point, which isn't the case at
   1346             the moment. */
   1347          /* We can't enter a no-redir translation with the dispatch
   1348             ctr set to zero, for the reasons commented just above --
   1349             we need to force it to execute right now.  So, if the
   1350             dispatch ctr is zero, set it to one.  Note that this would
   1351             have the bad side effect of holding the Big Lock arbitrary
   1352             long should there be an arbitrarily long sequence of
   1353             back-to-back no-redir translations to run.  But we assert
   1354             just below that this translation cannot request another
   1355             no-redir jump, so we should be safe against that. */
   1356          if (dispatch_ctr == 0) {
   1357             dispatch_ctr = 1;
   1358          }
   1359          handle_noredir_jump( &trc[0],
   1360                               &dispatch_ctr,
   1361                               tid );
   1362          vg_assert(trc[0] != VEX_TRC_JMP_NOREDIR);
   1363 
   1364          /* This can't be allowed to happen, since it means the block
   1365             didn't execute, and we have no way to resume-as-noredir
   1366             after we get more timeslice.  But I don't think it ever
   1367             can, since handle_noredir_jump will assert if the counter
   1368             is zero on entry. */
   1369          vg_assert(trc[0] != VG_TRC_INNER_COUNTERZERO);
   1370          /* This asserts the same thing. */
   1371          vg_assert(dispatch_ctr >= 0);
   1372 
   1373          /* A no-redir translation can't return with a chain-me
   1374             request, since chaining in the no-redir cache is too
   1375             complex. */
   1376          vg_assert(trc[0] != VG_TRC_CHAIN_ME_TO_SLOW_EP
   1377                    && trc[0] != VG_TRC_CHAIN_ME_TO_FAST_EP);
   1378       }
   1379 
   1380       switch (trc[0]) {
   1381       case VEX_TRC_JMP_BORING:
   1382          /* assisted dispatch, no event.  Used by no-redir
   1383             translations to force return to the scheduler. */
   1384       case VG_TRC_BORING:
   1385          /* no special event, just keep going. */
   1386          break;
   1387 
   1388       case VG_TRC_INNER_FASTMISS:
   1389 	 vg_assert(dispatch_ctr >= 0);
   1390 	 handle_tt_miss(tid);
   1391 	 break;
   1392 
   1393       case VG_TRC_CHAIN_ME_TO_SLOW_EP: {
   1394          if (0) VG_(printf)("sched: CHAIN_TO_SLOW_EP: %p\n", (void*)trc[1] );
   1395          handle_chain_me(tid, (void*)trc[1], False);
   1396          break;
   1397       }
   1398 
   1399       case VG_TRC_CHAIN_ME_TO_FAST_EP: {
   1400          if (0) VG_(printf)("sched: CHAIN_TO_FAST_EP: %p\n", (void*)trc[1] );
   1401          handle_chain_me(tid, (void*)trc[1], True);
   1402          break;
   1403       }
   1404 
   1405       case VEX_TRC_JMP_CLIENTREQ:
   1406 	 do_client_request(tid);
   1407 	 break;
   1408 
   1409       case VEX_TRC_JMP_SYS_INT128:  /* x86-linux */
   1410       case VEX_TRC_JMP_SYS_INT129:  /* x86-darwin */
   1411       case VEX_TRC_JMP_SYS_INT130:  /* x86-darwin */
   1412       case VEX_TRC_JMP_SYS_SYSCALL: /* amd64-linux, ppc32-linux, amd64-darwin */
   1413 	 handle_syscall(tid, trc[0]);
   1414 	 if (VG_(clo_sanity_level) > 2)
   1415 	    VG_(sanity_check_general)(True); /* sanity-check every syscall */
   1416 	 break;
   1417 
   1418       case VEX_TRC_JMP_YIELD:
   1419 	 /* Explicit yield, because this thread is in a spin-lock
   1420 	    or something.  Only let the thread run for a short while
   1421             longer.  Because swapping to another thread is expensive,
   1422             we're prepared to let this thread eat a little more CPU
   1423             before swapping to another.  That means that short term
   1424             spins waiting for hardware to poke memory won't cause a
   1425             thread swap. */
   1426          if (dispatch_ctr > 1000)
   1427             dispatch_ctr = 1000;
   1428 	 break;
   1429 
   1430       case VG_TRC_INNER_COUNTERZERO:
   1431 	 /* Timeslice is out.  Let a new thread be scheduled. */
   1432 	 vg_assert(dispatch_ctr == 0);
   1433 	 break;
   1434 
   1435       case VG_TRC_FAULT_SIGNAL:
   1436 	 /* Everything should be set up (either we're exiting, or
   1437 	    about to start in a signal handler). */
   1438 	 break;
   1439 
   1440       case VEX_TRC_JMP_MAPFAIL:
   1441          /* Failure of arch-specific address translation (x86/amd64
   1442             segment override use) */
   1443          /* jrs 2005 03 11: is this correct? */
   1444          VG_(synth_fault)(tid);
   1445          break;
   1446 
   1447       case VEX_TRC_JMP_EMWARN: {
   1448          static Int  counts[EmNote_NUMBER];
   1449          static Bool counts_initted = False;
   1450          VexEmNote ew;
   1451          const HChar* what;
   1452          Bool      show;
   1453          Int       q;
   1454          if (!counts_initted) {
   1455             counts_initted = True;
   1456             for (q = 0; q < EmNote_NUMBER; q++)
   1457                counts[q] = 0;
   1458          }
   1459          ew   = (VexEmNote)VG_(threads)[tid].arch.vex.guest_EMNOTE;
   1460          what = (ew < 0 || ew >= EmNote_NUMBER)
   1461                    ? "unknown (?!)"
   1462                    : LibVEX_EmNote_string(ew);
   1463          show = (ew < 0 || ew >= EmNote_NUMBER)
   1464                    ? True
   1465                    : counts[ew]++ < 3;
   1466          if (show && VG_(clo_show_emwarns) && !VG_(clo_xml)) {
   1467             VG_(message)( Vg_UserMsg,
   1468                           "Emulation warning: unsupported action:\n");
   1469             VG_(message)( Vg_UserMsg, "  %s\n", what);
   1470             VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
   1471          }
   1472          break;
   1473       }
   1474 
   1475       case VEX_TRC_JMP_EMFAIL: {
   1476          VexEmNote ew;
   1477          const HChar* what;
   1478          ew   = (VexEmNote)VG_(threads)[tid].arch.vex.guest_EMNOTE;
   1479          what = (ew < 0 || ew >= EmNote_NUMBER)
   1480                    ? "unknown (?!)"
   1481                    : LibVEX_EmNote_string(ew);
   1482          VG_(message)( Vg_UserMsg,
   1483                        "Emulation fatal error -- Valgrind cannot continue:\n");
   1484          VG_(message)( Vg_UserMsg, "  %s\n", what);
   1485          VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
   1486          VG_(message)(Vg_UserMsg, "\n");
   1487          VG_(message)(Vg_UserMsg, "Valgrind has to exit now.  Sorry.\n");
   1488          VG_(message)(Vg_UserMsg, "\n");
   1489          VG_(exit)(1);
   1490          break;
   1491       }
   1492 
   1493       case VEX_TRC_JMP_SIGILL:
   1494          VG_(synth_sigill)(tid, VG_(get_IP)(tid));
   1495          break;
   1496 
   1497       case VEX_TRC_JMP_SIGTRAP:
   1498          VG_(synth_sigtrap)(tid);
   1499          break;
   1500 
   1501       case VEX_TRC_JMP_SIGSEGV:
   1502          VG_(synth_fault)(tid);
   1503          break;
   1504 
   1505       case VEX_TRC_JMP_SIGBUS:
   1506          VG_(synth_sigbus)(tid);
   1507          break;
   1508 
   1509       case VEX_TRC_JMP_SIGFPE_INTDIV:
   1510          VG_(synth_sigfpe)(tid, VKI_FPE_INTDIV);
   1511          break;
   1512 
   1513       case VEX_TRC_JMP_SIGFPE_INTOVF:
   1514          VG_(synth_sigfpe)(tid, VKI_FPE_INTOVF);
   1515          break;
   1516 
   1517       case VEX_TRC_JMP_NODECODE: {
   1518          Addr addr = VG_(get_IP)(tid);
   1519 
   1520          if (VG_(clo_sigill_diag)) {
   1521             VG_(umsg)(
   1522                "valgrind: Unrecognised instruction at address %#lx.\n", addr);
   1523             VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
   1524 #        define M(a) VG_(umsg)(a "\n");
   1525          M("Your program just tried to execute an instruction that Valgrind" );
   1526          M("did not recognise.  There are two possible reasons for this."    );
   1527          M("1. Your program has a bug and erroneously jumped to a non-code"  );
   1528          M("   location.  If you are running Memcheck and you just saw a"    );
   1529          M("   warning about a bad jump, it's probably your program's fault.");
   1530          M("2. The instruction is legitimate but Valgrind doesn't handle it,");
   1531          M("   i.e. it's Valgrind's fault.  If you think this is the case or");
   1532          M("   you are not sure, please let us know and we'll try to fix it.");
   1533          M("Either way, Valgrind will now raise a SIGILL signal which will"  );
   1534          M("probably kill your program."                                     );
   1535 #        undef M
   1536          }
   1537 #        if defined(VGA_s390x)
   1538          /* Now that the complaint is out we need to adjust the guest_IA. The
   1539             reason is that -- after raising the exception -- execution will
   1540             continue with the insn that follows the invalid insn. As the first
   1541             2 bits of the invalid insn determine its length in the usual way,
   1542             we can compute the address of the next insn here and adjust the
   1543             guest_IA accordingly. This adjustment is essential and tested by
   1544             none/tests/s390x/op_exception.c (which would loop forever
   1545             otherwise) */
   1546          UChar byte = ((UChar *)addr)[0];
   1547          UInt  insn_length = ((((byte >> 6) + 1) >> 1) + 1) << 1;
   1548          Addr  next_insn_addr = addr + insn_length;
   1549          VG_(set_IP)(tid, next_insn_addr);
   1550 #        endif
   1551          VG_(synth_sigill)(tid, addr);
   1552          break;
   1553       }
   1554 
   1555       case VEX_TRC_JMP_INVALICACHE:
   1556          VG_(discard_translations)(
   1557             (Addr)VG_(threads)[tid].arch.vex.guest_CMSTART,
   1558             VG_(threads)[tid].arch.vex.guest_CMLEN,
   1559             "scheduler(VEX_TRC_JMP_INVALICACHE)"
   1560          );
   1561          if (0)
   1562             VG_(printf)("dump translations done.\n");
   1563          break;
   1564 
   1565       case VEX_TRC_JMP_FLUSHDCACHE: {
   1566          void* start = (void*)VG_(threads)[tid].arch.vex.guest_CMSTART;
   1567          SizeT len   = VG_(threads)[tid].arch.vex.guest_CMLEN;
   1568          VG_(debugLog)(2, "sched", "flush_dcache(%p, %lu)\n", start, len);
   1569          VG_(flush_dcache)(start, len);
   1570          break;
   1571       }
   1572 
   1573       case VG_TRC_INVARIANT_FAILED:
   1574          /* This typically happens if, after running generated code,
   1575             it is detected that host CPU settings (eg, FPU/Vector
   1576             control words) are not as they should be.  Vex's code
   1577             generation specifies the state such control words should
   1578             be in on entry to Vex-generated code, and they should be
   1579             unchanged on exit from it.  Failure of this assertion
   1580             usually means a bug in Vex's code generation. */
   1581          //{ UInt xx;
   1582          //  __asm__ __volatile__ (
   1583          //     "\t.word 0xEEF12A10\n"  // fmrx r2,fpscr
   1584          //     "\tmov %0, r2" : "=r"(xx) : : "r2" );
   1585          //  VG_(printf)("QQQQ new fpscr = %08x\n", xx);
   1586          //}
   1587          vg_assert2(0, "VG_(scheduler), phase 3: "
   1588                        "run_innerloop detected host "
   1589                        "state invariant failure", trc);
   1590 
   1591       case VEX_TRC_JMP_SYS_SYSENTER:
   1592          /* Do whatever simulation is appropriate for an x86 sysenter
   1593             instruction.  Note that it is critical to set this thread's
   1594             guest_EIP to point at the code to execute after the
   1595             sysenter, since Vex-generated code will not have set it --
   1596             vex does not know what it should be.  Vex sets the next
   1597             address to zero, so if you don't set guest_EIP, the thread
   1598             will jump to zero afterwards and probably die as a result. */
   1599 #        if defined(VGP_x86_linux)
   1600          vg_assert2(0, "VG_(scheduler), phase 3: "
   1601                        "sysenter_x86 on x86-linux is not supported");
   1602 #        elif defined(VGP_x86_darwin)
   1603          /* return address in client edx */
   1604          VG_(threads)[tid].arch.vex.guest_EIP
   1605             = VG_(threads)[tid].arch.vex.guest_EDX;
   1606          handle_syscall(tid, trc[0]);
   1607 #        else
   1608          vg_assert2(0, "VG_(scheduler), phase 3: "
   1609                        "sysenter_x86 on non-x86 platform?!?!");
   1610 #        endif
   1611          break;
   1612 
   1613       default:
   1614 	 vg_assert2(0, "VG_(scheduler), phase 3: "
   1615                        "unexpected thread return code (%u)", trc[0]);
   1616 	 /* NOTREACHED */
   1617 	 break;
   1618 
   1619       } /* switch (trc) */
   1620 
   1621       if (UNLIKELY(VG_(clo_profyle_sbs)) && VG_(clo_profyle_interval) > 0)
   1622          maybe_show_sb_profile();
   1623    }
   1624 
   1625    if (VG_(clo_trace_sched))
   1626       print_sched_event(tid, "exiting VG_(scheduler)");
   1627 
   1628    vg_assert(VG_(is_exiting)(tid));
   1629 
   1630    return tst->exitreason;
   1631 }
   1632 
   1633 
   1634 /*
   1635    This causes all threads to forceably exit.  They aren't actually
   1636    dead by the time this returns; you need to call
   1637    VG_(reap_threads)() to wait for them.
   1638  */
   1639 void VG_(nuke_all_threads_except) ( ThreadId me, VgSchedReturnCode src )
   1640 {
   1641    ThreadId tid;
   1642 
   1643    vg_assert(VG_(is_running_thread)(me));
   1644 
   1645    for (tid = 1; tid < VG_N_THREADS; tid++) {
   1646       if (tid == me
   1647           || VG_(threads)[tid].status == VgTs_Empty)
   1648          continue;
   1649       if (0)
   1650          VG_(printf)(
   1651             "VG_(nuke_all_threads_except): nuking tid %d\n", tid);
   1652 
   1653       VG_(threads)[tid].exitreason = src;
   1654       if (src == VgSrc_FatalSig)
   1655          VG_(threads)[tid].os_state.fatalsig = VKI_SIGKILL;
   1656       VG_(get_thread_out_of_syscall)(tid);
   1657    }
   1658 }
   1659 
   1660 
   1661 /* ---------------------------------------------------------------------
   1662    Specifying shadow register values
   1663    ------------------------------------------------------------------ */
   1664 
   1665 #if defined(VGA_x86)
   1666 #  define VG_CLREQ_ARGS       guest_EAX
   1667 #  define VG_CLREQ_RET        guest_EDX
   1668 #elif defined(VGA_amd64)
   1669 #  define VG_CLREQ_ARGS       guest_RAX
   1670 #  define VG_CLREQ_RET        guest_RDX
   1671 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
   1672 #  define VG_CLREQ_ARGS       guest_GPR4
   1673 #  define VG_CLREQ_RET        guest_GPR3
   1674 #elif defined(VGA_arm)
   1675 #  define VG_CLREQ_ARGS       guest_R4
   1676 #  define VG_CLREQ_RET        guest_R3
   1677 #elif defined(VGA_arm64)
   1678 #  define VG_CLREQ_ARGS       guest_X4
   1679 #  define VG_CLREQ_RET        guest_X3
   1680 #elif defined (VGA_s390x)
   1681 #  define VG_CLREQ_ARGS       guest_r2
   1682 #  define VG_CLREQ_RET        guest_r3
   1683 #elif defined(VGA_mips32) || defined(VGA_mips64)
   1684 #  define VG_CLREQ_ARGS       guest_r12
   1685 #  define VG_CLREQ_RET        guest_r11
   1686 #elif defined(VGA_tilegx)
   1687 #  define VG_CLREQ_ARGS       guest_r12
   1688 #  define VG_CLREQ_RET        guest_r11
   1689 #else
   1690 #  error Unknown arch
   1691 #endif
   1692 
   1693 #define CLREQ_ARGS(regs)   ((regs).vex.VG_CLREQ_ARGS)
   1694 #define CLREQ_RET(regs)    ((regs).vex.VG_CLREQ_RET)
   1695 #define O_CLREQ_RET        (offsetof(VexGuestArchState, VG_CLREQ_RET))
   1696 
   1697 // These macros write a value to a client's thread register, and tell the
   1698 // tool that it's happened (if necessary).
   1699 
   1700 #define SET_CLREQ_RETVAL(zztid, zzval) \
   1701    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
   1702         VG_TRACK( post_reg_write, \
   1703                   Vg_CoreClientReq, zztid, O_CLREQ_RET, sizeof(UWord)); \
   1704    } while (0)
   1705 
   1706 #define SET_CLCALL_RETVAL(zztid, zzval, f) \
   1707    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
   1708         VG_TRACK( post_reg_write_clientcall_return, \
   1709                   zztid, O_CLREQ_RET, sizeof(UWord), f); \
   1710    } while (0)
   1711 
   1712 
   1713 /* ---------------------------------------------------------------------
   1714    Handle client requests.
   1715    ------------------------------------------------------------------ */
   1716 
   1717 // OS-specific(?) client requests
   1718 static Bool os_client_request(ThreadId tid, UWord *args)
   1719 {
   1720    Bool handled = True;
   1721 
   1722    vg_assert(VG_(is_running_thread)(tid));
   1723 
   1724    switch(args[0]) {
   1725    case VG_USERREQ__LIBC_FREERES_DONE:
   1726       /* This is equivalent to an exit() syscall, but we don't set the
   1727 	 exitcode (since it might already be set) */
   1728       if (0 || VG_(clo_trace_syscalls) || VG_(clo_trace_sched))
   1729          VG_(message)(Vg_DebugMsg,
   1730                       "__libc_freeres() done; really quitting!\n");
   1731       VG_(threads)[tid].exitreason = VgSrc_ExitThread;
   1732       break;
   1733 
   1734    default:
   1735       handled = False;
   1736       break;
   1737    }
   1738 
   1739    return handled;
   1740 }
   1741 
   1742 
   1743 /* Write out a client message, possibly including a back trace. Return
   1744    the number of characters written. In case of XML output, the format
   1745    string as well as any arguments it requires will be XML'ified.
   1746    I.e. special characters such as the angle brackets will be translated
   1747    into proper escape sequences. */
   1748 static
   1749 Int print_client_message( ThreadId tid, const HChar *format,
   1750                           va_list *vargsp, Bool include_backtrace)
   1751 {
   1752    Int count;
   1753 
   1754    if (VG_(clo_xml)) {
   1755       /* Translate the format string as follows:
   1756          <  -->  &lt;
   1757          >  -->  &gt;
   1758          &  -->  &amp;
   1759          %s -->  %pS
   1760          Yes, yes, it's simplified but in synch with
   1761          myvprintf_str_XML_simplistic and VG_(debugLog_vprintf).
   1762       */
   1763 
   1764       /* Allocate a buffer that is for sure large enough. */
   1765       HChar xml_format[VG_(strlen)(format) * 5 + 1];
   1766 
   1767       const HChar *p;
   1768       HChar *q = xml_format;
   1769 
   1770       for (p = format; *p; ++p) {
   1771          switch (*p) {
   1772          case '<': VG_(strcpy)(q, "&lt;");  q += 4; break;
   1773          case '>': VG_(strcpy)(q, "&gt;");  q += 4; break;
   1774          case '&': VG_(strcpy)(q, "&amp;"); q += 5; break;
   1775          case '%':
   1776             /* Careful: make sure %%s stays %%s */
   1777             *q++ = *p++;
   1778             if (*p == 's') {
   1779               *q++ = 'p';
   1780               *q++ = 'S';
   1781             } else {
   1782               *q++ = *p;
   1783             }
   1784             break;
   1785 
   1786          default:
   1787             *q++ = *p;
   1788             break;
   1789          }
   1790       }
   1791       *q = '\0';
   1792 
   1793       VG_(printf_xml)( "<clientmsg>\n" );
   1794       VG_(printf_xml)( "  <tid>%d</tid>\n", tid );
   1795       VG_(printf_xml)( "  <text>" );
   1796       count = VG_(vprintf_xml)( xml_format, *vargsp );
   1797       VG_(printf_xml)( "  </text>\n" );
   1798    } else {
   1799       count = VG_(vmessage)( Vg_ClientMsg, format, *vargsp );
   1800       VG_(message_flush)();
   1801    }
   1802 
   1803    if (include_backtrace)
   1804       VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
   1805 
   1806    if (VG_(clo_xml))
   1807       VG_(printf_xml)( "</clientmsg>\n" );
   1808 
   1809    return count;
   1810 }
   1811 
   1812 
   1813 /* Do a client request for the thread tid.  After the request, tid may
   1814    or may not still be runnable; if not, the scheduler will have to
   1815    choose a new thread to run.
   1816 */
   1817 static
   1818 void do_client_request ( ThreadId tid )
   1819 {
   1820    UWord* arg = (UWord*)(CLREQ_ARGS(VG_(threads)[tid].arch));
   1821    UWord req_no = arg[0];
   1822 
   1823    if (0)
   1824       VG_(printf)("req no = 0x%llx, arg = %p\n", (ULong)req_no, arg);
   1825    switch (req_no) {
   1826 
   1827       case VG_USERREQ__CLIENT_CALL0: {
   1828          UWord (*f)(ThreadId) = (__typeof__(f))arg[1];
   1829 	 if (f == NULL)
   1830 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL0: func=%p\n", f);
   1831 	 else
   1832 	    SET_CLCALL_RETVAL(tid, f ( tid ), (Addr)f);
   1833          break;
   1834       }
   1835       case VG_USERREQ__CLIENT_CALL1: {
   1836          UWord (*f)(ThreadId, UWord) = (__typeof__(f))arg[1];
   1837 	 if (f == NULL)
   1838 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL1: func=%p\n", f);
   1839 	 else
   1840 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2] ), (Addr)f );
   1841          break;
   1842       }
   1843       case VG_USERREQ__CLIENT_CALL2: {
   1844          UWord (*f)(ThreadId, UWord, UWord) = (__typeof__(f))arg[1];
   1845 	 if (f == NULL)
   1846 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL2: func=%p\n", f);
   1847 	 else
   1848 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3] ), (Addr)f );
   1849          break;
   1850       }
   1851       case VG_USERREQ__CLIENT_CALL3: {
   1852          UWord (*f)(ThreadId, UWord, UWord, UWord) = (__typeof__(f))arg[1];
   1853 	 if (f == NULL)
   1854 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL3: func=%p\n", f);
   1855 	 else
   1856 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3], arg[4] ), (Addr)f );
   1857          break;
   1858       }
   1859 
   1860       // Nb: this looks like a circular definition, because it kind of is.
   1861       // See comment in valgrind.h to understand what's going on.
   1862       case VG_USERREQ__RUNNING_ON_VALGRIND:
   1863          SET_CLREQ_RETVAL(tid, RUNNING_ON_VALGRIND+1);
   1864          break;
   1865 
   1866       case VG_USERREQ__PRINTF: {
   1867          const HChar* format = (HChar *)arg[1];
   1868          /* JRS 2010-Jan-28: this is DEPRECATED; use the
   1869             _VALIST_BY_REF version instead */
   1870          if (sizeof(va_list) != sizeof(UWord))
   1871             goto va_list_casting_error_NORETURN;
   1872          union {
   1873             va_list vargs;
   1874             unsigned long uw;
   1875          } u;
   1876          u.uw = (unsigned long)arg[2];
   1877          Int count =
   1878             print_client_message( tid, format, &u.vargs,
   1879                                   /* include_backtrace */ False );
   1880          SET_CLREQ_RETVAL( tid, count );
   1881          break;
   1882       }
   1883 
   1884       case VG_USERREQ__PRINTF_BACKTRACE: {
   1885          const HChar* format = (HChar *)arg[1];
   1886          /* JRS 2010-Jan-28: this is DEPRECATED; use the
   1887             _VALIST_BY_REF version instead */
   1888          if (sizeof(va_list) != sizeof(UWord))
   1889             goto va_list_casting_error_NORETURN;
   1890          union {
   1891             va_list vargs;
   1892             unsigned long uw;
   1893          } u;
   1894          u.uw = (unsigned long)arg[2];
   1895          Int count =
   1896             print_client_message( tid, format, &u.vargs,
   1897                                   /* include_backtrace */ True );
   1898          SET_CLREQ_RETVAL( tid, count );
   1899          break;
   1900       }
   1901 
   1902       case VG_USERREQ__PRINTF_VALIST_BY_REF: {
   1903          const HChar* format = (HChar *)arg[1];
   1904          va_list* vargsp = (va_list*)arg[2];
   1905          Int count =
   1906             print_client_message( tid, format, vargsp,
   1907                                   /* include_backtrace */ False );
   1908 
   1909          SET_CLREQ_RETVAL( tid, count );
   1910          break;
   1911       }
   1912 
   1913       case VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF: {
   1914          const HChar* format = (HChar *)arg[1];
   1915          va_list* vargsp = (va_list*)arg[2];
   1916          Int count =
   1917             print_client_message( tid, format, vargsp,
   1918                                   /* include_backtrace */ True );
   1919          SET_CLREQ_RETVAL( tid, count );
   1920          break;
   1921       }
   1922 
   1923       case VG_USERREQ__INTERNAL_PRINTF_VALIST_BY_REF: {
   1924          va_list* vargsp = (va_list*)arg[2];
   1925          Int count =
   1926             VG_(vmessage)( Vg_DebugMsg, (HChar *)arg[1], *vargsp );
   1927          VG_(message_flush)();
   1928          SET_CLREQ_RETVAL( tid, count );
   1929          break;
   1930       }
   1931 
   1932       case VG_USERREQ__ADD_IFUNC_TARGET: {
   1933          VG_(redir_add_ifunc_target)( arg[1], arg[2] );
   1934          SET_CLREQ_RETVAL( tid, 0);
   1935          break; }
   1936 
   1937       case VG_USERREQ__STACK_REGISTER: {
   1938          UWord sid = VG_(register_stack)((Addr)arg[1], (Addr)arg[2]);
   1939          SET_CLREQ_RETVAL( tid, sid );
   1940          break; }
   1941 
   1942       case VG_USERREQ__STACK_DEREGISTER: {
   1943          VG_(deregister_stack)(arg[1]);
   1944          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   1945          break; }
   1946 
   1947       case VG_USERREQ__STACK_CHANGE: {
   1948          VG_(change_stack)(arg[1], (Addr)arg[2], (Addr)arg[3]);
   1949          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   1950          break; }
   1951 
   1952       case VG_USERREQ__GET_MALLOCFUNCS: {
   1953 	 struct vg_mallocfunc_info *info = (struct vg_mallocfunc_info *)arg[1];
   1954 
   1955 	 info->tl_malloc               = VG_(tdict).tool_malloc;
   1956 	 info->tl_calloc               = VG_(tdict).tool_calloc;
   1957 	 info->tl_realloc              = VG_(tdict).tool_realloc;
   1958 	 info->tl_memalign             = VG_(tdict).tool_memalign;
   1959 	 info->tl___builtin_new        = VG_(tdict).tool___builtin_new;
   1960 	 info->tl___builtin_vec_new    = VG_(tdict).tool___builtin_vec_new;
   1961 	 info->tl_free                 = VG_(tdict).tool_free;
   1962 	 info->tl___builtin_delete     = VG_(tdict).tool___builtin_delete;
   1963 	 info->tl___builtin_vec_delete = VG_(tdict).tool___builtin_vec_delete;
   1964          info->tl_malloc_usable_size   = VG_(tdict).tool_malloc_usable_size;
   1965 
   1966 	 info->mallinfo                = VG_(mallinfo);
   1967 	 info->clo_trace_malloc        = VG_(clo_trace_malloc);
   1968 
   1969          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   1970 
   1971 	 break;
   1972       }
   1973 
   1974       /* Requests from the client program */
   1975 
   1976       case VG_USERREQ__DISCARD_TRANSLATIONS:
   1977          if (VG_(clo_verbosity) > 2)
   1978             VG_(printf)( "client request: DISCARD_TRANSLATIONS,"
   1979                          " addr %p,  len %lu\n",
   1980                          (void*)arg[1], arg[2] );
   1981 
   1982          VG_(discard_translations)(
   1983             arg[1], arg[2], "scheduler(VG_USERREQ__DISCARD_TRANSLATIONS)"
   1984          );
   1985 
   1986          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   1987 	 break;
   1988 
   1989       case VG_USERREQ__COUNT_ERRORS:
   1990          SET_CLREQ_RETVAL( tid, VG_(get_n_errs_found)() );
   1991          break;
   1992 
   1993       case VG_USERREQ__LOAD_PDB_DEBUGINFO:
   1994          VG_(di_notify_pdb_debuginfo)( arg[1], arg[2], arg[3], arg[4] );
   1995          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   1996          break;
   1997 
   1998       case VG_USERREQ__MAP_IP_TO_SRCLOC: {
   1999          Addr   ip    = arg[1];
   2000          HChar* buf64 = (HChar*)arg[2];  // points to a HChar [64] array
   2001          const HChar *buf;  // points to a string of unknown size
   2002 
   2003          VG_(memset)(buf64, 0, 64);
   2004          UInt linenum = 0;
   2005          Bool ok = VG_(get_filename_linenum)(
   2006                       ip, &buf, NULL, &linenum
   2007                    );
   2008          if (ok) {
   2009             /* For backward compatibility truncate the filename to
   2010                49 characters. */
   2011             VG_(strncpy)(buf64, buf, 50);
   2012             buf64[49] = '\0';
   2013             UInt i;
   2014             for (i = 0; i < 50; i++) {
   2015                if (buf64[i] == 0)
   2016                   break;
   2017             }
   2018             VG_(sprintf)(buf64+i, ":%u", linenum);  // safe
   2019          } else {
   2020             buf64[0] = 0;
   2021          }
   2022 
   2023          SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
   2024          break;
   2025       }
   2026 
   2027       case VG_USERREQ__CHANGE_ERR_DISABLEMENT: {
   2028          Word delta = arg[1];
   2029          vg_assert(delta == 1 || delta == -1);
   2030          ThreadState* tst = VG_(get_ThreadState)(tid);
   2031          vg_assert(tst);
   2032          if (delta == 1 && tst->err_disablement_level < 0xFFFFFFFF) {
   2033             tst->err_disablement_level++;
   2034          }
   2035          else
   2036          if (delta == -1 && tst->err_disablement_level > 0) {
   2037             tst->err_disablement_level--;
   2038          }
   2039          SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
   2040          break;
   2041       }
   2042 
   2043       case VG_USERREQ__GDB_MONITOR_COMMAND: {
   2044          UWord ret;
   2045          ret = (UWord) VG_(client_monitor_command) ((HChar*)arg[1]);
   2046          SET_CLREQ_RETVAL(tid, ret);
   2047          break;
   2048       }
   2049 
   2050       case VG_USERREQ__MALLOCLIKE_BLOCK:
   2051       case VG_USERREQ__RESIZEINPLACE_BLOCK:
   2052       case VG_USERREQ__FREELIKE_BLOCK:
   2053          // Ignore them if the addr is NULL;  otherwise pass onto the tool.
   2054          if (!arg[1]) {
   2055             SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   2056             break;
   2057          } else {
   2058             goto my_default;
   2059          }
   2060 
   2061       case VG_USERREQ__VEX_INIT_FOR_IRI:
   2062          LibVEX_InitIRI ( (IRICB *)arg[1] );
   2063          break;
   2064 
   2065       default:
   2066        my_default:
   2067 	 if (os_client_request(tid, arg)) {
   2068 	    // do nothing, os_client_request() handled it
   2069          } else if (VG_(needs).client_requests) {
   2070 	    UWord ret;
   2071 
   2072             if (VG_(clo_verbosity) > 2)
   2073                VG_(printf)("client request: code %lx,  addr %p,  len %lu\n",
   2074                            arg[0], (void*)arg[1], arg[2] );
   2075 
   2076 	    if ( VG_TDICT_CALL(tool_handle_client_request, tid, arg, &ret) )
   2077 	       SET_CLREQ_RETVAL(tid, ret);
   2078          } else {
   2079 	    static Bool whined = False;
   2080 
   2081 	    if (!whined && VG_(clo_verbosity) > 2) {
   2082                // Allow for requests in core, but defined by tools, which
   2083                // have 0 and 0 in their two high bytes.
   2084                HChar c1 = (arg[0] >> 24) & 0xff;
   2085                HChar c2 = (arg[0] >> 16) & 0xff;
   2086                if (c1 == 0) c1 = '_';
   2087                if (c2 == 0) c2 = '_';
   2088 	       VG_(message)(Vg_UserMsg, "Warning:\n"
   2089                    "  unhandled client request: 0x%lx (%c%c+0x%lx).  Perhaps\n"
   2090 		   "  VG_(needs).client_requests should be set?\n",
   2091 			    arg[0], c1, c2, arg[0] & 0xffff);
   2092 	       whined = True;
   2093 	    }
   2094          }
   2095          break;
   2096    }
   2097    return;
   2098 
   2099    /*NOTREACHED*/
   2100   va_list_casting_error_NORETURN:
   2101    VG_(umsg)(
   2102       "Valgrind: fatal error - cannot continue: use of the deprecated\n"
   2103       "client requests VG_USERREQ__PRINTF or VG_USERREQ__PRINTF_BACKTRACE\n"
   2104       "on a platform where they cannot be supported.  Please use the\n"
   2105       "equivalent _VALIST_BY_REF versions instead.\n"
   2106       "\n"
   2107       "This is a binary-incompatible change in Valgrind's client request\n"
   2108       "mechanism.  It is unfortunate, but difficult to avoid.  End-users\n"
   2109       "are expected to almost never see this message.  The only case in\n"
   2110       "which you might see this message is if your code uses the macros\n"
   2111       "VALGRIND_PRINTF or VALGRIND_PRINTF_BACKTRACE.  If so, you will need\n"
   2112       "to recompile such code, using the header files from this version of\n"
   2113       "Valgrind, and not any previous version.\n"
   2114       "\n"
   2115       "If you see this mesage in any other circumstances, it is probably\n"
   2116       "a bug in Valgrind.  In this case, please file a bug report at\n"
   2117       "\n"
   2118       "   http://www.valgrind.org/support/bug_reports.html\n"
   2119       "\n"
   2120       "Will now abort.\n"
   2121    );
   2122    vg_assert(0);
   2123 }
   2124 
   2125 
   2126 /* ---------------------------------------------------------------------
   2127    Sanity checking (permanently engaged)
   2128    ------------------------------------------------------------------ */
   2129 
   2130 /* Internal consistency checks on the sched structures. */
   2131 static
   2132 void scheduler_sanity ( ThreadId tid )
   2133 {
   2134    Bool bad = False;
   2135    Int lwpid = VG_(gettid)();
   2136 
   2137    if (!VG_(is_running_thread)(tid)) {
   2138       VG_(message)(Vg_DebugMsg,
   2139 		   "Thread %d is supposed to be running, "
   2140                    "but doesn't own the_BigLock (owned by %d)\n",
   2141 		   tid, VG_(running_tid));
   2142       bad = True;
   2143    }
   2144 
   2145    if (lwpid != VG_(threads)[tid].os_state.lwpid) {
   2146       VG_(message)(Vg_DebugMsg,
   2147                    "Thread %d supposed to be in LWP %d, but we're actually %d\n",
   2148                    tid, VG_(threads)[tid].os_state.lwpid, VG_(gettid)());
   2149       bad = True;
   2150    }
   2151 
   2152    if (lwpid != ML_(get_sched_lock_owner)(the_BigLock)) {
   2153       VG_(message)(Vg_DebugMsg,
   2154                    "Thread (LWPID) %d doesn't own the_BigLock\n",
   2155                    tid);
   2156       bad = True;
   2157    }
   2158 
   2159    if (0) {
   2160       /* Periodically show the state of all threads, for debugging
   2161          purposes. */
   2162       static UInt lasttime = 0;
   2163       UInt now;
   2164       now = VG_(read_millisecond_timer)();
   2165       if ((!bad) && (lasttime + 4000/*ms*/ <= now)) {
   2166          lasttime = now;
   2167          VG_(printf)("\n------------ Sched State at %d ms ------------\n",
   2168                      (Int)now);
   2169          VG_(show_sched_status)(True,  // host_stacktrace
   2170                                 True,  // stack_usage
   2171                                 True); // exited_threads);
   2172       }
   2173    }
   2174 
   2175    /* core_panic also shows the sched status, which is why we don't
   2176       show it above if bad==True. */
   2177    if (bad)
   2178       VG_(core_panic)("scheduler_sanity: failed");
   2179 }
   2180 
   2181 void VG_(sanity_check_general) ( Bool force_expensive )
   2182 {
   2183    ThreadId tid;
   2184 
   2185    static UInt next_slow_check_at = 1;
   2186    static UInt slow_check_interval = 25;
   2187 
   2188    if (VG_(clo_sanity_level) < 1) return;
   2189 
   2190    /* --- First do all the tests that we can do quickly. ---*/
   2191 
   2192    sanity_fast_count++;
   2193 
   2194    /* Check stuff pertaining to the memory check system. */
   2195 
   2196    /* Check that nobody has spuriously claimed that the first or
   2197       last 16 pages of memory have become accessible [...] */
   2198    if (VG_(needs).sanity_checks) {
   2199       vg_assert(VG_TDICT_CALL(tool_cheap_sanity_check));
   2200    }
   2201 
   2202    /* --- Now some more expensive checks. ---*/
   2203 
   2204    /* Once every now and again, check some more expensive stuff.
   2205       Gradually increase the interval between such checks so as not to
   2206       burden long-running programs too much. */
   2207    if ( force_expensive
   2208         || VG_(clo_sanity_level) > 1
   2209         || (VG_(clo_sanity_level) == 1
   2210             && sanity_fast_count == next_slow_check_at)) {
   2211 
   2212       if (0) VG_(printf)("SLOW at %d\n", sanity_fast_count-1);
   2213 
   2214       next_slow_check_at = sanity_fast_count - 1 + slow_check_interval;
   2215       slow_check_interval++;
   2216       sanity_slow_count++;
   2217 
   2218       if (VG_(needs).sanity_checks) {
   2219           vg_assert(VG_TDICT_CALL(tool_expensive_sanity_check));
   2220       }
   2221 
   2222       /* Look for stack overruns.  Visit all threads. */
   2223       for (tid = 1; tid < VG_N_THREADS; tid++) {
   2224 	 SizeT    remains;
   2225          VgStack* stack;
   2226 
   2227 	 if (VG_(threads)[tid].status == VgTs_Empty ||
   2228 	     VG_(threads)[tid].status == VgTs_Zombie)
   2229 	    continue;
   2230 
   2231          stack
   2232             = (VgStack*)
   2233               VG_(get_ThreadState)(tid)->os_state.valgrind_stack_base;
   2234          SizeT limit
   2235             = 4096; // Let's say.  Checking more causes lots of L2 misses.
   2236 	 remains
   2237             = VG_(am_get_VgStack_unused_szB)(stack, limit);
   2238 	 if (remains < limit)
   2239 	    VG_(message)(Vg_DebugMsg,
   2240                          "WARNING: Thread %d is within %ld bytes "
   2241                          "of running out of valgrind stack!\n"
   2242                          "Valgrind stack size can be increased "
   2243                          "using --valgrind-stacksize=....\n",
   2244 		         tid, remains);
   2245       }
   2246    }
   2247 
   2248    if (VG_(clo_sanity_level) > 1) {
   2249       /* Check sanity of the low-level memory manager.  Note that bugs
   2250          in the client's code can cause this to fail, so we don't do
   2251          this check unless specially asked for.  And because it's
   2252          potentially very expensive. */
   2253       VG_(sanity_check_malloc_all)();
   2254    }
   2255 }
   2256 
   2257 /*--------------------------------------------------------------------*/
   2258 /*--- end                                                          ---*/
   2259 /*--------------------------------------------------------------------*/
   2260