Home | History | Annotate | Download | only in m_scheduler
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Thread scheduling.                               scheduler.c ---*/
      4 /*--------------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2000-2011 Julian Seward
     11       jseward (at) acm.org
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     26    02111-1307, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 */
     30 
     31 /*
     32    Overview
     33 
     34    Valgrind tries to emulate the kernel's threading as closely as
     35    possible.  The client does all threading via the normal syscalls
     36    (on Linux: clone, etc).  Valgrind emulates this by creating exactly
     37    the same process structure as would be created without Valgrind.
     38    There are no extra threads.
     39 
     40    The main difference is that Valgrind only allows one client thread
     41    to run at once.  This is controlled with the CPU Big Lock,
     42    "the_BigLock".  Any time a thread wants to run client code or
     43    manipulate any shared state (which is anything other than its own
     44    ThreadState entry), it must hold the_BigLock.
     45 
     46    When a thread is about to block in a blocking syscall, it releases
     47    the_BigLock, and re-takes it when it becomes runnable again (either
     48    because the syscall finished, or we took a signal).
     49 
     50    VG_(scheduler) therefore runs in each thread.  It returns only when
     51    the thread is exiting, either because it exited itself, or it was
     52    told to exit by another thread.
     53 
     54    This file is almost entirely OS-independent.  The details of how
     55    the OS handles threading and signalling are abstracted away and
     56    implemented elsewhere.  [Some of the functions have worked their
     57    way back for the moment, until we do an OS port in earnest...]
     58  */
     59 
     60 #include "pub_core_basics.h"
     61 #include "pub_core_debuglog.h"
     62 #include "pub_core_vki.h"
     63 #include "pub_core_vkiscnums.h"    // __NR_sched_yield
     64 #include "pub_core_libcsetjmp.h"   // to keep _threadstate.h happy
     65 #include "pub_core_threadstate.h"
     66 #include "pub_core_aspacemgr.h"
     67 #include "pub_core_clreq.h"         // for VG_USERREQ__*
     68 #include "pub_core_dispatch.h"
     69 #include "pub_core_errormgr.h"      // For VG_(get_n_errs_found)()
     70 #include "pub_core_gdbserver.h"     // for VG_(gdbserver) and VG_(gdbserver_activity)
     71 #include "pub_core_libcbase.h"
     72 #include "pub_core_libcassert.h"
     73 #include "pub_core_libcprint.h"
     74 #include "pub_core_libcproc.h"
     75 #include "pub_core_libcsignal.h"
     76 #if defined(VGO_darwin)
     77 #include "pub_core_mach.h"
     78 #endif
     79 #include "pub_core_machine.h"
     80 #include "pub_core_mallocfree.h"
     81 #include "pub_core_options.h"
     82 #include "pub_core_replacemalloc.h"
     83 #include "pub_core_signals.h"
     84 #include "pub_core_stacks.h"
     85 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
     86 #include "pub_core_syscall.h"
     87 #include "pub_core_syswrap.h"
     88 #include "pub_core_tooliface.h"
     89 #include "pub_core_translate.h"     // For VG_(translate)()
     90 #include "pub_core_transtab.h"
     91 #include "pub_core_debuginfo.h"     // VG_(di_notify_pdb_debuginfo)
     92 #include "priv_sema.h"
     93 #include "pub_core_scheduler.h"     // self
     94 #include "pub_core_redir.h"
     95 
     96 
     97 /* ---------------------------------------------------------------------
     98    Types and globals for the scheduler.
     99    ------------------------------------------------------------------ */
    100 
    101 /* ThreadId and ThreadState are defined elsewhere*/
    102 
    103 /* Defines the thread-scheduling timeslice, in terms of the number of
    104    basic blocks we attempt to run each thread for.  Smaller values
    105    give finer interleaving but much increased scheduling overheads. */
    106 #define SCHEDULING_QUANTUM   100000
    107 
    108 /* If False, a fault is Valgrind-internal (ie, a bug) */
    109 Bool VG_(in_generated_code) = False;
    110 
    111 /* Counts downwards in VG_(run_innerloop). */
    112 UInt VG_(dispatch_ctr);
    113 
    114 /* 64-bit counter for the number of basic blocks done. */
    115 static ULong bbs_done = 0;
    116 
    117 /* Counter to see if vgdb activity is to be verified.
    118    When nr of bbs done reaches vgdb_next_poll, scheduler will
    119    poll for gdbserver activity. VG_(force_vgdb_poll) and
    120    VG_(disable_vgdb_poll) allows the valgrind core (e.g. m_gdbserver)
    121    to control when the next poll will be done. */
    122 static ULong vgdb_next_poll;
    123 
    124 /* Forwards */
    125 static void do_client_request ( ThreadId tid );
    126 static void scheduler_sanity ( ThreadId tid );
    127 static void mostly_clear_thread_record ( ThreadId tid );
    128 
    129 /* Stats. */
    130 static ULong n_scheduling_events_MINOR = 0;
    131 static ULong n_scheduling_events_MAJOR = 0;
    132 
    133 /* Sanity checking counts. */
    134 static UInt sanity_fast_count = 0;
    135 static UInt sanity_slow_count = 0;
    136 
    137 void VG_(print_scheduler_stats)(void)
    138 {
    139    VG_(message)(Vg_DebugMsg,
    140       "scheduler: %'llu jumps (bb entries).\n", bbs_done );
    141    VG_(message)(Vg_DebugMsg,
    142       "scheduler: %'llu/%'llu major/minor sched events.\n",
    143       n_scheduling_events_MAJOR, n_scheduling_events_MINOR);
    144    VG_(message)(Vg_DebugMsg,
    145                 "   sanity: %d cheap, %d expensive checks.\n",
    146                 sanity_fast_count, sanity_slow_count );
    147 }
    148 
    149 /* CPU semaphore, so that threads can run exclusively */
    150 static vg_sema_t the_BigLock;
    151 
    152 // Base address of the NaCl sandbox.
    153 UWord nacl_head;
    154 
    155 // Path to NaCl nexe.
    156 char *nacl_file;
    157 
    158 /* ---------------------------------------------------------------------
    159    Helper functions for the scheduler.
    160    ------------------------------------------------------------------ */
    161 
    162 static
    163 void print_sched_event ( ThreadId tid, Char* what )
    164 {
    165    VG_(message)(Vg_DebugMsg, "  SCHED[%d]: %s\n", tid, what );
    166 }
    167 
    168 /* For showing SB counts, if the user asks to see them. */
    169 #define SHOW_SBCOUNT_EVERY (20ULL * 1000 * 1000)
    170 static ULong bbs_done_lastcheck = 0;
    171 
    172 static
    173 void maybe_show_sb_counts ( void )
    174 {
    175    Long delta = bbs_done - bbs_done_lastcheck;
    176    vg_assert(delta >= 0);
    177    if (UNLIKELY(delta >= SHOW_SBCOUNT_EVERY)) {
    178       VG_(umsg)("%'lld superblocks executed\n", bbs_done);
    179       bbs_done_lastcheck = bbs_done;
    180    }
    181 }
    182 
    183 static
    184 HChar* name_of_sched_event ( UInt event )
    185 {
    186    switch (event) {
    187       case VEX_TRC_JMP_SYS_SYSCALL:   return "SYSCALL";
    188       case VEX_TRC_JMP_SYS_INT32:     return "INT32";
    189       case VEX_TRC_JMP_SYS_INT128:    return "INT128";
    190       case VEX_TRC_JMP_SYS_INT129:    return "INT129";
    191       case VEX_TRC_JMP_SYS_INT130:    return "INT130";
    192       case VEX_TRC_JMP_SYS_SYSENTER:  return "SYSENTER";
    193       case VEX_TRC_JMP_CLIENTREQ:     return "CLIENTREQ";
    194       case VEX_TRC_JMP_YIELD:         return "YIELD";
    195       case VEX_TRC_JMP_YIELD_NOREDIR: return "YIELD_NOREDIR";
    196       case VEX_TRC_JMP_NODECODE:      return "NODECODE";
    197       case VEX_TRC_JMP_MAPFAIL:       return "MAPFAIL";
    198       case VEX_TRC_JMP_NOREDIR:       return "NOREDIR";
    199       case VEX_TRC_JMP_EMWARN:        return "EMWARN";
    200       case VEX_TRC_JMP_TINVAL:        return "TINVAL";
    201       case VG_TRC_INVARIANT_FAILED:   return "INVFAILED";
    202       case VG_TRC_INNER_COUNTERZERO:  return "COUNTERZERO";
    203       case VG_TRC_INNER_FASTMISS:     return "FASTMISS";
    204       case VG_TRC_FAULT_SIGNAL:       return "FAULTSIGNAL";
    205       default:                        return "??UNKNOWN??";
    206   }
    207 }
    208 
    209 /* Allocate a completely empty ThreadState record. */
    210 ThreadId VG_(alloc_ThreadState) ( void )
    211 {
    212    Int i;
    213    for (i = 1; i < VG_N_THREADS; i++) {
    214       if (VG_(threads)[i].status == VgTs_Empty) {
    215 	 VG_(threads)[i].status = VgTs_Init;
    216 	 VG_(threads)[i].exitreason = VgSrc_None;
    217          return i;
    218       }
    219    }
    220    VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
    221    VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
    222    VG_(core_panic)("VG_N_THREADS is too low");
    223    /*NOTREACHED*/
    224 }
    225 
    226 /*
    227    Mark a thread as Runnable.  This will block until the_BigLock is
    228    available, so that we get exclusive access to all the shared
    229    structures and the CPU.  Up until we get the_BigLock, we must not
    230    touch any shared state.
    231 
    232    When this returns, we'll actually be running.
    233  */
    234 void VG_(acquire_BigLock)(ThreadId tid, HChar* who)
    235 {
    236    ThreadState *tst;
    237 
    238 #if 0
    239    if (VG_(clo_trace_sched)) {
    240       HChar buf[100];
    241       vg_assert(VG_(strlen)(who) <= 100-50);
    242       VG_(sprintf)(buf, "waiting for lock (%s)", who);
    243       print_sched_event(tid, buf);
    244    }
    245 #endif
    246 
    247    /* First, acquire the_BigLock.  We can't do anything else safely
    248       prior to this point.  Even doing debug printing prior to this
    249       point is, technically, wrong. */
    250    ML_(sema_down)(&the_BigLock, False/*not LL*/);
    251 
    252    tst = VG_(get_ThreadState)(tid);
    253 
    254    vg_assert(tst->status != VgTs_Runnable);
    255 
    256    tst->status = VgTs_Runnable;
    257 
    258    if (VG_(running_tid) != VG_INVALID_THREADID)
    259       VG_(printf)("tid %d found %d running\n", tid, VG_(running_tid));
    260    vg_assert(VG_(running_tid) == VG_INVALID_THREADID);
    261    VG_(running_tid) = tid;
    262 
    263    { Addr gsp = VG_(get_SP)(tid);
    264      VG_(unknown_SP_update)(gsp, gsp, 0/*unknown origin*/);
    265    }
    266 
    267    if (VG_(clo_trace_sched)) {
    268       HChar buf[150];
    269       vg_assert(VG_(strlen)(who) <= 150-50);
    270       VG_(sprintf)(buf, " acquired lock (%s)", who);
    271       print_sched_event(tid, buf);
    272    }
    273 }
    274 
    275 /*
    276    Set a thread into a sleeping state, and give up exclusive access to
    277    the CPU.  On return, the thread must be prepared to block until it
    278    is ready to run again (generally this means blocking in a syscall,
    279    but it may mean that we remain in a Runnable state and we're just
    280    yielding the CPU to another thread).
    281  */
    282 void VG_(release_BigLock)(ThreadId tid, ThreadStatus sleepstate, HChar* who)
    283 {
    284    ThreadState *tst = VG_(get_ThreadState)(tid);
    285 
    286    vg_assert(tst->status == VgTs_Runnable);
    287 
    288    vg_assert(sleepstate == VgTs_WaitSys ||
    289 	     sleepstate == VgTs_Yielding);
    290 
    291    tst->status = sleepstate;
    292 
    293    vg_assert(VG_(running_tid) == tid);
    294    VG_(running_tid) = VG_INVALID_THREADID;
    295 
    296    if (VG_(clo_trace_sched)) {
    297       Char buf[200];
    298       vg_assert(VG_(strlen)(who) <= 200-100);
    299       VG_(sprintf)(buf, "releasing lock (%s) -> %s",
    300                         who, VG_(name_of_ThreadStatus)(sleepstate));
    301       print_sched_event(tid, buf);
    302    }
    303 
    304    /* Release the_BigLock; this will reschedule any runnable
    305       thread. */
    306    ML_(sema_up)(&the_BigLock, False/*not LL*/);
    307 }
    308 
    309 /* See pub_core_scheduler.h for description */
    310 void VG_(acquire_BigLock_LL) ( HChar* who )
    311 {
    312   ML_(sema_down)(&the_BigLock, True/*LL*/);
    313 }
    314 
    315 /* See pub_core_scheduler.h for description */
    316 void VG_(release_BigLock_LL) ( HChar* who )
    317 {
    318    ML_(sema_up)(&the_BigLock, True/*LL*/);
    319 }
    320 
    321 
    322 /* Clear out the ThreadState and release the semaphore. Leaves the
    323    ThreadState in VgTs_Zombie state, so that it doesn't get
    324    reallocated until the caller is really ready. */
    325 void VG_(exit_thread)(ThreadId tid)
    326 {
    327    vg_assert(VG_(is_valid_tid)(tid));
    328    vg_assert(VG_(is_running_thread)(tid));
    329    vg_assert(VG_(is_exiting)(tid));
    330 
    331    mostly_clear_thread_record(tid);
    332    VG_(running_tid) = VG_INVALID_THREADID;
    333 
    334    /* There should still be a valid exitreason for this thread */
    335    vg_assert(VG_(threads)[tid].exitreason != VgSrc_None);
    336 
    337    if (VG_(clo_trace_sched))
    338       print_sched_event(tid, "release lock in VG_(exit_thread)");
    339 
    340    ML_(sema_up)(&the_BigLock, False/*not LL*/);
    341 }
    342 
    343 /* If 'tid' is blocked in a syscall, send it SIGVGKILL so as to get it
    344    out of the syscall and onto doing the next thing, whatever that is.
    345    If it isn't blocked in a syscall, has no effect on the thread. */
    346 void VG_(get_thread_out_of_syscall)(ThreadId tid)
    347 {
    348    vg_assert(VG_(is_valid_tid)(tid));
    349    vg_assert(!VG_(is_running_thread)(tid));
    350 
    351    if (VG_(threads)[tid].status == VgTs_WaitSys) {
    352       if (VG_(clo_trace_signals)) {
    353 	 VG_(message)(Vg_DebugMsg,
    354                       "get_thread_out_of_syscall zaps tid %d lwp %d\n",
    355 		      tid, VG_(threads)[tid].os_state.lwpid);
    356       }
    357 #     if defined(VGO_darwin)
    358       {
    359          // GrP fixme use mach primitives on darwin?
    360          // GrP fixme thread_abort_safely?
    361          // GrP fixme race for thread with WaitSys set but not in syscall yet?
    362          extern kern_return_t thread_abort(mach_port_t);
    363          thread_abort(VG_(threads)[tid].os_state.lwpid);
    364       }
    365 #     else
    366       {
    367          __attribute__((unused))
    368          Int r = VG_(tkill)(VG_(threads)[tid].os_state.lwpid, VG_SIGVGKILL);
    369          /* JRS 2009-Mar-20: should we assert for r==0 (tkill succeeded)?
    370             I'm really not sure.  Here's a race scenario which argues
    371             that we shoudn't; but equally I'm not sure the scenario is
    372             even possible, because of constraints caused by the question
    373             of who holds the BigLock when.
    374 
    375             Target thread tid does sys_read on a socket and blocks.  This
    376             function gets called, and we observe correctly that tid's
    377             status is WaitSys but then for whatever reason this function
    378             goes very slowly for a while.  Then data arrives from
    379             wherever, tid's sys_read returns, tid exits.  Then we do
    380             tkill on tid, but tid no longer exists; tkill returns an
    381             error code and the assert fails. */
    382          /* vg_assert(r == 0); */
    383       }
    384 #     endif
    385    }
    386 }
    387 
    388 /*
    389    Yield the CPU for a short time to let some other thread run.
    390  */
    391 void VG_(vg_yield)(void)
    392 {
    393    ThreadId tid = VG_(running_tid);
    394 
    395    vg_assert(tid != VG_INVALID_THREADID);
    396    vg_assert(VG_(threads)[tid].os_state.lwpid == VG_(gettid)());
    397 
    398    VG_(release_BigLock)(tid, VgTs_Yielding, "VG_(vg_yield)");
    399 
    400    /*
    401       Tell the kernel we're yielding.
    402     */
    403    VG_(do_syscall0)(__NR_sched_yield);
    404 
    405    VG_(acquire_BigLock)(tid, "VG_(vg_yield)");
    406 }
    407 
    408 
    409 /* Set the standard set of blocked signals, used whenever we're not
    410    running a client syscall. */
    411 static void block_signals(void)
    412 {
    413    vki_sigset_t mask;
    414 
    415    VG_(sigfillset)(&mask);
    416 
    417    /* Don't block these because they're synchronous */
    418    VG_(sigdelset)(&mask, VKI_SIGSEGV);
    419    VG_(sigdelset)(&mask, VKI_SIGBUS);
    420    VG_(sigdelset)(&mask, VKI_SIGFPE);
    421    VG_(sigdelset)(&mask, VKI_SIGILL);
    422    VG_(sigdelset)(&mask, VKI_SIGTRAP);
    423 
    424    /* Can't block these anyway */
    425    VG_(sigdelset)(&mask, VKI_SIGSTOP);
    426    VG_(sigdelset)(&mask, VKI_SIGKILL);
    427 
    428    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, NULL);
    429 }
    430 
    431 static void os_state_clear(ThreadState *tst)
    432 {
    433    tst->os_state.lwpid       = 0;
    434    tst->os_state.threadgroup = 0;
    435 #  if defined(VGO_linux)
    436    /* no other fields to clear */
    437 #  elif defined(VGO_darwin)
    438    tst->os_state.post_mach_trap_fn = NULL;
    439    tst->os_state.pthread           = 0;
    440    tst->os_state.func_arg          = 0;
    441    VG_(memset)(&tst->os_state.child_go, 0, sizeof(tst->os_state.child_go));
    442    VG_(memset)(&tst->os_state.child_done, 0, sizeof(tst->os_state.child_done));
    443    tst->os_state.wq_jmpbuf_valid   = False;
    444    tst->os_state.remote_port       = 0;
    445    tst->os_state.msgh_id           = 0;
    446    VG_(memset)(&tst->os_state.mach_args, 0, sizeof(tst->os_state.mach_args));
    447 #  else
    448 #    error "Unknown OS"
    449 #  endif
    450 }
    451 
    452 static void os_state_init(ThreadState *tst)
    453 {
    454    tst->os_state.valgrind_stack_base    = 0;
    455    tst->os_state.valgrind_stack_init_SP = 0;
    456    os_state_clear(tst);
    457 }
    458 
    459 static
    460 void mostly_clear_thread_record ( ThreadId tid )
    461 {
    462    vki_sigset_t savedmask;
    463 
    464    vg_assert(tid >= 0 && tid < VG_N_THREADS);
    465    VG_(cleanup_thread)(&VG_(threads)[tid].arch);
    466    VG_(threads)[tid].tid = tid;
    467 
    468    /* Leave the thread in Zombie, so that it doesn't get reallocated
    469       until the caller is finally done with the thread stack. */
    470    VG_(threads)[tid].status               = VgTs_Zombie;
    471 
    472    VG_(sigemptyset)(&VG_(threads)[tid].sig_mask);
    473    VG_(sigemptyset)(&VG_(threads)[tid].tmp_sig_mask);
    474 
    475    os_state_clear(&VG_(threads)[tid]);
    476 
    477    /* start with no altstack */
    478    VG_(threads)[tid].altstack.ss_sp = (void *)0xdeadbeef;
    479    VG_(threads)[tid].altstack.ss_size = 0;
    480    VG_(threads)[tid].altstack.ss_flags = VKI_SS_DISABLE;
    481 
    482    VG_(clear_out_queued_signals)(tid, &savedmask);
    483 
    484    VG_(threads)[tid].sched_jmpbuf_valid = False;
    485 }
    486 
    487 /*
    488    Called in the child after fork.  If the parent has multiple
    489    threads, then we've inherited a VG_(threads) array describing them,
    490    but only the thread which called fork() is actually alive in the
    491    child.  This functions needs to clean up all those other thread
    492    structures.
    493 
    494    Whichever tid in the parent which called fork() becomes the
    495    master_tid in the child.  That's because the only living slot in
    496    VG_(threads) in the child after fork is VG_(threads)[tid], and it
    497    would be too hard to try to re-number the thread and relocate the
    498    thread state down to VG_(threads)[1].
    499 
    500    This function also needs to reinitialize the_BigLock, since
    501    otherwise we may end up sharing its state with the parent, which
    502    would be deeply confusing.
    503 */
    504 static void sched_fork_cleanup(ThreadId me)
    505 {
    506    ThreadId tid;
    507    vg_assert(VG_(running_tid) == me);
    508 
    509 #  if defined(VGO_darwin)
    510    // GrP fixme hack reset Mach ports
    511    VG_(mach_init)();
    512 #  endif
    513 
    514    VG_(threads)[me].os_state.lwpid = VG_(gettid)();
    515    VG_(threads)[me].os_state.threadgroup = VG_(getpid)();
    516 
    517    /* clear out all the unused thread slots */
    518    for (tid = 1; tid < VG_N_THREADS; tid++) {
    519       if (tid != me) {
    520          mostly_clear_thread_record(tid);
    521 	 VG_(threads)[tid].status = VgTs_Empty;
    522          VG_(clear_syscallInfo)(tid);
    523       }
    524    }
    525 
    526    /* re-init and take the sema */
    527    ML_(sema_deinit)(&the_BigLock);
    528    ML_(sema_init)(&the_BigLock);
    529    ML_(sema_down)(&the_BigLock, False/*not LL*/);
    530 }
    531 
    532 
    533 /* First phase of initialisation of the scheduler.  Initialise the
    534    bigLock, zeroise the VG_(threads) structure and decide on the
    535    ThreadId of the root thread.
    536 */
    537 ThreadId VG_(scheduler_init_phase1) ( void )
    538 {
    539    Int i;
    540    ThreadId tid_main;
    541 
    542    VG_(debugLog)(1,"sched","sched_init_phase1\n");
    543 
    544    ML_(sema_init)(&the_BigLock);
    545 
    546    for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
    547       /* Paranoia .. completely zero it out. */
    548       VG_(memset)( & VG_(threads)[i], 0, sizeof( VG_(threads)[i] ) );
    549 
    550       VG_(threads)[i].sig_queue = NULL;
    551 
    552       os_state_init(&VG_(threads)[i]);
    553       mostly_clear_thread_record(i);
    554 
    555       VG_(threads)[i].status                    = VgTs_Empty;
    556       VG_(threads)[i].client_stack_szB          = 0;
    557       VG_(threads)[i].client_stack_highest_word = (Addr)NULL;
    558       VG_(threads)[i].err_disablement_level     = 0;
    559    }
    560 
    561    tid_main = VG_(alloc_ThreadState)();
    562 
    563    /* Bleh.  Unfortunately there are various places in the system that
    564       assume that the main thread has a ThreadId of 1.
    565       - Helgrind (possibly)
    566       - stack overflow message in default_action() in m_signals.c
    567       - definitely a lot more places
    568    */
    569    vg_assert(tid_main == 1);
    570 
    571    return tid_main;
    572 }
    573 
    574 
    575 /* Second phase of initialisation of the scheduler.  Given the root
    576    ThreadId computed by first phase of initialisation, fill in stack
    577    details and acquire bigLock.  Initialise the scheduler.  This is
    578    called at startup.  The caller subsequently initialises the guest
    579    state components of this main thread.
    580 */
    581 void VG_(scheduler_init_phase2) ( ThreadId tid_main,
    582                                   Addr     clstack_end,
    583                                   SizeT    clstack_size )
    584 {
    585    VG_(debugLog)(1,"sched","sched_init_phase2: tid_main=%d, "
    586                    "cls_end=0x%lx, cls_sz=%ld\n",
    587                    tid_main, clstack_end, clstack_size);
    588 
    589    vg_assert(VG_IS_PAGE_ALIGNED(clstack_end+1));
    590    vg_assert(VG_IS_PAGE_ALIGNED(clstack_size));
    591 
    592    VG_(threads)[tid_main].client_stack_highest_word
    593       = clstack_end + 1 - sizeof(UWord);
    594    VG_(threads)[tid_main].client_stack_szB
    595       = clstack_size;
    596 
    597    VG_(atfork)(NULL, NULL, sched_fork_cleanup);
    598 }
    599 
    600 
    601 /* ---------------------------------------------------------------------
    602    Helpers for running translations.
    603    ------------------------------------------------------------------ */
    604 
    605 /* Use gcc's built-in setjmp/longjmp.  longjmp must not restore signal
    606    mask state, but does need to pass "val" through.  jumped must be a
    607    volatile UWord. */
    608 #define SCHEDSETJMP(tid, jumped, stmt)					\
    609    do {									\
    610       ThreadState * volatile _qq_tst = VG_(get_ThreadState)(tid);	\
    611 									\
    612       (jumped) = VG_MINIMAL_SETJMP(_qq_tst->sched_jmpbuf);              \
    613       if ((jumped) == ((UWord)0)) {                                     \
    614 	 vg_assert(!_qq_tst->sched_jmpbuf_valid);			\
    615 	 _qq_tst->sched_jmpbuf_valid = True;				\
    616 	 stmt;								\
    617       }	else if (VG_(clo_trace_sched))					\
    618 	 VG_(printf)("SCHEDSETJMP(line %d) tid %d, jumped=%ld\n",       \
    619                      __LINE__, tid, jumped);                            \
    620       vg_assert(_qq_tst->sched_jmpbuf_valid);				\
    621       _qq_tst->sched_jmpbuf_valid = False;				\
    622    } while(0)
    623 
    624 
    625 /* Do various guest state alignment checks prior to running a thread.
    626    Specifically, check that what we have matches Vex's guest state
    627    layout requirements.  See libvex.h for details, but in short the
    628    requirements are: There must be no holes in between the primary
    629    guest state, its two copies, and the spill area.  In short, all 4
    630    areas must have a 16-aligned size and be 16-aligned, and placed
    631    back-to-back. */
    632 static void do_pre_run_checks ( ThreadState* tst )
    633 {
    634    Addr a_vex     = (Addr) & tst->arch.vex;
    635    Addr a_vexsh1  = (Addr) & tst->arch.vex_shadow1;
    636    Addr a_vexsh2  = (Addr) & tst->arch.vex_shadow2;
    637    Addr a_spill   = (Addr) & tst->arch.vex_spill;
    638    UInt sz_vex    = (UInt) sizeof tst->arch.vex;
    639    UInt sz_vexsh1 = (UInt) sizeof tst->arch.vex_shadow1;
    640    UInt sz_vexsh2 = (UInt) sizeof tst->arch.vex_shadow2;
    641    UInt sz_spill  = (UInt) sizeof tst->arch.vex_spill;
    642 
    643    if (0)
    644    VG_(printf)("gst %p %d, sh1 %p %d, "
    645                "sh2 %p %d, spill %p %d\n",
    646                (void*)a_vex, sz_vex,
    647                (void*)a_vexsh1, sz_vexsh1,
    648                (void*)a_vexsh2, sz_vexsh2,
    649                (void*)a_spill, sz_spill );
    650 
    651    vg_assert(VG_IS_16_ALIGNED(sz_vex));
    652    vg_assert(VG_IS_16_ALIGNED(sz_vexsh1));
    653    vg_assert(VG_IS_16_ALIGNED(sz_vexsh2));
    654    vg_assert(VG_IS_16_ALIGNED(sz_spill));
    655 
    656    vg_assert(VG_IS_16_ALIGNED(a_vex));
    657    vg_assert(VG_IS_16_ALIGNED(a_vexsh1));
    658    vg_assert(VG_IS_16_ALIGNED(a_vexsh2));
    659    vg_assert(VG_IS_16_ALIGNED(a_spill));
    660 
    661    /* Check that the guest state and its two shadows have the same
    662       size, and that there are no holes in between.  The latter is
    663       important because Memcheck assumes that it can reliably access
    664       the shadows by indexing off a pointer to the start of the
    665       primary guest state area. */
    666    vg_assert(sz_vex == sz_vexsh1);
    667    vg_assert(sz_vex == sz_vexsh2);
    668    vg_assert(a_vex + 1 * sz_vex == a_vexsh1);
    669    vg_assert(a_vex + 2 * sz_vex == a_vexsh2);
    670    /* Also check there's no hole between the second shadow area and
    671       the spill area. */
    672    vg_assert(sz_spill == LibVEX_N_SPILL_BYTES);
    673    vg_assert(a_vex + 3 * sz_vex == a_spill);
    674 
    675 #  if defined(VGA_amd64)
    676    /* x86/amd64 XMM regs must form an array, ie, have no
    677       holes in between. */
    678    vg_assert(
    679       (offsetof(VexGuestAMD64State,guest_XMM16)
    680        - offsetof(VexGuestAMD64State,guest_XMM0))
    681       == (17/*#regs*/-1) * 16/*bytes per reg*/
    682    );
    683 #  endif
    684 
    685 #  if defined(VGA_ppc32) || defined(VGA_ppc64)
    686    /* ppc guest_state vector regs must be 16 byte aligned for
    687       loads/stores.  This is important! */
    688    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR0));
    689    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR0));
    690    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR0));
    691    /* be extra paranoid .. */
    692    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR1));
    693    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR1));
    694    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR1));
    695 #  endif
    696 
    697 #  if defined(VGA_arm)
    698    /* arm guest_state VFP regs must be 8 byte aligned for
    699       loads/stores. */
    700    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_D0));
    701    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_D0));
    702    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_D0));
    703    /* be extra paranoid .. */
    704    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_D1));
    705    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_D1));
    706    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_D1));
    707 #  endif
    708 
    709 #  if defined(VGA_s390x)
    710    /* no special requirements */
    711 #  endif
    712 }
    713 
    714 // NO_VGDB_POLL value ensures vgdb is not polled, while
    715 // VGDB_POLL_ASAP ensures that the next scheduler call
    716 // will cause a poll.
    717 #define NO_VGDB_POLL    0xffffffffffffffffULL
    718 #define VGDB_POLL_ASAP  0x0ULL
    719 
    720 void VG_(disable_vgdb_poll) (void )
    721 {
    722    vgdb_next_poll = NO_VGDB_POLL;
    723 }
    724 void VG_(force_vgdb_poll) ( void )
    725 {
    726    vgdb_next_poll = VGDB_POLL_ASAP;
    727 }
    728 
    729 /* Run the thread tid for a while, and return a VG_TRC_* value
    730    indicating why VG_(run_innerloop) stopped. */
    731 static UInt run_thread_for_a_while ( ThreadId tid )
    732 {
    733    volatile UWord        jumped;
    734    volatile ThreadState* tst = NULL; /* stop gcc complaining */
    735    volatile UInt         trc;
    736    volatile Int          dispatch_ctr_SAVED;
    737    volatile Int          done_this_time;
    738 
    739    /* Paranoia */
    740    vg_assert(VG_(is_valid_tid)(tid));
    741    vg_assert(VG_(is_running_thread)(tid));
    742    vg_assert(!VG_(is_exiting)(tid));
    743 
    744    tst = VG_(get_ThreadState)(tid);
    745    do_pre_run_checks( (ThreadState*)tst );
    746    /* end Paranoia */
    747 
    748    trc = 0;
    749    dispatch_ctr_SAVED = VG_(dispatch_ctr);
    750 
    751    /* there should be no undealt-with signals */
    752    //vg_assert(VG_(threads)[tid].siginfo.si_signo == 0);
    753 
    754    if (0) {
    755       vki_sigset_t m;
    756       Int i, err = VG_(sigprocmask)(VKI_SIG_SETMASK, NULL, &m);
    757       vg_assert(err == 0);
    758       VG_(printf)("tid %d: entering code with unblocked signals: ", tid);
    759       for (i = 1; i <= _VKI_NSIG; i++)
    760          if (!VG_(sigismember)(&m, i))
    761             VG_(printf)("%d ", i);
    762       VG_(printf)("\n");
    763    }
    764 
    765    // Tell the tool this thread is about to run client code
    766    VG_TRACK( start_client_code, tid, bbs_done );
    767 
    768    vg_assert(VG_(in_generated_code) == False);
    769    VG_(in_generated_code) = True;
    770 
    771    SCHEDSETJMP(
    772       tid,
    773       jumped,
    774       trc = (UInt)VG_(run_innerloop)( (void*)&tst->arch.vex,
    775                                       VG_(clo_profile_flags) > 0 ? 1 : 0 )
    776    );
    777 
    778    vg_assert(VG_(in_generated_code) == True);
    779    VG_(in_generated_code) = False;
    780 
    781    if (jumped != (UWord)0) {
    782       /* We get here if the client took a fault that caused our signal
    783          handler to longjmp. */
    784       vg_assert(trc == 0);
    785       trc = VG_TRC_FAULT_SIGNAL;
    786       block_signals();
    787    }
    788 
    789    done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 0;
    790 
    791    vg_assert(done_this_time >= 0);
    792    bbs_done += (ULong)done_this_time;
    793 
    794    // Tell the tool this thread has stopped running client code
    795    VG_TRACK( stop_client_code, tid, bbs_done );
    796 
    797    if (bbs_done >= vgdb_next_poll) {
    798       if (VG_(clo_vgdb_poll))
    799          vgdb_next_poll = bbs_done + (ULong)VG_(clo_vgdb_poll);
    800       else
    801          /* value was changed due to gdbserver invocation via ptrace */
    802          vgdb_next_poll = NO_VGDB_POLL;
    803       if (VG_(gdbserver_activity) (tid))
    804          VG_(gdbserver) (tid);
    805    }
    806 
    807    return trc;
    808 }
    809 
    810 
    811 /* Run a no-redir translation just once, and return the resulting
    812    VG_TRC_* value. */
    813 static UInt run_noredir_translation ( Addr hcode, ThreadId tid )
    814 {
    815    volatile UWord        jumped;
    816    volatile ThreadState* tst;
    817    volatile UWord        argblock[4];
    818    volatile UInt         retval;
    819 
    820    /* Paranoia */
    821    vg_assert(VG_(is_valid_tid)(tid));
    822    vg_assert(VG_(is_running_thread)(tid));
    823    vg_assert(!VG_(is_exiting)(tid));
    824 
    825    tst = VG_(get_ThreadState)(tid);
    826    do_pre_run_checks( (ThreadState*)tst );
    827    /* end Paranoia */
    828 
    829 #  if defined(VGA_ppc32) || defined(VGA_ppc64)
    830    /* I don't think we need to clear this thread's guest_RESVN here,
    831       because we can only get here if run_thread_for_a_while() has
    832       been used immediately before, on this same thread. */
    833 #  endif
    834 
    835    /* There can be 3 outcomes from VG_(run_a_noredir_translation):
    836 
    837       - a signal occurred and the sighandler longjmp'd.  Then both [2]
    838         and [3] are unchanged - hence zero.
    839 
    840       - translation ran normally, set [2] (next guest IP) and set [3]
    841         to whatever [1] was beforehand, indicating a normal (boring)
    842         jump to the next block.
    843 
    844       - translation ran normally, set [2] (next guest IP) and set [3]
    845         to something different from [1] beforehand, which indicates a
    846         TRC_ value.
    847    */
    848    argblock[0] = (UWord)hcode;
    849    argblock[1] = (UWord)&VG_(threads)[tid].arch.vex;
    850    argblock[2] = 0; /* next guest IP is written here */
    851    argblock[3] = 0; /* guest state ptr afterwards is written here */
    852 
    853    // Tell the tool this thread is about to run client code
    854    VG_TRACK( start_client_code, tid, bbs_done );
    855 
    856    vg_assert(VG_(in_generated_code) == False);
    857    VG_(in_generated_code) = True;
    858 
    859    SCHEDSETJMP(
    860       tid,
    861       jumped,
    862       VG_(run_a_noredir_translation)( &argblock[0] )
    863    );
    864 
    865    VG_(in_generated_code) = False;
    866 
    867    if (jumped != (UWord)0) {
    868       /* We get here if the client took a fault that caused our signal
    869          handler to longjmp. */
    870       vg_assert(argblock[2] == 0); /* next guest IP was not written */
    871       vg_assert(argblock[3] == 0); /* trc was not written */
    872       block_signals();
    873       retval = VG_TRC_FAULT_SIGNAL;
    874    } else {
    875       /* store away the guest program counter */
    876       VG_(set_IP)( tid, argblock[2] );
    877       if (argblock[3] == argblock[1])
    878          /* the guest state pointer afterwards was unchanged */
    879          retval = VG_TRC_BORING;
    880       else
    881          retval = (UInt)argblock[3];
    882    }
    883 
    884    bbs_done++;
    885 
    886    // Tell the tool this thread has stopped running client code
    887    VG_TRACK( stop_client_code, tid, bbs_done );
    888 
    889    return retval;
    890 }
    891 
    892 
    893 /* ---------------------------------------------------------------------
    894    The scheduler proper.
    895    ------------------------------------------------------------------ */
    896 
    897 static void handle_tt_miss ( ThreadId tid )
    898 {
    899    Bool found;
    900    Addr ip = VG_(get_IP)(tid);
    901 
    902    /* Trivial event.  Miss in the fast-cache.  Do a full
    903       lookup for it. */
    904    found = VG_(search_transtab)( NULL, ip, True/*upd_fast_cache*/ );
    905    if (UNLIKELY(!found)) {
    906       /* Not found; we need to request a translation. */
    907       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
    908                           bbs_done, True/*allow redirection*/ )) {
    909 	 found = VG_(search_transtab)( NULL, ip, True );
    910          vg_assert2(found, "VG_TRC_INNER_FASTMISS: missing tt_fast entry");
    911 
    912       } else {
    913 	 // If VG_(translate)() fails, it's because it had to throw a
    914 	 // signal because the client jumped to a bad address.  That
    915 	 // means that either a signal has been set up for delivery,
    916 	 // or the thread has been marked for termination.  Either
    917 	 // way, we just need to go back into the scheduler loop.
    918       }
    919    }
    920 }
    921 
    922 static void handle_syscall(ThreadId tid, UInt trc)
    923 {
    924    ThreadState * volatile tst = VG_(get_ThreadState)(tid);
    925    volatile UWord jumped;
    926 
    927    /* Syscall may or may not block; either way, it will be
    928       complete by the time this call returns, and we'll be
    929       runnable again.  We could take a signal while the
    930       syscall runs. */
    931 
    932    if (VG_(clo_sanity_level >= 3))
    933       VG_(am_do_sync_check)("(BEFORE SYSCALL)",__FILE__,__LINE__);
    934 
    935    SCHEDSETJMP(tid, jumped, VG_(client_syscall)(tid, trc));
    936 
    937    if (VG_(clo_sanity_level >= 3))
    938       VG_(am_do_sync_check)("(AFTER SYSCALL)",__FILE__,__LINE__);
    939 
    940    if (!VG_(is_running_thread)(tid))
    941       VG_(printf)("tid %d not running; VG_(running_tid)=%d, tid %d status %d\n",
    942 		  tid, VG_(running_tid), tid, tst->status);
    943    vg_assert(VG_(is_running_thread)(tid));
    944 
    945    if (jumped != (UWord)0) {
    946       block_signals();
    947       VG_(poll_signals)(tid);
    948    }
    949 }
    950 
    951 /* tid just requested a jump to the noredir version of its current
    952    program counter.  So make up that translation if needed, run it,
    953    and return the resulting thread return code. */
    954 static UInt/*trc*/ handle_noredir_jump ( ThreadId tid )
    955 {
    956    AddrH hcode = 0;
    957    Addr  ip    = VG_(get_IP)(tid);
    958 
    959    Bool  found = VG_(search_unredir_transtab)( &hcode, ip );
    960    if (!found) {
    961       /* Not found; we need to request a translation. */
    962       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/, bbs_done,
    963                           False/*NO REDIRECTION*/ )) {
    964 
    965          found = VG_(search_unredir_transtab)( &hcode, ip );
    966          vg_assert2(found, "unredir translation missing after creation?!");
    967 
    968       } else {
    969 	 // If VG_(translate)() fails, it's because it had to throw a
    970 	 // signal because the client jumped to a bad address.  That
    971 	 // means that either a signal has been set up for delivery,
    972 	 // or the thread has been marked for termination.  Either
    973 	 // way, we just need to go back into the scheduler loop.
    974          return VG_TRC_BORING;
    975       }
    976 
    977    }
    978 
    979    vg_assert(found);
    980    vg_assert(hcode != 0);
    981 
    982    /* Otherwise run it and return the resulting VG_TRC_* value. */
    983    return run_noredir_translation( hcode, tid );
    984 }
    985 
    986 
    987 /*
    988    Run a thread until it wants to exit.
    989 
    990    We assume that the caller has already called VG_(acquire_BigLock) for
    991    us, so we own the VCPU.  Also, all signals are blocked.
    992  */
    993 VgSchedReturnCode VG_(scheduler) ( ThreadId tid )
    994 {
    995    UInt     trc = VG_TRC_BORING;
    996    ThreadState *tst = VG_(get_ThreadState)(tid);
    997    static Bool vgdb_startup_action_done = False;
    998 
    999    if (VG_(clo_trace_sched))
   1000       print_sched_event(tid, "entering VG_(scheduler)");
   1001 
   1002    /* Do vgdb initialization (but once). Only the first (main) task
   1003       starting up will do the below.
   1004       Initialize gdbserver earlier than at the first
   1005       thread VG_(scheduler) is causing problems:
   1006       * at the end of VG_(scheduler_init_phase2) :
   1007         The main thread is in VgTs_Init state, but in a not yet
   1008         consistent state => the thread cannot be reported to gdb
   1009         (e.g. causes an assert in LibVEX_GuestX86_get_eflags when giving
   1010         back the guest registers to gdb).
   1011       * at end of valgrind_main, just
   1012         before VG_(main_thread_wrapper_NORETURN)(1) :
   1013         The main thread is still in VgTs_Init state but in a
   1014         more advanced state. However, the thread state is not yet
   1015         completely initialized : a.o., the os_state is not yet fully
   1016         set => the thread is then not properly reported to gdb,
   1017         which is then confused (causing e.g. a duplicate thread be
   1018         shown, without thread id).
   1019       * it would be possible to initialize gdbserver "lower" in the
   1020         call stack (e.g. in VG_(main_thread_wrapper_NORETURN)) but
   1021         these are platform dependent and the place at which
   1022         the thread state is completely initialized is not
   1023         specific anymore to the main thread (so a similar "do it only
   1024         once" would be needed).
   1025 
   1026         => a "once only" initialization here is the best compromise. */
   1027    if (!vgdb_startup_action_done) {
   1028       vg_assert(tid == 1); // it must be the main thread.
   1029       vgdb_startup_action_done = True;
   1030       if (VG_(clo_vgdb) != Vg_VgdbNo) {
   1031          /* If we have to poll, ensures we do an initial poll at first
   1032             scheduler call. Otherwise, ensure no poll (unless interrupted
   1033             by ptrace). */
   1034          if (VG_(clo_vgdb_poll))
   1035             VG_(force_vgdb_poll) ();
   1036          else
   1037             VG_(disable_vgdb_poll) ();
   1038 
   1039          vg_assert (VG_(dyn_vgdb_error) == VG_(clo_vgdb_error));
   1040          /* As we are initializing, VG_(dyn_vgdb_error) can't have been
   1041             changed yet. */
   1042 
   1043          VG_(gdbserver_prerun_action) (1);
   1044       } else {
   1045          VG_(disable_vgdb_poll) ();
   1046       }
   1047    }
   1048 
   1049    /* set the proper running signal mask */
   1050    block_signals();
   1051 
   1052    vg_assert(VG_(is_running_thread)(tid));
   1053 
   1054    VG_(dispatch_ctr) = SCHEDULING_QUANTUM + 1;
   1055 
   1056    while (!VG_(is_exiting)(tid)) {
   1057 
   1058       if (VG_(dispatch_ctr) == 1) {
   1059 
   1060 	 /* Our slice is done, so yield the CPU to another thread.  On
   1061             Linux, this doesn't sleep between sleeping and running,
   1062             since that would take too much time. */
   1063 
   1064 	 /* 4 July 06: it seems that a zero-length nsleep is needed to
   1065             cause async thread cancellation (canceller.c) to terminate
   1066             in finite time; else it is in some kind of race/starvation
   1067             situation and completion is arbitrarily delayed (although
   1068             this is not a deadlock).
   1069 
   1070             Unfortunately these sleeps cause MPI jobs not to terminate
   1071             sometimes (some kind of livelock).  So sleeping once
   1072             every N opportunities appears to work. */
   1073 
   1074 	 /* 3 Aug 06: doing sys__nsleep works but crashes some apps.
   1075             sys_yield also helps the problem, whilst not crashing apps. */
   1076 
   1077 	 VG_(release_BigLock)(tid, VgTs_Yielding,
   1078                                    "VG_(scheduler):timeslice");
   1079 	 /* ------------ now we don't have The Lock ------------ */
   1080 
   1081          VG_(do_syscall0)(__NR_sched_yield);
   1082 
   1083 	 VG_(acquire_BigLock)(tid, "VG_(scheduler):timeslice");
   1084 	 /* ------------ now we do have The Lock ------------ */
   1085 
   1086 	 /* OK, do some relatively expensive housekeeping stuff */
   1087 	 scheduler_sanity(tid);
   1088 	 VG_(sanity_check_general)(False);
   1089 
   1090 	 /* Look for any pending signals for this thread, and set them up
   1091 	    for delivery */
   1092 	 VG_(poll_signals)(tid);
   1093 
   1094 	 if (VG_(is_exiting)(tid))
   1095 	    break;		/* poll_signals picked up a fatal signal */
   1096 
   1097 	 /* For stats purposes only. */
   1098 	 n_scheduling_events_MAJOR++;
   1099 
   1100 	 /* Figure out how many bbs to ask vg_run_innerloop to do.  Note
   1101 	    that it decrements the counter before testing it for zero, so
   1102 	    that if tst->dispatch_ctr is set to N you get at most N-1
   1103 	    iterations.  Also this means that tst->dispatch_ctr must
   1104 	    exceed zero before entering the innerloop.  Also also, the
   1105 	    decrement is done before the bb is actually run, so you
   1106 	    always get at least one decrement even if nothing happens. */
   1107          VG_(dispatch_ctr) = SCHEDULING_QUANTUM + 1;
   1108 
   1109 	 /* paranoia ... */
   1110 	 vg_assert(tst->tid == tid);
   1111 	 vg_assert(tst->os_state.lwpid == VG_(gettid)());
   1112       }
   1113 
   1114       /* For stats purposes only. */
   1115       n_scheduling_events_MINOR++;
   1116 
   1117       if (0)
   1118          VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs\n",
   1119                                    tid, VG_(dispatch_ctr) - 1 );
   1120 
   1121       if (trc == VEX_TRC_JMP_YIELD_NOREDIR) {
   1122         trc = handle_noredir_jump(tid);
   1123       } else {
   1124         trc = run_thread_for_a_while ( tid );
   1125       }
   1126 
   1127       if (VG_(clo_trace_sched) && VG_(clo_verbosity) > 2) {
   1128 	 Char buf[50];
   1129 	 VG_(sprintf)(buf, "TRC: %s", name_of_sched_event(trc));
   1130 	 print_sched_event(tid, buf);
   1131       }
   1132 
   1133       if (trc == VEX_TRC_JMP_NOREDIR) {
   1134          /* If we got a request to run a no-redir version of
   1135             something, do so now -- handle_noredir_jump just (creates
   1136             and) runs that one translation.  The flip side is that the
   1137             noredir translation can't itself return another noredir
   1138             request -- that would be nonsensical.  It can, however,
   1139             return VG_TRC_BORING, which just means keep going as
   1140             normal. */
   1141          trc = handle_noredir_jump(tid);
   1142          vg_assert(trc != VEX_TRC_JMP_NOREDIR);
   1143       }
   1144 
   1145       switch (trc) {
   1146       case VG_TRC_BORING:
   1147          /* no special event, just keep going. */
   1148          break;
   1149 
   1150       case VG_TRC_INNER_FASTMISS:
   1151 	 vg_assert(VG_(dispatch_ctr) > 1);
   1152 	 handle_tt_miss(tid);
   1153 	 break;
   1154 
   1155       case VEX_TRC_JMP_CLIENTREQ:
   1156 	 do_client_request(tid);
   1157 	 break;
   1158 
   1159       case VEX_TRC_JMP_SYS_INT128:  /* x86-linux */
   1160       case VEX_TRC_JMP_SYS_INT129:  /* x86-darwin */
   1161       case VEX_TRC_JMP_SYS_INT130:  /* x86-darwin */
   1162       case VEX_TRC_JMP_SYS_SYSCALL: /* amd64-linux, ppc32-linux, amd64-darwin */
   1163 	 handle_syscall(tid, trc);
   1164 	 if (VG_(clo_sanity_level) > 2)
   1165 	    VG_(sanity_check_general)(True); /* sanity-check every syscall */
   1166 	 break;
   1167 
   1168       case VEX_TRC_JMP_YIELD:
   1169 	 /* Explicit yield, because this thread is in a spin-lock
   1170 	    or something.  Only let the thread run for a short while
   1171             longer.  Because swapping to another thread is expensive,
   1172             we're prepared to let this thread eat a little more CPU
   1173             before swapping to another.  That means that short term
   1174             spins waiting for hardware to poke memory won't cause a
   1175             thread swap. */
   1176 	 if (VG_(dispatch_ctr) > 2000)
   1177             VG_(dispatch_ctr) = 2000;
   1178 	 break;
   1179 
   1180       case VEX_TRC_JMP_YIELD_NOREDIR:
   1181          VG_(dispatch_ctr) = 1;
   1182          break;
   1183 
   1184       case VG_TRC_INNER_COUNTERZERO:
   1185 	 /* Timeslice is out.  Let a new thread be scheduled. */
   1186 	 vg_assert(VG_(dispatch_ctr) == 1);
   1187 	 break;
   1188 
   1189       case VG_TRC_FAULT_SIGNAL:
   1190 	 /* Everything should be set up (either we're exiting, or
   1191 	    about to start in a signal handler). */
   1192 	 break;
   1193 
   1194       case VEX_TRC_JMP_MAPFAIL:
   1195          /* Failure of arch-specific address translation (x86/amd64
   1196             segment override use) */
   1197          /* jrs 2005 03 11: is this correct? */
   1198          VG_(synth_fault)(tid);
   1199          break;
   1200 
   1201       case VEX_TRC_JMP_EMWARN: {
   1202          static Int  counts[EmWarn_NUMBER];
   1203          static Bool counts_initted = False;
   1204          VexEmWarn ew;
   1205          HChar*    what;
   1206          Bool      show;
   1207          Int       q;
   1208          if (!counts_initted) {
   1209             counts_initted = True;
   1210             for (q = 0; q < EmWarn_NUMBER; q++)
   1211                counts[q] = 0;
   1212          }
   1213          ew   = (VexEmWarn)VG_(threads)[tid].arch.vex.guest_EMWARN;
   1214          what = (ew < 0 || ew >= EmWarn_NUMBER)
   1215                    ? "unknown (?!)"
   1216                    : LibVEX_EmWarn_string(ew);
   1217          show = (ew < 0 || ew >= EmWarn_NUMBER)
   1218                    ? True
   1219                    : counts[ew]++ < 3;
   1220          if (show && VG_(clo_show_emwarns) && !VG_(clo_xml)) {
   1221             VG_(message)( Vg_UserMsg,
   1222                           "Emulation warning: unsupported action:\n");
   1223             VG_(message)( Vg_UserMsg, "  %s\n", what);
   1224             VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
   1225          }
   1226          break;
   1227       }
   1228 
   1229       case VEX_TRC_JMP_EMFAIL: {
   1230          VexEmWarn ew;
   1231          HChar*    what;
   1232          ew   = (VexEmWarn)VG_(threads)[tid].arch.vex.guest_EMWARN;
   1233          what = (ew < 0 || ew >= EmWarn_NUMBER)
   1234                    ? "unknown (?!)"
   1235                    : LibVEX_EmWarn_string(ew);
   1236          VG_(message)( Vg_UserMsg,
   1237                        "Emulation fatal error -- Valgrind cannot continue:\n");
   1238          VG_(message)( Vg_UserMsg, "  %s\n", what);
   1239          VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
   1240          VG_(message)(Vg_UserMsg, "\n");
   1241          VG_(message)(Vg_UserMsg, "Valgrind has to exit now.  Sorry.\n");
   1242          VG_(message)(Vg_UserMsg, "\n");
   1243          VG_(exit)(1);
   1244          break;
   1245       }
   1246 
   1247       case VEX_TRC_JMP_SIGTRAP:
   1248          VG_(synth_sigtrap)(tid);
   1249          break;
   1250 
   1251       case VEX_TRC_JMP_SIGSEGV:
   1252          VG_(synth_fault)(tid);
   1253          break;
   1254 
   1255       case VEX_TRC_JMP_SIGBUS:
   1256          VG_(synth_sigbus)(tid);
   1257          break;
   1258 
   1259       case VEX_TRC_JMP_NODECODE:
   1260          VG_(umsg)(
   1261             "valgrind: Unrecognised instruction at address %#lx.\n",
   1262             VG_(get_IP)(tid));
   1263          VG_(get_and_pp_StackTrace)(tid, 50);
   1264 #define M(a) VG_(umsg)(a "\n");
   1265    M("Your program just tried to execute an instruction that Valgrind" );
   1266    M("did not recognise.  There are two possible reasons for this."    );
   1267    M("1. Your program has a bug and erroneously jumped to a non-code"  );
   1268    M("   location.  If you are running Memcheck and you just saw a"    );
   1269    M("   warning about a bad jump, it's probably your program's fault.");
   1270    M("2. The instruction is legitimate but Valgrind doesn't handle it,");
   1271    M("   i.e. it's Valgrind's fault.  If you think this is the case or");
   1272    M("   you are not sure, please let us know and we'll try to fix it.");
   1273    M("Either way, Valgrind will now raise a SIGILL signal which will"  );
   1274    M("probably kill your program."                                     );
   1275 #undef M
   1276          VG_(synth_sigill)(tid, VG_(get_IP)(tid));
   1277          break;
   1278 
   1279       case VEX_TRC_JMP_TINVAL:
   1280          VG_(discard_translations)(
   1281             (Addr64)VG_(threads)[tid].arch.vex.guest_TISTART,
   1282             VG_(threads)[tid].arch.vex.guest_TILEN,
   1283             "scheduler(VEX_TRC_JMP_TINVAL)"
   1284          );
   1285          if (0)
   1286             VG_(printf)("dump translations done.\n");
   1287          break;
   1288 
   1289       case VG_TRC_INVARIANT_FAILED:
   1290          /* This typically happens if, after running generated code,
   1291             it is detected that host CPU settings (eg, FPU/Vector
   1292             control words) are not as they should be.  Vex's code
   1293             generation specifies the state such control words should
   1294             be in on entry to Vex-generated code, and they should be
   1295             unchanged on exit from it.  Failure of this assertion
   1296             usually means a bug in Vex's code generation. */
   1297          //{ UInt xx;
   1298          //  __asm__ __volatile__ (
   1299          //     "\t.word 0xEEF12A10\n"  // fmrx r2,fpscr
   1300          //     "\tmov %0, r2" : "=r"(xx) : : "r2" );
   1301          //  VG_(printf)("QQQQ new fpscr = %08x\n", xx);
   1302          //}
   1303          vg_assert2(0, "VG_(scheduler), phase 3: "
   1304                        "run_innerloop detected host "
   1305                        "state invariant failure", trc);
   1306 
   1307       case VEX_TRC_JMP_SYS_SYSENTER:
   1308          /* Do whatever simulation is appropriate for an x86 sysenter
   1309             instruction.  Note that it is critical to set this thread's
   1310             guest_EIP to point at the code to execute after the
   1311             sysenter, since Vex-generated code will not have set it --
   1312             vex does not know what it should be.  Vex sets the next
   1313             address to zero, so if you don't set guest_EIP, the thread
   1314             will jump to zero afterwards and probably die as a result. */
   1315 #        if defined(VGP_x86_linux)
   1316          vg_assert2(0, "VG_(scheduler), phase 3: "
   1317                        "sysenter_x86 on x86-linux is not supported");
   1318 #        elif defined(VGP_x86_darwin)
   1319          /* return address in client edx */
   1320          VG_(threads)[tid].arch.vex.guest_EIP
   1321             = VG_(threads)[tid].arch.vex.guest_EDX;
   1322          handle_syscall(tid, trc);
   1323 #        else
   1324          vg_assert2(0, "VG_(scheduler), phase 3: "
   1325                        "sysenter_x86 on non-x86 platform?!?!");
   1326 #        endif
   1327          break;
   1328 
   1329       default:
   1330 	 vg_assert2(0, "VG_(scheduler), phase 3: "
   1331                        "unexpected thread return code (%u)", trc);
   1332 	 /* NOTREACHED */
   1333 	 break;
   1334 
   1335       } /* switch (trc) */
   1336 
   1337       if (0)
   1338          maybe_show_sb_counts();
   1339    }
   1340 
   1341    if (VG_(clo_trace_sched))
   1342       print_sched_event(tid, "exiting VG_(scheduler)");
   1343 
   1344    vg_assert(VG_(is_exiting)(tid));
   1345 
   1346    return tst->exitreason;
   1347 }
   1348 
   1349 
   1350 /*
   1351    This causes all threads to forceably exit.  They aren't actually
   1352    dead by the time this returns; you need to call
   1353    VG_(reap_threads)() to wait for them.
   1354  */
   1355 void VG_(nuke_all_threads_except) ( ThreadId me, VgSchedReturnCode src )
   1356 {
   1357    ThreadId tid;
   1358 
   1359    vg_assert(VG_(is_running_thread)(me));
   1360 
   1361    for (tid = 1; tid < VG_N_THREADS; tid++) {
   1362       if (tid == me
   1363           || VG_(threads)[tid].status == VgTs_Empty)
   1364          continue;
   1365       if (0)
   1366          VG_(printf)(
   1367             "VG_(nuke_all_threads_except): nuking tid %d\n", tid);
   1368 
   1369       VG_(threads)[tid].exitreason = src;
   1370       if (src == VgSrc_FatalSig)
   1371          VG_(threads)[tid].os_state.fatalsig = VKI_SIGKILL;
   1372       VG_(get_thread_out_of_syscall)(tid);
   1373    }
   1374 }
   1375 
   1376 
   1377 /* ---------------------------------------------------------------------
   1378    Specifying shadow register values
   1379    ------------------------------------------------------------------ */
   1380 
   1381 #if defined(VGA_x86)
   1382 #  define VG_CLREQ_ARGS       guest_EAX
   1383 #  define VG_CLREQ_RET        guest_EDX
   1384 #elif defined(VGA_amd64)
   1385 #  define VG_CLREQ_ARGS       guest_RAX
   1386 #  define VG_CLREQ_RET        guest_RDX
   1387 #elif defined(VGA_ppc32) || defined(VGA_ppc64)
   1388 #  define VG_CLREQ_ARGS       guest_GPR4
   1389 #  define VG_CLREQ_RET        guest_GPR3
   1390 #elif defined(VGA_arm)
   1391 #  define VG_CLREQ_ARGS       guest_R4
   1392 #  define VG_CLREQ_RET        guest_R3
   1393 #elif defined (VGA_s390x)
   1394 #  define VG_CLREQ_ARGS       guest_r2
   1395 #  define VG_CLREQ_RET        guest_r3
   1396 #else
   1397 #  error Unknown arch
   1398 #endif
   1399 
   1400 #define CLREQ_ARGS(regs)   ((regs).vex.VG_CLREQ_ARGS)
   1401 #define CLREQ_RET(regs)    ((regs).vex.VG_CLREQ_RET)
   1402 #define O_CLREQ_RET        (offsetof(VexGuestArchState, VG_CLREQ_RET))
   1403 
   1404 // These macros write a value to a client's thread register, and tell the
   1405 // tool that it's happened (if necessary).
   1406 
   1407 #define SET_CLREQ_RETVAL(zztid, zzval) \
   1408    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
   1409         VG_TRACK( post_reg_write, \
   1410                   Vg_CoreClientReq, zztid, O_CLREQ_RET, sizeof(UWord)); \
   1411    } while (0)
   1412 
   1413 #define SET_CLCALL_RETVAL(zztid, zzval, f) \
   1414    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
   1415         VG_TRACK( post_reg_write_clientcall_return, \
   1416                   zztid, O_CLREQ_RET, sizeof(UWord), f); \
   1417    } while (0)
   1418 
   1419 
   1420 /* ---------------------------------------------------------------------
   1421    Handle client requests.
   1422    ------------------------------------------------------------------ */
   1423 
   1424 // OS-specific(?) client requests
   1425 static Bool os_client_request(ThreadId tid, UWord *args)
   1426 {
   1427    Bool handled = True;
   1428 
   1429    vg_assert(VG_(is_running_thread)(tid));
   1430 
   1431    switch(args[0]) {
   1432    case VG_USERREQ__LIBC_FREERES_DONE:
   1433       /* This is equivalent to an exit() syscall, but we don't set the
   1434 	 exitcode (since it might already be set) */
   1435       if (0 || VG_(clo_trace_syscalls) || VG_(clo_trace_sched))
   1436          VG_(message)(Vg_DebugMsg,
   1437                       "__libc_freeres() done; really quitting!\n");
   1438       VG_(threads)[tid].exitreason = VgSrc_ExitThread;
   1439       break;
   1440 
   1441    default:
   1442       handled = False;
   1443       break;
   1444    }
   1445 
   1446    return handled;
   1447 }
   1448 
   1449 
   1450 /* Do a client request for the thread tid.  After the request, tid may
   1451    or may not still be runnable; if not, the scheduler will have to
   1452    choose a new thread to run.
   1453 */
   1454 static
   1455 void do_client_request ( ThreadId tid )
   1456 {
   1457    UWord* arg = (UWord*)(CLREQ_ARGS(VG_(threads)[tid].arch));
   1458    UWord req_no = arg[0];
   1459 
   1460    if (0)
   1461       VG_(printf)("req no = 0x%llx, arg = %p\n", (ULong)req_no, arg);
   1462    switch (req_no) {
   1463 
   1464       case VG_USERREQ__CLIENT_CALL0: {
   1465          UWord (*f)(ThreadId) = (void*)arg[1];
   1466 	 if (f == NULL)
   1467 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL0: func=%p\n", f);
   1468 	 else
   1469 	    SET_CLCALL_RETVAL(tid, f ( tid ), (Addr)f);
   1470          break;
   1471       }
   1472       case VG_USERREQ__CLIENT_CALL1: {
   1473          UWord (*f)(ThreadId, UWord) = (void*)arg[1];
   1474 	 if (f == NULL)
   1475 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL1: func=%p\n", f);
   1476 	 else
   1477 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2] ), (Addr)f );
   1478          break;
   1479       }
   1480       case VG_USERREQ__CLIENT_CALL2: {
   1481          UWord (*f)(ThreadId, UWord, UWord) = (void*)arg[1];
   1482 	 if (f == NULL)
   1483 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL2: func=%p\n", f);
   1484 	 else
   1485 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3] ), (Addr)f );
   1486          break;
   1487       }
   1488       case VG_USERREQ__CLIENT_CALL3: {
   1489          UWord (*f)(ThreadId, UWord, UWord, UWord) = (void*)arg[1];
   1490 	 if (f == NULL)
   1491 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL3: func=%p\n", f);
   1492 	 else
   1493 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3], arg[4] ), (Addr)f );
   1494          break;
   1495       }
   1496 
   1497       // Nb: this looks like a circular definition, because it kind of is.
   1498       // See comment in valgrind.h to understand what's going on.
   1499       case VG_USERREQ__RUNNING_ON_VALGRIND:
   1500          SET_CLREQ_RETVAL(tid, RUNNING_ON_VALGRIND+1);
   1501          break;
   1502 
   1503       case VG_USERREQ__PRINTF: {
   1504          /* JRS 2010-Jan-28: this is DEPRECATED; use the
   1505             _VALIST_BY_REF version instead */
   1506          if (sizeof(va_list) != sizeof(UWord))
   1507             goto va_list_casting_error_NORETURN;
   1508          union {
   1509             va_list vargs;
   1510             unsigned long uw;
   1511          } u;
   1512          u.uw = (unsigned long)arg[2];
   1513          Int count =
   1514             VG_(vmessage)( Vg_ClientMsg, (char *)arg[1], u.vargs );
   1515          VG_(message_flush)();
   1516          SET_CLREQ_RETVAL( tid, count );
   1517          break;
   1518       }
   1519 
   1520       case VG_USERREQ__PRINTF_BACKTRACE: {
   1521          /* JRS 2010-Jan-28: this is DEPRECATED; use the
   1522             _VALIST_BY_REF version instead */
   1523          if (sizeof(va_list) != sizeof(UWord))
   1524             goto va_list_casting_error_NORETURN;
   1525          union {
   1526             va_list vargs;
   1527             unsigned long uw;
   1528          } u;
   1529          u.uw = (unsigned long)arg[2];
   1530          Int count =
   1531             VG_(vmessage)( Vg_ClientMsg, (char *)arg[1], u.vargs );
   1532          VG_(message_flush)();
   1533          VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
   1534          SET_CLREQ_RETVAL( tid, count );
   1535          break;
   1536       }
   1537 
   1538       case VG_USERREQ__PRINTF_VALIST_BY_REF: {
   1539          va_list* vargsp = (va_list*)arg[2];
   1540          Int count =
   1541             VG_(vmessage)( Vg_ClientMsg, (char *)arg[1], *vargsp );
   1542          VG_(message_flush)();
   1543          SET_CLREQ_RETVAL( tid, count );
   1544          break;
   1545       }
   1546 
   1547       case VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF: {
   1548          va_list* vargsp = (va_list*)arg[2];
   1549          Int count =
   1550             VG_(vmessage)( Vg_ClientMsg, (char *)arg[1], *vargsp );
   1551          VG_(message_flush)();
   1552          VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
   1553          SET_CLREQ_RETVAL( tid, count );
   1554          break;
   1555       }
   1556 
   1557       case VG_USERREQ__INTERNAL_PRINTF_VALIST_BY_REF: {
   1558          va_list* vargsp = (va_list*)arg[2];
   1559          Int count =
   1560             VG_(vmessage)( Vg_DebugMsg, (char *)arg[1], *vargsp );
   1561          VG_(message_flush)();
   1562          SET_CLREQ_RETVAL( tid, count );
   1563          break;
   1564       }
   1565 
   1566       case VG_USERREQ__ADD_IFUNC_TARGET: {
   1567          VG_(redir_add_ifunc_target)( arg[1], arg[2] );
   1568          SET_CLREQ_RETVAL( tid, 0);
   1569          break; }
   1570 
   1571       case VG_USERREQ__STACK_REGISTER: {
   1572          UWord sid = VG_(register_stack)((Addr)arg[1], (Addr)arg[2]);
   1573          SET_CLREQ_RETVAL( tid, sid );
   1574          break; }
   1575 
   1576       case VG_USERREQ__STACK_DEREGISTER: {
   1577          VG_(deregister_stack)(arg[1]);
   1578          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   1579          break; }
   1580 
   1581       case VG_USERREQ__STACK_CHANGE: {
   1582          VG_(change_stack)(arg[1], (Addr)arg[2], (Addr)arg[3]);
   1583          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   1584          break; }
   1585 
   1586       case VG_USERREQ__GET_MALLOCFUNCS: {
   1587 	 struct vg_mallocfunc_info *info = (struct vg_mallocfunc_info *)arg[1];
   1588 
   1589 	 info->tl_malloc               = VG_(tdict).tool_malloc;
   1590 	 info->tl_calloc               = VG_(tdict).tool_calloc;
   1591 	 info->tl_realloc              = VG_(tdict).tool_realloc;
   1592 	 info->tl_memalign             = VG_(tdict).tool_memalign;
   1593 	 info->tl___builtin_new        = VG_(tdict).tool___builtin_new;
   1594 	 info->tl___builtin_vec_new    = VG_(tdict).tool___builtin_vec_new;
   1595 	 info->tl_free                 = VG_(tdict).tool_free;
   1596 	 info->tl___builtin_delete     = VG_(tdict).tool___builtin_delete;
   1597 	 info->tl___builtin_vec_delete = VG_(tdict).tool___builtin_vec_delete;
   1598          info->tl_malloc_usable_size   = VG_(tdict).tool_malloc_usable_size;
   1599 
   1600 	 info->mallinfo                = VG_(mallinfo);
   1601 	 info->clo_trace_malloc        = VG_(clo_trace_malloc);
   1602 
   1603          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   1604 
   1605 	 break;
   1606       }
   1607 
   1608       /* Requests from the client program */
   1609 
   1610       case VG_USERREQ__DISCARD_TRANSLATIONS:
   1611          if (VG_(clo_verbosity) > 2)
   1612             VG_(printf)( "client request: DISCARD_TRANSLATIONS,"
   1613                          " addr %p,  len %lu\n",
   1614                          (void*)arg[1], arg[2] );
   1615 
   1616          VG_(discard_translations)(
   1617             arg[1], arg[2], "scheduler(VG_USERREQ__DISCARD_TRANSLATIONS)"
   1618          );
   1619 
   1620          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   1621 	 break;
   1622 
   1623       case VG_USERREQ__COUNT_ERRORS:
   1624          SET_CLREQ_RETVAL( tid, VG_(get_n_errs_found)() );
   1625          break;
   1626 
   1627       case VG_USERREQ__LOAD_PDB_DEBUGINFO:
   1628          VG_(di_notify_pdb_debuginfo)( arg[1], arg[2], arg[3], arg[4] );
   1629          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   1630          break;
   1631 
   1632       case VG_USERREQ__MAP_IP_TO_SRCLOC: {
   1633          Addr   ip    = arg[1];
   1634          UChar* buf64 = (UChar*)arg[2];
   1635 
   1636          VG_(memset)(buf64, 0, 64);
   1637          UInt linenum = 0;
   1638          Bool ok = VG_(get_filename_linenum)(
   1639                       ip, &buf64[0], 50, NULL, 0, NULL, &linenum
   1640                    );
   1641          if (ok) {
   1642             /* Find the terminating zero in the first 50 bytes. */
   1643             UInt i;
   1644             for (i = 0; i < 50; i++) {
   1645                if (buf64[i] == 0)
   1646                   break;
   1647             }
   1648             /* We must find a zero somewhere in 0 .. 49.  Else
   1649                VG_(get_filename_linenum) is not properly zero
   1650                terminating. */
   1651             vg_assert(i < 50);
   1652             VG_(sprintf)(&buf64[i], ":%u", linenum);
   1653          } else {
   1654             buf64[0] = 0;
   1655          }
   1656 
   1657          SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
   1658          break;
   1659       }
   1660 
   1661       case VG_USERREQ__CHANGE_ERR_DISABLEMENT: {
   1662          Word delta = arg[1];
   1663          vg_assert(delta == 1 || delta == -1);
   1664          ThreadState* tst = VG_(get_ThreadState)(tid);
   1665          vg_assert(tst);
   1666          if (delta == 1 && tst->err_disablement_level < 0xFFFFFFFF) {
   1667             tst->err_disablement_level++;
   1668          }
   1669          else
   1670          if (delta == -1 && tst->err_disablement_level > 0) {
   1671             tst->err_disablement_level--;
   1672          }
   1673          SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
   1674          break;
   1675       }
   1676 
   1677       case VG_USERREQ__MALLOCLIKE_BLOCK:
   1678       case VG_USERREQ__RESIZEINPLACE_BLOCK:
   1679       case VG_USERREQ__FREELIKE_BLOCK:
   1680          // Ignore them if the addr is NULL;  otherwise pass onto the tool.
   1681          if (!arg[1]) {
   1682             SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
   1683             break;
   1684          } else {
   1685             goto my_default;
   1686          }
   1687 
   1688       case VG_USERREQ__NACL_MEM_START: {
   1689          Addr mem_start = arg[1];
   1690          nacl_head = mem_start;
   1691          VG_(printf)("*********************** NaCl mem_start: %p\n", (void*)mem_start);
   1692 
   1693          // At this point all segments in the sandbox belong to nacl_file (the
   1694          // first untrusted binary loaded by sel_ldr), and have correct
   1695          // permissions. Read its debug info.
   1696          NSegment* seg = VG_(am_find_nsegment)(mem_start);
   1697          int fnIdx = -1;
   1698          while (seg) {
   1699            if (seg->kind == SkFileC) {
   1700              if (fnIdx == seg->fnIdx || fnIdx == -1) {
   1701                fnIdx = seg->fnIdx;
   1702                VG_(printf)("Segment at %p belongs to the loader\n", (void*)seg->start);
   1703                VG_(di_notify_mmap)(seg->start, False, /*glider: don't use fd*/-1);
   1704              }
   1705            }
   1706            seg = VG_(am_next_nsegment)((NSegment*)seg, True);
   1707          }
   1708          goto my_default;
   1709       }
   1710 
   1711       case VG_USERREQ__NACL_FILE: {
   1712          VG_(printf)("*********************** NaCl nacl_file: %s\n", (void*)arg[1]);
   1713          nacl_file = (char*) arg[1];
   1714          goto my_default;
   1715       }
   1716 
   1717       case VG_USERREQ__NACL_MMAP: {
   1718          // Simulate an mmap().
   1719          UWord vma = arg[1]; // Base VMA of the mapping.
   1720          UWord size = arg[2]; // Size of the mapping.
   1721          UWord file_offset = arg[3]; // File offset.
   1722          UWord access = arg[4]; // Access.
   1723          UWord clone_vma = arg[5]; // Another mapping of the same; only used to find the file name.
   1724          if (!access)
   1725            access = VKI_PROT_READ | VKI_PROT_EXEC;
   1726          VG_(printf)("*********************** NaCl nacl_mmap: %lx %lx %lx %lx\n", vma, size, file_offset, clone_vma);
   1727 
   1728          char* file_name = NULL;
   1729          if (clone_vma) {
   1730            NSegment* seg = VG_(am_find_nsegment)(clone_vma);
   1731            file_name = VG_(am_get_filename)(seg);
   1732            VG_(printf)("*********************** NaCl DSO file_name: %s\n", file_name);
   1733          }
   1734 
   1735          UWord vma_end = vma + size;
   1736          UWord vma_aligned = VG_PGROUNDDN(vma);
   1737          UWord vma_end_aligned = VG_PGROUNDUP(vma_end);
   1738          size = vma_end_aligned - vma_aligned;
   1739          file_offset -= vma - vma_aligned;
   1740          VG_(am_notify_fake_client_mmap)(vma_aligned, size, access,
   1741              0, file_name ? file_name : (VG_(clo_nacl_file) ? VG_(clo_nacl_file) : nacl_file), file_offset);
   1742          // If file_name == NULL, then this is the main (sel_ldr-mapped) nexe,
   1743          // and has incorrect permissions at this point. In that case, wait for
   1744          // NACL_MEM_START to read the debug info.
   1745          if (file_name)
   1746            VG_(di_notify_mmap)(vma_aligned, False, /*glider: don't use fd*/-1);
   1747          goto my_default;
   1748       }
   1749 
   1750 
   1751       default:
   1752        my_default:
   1753 	 if (os_client_request(tid, arg)) {
   1754 	    // do nothing, os_client_request() handled it
   1755          } else if (VG_(needs).client_requests) {
   1756 	    UWord ret;
   1757 
   1758             if (VG_(clo_verbosity) > 2)
   1759                VG_(printf)("client request: code %lx,  addr %p,  len %lu\n",
   1760                            arg[0], (void*)arg[1], arg[2] );
   1761 
   1762 	    if ( VG_TDICT_CALL(tool_handle_client_request, tid, arg, &ret) )
   1763 	       SET_CLREQ_RETVAL(tid, ret);
   1764          } else {
   1765 	    static Bool whined = False;
   1766 
   1767 	    if (!whined && VG_(clo_verbosity) > 2) {
   1768                // Allow for requests in core, but defined by tools, which
   1769                // have 0 and 0 in their two high bytes.
   1770                Char c1 = (arg[0] >> 24) & 0xff;
   1771                Char c2 = (arg[0] >> 16) & 0xff;
   1772                if (c1 == 0) c1 = '_';
   1773                if (c2 == 0) c2 = '_';
   1774 	       VG_(message)(Vg_UserMsg, "Warning:\n"
   1775                    "  unhandled client request: 0x%lx (%c%c+0x%lx).  Perhaps\n"
   1776 		   "  VG_(needs).client_requests should be set?\n",
   1777 			    arg[0], c1, c2, arg[0] & 0xffff);
   1778 	       whined = True;
   1779 	    }
   1780          }
   1781          break;
   1782    }
   1783    return;
   1784 
   1785    /*NOTREACHED*/
   1786   va_list_casting_error_NORETURN:
   1787    VG_(umsg)(
   1788       "Valgrind: fatal error - cannot continue: use of the deprecated\n"
   1789       "client requests VG_USERREQ__PRINTF or VG_USERREQ__PRINTF_BACKTRACE\n"
   1790       "on a platform where they cannot be supported.  Please use the\n"
   1791       "equivalent _VALIST_BY_REF versions instead.\n"
   1792       "\n"
   1793       "This is a binary-incompatible change in Valgrind's client request\n"
   1794       "mechanism.  It is unfortunate, but difficult to avoid.  End-users\n"
   1795       "are expected to almost never see this message.  The only case in\n"
   1796       "which you might see this message is if your code uses the macros\n"
   1797       "VALGRIND_PRINTF or VALGRIND_PRINTF_BACKTRACE.  If so, you will need\n"
   1798       "to recompile such code, using the header files from this version of\n"
   1799       "Valgrind, and not any previous version.\n"
   1800       "\n"
   1801       "If you see this mesage in any other circumstances, it is probably\n"
   1802       "a bug in Valgrind.  In this case, please file a bug report at\n"
   1803       "\n"
   1804       "   http://www.valgrind.org/support/bug_reports.html\n"
   1805       "\n"
   1806       "Will now abort.\n"
   1807    );
   1808    vg_assert(0);
   1809 }
   1810 
   1811 
   1812 /* ---------------------------------------------------------------------
   1813    Sanity checking (permanently engaged)
   1814    ------------------------------------------------------------------ */
   1815 
   1816 /* Internal consistency checks on the sched structures. */
   1817 static
   1818 void scheduler_sanity ( ThreadId tid )
   1819 {
   1820    Bool bad = False;
   1821    static UInt lasttime = 0;
   1822    UInt now;
   1823    Int lwpid = VG_(gettid)();
   1824 
   1825    if (!VG_(is_running_thread)(tid)) {
   1826       VG_(message)(Vg_DebugMsg,
   1827 		   "Thread %d is supposed to be running, "
   1828                    "but doesn't own the_BigLock (owned by %d)\n",
   1829 		   tid, VG_(running_tid));
   1830       bad = True;
   1831    }
   1832 
   1833    if (lwpid != VG_(threads)[tid].os_state.lwpid) {
   1834       VG_(message)(Vg_DebugMsg,
   1835                    "Thread %d supposed to be in LWP %d, but we're actually %d\n",
   1836                    tid, VG_(threads)[tid].os_state.lwpid, VG_(gettid)());
   1837       bad = True;
   1838    }
   1839 
   1840 #if !defined(VGO_darwin)
   1841    // GrP fixme
   1842    if (lwpid != the_BigLock.owner_lwpid) {
   1843       VG_(message)(Vg_DebugMsg,
   1844                    "Thread (LWPID) %d doesn't own the_BigLock\n",
   1845                    tid);
   1846       bad = True;
   1847    }
   1848 #endif
   1849 
   1850    /* Periodically show the state of all threads, for debugging
   1851       purposes. */
   1852    now = VG_(read_millisecond_timer)();
   1853    if (0 && (!bad) && (lasttime + 4000/*ms*/ <= now)) {
   1854       lasttime = now;
   1855       VG_(printf)("\n------------ Sched State at %d ms ------------\n",
   1856                   (Int)now);
   1857       VG_(show_sched_status)();
   1858    }
   1859 
   1860    /* core_panic also shows the sched status, which is why we don't
   1861       show it above if bad==True. */
   1862    if (bad)
   1863       VG_(core_panic)("scheduler_sanity: failed");
   1864 }
   1865 
   1866 void VG_(sanity_check_general) ( Bool force_expensive )
   1867 {
   1868    ThreadId tid;
   1869 
   1870    static UInt next_slow_check_at = 1;
   1871    static UInt slow_check_interval = 25;
   1872 
   1873    if (VG_(clo_sanity_level) < 1) return;
   1874 
   1875    /* --- First do all the tests that we can do quickly. ---*/
   1876 
   1877    sanity_fast_count++;
   1878 
   1879    /* Check stuff pertaining to the memory check system. */
   1880 
   1881    /* Check that nobody has spuriously claimed that the first or
   1882       last 16 pages of memory have become accessible [...] */
   1883    if (VG_(needs).sanity_checks) {
   1884       vg_assert(VG_TDICT_CALL(tool_cheap_sanity_check));
   1885    }
   1886 
   1887    /* --- Now some more expensive checks. ---*/
   1888 
   1889    /* Once every now and again, check some more expensive stuff.
   1890       Gradually increase the interval between such checks so as not to
   1891       burden long-running programs too much. */
   1892    if ( force_expensive
   1893         || VG_(clo_sanity_level) > 1
   1894         || (VG_(clo_sanity_level) == 1
   1895             && sanity_fast_count == next_slow_check_at)) {
   1896 
   1897       if (0) VG_(printf)("SLOW at %d\n", sanity_fast_count-1);
   1898 
   1899       next_slow_check_at = sanity_fast_count - 1 + slow_check_interval;
   1900       slow_check_interval++;
   1901       sanity_slow_count++;
   1902 
   1903       if (VG_(needs).sanity_checks) {
   1904           vg_assert(VG_TDICT_CALL(tool_expensive_sanity_check));
   1905       }
   1906 
   1907       /* Look for stack overruns.  Visit all threads. */
   1908       for (tid = 1; tid < VG_N_THREADS; tid++) {
   1909 	 SizeT    remains;
   1910          VgStack* stack;
   1911 
   1912 	 if (VG_(threads)[tid].status == VgTs_Empty ||
   1913 	     VG_(threads)[tid].status == VgTs_Zombie)
   1914 	    continue;
   1915 
   1916          stack
   1917             = (VgStack*)
   1918               VG_(get_ThreadState)(tid)->os_state.valgrind_stack_base;
   1919          SizeT limit
   1920             = 4096; // Let's say.  Checking more causes lots of L2 misses.
   1921 	 remains
   1922             = VG_(am_get_VgStack_unused_szB)(stack, limit);
   1923 	 if (remains < limit)
   1924 	    VG_(message)(Vg_DebugMsg,
   1925                          "WARNING: Thread %d is within %ld bytes "
   1926                          "of running out of stack!\n",
   1927 		         tid, remains);
   1928       }
   1929    }
   1930 
   1931    if (VG_(clo_sanity_level) > 1) {
   1932       /* Check sanity of the low-level memory manager.  Note that bugs
   1933          in the client's code can cause this to fail, so we don't do
   1934          this check unless specially asked for.  And because it's
   1935          potentially very expensive. */
   1936       VG_(sanity_check_malloc_all)();
   1937    }
   1938 }
   1939 
   1940 /*--------------------------------------------------------------------*/
   1941 /*--- end                                                          ---*/
   1942 /*--------------------------------------------------------------------*/
   1943