Home | History | Annotate | Download | only in tsan
      1 /* Copyright (c) 2008-2010, Google Inc.
      2  * All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are
      6  * met:
      7  *
      8  *     * Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *     * Neither the name of Google Inc. nor the names of its
     11  * contributors may be used to endorse or promote products derived from
     12  * this software without specific prior written permission.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     15  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     16  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     17  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     18  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     19  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     20  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 // This file is part of ThreadSanitizer, a dynamic data race detector.
     28 // Author: Konstantin Serebryany.
     29 // Author: Timur Iskhodzhanov.
     30 
     31 #define __STDC_LIMIT_MACROS
     32 #include "pin.H"
     33 
     34 #include <stdio.h>
     35 #include <stdlib.h>
     36 #include <string.h>
     37 #include <map>
     38 #include <assert.h>
     39 
     40 #include "thread_sanitizer.h"
     41 #include "ts_lock.h"
     42 #include "ts_trace_info.h"
     43 #include "ts_race_verifier.h"
     44 #include "common_util.h"
     45 
     46 
     47 #if defined(__GNUC__)
     48 # include <cxxabi.h>  // __cxa_demangle
     49 # define ATOMIC_READ(a) __sync_add_and_fetch(a, 0)
     50 
     51 #elif defined(_MSC_VER)
     52 namespace WINDOWS
     53 {
     54 // This is the way of including winows.h recommended by PIN docs.
     55 #include<Windows.h>
     56 }
     57 
     58 #include <intrin.h>
     59 # define popen(x,y) (NULL)
     60 # define ATOMIC_READ(a)         _InterlockedCompareExchange(a, 0, 0)
     61 # define usleep(x) WINDOWS::Sleep((x)/1000)
     62 # define UINTPTR_MAX ((uintptr_t)-1)
     63 #endif
     64 
     65 #ifdef NDEBUG
     66 # error "Please don't define NDEBUG"
     67 #endif
     68 
     69 static void DumpEvent(CONTEXT *ctx, EventType type, int32_t tid, uintptr_t pc,
     70                       uintptr_t a, uintptr_t info);
     71 //------ Global PIN lock ------- {{{1
     72 class ScopedReentrantClientLock {
     73  public:
     74   ScopedReentrantClientLock(int line)
     75     : line_(line) {
     76     // if (line && G_flags->debug_level >= 5)  Printf("??Try  at line %d\n", line);
     77     PIN_LockClient();
     78     if (line && G_flags->debug_level >= 5)  Printf("++Lock at line %d\n", line);
     79   }
     80   ~ScopedReentrantClientLock() {
     81     if (line_ && G_flags->debug_level >= 5) Printf("--Unlock at line %d\n", line_);
     82     PIN_UnlockClient();
     83   }
     84  private:
     85   int line_;
     86 };
     87 
     88 //--------------- Globals ----------------- {{{1
     89 extern FILE *G_out;
     90 
     91 // Number of threads created by pthread_create (i.e. not counting main thread).
     92 static int n_created_threads = 0;
     93 // Number of started threads, i.e. the number of CallbackForThreadStart calls.
     94 static int n_started_threads = 0;
     95 
     96 const uint32_t kMaxThreads = PIN_MAX_THREADS;
     97 
     98 // Serializes the ThreadSanitizer callbacks if TS_SERIALIZED==1
     99 static TSLock g_main_ts_lock;
    100 
    101 // Serializes calls to pthread_create and CreateThread.
    102 static TSLock g_thread_create_lock;
    103 // Under g_thread_create_lock.
    104 static THREADID g_tid_of_thread_which_called_create_thread = -1;
    105 
    106 #ifdef _MSC_VER
    107 // On Windows, we need to create a h-b arc between
    108 // RtlQueueWorkItem(callback, x, y) and the call to callback.
    109 // Same for RegisterWaitForSingleObject.
    110 static unordered_set<uintptr_t> *g_windows_thread_pool_calback_set;
    111 // Similarly, we need h-b arcs between the returns from callbacks and
    112 // thre related UnregisterWaitEx. Damn, what a stupid interface!
    113 static unordered_map<uintptr_t, uintptr_t> *g_windows_thread_pool_wait_object_map;
    114 #endif
    115 
    116 //--------------- StackFrame ----------------- {{{1
    117 struct StackFrame {
    118   uintptr_t pc;
    119   uintptr_t sp;
    120   StackFrame(uintptr_t p, uintptr_t s) : pc(p), sp(s) { }
    121 };
    122 //--------------- InstrumentedCallFrame ----- {{{1
    123 // Machinery to implement the fast interceptors in PIN
    124 // (i.e. the ones that don't use PIN_CallApplicationFunction).
    125 // We instrument the entry of the interesting function (e.g. malloc)
    126 // and all RET instructions in this function's module (e.g. libc).
    127 // At entry, we push an InstrumentedCallFrame object onto InstrumentedCallStack.
    128 // At every RET instruction we check if the stack is not empty (fast path)
    129 // and if the top contains the current SP. If yes -- this is the function return
    130 // and we pop the stack.
    131 struct InstrumentedCallFrame {
    132   typedef void (*callback_t)(THREADID tid, InstrumentedCallFrame &frame,
    133                              ADDRINT ret);
    134   callback_t callback;
    135   uintptr_t pc;
    136   uintptr_t sp;
    137   uintptr_t arg[4];
    138 };
    139 
    140 struct InstrumentedCallStack {
    141  public:
    142   InstrumentedCallStack() : size_(0) { }
    143 
    144   size_t size() { return size_; }
    145 
    146   void Push(InstrumentedCallFrame::callback_t callback,
    147             uintptr_t pc,
    148             uintptr_t sp,
    149             uintptr_t a0, uintptr_t a1) {
    150     CHECK(size() < TS_ARRAY_SIZE(frames_));
    151     size_++;
    152     Top()->callback = callback;
    153     Top()->pc = pc;
    154     Top()->sp = sp;
    155     Top()->arg[0] = a0;
    156     Top()->arg[1] = a1;
    157   }
    158 
    159   void Pop() {
    160     CHECK(size() > 0);
    161     size_--;
    162   }
    163 
    164   InstrumentedCallFrame *Top() {
    165     CHECK(size() > 0);
    166     return &frames_[size_-1];
    167   }
    168 
    169   void Print() {
    170     for (size_t i = 0; i < size(); i++) {
    171       Printf( " %p\n", frames_[i].sp);
    172       if (i > 0) CHECK(frames_[i].sp <= frames_[i-1].sp);
    173     }
    174   }
    175 
    176  private:
    177   InstrumentedCallFrame frames_[20];
    178   size_t size_;
    179 };
    180 
    181 //--------------- PinThread ----------------- {{{1
    182 const size_t kThreadLocalEventBufferSize = 2048 - 2;
    183 // The number of mops should be at least 2 less than the size of TLEB
    184 // so that we have space to put SBLOCK_ENTER token and the trace_info ptr.
    185 const size_t kMaxMopsPerTrace = kThreadLocalEventBufferSize - 2;
    186 
    187 REG tls_reg;
    188 
    189 struct PinThread;
    190 
    191 struct ThreadLocalEventBuffer {
    192   PinThread *t;
    193   size_t size;
    194   uintptr_t events[kThreadLocalEventBufferSize];
    195 };
    196 
    197 struct PinThread {
    198   ThreadLocalEventBuffer tleb;
    199   int          uniq_tid;
    200   uint32_t     literace_sampling;  // cache of a flag.
    201   volatile long last_child_tid;
    202   InstrumentedCallStack ic_stack;
    203   THREADID     tid;
    204   THREADID     parent_tid;
    205   pthread_t    my_ptid;
    206   size_t       thread_stack_size_if_known;
    207   size_t       last_child_stack_size_if_known;
    208   vector<StackFrame> shadow_stack;
    209   TraceInfo    *trace_info;
    210   int ignore_accesses;  // if > 0, ignore all memory accesses.
    211   int ignore_accesses_depth;
    212   int ignore_sync;      // if > 0, ignore all sync events.
    213   int spin_lock_recursion_depth;
    214   bool         thread_finished;
    215   bool         thread_done;
    216   bool         holding_lock;
    217   int          n_consumed_events;
    218 #ifdef _MSC_VER
    219   enum StartupState {
    220     STARTING,
    221     CHILD_READY,
    222     MAY_CONTINUE,
    223   };
    224   volatile long startup_state;  // used to handle the CREATE_SUSPENDED flag.
    225 #endif
    226   char         padding[64];  // avoid any chance of ping-pong.
    227 };
    228 
    229 // Array of pin threads, indexed by pin's THREADID.
    230 static PinThread *g_pin_threads;
    231 
    232 // If true, ignore all accesses in all threads.
    233 extern bool global_ignore;
    234 
    235 #ifdef _MSC_VER
    236 static unordered_set<pthread_t> *g_win_handles_which_are_threads;
    237 #endif
    238 
    239 //-------------------- ts_replace ------------------- {{{1
    240 static void ReportAccesRange(THREADID tid, uintptr_t pc, EventType type, uintptr_t x, size_t size) {
    241   if (size && !g_pin_threads[tid].ignore_accesses) {
    242     uintptr_t end = x + size;
    243     for(uintptr_t a = x; a < end; a += 8) {
    244       size_t cur_size = min((uintptr_t)8, end - a);
    245       DumpEvent(0, type, tid, pc, a, cur_size);
    246     }
    247   }
    248 }
    249 
    250 #define REPORT_READ_RANGE(x, size) ReportAccesRange(tid, pc, READ, (uintptr_t)x, size)
    251 #define REPORT_WRITE_RANGE(x, size) ReportAccesRange(tid, pc, WRITE, (uintptr_t)x, size)
    252 
    253 #define EXTRA_REPLACE_PARAMS THREADID tid, uintptr_t pc,
    254 #define EXTRA_REPLACE_ARGS tid, pc,
    255 #include "ts_replace.h"
    256 
    257 //------------- ThreadSanitizer exports ------------ {{{1
    258 string Demangle(const char *str) {
    259 #if defined(__GNUC__)
    260   int status;
    261   char *demangled = __cxxabiv1::__cxa_demangle(str, 0, 0, &status);
    262   if (demangled) {
    263     string res = demangled;
    264     free(demangled);
    265     return res;
    266   }
    267 #endif
    268   return str;
    269 }
    270 
    271 void PcToStrings(uintptr_t pc, bool demangle,
    272                 string *img_name, string *rtn_name,
    273                 string *file_name, int *line_no) {
    274   if (G_flags->symbolize) {
    275     RTN rtn;
    276     ScopedReentrantClientLock lock(__LINE__);
    277     // ClientLock must be held.
    278     PIN_GetSourceLocation(pc, NULL, line_no, file_name);
    279     *file_name = ConvertToPlatformIndependentPath(*file_name);
    280     rtn = RTN_FindByAddress(pc);
    281     string name;
    282     if (RTN_Valid(rtn)) {
    283       *rtn_name = demangle
    284           ? Demangle(RTN_Name(rtn).c_str())
    285           : RTN_Name(rtn);
    286       *img_name = IMG_Name(SEC_Img(RTN_Sec(rtn)));
    287     }
    288   }
    289 }
    290 
    291 string PcToRtnName(uintptr_t pc, bool demangle) {
    292   string res;
    293   if (G_flags->symbolize) {
    294     {
    295       ScopedReentrantClientLock lock(__LINE__);
    296       RTN rtn = RTN_FindByAddress(pc);
    297       if (RTN_Valid(rtn)) {
    298         res = demangle
    299             ? Demangle(RTN_Name(rtn).c_str())
    300             : RTN_Name(rtn);
    301       }
    302     }
    303   }
    304   return res;
    305 }
    306 
    307 //--------------- ThreadLocalEventBuffer ----------------- {{{1
    308 // thread local event buffer is an array of uintptr_t.
    309 // The events are encoded like this:
    310 // { RTN_CALL, call_pc, target_pc }
    311 // { RTN_EXIT }
    312 // { SBLOCK_ENTER, trace_info_of_size_n, addr1, addr2, ... addr_n}
    313 
    314 enum TLEBSpecificEvents {
    315   TLEB_IGNORE_ALL_BEGIN = LAST_EVENT + 1,
    316   TLEB_IGNORE_ALL_END,
    317   TLEB_IGNORE_SYNC_BEGIN,
    318   TLEB_IGNORE_SYNC_END,
    319   TLEB_GLOBAL_IGNORE_ON,
    320   TLEB_GLOBAL_IGNORE_OFF,
    321 };
    322 
    323 static bool DumpEventPlainText(EventType type, int32_t tid, uintptr_t pc,
    324                         uintptr_t a, uintptr_t info) {
    325 #if DEBUG == 0 || defined(_MSC_VER)
    326   return false;
    327 #else
    328   if (G_flags->dump_events.empty()) return false;
    329 
    330   static unordered_set<uintptr_t> *pc_set;
    331   if (pc_set == NULL) {
    332     pc_set = new unordered_set<uintptr_t>;
    333   }
    334   static FILE *log_file = NULL;
    335   if (log_file == NULL) {
    336     log_file = popen(("gzip > " + G_flags->dump_events).c_str(), "w");
    337   }
    338   if (G_flags->symbolize && pc_set->insert(pc).second) {
    339     string img_name, rtn_name, file_name;
    340     int line = 0;
    341     PcToStrings(pc, false, &img_name, &rtn_name, &file_name, &line);
    342     if (file_name.empty()) file_name = "unknown";
    343     if (img_name.empty()) img_name = "unknown";
    344     if (rtn_name.empty()) rtn_name = "unknown";
    345     if (line == 0) line = 1;
    346     fprintf(log_file, "#PC %lx %s %s %s %d\n",
    347             (long)pc, img_name.c_str(), rtn_name.c_str(),
    348             file_name.c_str(), line);
    349   }
    350   fprintf(log_file, "%s %x %lx %lx %lx\n", kEventNames[type], tid,
    351           (long)pc, (long)a, (long)info);
    352   return true;
    353 #endif
    354 }
    355 
    356 static void DumpEventInternal(EventType type, int32_t uniq_tid, uintptr_t pc,
    357                               uintptr_t a, uintptr_t info) {
    358   if (DumpEventPlainText(type, uniq_tid, pc, a, info)) return;
    359   // PIN wraps the tid (after 2048), but we need a uniq tid.
    360   Event event(type, uniq_tid, pc, a, info);
    361   ThreadSanitizerHandleOneEvent(&event);
    362 }
    363 
    364 void ComputeIgnoreAccesses(PinThread &t) {
    365   t.ignore_accesses = (t.ignore_accesses_depth != 0) || (global_ignore != 0);
    366 }
    367 
    368 static void HandleInnerEvent(PinThread &t, uintptr_t event) {
    369   DCHECK(event > LAST_EVENT);
    370   if (event == TLEB_IGNORE_ALL_BEGIN){
    371     t.ignore_accesses_depth++;
    372     ComputeIgnoreAccesses(t);
    373   } else if (event == TLEB_IGNORE_ALL_END){
    374     t.ignore_accesses_depth--;
    375     CHECK(t.ignore_accesses_depth >= 0);
    376     ComputeIgnoreAccesses(t);
    377   } else if (event == TLEB_IGNORE_SYNC_BEGIN){
    378     t.ignore_sync++;
    379   } else if (event == TLEB_IGNORE_SYNC_END){
    380     t.ignore_sync--;
    381     CHECK(t.ignore_sync >= 0);
    382   } else if (event == TLEB_GLOBAL_IGNORE_ON){
    383     Report("INFO: GLOBAL IGNORE ON\n");
    384     global_ignore = true;
    385     ComputeIgnoreAccesses(t);
    386   } else if (event == TLEB_GLOBAL_IGNORE_OFF){
    387     Report("INFO: GLOBAL IGNORE OFF\n");
    388     global_ignore = false;
    389     ComputeIgnoreAccesses(t);
    390   } else {
    391     Printf("Event: %ld (last: %ld)\n", event, LAST_EVENT);
    392     CHECK(0);
    393   }
    394 }
    395 
    396 static INLINE bool WantToIgnoreEvent(PinThread &t, uintptr_t event) {
    397   if (t.ignore_sync &&
    398       (event == WRITER_LOCK || event == READER_LOCK || event == UNLOCK ||
    399        event == SIGNAL || event == WAIT)) {
    400     // do nothing, we are ignoring locks.
    401     return true;
    402   } else if (t.ignore_accesses && (event == READ || event == WRITE)) {
    403     // do nothing, we are ignoring mops.
    404     return true;
    405   }
    406   return false;
    407 }
    408 
    409 static INLINE void TLEBFlushUnlocked(ThreadLocalEventBuffer &tleb) {
    410   if (tleb.size == 0) return;
    411   PinThread &t = *tleb.t;
    412   // global_ignore should be always on with race verifier
    413   DCHECK(!g_race_verifier_active || global_ignore);
    414   DCHECK(tleb.size <= kThreadLocalEventBufferSize);
    415   if (DEBUG_MODE && t.thread_done) {
    416     Printf("ACHTUNG!!! an event from a dead thread T%d\n", t.tid);
    417   }
    418   DCHECK(!t.thread_done);
    419 
    420   if (TS_SERIALIZED == 1 || DEBUG_MODE) {
    421     size_t max_idx = TS_ARRAY_SIZE(G_stats->tleb_flush);
    422     size_t idx = min((size_t)u32_log2(tleb.size), max_idx - 1);
    423     CHECK(idx < max_idx);
    424     G_stats->tleb_flush[idx]++;
    425   }
    426 
    427   if (TS_SERIALIZED == 1 && G_flags->offline) {
    428     fwrite(tleb.events, sizeof(uintptr_t), tleb.size, G_out);
    429     tleb.size = 0;
    430     return;
    431   }
    432 
    433   size_t i;
    434   for (i = 0; i < tleb.size; ) {
    435     uintptr_t event = tleb.events[i++];
    436     DCHECK(!g_race_verifier_active ||
    437         event == SBLOCK_ENTER || event == EXPECT_RACE || event == THR_START);
    438     if (event == RTN_EXIT) {
    439       if (DumpEventPlainText(RTN_EXIT, t.uniq_tid, 0, 0, 0)) continue;
    440       ThreadSanitizerHandleRtnExit(t.uniq_tid);
    441     } else if (event == RTN_CALL) {
    442       uintptr_t call_pc = tleb.events[i++];
    443       uintptr_t target_pc = tleb.events[i++];
    444       IGNORE_BELOW_RTN ignore_below = (IGNORE_BELOW_RTN)tleb.events[i++];
    445       if (DumpEventPlainText(RTN_CALL, t.uniq_tid, call_pc,
    446                              target_pc, ignore_below)) continue;
    447       ThreadSanitizerHandleRtnCall(t.uniq_tid, call_pc, target_pc,
    448                                    ignore_below);
    449     } else if (event == SBLOCK_ENTER){
    450       TraceInfo *trace_info = (TraceInfo*) tleb.events[i++];
    451       DCHECK(trace_info);
    452       bool do_this_trace = true;
    453       if (t.ignore_accesses) {
    454         do_this_trace = false;
    455       } else if (t.literace_sampling) {
    456         do_this_trace = !trace_info->LiteRaceSkipTraceRealTid(
    457             t.uniq_tid, t.literace_sampling);
    458       }
    459 
    460       size_t n = trace_info->n_mops();
    461       if (do_this_trace) {
    462         if (DEBUG_MODE && !G_flags->dump_events.empty()) {
    463           DumpEventPlainText(SBLOCK_ENTER, t.uniq_tid, trace_info->pc(), 0, 0);
    464           for (size_t j = 0; j < n; j++) {
    465             MopInfo *mop = trace_info->GetMop(j);
    466             DCHECK(mop->size());
    467             DCHECK(mop);
    468             uintptr_t addr = tleb.events[i + j];
    469             if (addr) {
    470               DumpEventPlainText(mop->is_write() ? WRITE : READ, t.uniq_tid,
    471                                      mop->pc(), addr, mop->size());
    472             }
    473           }
    474         } else {
    475           ThreadSanitizerHandleTrace(t.uniq_tid, trace_info, tleb.events+i);
    476         }
    477       }
    478       i += n;
    479     } else if (event == THR_START) {
    480       uintptr_t parent = -1;
    481       if (t.parent_tid != (THREADID)-1) {
    482         parent = g_pin_threads[t.parent_tid].uniq_tid;
    483       }
    484       DumpEventInternal(THR_START, t.uniq_tid, 0, 0, parent);
    485     } else if (event == THR_END) {
    486       DumpEventInternal(THR_END, t.uniq_tid, 0, 0, 0);
    487       DCHECK(t.thread_finished == true);
    488       DCHECK(t.thread_done == false);
    489       t.thread_done = true;
    490       i += 3;  // consume the unneeded data.
    491       DCHECK(i == tleb.size);  // should be last event in this tleb.
    492     } else if (event > LAST_EVENT) {
    493       HandleInnerEvent(t, event);
    494     } else {
    495       // all other events.
    496       CHECK(event > NOOP && event < LAST_EVENT);
    497       uintptr_t pc    = tleb.events[i++];
    498       uintptr_t a     = tleb.events[i++];
    499       uintptr_t info  = tleb.events[i++];
    500       if (!WantToIgnoreEvent(t, event)) {
    501         DumpEventInternal((EventType)event, t.uniq_tid, pc, a, info);
    502       }
    503     }
    504   }
    505   DCHECK(i == tleb.size);
    506   tleb.size = 0;
    507   if (DEBUG_MODE) { // for sanity checking.
    508     memset(tleb.events, 0xf0, sizeof(tleb.events));
    509   }
    510 }
    511 
    512 static INLINE void TLEBFlushLocked(PinThread &t) {
    513 #if TS_SERIALIZED==1
    514   if (G_flags->dry_run) {
    515     t.tleb.size = 0;
    516     return;
    517   }
    518   CHECK(t.tleb.size <= kThreadLocalEventBufferSize);
    519   G_stats->lock_sites[0]++;
    520   ScopedLock lock(&g_main_ts_lock);
    521   TLEBFlushUnlocked(t.tleb);
    522 #else
    523   TLEBFlushUnlocked(t.tleb);
    524 #endif
    525 }
    526 
    527 static void TLEBAddRtnCall(PinThread &t, uintptr_t call_pc,
    528                            uintptr_t target_pc, IGNORE_BELOW_RTN ignore_below) {
    529   if (TS_SERIALIZED == 0) {
    530     TLEBFlushLocked(t);
    531     ThreadSanitizerHandleRtnCall(t.uniq_tid, call_pc, target_pc,
    532                                  ignore_below);
    533     return;
    534   }
    535   DCHECK(t.tleb.size <= kThreadLocalEventBufferSize);
    536   if (t.tleb.size + 4 > kThreadLocalEventBufferSize) {
    537     TLEBFlushLocked(t);
    538     DCHECK(t.tleb.size == 0);
    539   }
    540   t.tleb.events[t.tleb.size++] = RTN_CALL;
    541   t.tleb.events[t.tleb.size++] = call_pc;
    542   t.tleb.events[t.tleb.size++] = target_pc;
    543   t.tleb.events[t.tleb.size++] = ignore_below;
    544   DCHECK(t.tleb.size <= kThreadLocalEventBufferSize);
    545 }
    546 
    547 static void TLEBAddRtnExit(PinThread &t) {
    548   if (TS_SERIALIZED == 0) {
    549     TLEBFlushLocked(t);
    550     ThreadSanitizerHandleRtnExit(t.uniq_tid);
    551     return;
    552   }
    553   if (t.tleb.size + 1 > kThreadLocalEventBufferSize) {
    554     TLEBFlushLocked(t);
    555   }
    556   t.tleb.events[t.tleb.size++] = RTN_EXIT;
    557   DCHECK(t.tleb.size <= kThreadLocalEventBufferSize);
    558 }
    559 
    560 static INLINE uintptr_t *TLEBAddTrace(PinThread &t) {
    561   size_t n = t.trace_info->n_mops();
    562   DCHECK(n > 0);
    563   if (TS_SERIALIZED == 0) {
    564     TLEBFlushLocked(t);
    565   } else if (t.tleb.size + 2 + n > kThreadLocalEventBufferSize) {
    566     TLEBFlushLocked(t);
    567   }
    568   if (TS_SERIALIZED == 1) {
    569     t.tleb.events[t.tleb.size++] = SBLOCK_ENTER;
    570     t.tleb.events[t.tleb.size++] = (uintptr_t)t.trace_info;
    571   } else {
    572     DCHECK(t.tleb.size == 0);
    573     t.tleb.events[0] = SBLOCK_ENTER;
    574     t.tleb.events[1] = (uintptr_t)t.trace_info;
    575     t.tleb.size += 2;
    576   }
    577   uintptr_t *mop_addresses = &t.tleb.events[t.tleb.size];
    578   // not every address will be written to. so they will stay 0.
    579   for (size_t i = 0; i < n; i++) {
    580     mop_addresses[i] = 0;
    581   }
    582   t.tleb.size += n;
    583   DCHECK(t.tleb.size <= kThreadLocalEventBufferSize);
    584   return mop_addresses;
    585 }
    586 
    587 static void TLEBStartThread(PinThread &t) {
    588   CHECK(t.tleb.size == 0);
    589   t.tleb.events[t.tleb.size++] = THR_START;
    590 }
    591 
    592 static void TLEBSimpleEvent(PinThread &t, uintptr_t event) {
    593   if (g_race_verifier_active)
    594     return;
    595   if (TS_SERIALIZED == 0) {
    596     TLEBFlushLocked(t);
    597     if (event < LAST_EVENT) {
    598       Event e((EventType)event, t.uniq_tid, 0, 0, 0);
    599       ThreadSanitizerHandleOneEvent(&e);
    600     } else {
    601       HandleInnerEvent(t, event);
    602     }
    603     return;
    604   }
    605   if (t.tleb.size + 1 > kThreadLocalEventBufferSize) {
    606     TLEBFlushLocked(t);
    607   }
    608   t.tleb.events[t.tleb.size++] = event;
    609   DCHECK(t.tleb.size <= kThreadLocalEventBufferSize);
    610 }
    611 
    612 static void TLEBAddGenericEventAndFlush(PinThread &t,
    613                                         EventType type, uintptr_t pc,
    614                                         uintptr_t a, uintptr_t info) {
    615   if (TS_SERIALIZED == 0) {
    616     if (WantToIgnoreEvent(t, type)) return;
    617     TLEBFlushLocked(t);
    618     Event e(type, t.uniq_tid, pc, a, info);
    619     ThreadSanitizerHandleOneEvent(&e);
    620     return;
    621   }
    622   if (t.tleb.size + 4 > kThreadLocalEventBufferSize) {
    623     TLEBFlushLocked(t);
    624   }
    625   DCHECK(type > NOOP && type < LAST_EVENT);
    626   t.tleb.events[t.tleb.size++] = type;
    627   t.tleb.events[t.tleb.size++] = pc;
    628   t.tleb.events[t.tleb.size++] = a;
    629   t.tleb.events[t.tleb.size++] = info;
    630   TLEBFlushLocked(t);
    631   DCHECK(t.tleb.size <= kThreadLocalEventBufferSize);
    632 }
    633 
    634 static void UpdateCallStack(PinThread &t, ADDRINT sp);
    635 
    636 // Must be called from its thread (except for THR_END case)!
    637 static void DumpEventWithSp(uintptr_t sp, EventType type, int32_t tid, uintptr_t pc,
    638                             uintptr_t a, uintptr_t info) {
    639   if (!g_race_verifier_active || type == EXPECT_RACE) {
    640     PinThread &t = g_pin_threads[tid];
    641     if (sp) {
    642       UpdateCallStack(t, sp);
    643     }
    644     TLEBAddGenericEventAndFlush(t, type, pc, a, info);
    645   }
    646 }
    647 static void DumpEvent(CONTEXT *ctx, EventType type, int32_t tid, uintptr_t pc,
    648                       uintptr_t a, uintptr_t info) {
    649   DumpEventWithSp(ctx ? PIN_GetContextReg(ctx, REG_STACK_PTR) : 0,
    650             type, tid, pc, a, info);
    651 }
    652 
    653 //--------- Wraping and relacing --------------- {{{1
    654 static set<string> g_wrapped_functions;
    655 static void InformAboutFunctionWrap(RTN rtn, string name) {
    656   g_wrapped_functions.insert(name);
    657   if (!debug_wrap) return;
    658   Printf("Function wrapped: %s (%s %s)\n", name.c_str(),
    659          RTN_Name(rtn).c_str(), IMG_Name(SEC_Img(RTN_Sec(rtn))).c_str());
    660 }
    661 
    662 static bool RtnMatchesName(const string &rtn_name, const string &name) {
    663   CHECK(name.size() > 0);
    664   size_t pos = rtn_name.find(name);
    665   if (pos == string::npos) {
    666     return false;
    667   }
    668   if (pos == 0 && name.size() == rtn_name.size()) {
    669   //  Printf("Full match: %s %s\n", rtn_name.c_str(), name.c_str());
    670     return true;
    671   }
    672   // match MyFuncName@123
    673   if (pos == 0 && name.size() < rtn_name.size()
    674       && rtn_name[name.size()] == '@') {
    675   //  Printf("Versioned match: %s %s\n", rtn_name.c_str(), name.c_str());
    676     return true;
    677   }
    678   // match _MyFuncName@123
    679   if (pos == 1 && rtn_name[0] == '_' && name.size() < rtn_name.size()
    680       && rtn_name[name.size() + 1] == '@') {
    681     // Printf("Versioned match: %s %s\n", rtn_name.c_str(), name.c_str());
    682     return true;
    683   }
    684 
    685   return false;
    686 }
    687 
    688 #define FAST_WRAP_PARAM0 THREADID tid, ADDRINT pc, ADDRINT sp
    689 #define FAST_WRAP_PARAM1 FAST_WRAP_PARAM0, ADDRINT arg0
    690 #define FAST_WRAP_PARAM2 FAST_WRAP_PARAM1, ADDRINT arg1
    691 #define FAST_WRAP_PARAM3 FAST_WRAP_PARAM2, ADDRINT arg2
    692 
    693 #define FAST_WRAP_PARAM_AFTER \
    694   THREADID tid, InstrumentedCallFrame &frame, ADDRINT ret
    695 
    696 
    697 #define DEBUG_FAST_INTERCEPTORS 0
    698 //#define DEBUG_FAST_INTERCEPTORS (tid == 1)
    699 
    700 #define PUSH_AFTER_CALLBACK1(callback, a0) \
    701   g_pin_threads[tid].ic_stack.Push(callback, pc, sp, a0, 0); \
    702   if (DEBUG_FAST_INTERCEPTORS) \
    703     Printf("T%d %s pc=%p sp=%p *sp=(%p) arg0=%p stack_size=%ld\n",\
    704          tid, __FUNCTION__, pc, sp,\
    705          ((void**)sp)[0],\
    706          arg0,\
    707          g_pin_threads[tid].ic_stack.size()\
    708          );\
    709 
    710 
    711 #define WRAP_NAME(name) Wrap_##name
    712 #define WRAP4(name) WrapFunc4(img, rtn, #name, (AFUNPTR)Wrap_##name)
    713 #define WRAPSTD1(name) WrapStdCallFunc1(rtn, #name, (AFUNPTR)Wrap_##name)
    714 #define WRAPSTD2(name) WrapStdCallFunc2(rtn, #name, (AFUNPTR)Wrap_##name)
    715 #define WRAPSTD3(name) WrapStdCallFunc3(rtn, #name, (AFUNPTR)Wrap_##name)
    716 #define WRAPSTD4(name) WrapStdCallFunc4(rtn, #name, (AFUNPTR)Wrap_##name)
    717 #define WRAPSTD5(name) WrapStdCallFunc5(rtn, #name, (AFUNPTR)Wrap_##name)
    718 #define WRAPSTD6(name) WrapStdCallFunc6(rtn, #name, (AFUNPTR)Wrap_##name)
    719 #define WRAPSTD7(name) WrapStdCallFunc7(rtn, #name, (AFUNPTR)Wrap_##name)
    720 #define WRAPSTD8(name) WrapStdCallFunc8(rtn, #name, (AFUNPTR)Wrap_##name)
    721 #define WRAPSTD10(name) WrapStdCallFunc10(rtn, #name, (AFUNPTR)Wrap_##name)
    722 #define WRAPSTD11(name) WrapStdCallFunc11(rtn, #name, (AFUNPTR)Wrap_##name)
    723 #define WRAP_PARAM4  THREADID tid, ADDRINT pc, CONTEXT *ctx, \
    724                                 AFUNPTR f,\
    725                                 uintptr_t arg0, uintptr_t arg1, \
    726                                 uintptr_t arg2, uintptr_t arg3
    727 
    728 #define WRAP_PARAM6 WRAP_PARAM4, uintptr_t arg4, uintptr_t arg5
    729 #define WRAP_PARAM8 WRAP_PARAM6, uintptr_t arg6, uintptr_t arg7
    730 #define WRAP_PARAM10 WRAP_PARAM8, uintptr_t arg8, uintptr_t arg9
    731 #define WRAP_PARAM12 WRAP_PARAM10, uintptr_t arg10, uintptr_t arg11
    732 
    733 static uintptr_t CallFun4(CONTEXT *ctx, THREADID tid,
    734                          AFUNPTR f, uintptr_t arg0, uintptr_t arg1,
    735                          uintptr_t arg2, uintptr_t arg3) {
    736   uintptr_t ret = 0xdeadbee1;
    737   PIN_CallApplicationFunction(ctx, tid,
    738                               CALLINGSTD_DEFAULT, (AFUNPTR)(f),
    739                               PIN_PARG(uintptr_t), &ret,
    740                               PIN_PARG(uintptr_t), arg0,
    741                               PIN_PARG(uintptr_t), arg1,
    742                               PIN_PARG(uintptr_t), arg2,
    743                               PIN_PARG(uintptr_t), arg3,
    744                               PIN_PARG_END());
    745   return ret;
    746 }
    747 
    748 static uintptr_t CallFun6(CONTEXT *ctx, THREADID tid,
    749                          AFUNPTR f, uintptr_t arg0, uintptr_t arg1,
    750                          uintptr_t arg2, uintptr_t arg3,
    751                          uintptr_t arg4, uintptr_t arg5) {
    752   uintptr_t ret = 0xdeadbee1;
    753   PIN_CallApplicationFunction(ctx, tid,
    754                               CALLINGSTD_DEFAULT, (AFUNPTR)(f),
    755                               PIN_PARG(uintptr_t), &ret,
    756                               PIN_PARG(uintptr_t), arg0,
    757                               PIN_PARG(uintptr_t), arg1,
    758                               PIN_PARG(uintptr_t), arg2,
    759                               PIN_PARG(uintptr_t), arg3,
    760                               PIN_PARG(uintptr_t), arg4,
    761                               PIN_PARG(uintptr_t), arg5,
    762                               PIN_PARG_END());
    763   return ret;
    764 }
    765 
    766 #define CALL_ME_INSIDE_WRAPPER_4() CallFun4(ctx, tid, f, arg0, arg1, arg2, arg3)
    767 #define CALL_ME_INSIDE_WRAPPER_6() CallFun6(ctx, tid, f, arg0, arg1, arg2, arg3, arg4, arg5)
    768 
    769 // Completely replace (i.e. not wrap) a function with 3 (or less) parameters.
    770 // The original function will not be called.
    771 void ReplaceFunc3(IMG img, RTN rtn, const char *name, AFUNPTR replacement_func) {
    772   if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
    773     InformAboutFunctionWrap(rtn, name);
    774     PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
    775                                  CALLINGSTD_DEFAULT,
    776                                  "proto",
    777                                  PIN_PARG(uintptr_t),
    778                                  PIN_PARG(uintptr_t),
    779                                  PIN_PARG(uintptr_t),
    780                                  PIN_PARG_END());
    781     RTN_ReplaceSignature(rtn,
    782                          AFUNPTR(replacement_func),
    783                          IARG_PROTOTYPE, proto,
    784                          IARG_THREAD_ID,
    785                          IARG_INST_PTR,
    786                          IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
    787                          IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
    788                          IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
    789                          IARG_END);
    790     PROTO_Free(proto);
    791   }
    792 }
    793 
    794 // Wrap a function with up to 4 parameters.
    795 void WrapFunc4(IMG img, RTN rtn, const char *name, AFUNPTR replacement_func) {
    796   if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
    797     InformAboutFunctionWrap(rtn, name);
    798     PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
    799                                  CALLINGSTD_DEFAULT,
    800                                  "proto",
    801                                  PIN_PARG(uintptr_t),
    802                                  PIN_PARG(uintptr_t),
    803                                  PIN_PARG(uintptr_t),
    804                                  PIN_PARG(uintptr_t),
    805                                  PIN_PARG_END());
    806     RTN_ReplaceSignature(rtn,
    807                          AFUNPTR(replacement_func),
    808                          IARG_PROTOTYPE, proto,
    809                          IARG_THREAD_ID,
    810                          IARG_INST_PTR,
    811                          IARG_CONTEXT,
    812                          IARG_ORIG_FUNCPTR,
    813                          IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
    814                          IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
    815                          IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
    816                          IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
    817                          IARG_END);
    818     PROTO_Free(proto);
    819   }
    820 }
    821 
    822 // Wrap a function with up to 6 parameters.
    823 void WrapFunc6(IMG img, RTN rtn, const char *name, AFUNPTR replacement_func) {
    824   if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
    825     InformAboutFunctionWrap(rtn, name);
    826     PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
    827                                  CALLINGSTD_DEFAULT,
    828                                  "proto",
    829                                  PIN_PARG(uintptr_t),
    830                                  PIN_PARG(uintptr_t),
    831                                  PIN_PARG(uintptr_t),
    832                                  PIN_PARG(uintptr_t),
    833                                  PIN_PARG(uintptr_t),
    834                                  PIN_PARG(uintptr_t),
    835                                  PIN_PARG_END());
    836     RTN_ReplaceSignature(rtn,
    837                          AFUNPTR(replacement_func),
    838                          IARG_PROTOTYPE, proto,
    839                          IARG_THREAD_ID,
    840                          IARG_INST_PTR,
    841                          IARG_CONTEXT,
    842                          IARG_ORIG_FUNCPTR,
    843                          IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
    844                          IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
    845                          IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
    846                          IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
    847                          IARG_FUNCARG_ENTRYPOINT_VALUE, 4,
    848                          IARG_FUNCARG_ENTRYPOINT_VALUE, 5,
    849                          IARG_END);
    850     PROTO_Free(proto);
    851   }
    852 }
    853 
    854 
    855 //--------- Instrumentation callbacks --------------- {{{1
    856 //---------- Debug -----------------------------------{{{2
    857 #define DEB_PR (0)
    858 
    859 static void ShowPcAndSp(const char *where, THREADID tid,
    860                         ADDRINT pc, ADDRINT sp) {
    861     Printf("%s T%d sp=%ld pc=%p %s\n", where, tid, sp, pc,
    862            PcToRtnName(pc, true).c_str());
    863 }
    864 
    865 static void PrintShadowStack(PinThread &t) {
    866   Printf("T%d Shadow stack (%d)\n", t.tid, (int)t.shadow_stack.size());
    867   for (int i = t.shadow_stack.size() - 1; i >= 0; i--) {
    868     uintptr_t pc = t.shadow_stack[i].pc;
    869     uintptr_t sp = t.shadow_stack[i].sp;
    870     Printf("  sp=%ld pc=%lx %s\n", sp, pc, PcToRtnName(pc, true).c_str());
    871   }
    872 }
    873 
    874 static void DebugOnlyShowPcAndSp(const char *where, THREADID tid,
    875                                  ADDRINT pc, ADDRINT sp) {
    876   if (DEB_PR) {
    877     ShowPcAndSp(where, tid, pc, sp);
    878   }
    879 }
    880 
    881 static uintptr_t WRAP_NAME(ThreadSanitizerQuery)(WRAP_PARAM4) {
    882   const char *query = (const char*)arg0;
    883   return (uintptr_t)ThreadSanitizerQuery(query);
    884 }
    885 
    886 //--------- Ignores -------------------------------- {{{2
    887 static void IgnoreMopsBegin(THREADID tid) {
    888   // if (tid != 0) Printf("T%d IgnoreMops++\n", tid);
    889   TLEBSimpleEvent(g_pin_threads[tid], TLEB_IGNORE_ALL_BEGIN);
    890 }
    891 static void IgnoreMopsEnd(THREADID tid) {
    892   // if (tid != 0) Printf("T%d IgnoreMops--\n", tid);
    893   TLEBSimpleEvent(g_pin_threads[tid], TLEB_IGNORE_ALL_END);
    894 }
    895 
    896 static void IgnoreSyncAndMopsBegin(THREADID tid) {
    897   // if (tid != 0) Printf("T%d IgnoreSync++\n", tid);
    898   IgnoreMopsBegin(tid);
    899   TLEBSimpleEvent(g_pin_threads[tid], TLEB_IGNORE_SYNC_BEGIN);
    900 }
    901 static void IgnoreSyncAndMopsEnd(THREADID tid) {
    902   // if (tid != 0) Printf("T%d IgnoreSync--\n", tid);
    903   IgnoreMopsEnd(tid);
    904   TLEBSimpleEvent(g_pin_threads[tid], TLEB_IGNORE_SYNC_END);
    905 }
    906 
    907 //--------- __cxa_guard_* -------------------------- {{{2
    908 // From gcc/cp/decl.c:
    909 // --------------------------------------------------------------
    910 //      Emit code to perform this initialization but once.  This code
    911 //      looks like:
    912 //
    913 //      static <type> guard;
    914 //      if (!guard.first_byte) {
    915 //        if (__cxa_guard_acquire (&guard)) {
    916 //          bool flag = false;
    917 //          try {
    918 //            // Do initialization.
    919 //            flag = true; __cxa_guard_release (&guard);
    920 //            // Register variable for destruction at end of program.
    921 //           } catch {
    922 //          if (!flag) __cxa_guard_abort (&guard);
    923 //         }
    924 //      }
    925 // --------------------------------------------------------------
    926 // So, when __cxa_guard_acquire returns true, we start ignoring all accesses
    927 // and in __cxa_guard_release we stop ignoring them.
    928 // We also need to ignore all accesses inside these two functions.
    929 
    930 static void Before_cxa_guard_acquire(THREADID tid, ADDRINT pc, ADDRINT guard) {
    931   IgnoreMopsBegin(tid);
    932 }
    933 
    934 static void After_cxa_guard_acquire(THREADID tid, ADDRINT pc, ADDRINT ret) {
    935   if (ret) {
    936     // Continue ignoring, it will end in __cxa_guard_release.
    937   } else {
    938     // Stop ignoring, there will be no matching call to __cxa_guard_release.
    939     IgnoreMopsEnd(tid);
    940   }
    941 }
    942 
    943 static void After_cxa_guard_release(THREADID tid, ADDRINT pc) {
    944   IgnoreMopsEnd(tid);
    945 }
    946 
    947 static uintptr_t WRAP_NAME(pthread_once)(WRAP_PARAM4) {
    948   uintptr_t ret;
    949   IgnoreMopsBegin(tid);
    950   ret = CALL_ME_INSIDE_WRAPPER_4();
    951   IgnoreMopsEnd(tid);
    952   return ret;
    953 }
    954 
    955 void TmpCallback1(THREADID tid, ADDRINT pc) {
    956   Printf("%s T%d %lx\n", __FUNCTION__, tid, pc);
    957 }
    958 void TmpCallback2(THREADID tid, ADDRINT pc) {
    959   Printf("%s T%d %lx\n", __FUNCTION__, tid, pc);
    960 }
    961 
    962 //--------- Threads --------------------------------- {{{2
    963 static void HandleThreadCreateBefore(THREADID tid, ADDRINT pc) {
    964   DumpEvent(0, THR_CREATE_BEFORE, tid, pc, 0, 0);
    965   g_thread_create_lock.Lock();
    966   IgnoreMopsBegin(tid);
    967   CHECK(g_tid_of_thread_which_called_create_thread == (THREADID)-1);
    968   g_tid_of_thread_which_called_create_thread = tid;
    969   n_created_threads++;
    970 }
    971 
    972 static void HandleThreadCreateAbort(THREADID tid) {
    973   CHECK(g_tid_of_thread_which_called_create_thread == tid);
    974   g_tid_of_thread_which_called_create_thread = (THREADID)-1;
    975   n_created_threads--;
    976   IgnoreMopsEnd(tid);
    977   g_thread_create_lock.Unlock();
    978 }
    979 
    980 static THREADID HandleThreadCreateAfter(THREADID tid, pthread_t child_ptid,
    981                                         bool suspend_child) {
    982   // Spin, waiting for last_child_tid to appear (i.e. wait for the thread to
    983   // actually start) so that we know the child's tid. No locks.
    984   while (!ATOMIC_READ(&g_pin_threads[tid].last_child_tid)) {
    985     YIELD();
    986   }
    987 
    988   CHECK(g_tid_of_thread_which_called_create_thread == tid);
    989   g_tid_of_thread_which_called_create_thread = -1;
    990 
    991   THREADID last_child_tid = g_pin_threads[tid].last_child_tid;
    992   CHECK(last_child_tid);
    993 
    994   PinThread &child_t = g_pin_threads[last_child_tid];
    995   child_t.my_ptid = child_ptid;
    996 
    997 #ifdef _MSC_VER
    998   if (suspend_child) {
    999     while (ATOMIC_READ(&child_t.startup_state) != PinThread::CHILD_READY) {
   1000       YIELD();
   1001     }
   1002     // Strictly speaking, PIN forbids calling system functions like this.
   1003     // This may violate application library isolation but
   1004     // a) YIELD == WINDOWS::Sleep, so we violate it anyways
   1005     // b) SuspendThread probably calls NtSuspendThread right away
   1006     WINDOWS::DWORD old_count = WINDOWS::SuspendThread((WINDOWS::HANDLE)child_ptid);  // TODO handle?
   1007     CHECK(old_count == 0);
   1008   }
   1009   child_t.startup_state = PinThread::MAY_CONTINUE;
   1010 #else
   1011   CHECK(!suspend_child);  // Not implemented - do we need to?
   1012 #endif
   1013 
   1014   int uniq_tid_of_child = child_t.uniq_tid;
   1015   g_pin_threads[tid].last_child_tid = 0;
   1016 
   1017   IgnoreMopsEnd(tid);
   1018   g_thread_create_lock.Unlock();
   1019 
   1020   DumpEvent(0, THR_CREATE_AFTER, tid, 0, 0, uniq_tid_of_child);
   1021   return last_child_tid;
   1022 }
   1023 
   1024 static uintptr_t WRAP_NAME(pthread_create)(WRAP_PARAM4) {
   1025   HandleThreadCreateBefore(tid, pc);
   1026 
   1027   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   1028   if (ret != 0) {
   1029     HandleThreadCreateAbort(tid);
   1030     return ret;
   1031   }
   1032 
   1033   pthread_t child_ptid = *(pthread_t*)arg0;
   1034   HandleThreadCreateAfter(tid, child_ptid, false);
   1035 
   1036   return ret;
   1037 }
   1038 
   1039 void CallbackForThreadStart(THREADID tid, CONTEXT *ctxt,
   1040                             INT32 flags, void *v) {
   1041   // We can not rely on PIN_GetParentTid() since it is broken on Windows.
   1042 
   1043   if (g_pin_threads == NULL) {
   1044     g_pin_threads = new PinThread[kMaxThreads];
   1045   }
   1046 
   1047   bool has_parent = true;
   1048   if (tid == 0) {
   1049     // Main thread or we have attached to a running process.
   1050     has_parent = false;
   1051   } else {
   1052     CHECK(tid > 0);
   1053   }
   1054 
   1055   CHECK(tid < kMaxThreads);
   1056   PinThread &t = g_pin_threads[tid];
   1057   memset(&t, 0, sizeof(PinThread));
   1058   t.uniq_tid = n_started_threads++;
   1059   t.literace_sampling = G_flags->literace_sampling;
   1060   t.tid = tid;
   1061   t.tleb.t = &t;
   1062 #if defined(_MSC_VER)
   1063   t.startup_state = PinThread::STARTING;
   1064 #endif
   1065   ComputeIgnoreAccesses(t);
   1066 
   1067 
   1068   PIN_SetContextReg(ctxt, tls_reg, (ADDRINT)&t.tleb.events[2]);
   1069 
   1070   t.parent_tid = -1;
   1071   if (has_parent) {
   1072     t.parent_tid = g_tid_of_thread_which_called_create_thread;
   1073 #if !defined(_MSC_VER)  // On Windows, threads may appear out of thin air.
   1074     CHECK(t.parent_tid != (THREADID)-1);
   1075 #endif  // _MSC_VER
   1076   }
   1077 
   1078   if (debug_thread) {
   1079     Printf("T%d ThreadStart parent=%d child=%d\n", tid, t.parent_tid, tid);
   1080   }
   1081 
   1082   if (has_parent && t.parent_tid != (THREADID)-1) {
   1083     g_pin_threads[t.parent_tid].last_child_tid = tid;
   1084     t.thread_stack_size_if_known =
   1085         g_pin_threads[t.parent_tid].last_child_stack_size_if_known;
   1086   } else {
   1087 #if defined(_MSC_VER)
   1088     t.startup_state = PinThread::MAY_CONTINUE;
   1089 #endif
   1090   }
   1091 
   1092   // This is a lock-free (thread local) operation.
   1093   TLEBStartThread(t);
   1094 /* TODO(timurrrr): investigate and un-comment
   1095 #ifdef _MSC_VER
   1096   // Ignore all mops & sync before the real thread code.
   1097   // See the corresponding IgnoreSyncAndMopsEnd in Before_BaseThreadInitThunk.
   1098   IgnoreSyncAndMopsBegin(tid);
   1099 #endif
   1100 */
   1101 }
   1102 
   1103 static void Before_start_thread(THREADID tid, ADDRINT pc, ADDRINT sp) {
   1104   PinThread &t = g_pin_threads[tid];
   1105   if (debug_thread) {
   1106     Printf("T%d Before_start_thread: sp=%p my_ptid=%p diff=%p\n",
   1107          tid, sp, t.my_ptid, t.my_ptid - sp);
   1108   }
   1109   // This is a rather scary hack, but I see no easy way to avoid it.
   1110   // On linux NPTL, the pthread_t structure is the same block of memory
   1111   // as the stack (and the tls?). Somewhere inside the pthread_t
   1112   // object lives the address of stackblock followed by its size
   1113   // (see nptl/descr.h).
   1114   // At the current point we may not know the value of pthread_t (my_ptid),
   1115   // but we do know the current sp, which is a bit less than my_ptid.
   1116   //
   1117   // address                        value
   1118   // ------------------------------------------------
   1119   // 0xffffffffffffffff:
   1120   //
   1121   // stackblock + stackblock_size:
   1122   // my_ptid:
   1123   //
   1124   //                                stackblock_size
   1125   //                                stackblock
   1126   //
   1127   // current_sp:
   1128   //
   1129   //
   1130   // stackblock:
   1131   //
   1132   // 0x0000000000000000:
   1133   // -------------------------------------------------
   1134   //
   1135   // So, we itrate from sp to the higher addresses (but just in case, not more
   1136   // than a few pages) trying to find a pair of values which looks like
   1137   // stackblock and stackblock_size. Oh well.
   1138   // Note that in valgrind we are able to get this info from
   1139   //  pthread_getattr_np (linux) or pthread_get_stackaddr_np (mac),
   1140   // but in PIN we can't call those (can we?).
   1141   uintptr_t prev = 0;
   1142   for (uintptr_t sp1 = sp; sp1 - sp < 0x2000;
   1143        sp1 += sizeof(uintptr_t)) {
   1144     uintptr_t val = *(uintptr_t*)sp1;
   1145     if (val == 0) continue;
   1146     if (prev &&
   1147         (prev & 0xfff) == 0 && // stack is page aligned
   1148         prev < sp &&           // min stack is < sp
   1149         prev + val > sp &&     // max stack is > sp
   1150         val >= (1 << 15) &&    // stack size is >= 32k
   1151         val <= 128 * (1 << 20) // stack size is hardly > 128M
   1152         ) {
   1153       if (debug_thread) {
   1154         Printf("T%d found stack: %p size=%p\n", tid, prev, val);
   1155       }
   1156       DumpEvent(0, THR_STACK_TOP, tid, pc, prev + val, val);
   1157       return;
   1158     }
   1159     prev = val;
   1160   }
   1161   // The hack above does not always works. (TODO(kcc)). Do something.
   1162   Printf("WARNING: ThreadSanitizerPin is guessing stack size for T%d\n", tid);
   1163   DumpEvent(0, THR_STACK_TOP, tid, pc, sp, t.thread_stack_size_if_known);
   1164 }
   1165 
   1166 #ifdef _MSC_VER
   1167 static uintptr_t WRAP_NAME(CreateThread)(WRAP_PARAM6) {
   1168   PinThread &t = g_pin_threads[tid];
   1169   t.last_child_stack_size_if_known = arg1 ? arg1 : 1024 * 1024;
   1170 
   1171   HandleThreadCreateBefore(tid, pc);
   1172 
   1173   // We can't start the thread suspended because we want to get its
   1174   // PIN thread ID before leaving CreateThread.
   1175   // So, we reset the CREATE_SUSPENDED flag and SuspendThread before any client
   1176   // code is executed in the HandleThreadCreateAfter if needed.
   1177   bool should_be_suspended = arg4 & CREATE_SUSPENDED;
   1178   arg4 &= ~CREATE_SUSPENDED;
   1179 
   1180   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_6();
   1181   if (ret == NULL) {
   1182     HandleThreadCreateAbort(tid);
   1183     return ret;
   1184   }
   1185   pthread_t child_ptid = ret;
   1186   THREADID child_tid = HandleThreadCreateAfter(tid, child_ptid,
   1187                                                should_be_suspended);
   1188   {
   1189     ScopedReentrantClientLock lock(__LINE__);
   1190     if (g_win_handles_which_are_threads == NULL) {
   1191       g_win_handles_which_are_threads = new unordered_set<pthread_t>;
   1192     }
   1193     g_win_handles_which_are_threads->insert(child_ptid);
   1194   }
   1195   return ret;
   1196 }
   1197 
   1198 static void Before_BaseThreadInitThunk(THREADID tid, ADDRINT pc, ADDRINT sp) {
   1199   PinThread &t = g_pin_threads[tid];
   1200   size_t stack_size = t.thread_stack_size_if_known;
   1201   // Printf("T%d %s %p %p\n", tid, __FUNCTION__, sp, stack_size);
   1202   /* TODO(timurrrr): investigate and uncomment
   1203   if (tid != 0) {
   1204     // Ignore all mops & sync before the real thread code.
   1205     // See the corresponding IgnoreSyncAndMopsBegin in CallbackForThreadStart.
   1206     IgnoreSyncAndMopsEnd(tid);
   1207     TLEBFlushLocked(t);
   1208     CHECK(t.ignore_sync == 0);
   1209     CHECK(t.ignore_accesses == 0);
   1210   }
   1211   */
   1212   DumpEvent(0, THR_STACK_TOP, tid, pc, sp, stack_size);
   1213 
   1214 #ifdef _MSC_VER
   1215   if (t.startup_state != PinThread::MAY_CONTINUE) {
   1216     CHECK(t.startup_state == PinThread::STARTING);
   1217     t.startup_state = PinThread::CHILD_READY;
   1218     while (ATOMIC_READ(&t.startup_state) != PinThread::MAY_CONTINUE) {
   1219       YIELD();
   1220     }
   1221     // Corresponds to SIGNAL from ResumeThread if the thread was suspended on
   1222     // start.
   1223     DumpEvent(0, WAIT, tid, pc, t.my_ptid, 0);
   1224   }
   1225 #endif
   1226 }
   1227 
   1228 static void Before_RtlExitUserThread(THREADID tid, ADDRINT pc) {
   1229   PinThread &t = g_pin_threads[tid];
   1230   if (t.tid != 0) {
   1231     // Once we started exiting the thread, ignore the locking events.
   1232     // This way we will avoid h-b arcs between unrelated threads.
   1233     // We also start ignoring all mops, otherwise we will get tons of race
   1234     // reports from the windows guts.
   1235     IgnoreSyncAndMopsBegin(tid);
   1236   }
   1237 }
   1238 #endif  // _MSC_VER
   1239 
   1240 void CallbackForThreadFini(THREADID tid, const CONTEXT *ctxt,
   1241                           INT32 code, void *v) {
   1242   PinThread &t = g_pin_threads[tid];
   1243   t.thread_finished = true;
   1244   // We can not DumpEvent here,
   1245   // due to possible deadlock with PIN's internal lock.
   1246   if (debug_thread) {
   1247     Printf("T%d Thread finished (ptid=%d)\n", tid, t.my_ptid);
   1248   }
   1249 }
   1250 
   1251 static bool HandleThreadJoinAfter(THREADID tid, pthread_t joined_ptid) {
   1252   THREADID joined_tid = kMaxThreads;
   1253   int max_uniq_tid_found = -1;
   1254 
   1255   // TODO(timurrrr): walking through g_pin_threads may be slow.
   1256   // Do we need to/Can we optimize it?
   1257   for (THREADID j = 1; j < kMaxThreads; j++) {
   1258     if (g_pin_threads[j].thread_finished == false)
   1259       continue;
   1260     if (g_pin_threads[j].my_ptid == joined_ptid) {
   1261       // We search for the thread with the maximum uniq_tid to work around
   1262       // thread HANDLE reuse issues.
   1263       if (max_uniq_tid_found < g_pin_threads[j].uniq_tid) {
   1264         max_uniq_tid_found = g_pin_threads[j].uniq_tid;
   1265         joined_tid = j;
   1266       }
   1267     }
   1268   }
   1269   if (joined_tid == kMaxThreads) {
   1270     // This may happen in the following case:
   1271     //  - A non-joinable thread is created and a handle is assigned to it.
   1272     //  - Since the thread is non-joinable, the handle is then reused
   1273     //  for some other purpose, e.g. for a WaitableEvent.
   1274     //  - We did not yet register the thread fini event.
   1275     //  - We observe WaitForSingleObjectEx(ptid) and think that this is thread
   1276     //  join event, while it is not.
   1277     if (debug_thread)
   1278       Printf("T%d JoinAfter returns false! ptid=%d\n", tid, joined_ptid);
   1279     return false;
   1280   }
   1281   CHECK(joined_tid < kMaxThreads);
   1282   CHECK(joined_tid > 0);
   1283   g_pin_threads[joined_tid].my_ptid = 0;
   1284   int joined_uniq_tid = g_pin_threads[joined_tid].uniq_tid;
   1285 
   1286   if (debug_thread) {
   1287     Printf("T%d JoinAfter   parent=%d child=%d (uniq=%d)\n", tid, tid,
   1288            joined_tid, joined_uniq_tid);
   1289   }
   1290 
   1291   // Here we send an event for a different thread (joined_tid), which is already
   1292   // dead.
   1293   DumpEvent(0, THR_END, joined_tid, 0, 0, 0);
   1294 
   1295 
   1296   DumpEvent(0, THR_JOIN_AFTER, tid, 0, joined_uniq_tid, 0);
   1297   return true;
   1298 }
   1299 
   1300 static uintptr_t WRAP_NAME(pthread_join)(WRAP_PARAM4) {
   1301   if (G_flags->debug_level >= 2)
   1302     Printf("T%d in  pthread_join %p\n", tid, arg0);
   1303   pthread_t joined_ptid = (pthread_t)arg0;
   1304   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   1305   HandleThreadJoinAfter(tid, joined_ptid);
   1306   if (G_flags->debug_level >= 2)
   1307     Printf("T%d out pthread_join %p\n", tid, arg0);
   1308   return ret;
   1309 }
   1310 
   1311 static size_t WRAP_NAME(fwrite)(WRAP_PARAM4) {
   1312   void* p = (void*)arg0;
   1313   size_t size = (size_t)arg1 * (size_t)arg2;
   1314   REPORT_READ_RANGE(p, size);
   1315   IgnoreMopsBegin(tid);
   1316   size_t ret = CALL_ME_INSIDE_WRAPPER_4();
   1317   IgnoreMopsEnd(tid);
   1318   return ret;
   1319 }
   1320 
   1321 #ifdef _MSC_VER
   1322 
   1323 
   1324 uintptr_t CallStdCallFun1(CONTEXT *ctx, THREADID tid,
   1325                          AFUNPTR f, uintptr_t arg0) {
   1326   uintptr_t ret = 0xdeadbee1;
   1327   PIN_CallApplicationFunction(ctx, tid,
   1328                               CALLINGSTD_STDCALL, (AFUNPTR)(f),
   1329                               PIN_PARG(uintptr_t), &ret,
   1330                               PIN_PARG(uintptr_t), arg0,
   1331                               PIN_PARG_END());
   1332   return ret;
   1333 }
   1334 
   1335 uintptr_t CallStdCallFun2(CONTEXT *ctx, THREADID tid,
   1336                          AFUNPTR f, uintptr_t arg0, uintptr_t arg1) {
   1337   uintptr_t ret = 0xdeadbee2;
   1338   PIN_CallApplicationFunction(ctx, tid,
   1339                               CALLINGSTD_STDCALL, (AFUNPTR)(f),
   1340                               PIN_PARG(uintptr_t), &ret,
   1341                               PIN_PARG(uintptr_t), arg0,
   1342                               PIN_PARG(uintptr_t), arg1,
   1343                               PIN_PARG_END());
   1344   return ret;
   1345 }
   1346 
   1347 uintptr_t CallStdCallFun3(CONTEXT *ctx, THREADID tid,
   1348                          AFUNPTR f, uintptr_t arg0, uintptr_t arg1,
   1349                          uintptr_t arg2) {
   1350   uintptr_t ret = 0xdeadbee3;
   1351   PIN_CallApplicationFunction(ctx, tid,
   1352                               CALLINGSTD_STDCALL, (AFUNPTR)(f),
   1353                               PIN_PARG(uintptr_t), &ret,
   1354                               PIN_PARG(uintptr_t), arg0,
   1355                               PIN_PARG(uintptr_t), arg1,
   1356                               PIN_PARG(uintptr_t), arg2,
   1357                               PIN_PARG_END());
   1358   return ret;
   1359 }
   1360 
   1361 uintptr_t CallStdCallFun4(CONTEXT *ctx, THREADID tid,
   1362                          AFUNPTR f, uintptr_t arg0, uintptr_t arg1,
   1363                          uintptr_t arg2, uintptr_t arg3) {
   1364   uintptr_t ret = 0xdeadbee4;
   1365   PIN_CallApplicationFunction(ctx, tid,
   1366                               CALLINGSTD_STDCALL, (AFUNPTR)(f),
   1367                               PIN_PARG(uintptr_t), &ret,
   1368                               PIN_PARG(uintptr_t), arg0,
   1369                               PIN_PARG(uintptr_t), arg1,
   1370                               PIN_PARG(uintptr_t), arg2,
   1371                               PIN_PARG(uintptr_t), arg3,
   1372                               PIN_PARG_END());
   1373   return ret;
   1374 }
   1375 
   1376 uintptr_t CallStdCallFun5(CONTEXT *ctx, THREADID tid,
   1377                          AFUNPTR f, uintptr_t arg0, uintptr_t arg1,
   1378                          uintptr_t arg2, uintptr_t arg3,
   1379                          uintptr_t arg4) {
   1380   uintptr_t ret = 0xdeadbee5;
   1381   PIN_CallApplicationFunction(ctx, tid,
   1382                               CALLINGSTD_STDCALL, (AFUNPTR)(f),
   1383                               PIN_PARG(uintptr_t), &ret,
   1384                               PIN_PARG(uintptr_t), arg0,
   1385                               PIN_PARG(uintptr_t), arg1,
   1386                               PIN_PARG(uintptr_t), arg2,
   1387                               PIN_PARG(uintptr_t), arg3,
   1388                               PIN_PARG(uintptr_t), arg4,
   1389                               PIN_PARG_END());
   1390   return ret;
   1391 }
   1392 
   1393 uintptr_t CallStdCallFun6(CONTEXT *ctx, THREADID tid,
   1394                          AFUNPTR f, uintptr_t arg0, uintptr_t arg1,
   1395                          uintptr_t arg2, uintptr_t arg3,
   1396                          uintptr_t arg4, uintptr_t arg5) {
   1397   uintptr_t ret = 0xdeadbee6;
   1398   PIN_CallApplicationFunction(ctx, tid,
   1399                               CALLINGSTD_STDCALL, (AFUNPTR)(f),
   1400                               PIN_PARG(uintptr_t), &ret,
   1401                               PIN_PARG(uintptr_t), arg0,
   1402                               PIN_PARG(uintptr_t), arg1,
   1403                               PIN_PARG(uintptr_t), arg2,
   1404                               PIN_PARG(uintptr_t), arg3,
   1405                               PIN_PARG(uintptr_t), arg4,
   1406                               PIN_PARG(uintptr_t), arg5,
   1407                               PIN_PARG_END());
   1408   return ret;
   1409 }
   1410 
   1411 uintptr_t CallStdCallFun7(CONTEXT *ctx, THREADID tid,
   1412                          AFUNPTR f, uintptr_t arg0, uintptr_t arg1,
   1413                          uintptr_t arg2, uintptr_t arg3,
   1414                          uintptr_t arg4, uintptr_t arg5,
   1415                          uintptr_t arg6) {
   1416   uintptr_t ret = 0xdeadbee7;
   1417   PIN_CallApplicationFunction(ctx, tid,
   1418                               CALLINGSTD_STDCALL, (AFUNPTR)(f),
   1419                               PIN_PARG(uintptr_t), &ret,
   1420                               PIN_PARG(uintptr_t), arg0,
   1421                               PIN_PARG(uintptr_t), arg1,
   1422                               PIN_PARG(uintptr_t), arg2,
   1423                               PIN_PARG(uintptr_t), arg3,
   1424                               PIN_PARG(uintptr_t), arg4,
   1425                               PIN_PARG(uintptr_t), arg5,
   1426                               PIN_PARG(uintptr_t), arg6,
   1427                               PIN_PARG_END());
   1428   return ret;
   1429 }
   1430 
   1431 uintptr_t WRAP_NAME(ResumeThread)(WRAP_PARAM4) {
   1432 //  Printf("T%d %s arg0=%p\n", tid, __FUNCTION__, arg0);
   1433   DumpEvent(ctx, SIGNAL, tid, pc, arg0, 0);
   1434   uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
   1435   return ret;
   1436 }
   1437 uintptr_t WRAP_NAME(RtlInitializeCriticalSection)(WRAP_PARAM4) {
   1438 //  Printf("T%d pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
   1439   DumpEvent(ctx, LOCK_CREATE, tid, pc, arg0, 0);
   1440   IgnoreSyncAndMopsBegin(tid);
   1441   uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
   1442   IgnoreSyncAndMopsEnd(tid);
   1443   return ret;
   1444 }
   1445 uintptr_t WRAP_NAME(RtlInitializeCriticalSectionAndSpinCount)(WRAP_PARAM4) {
   1446 //  Printf("T%d pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
   1447   DumpEvent(ctx, LOCK_CREATE, tid, pc, arg0, 0);
   1448   IgnoreSyncAndMopsBegin(tid);
   1449   uintptr_t ret = CallStdCallFun2(ctx, tid, f, arg0, arg1);
   1450   IgnoreSyncAndMopsEnd(tid);
   1451   return ret;
   1452 }
   1453 uintptr_t WRAP_NAME(RtlInitializeCriticalSectionEx)(WRAP_PARAM4) {
   1454 //  Printf("T%d pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
   1455   DumpEvent(ctx, LOCK_CREATE, tid, pc, arg0, 0);
   1456   IgnoreSyncAndMopsBegin(tid);
   1457   uintptr_t ret = CallStdCallFun3(ctx, tid, f, arg0, arg1, arg2);
   1458   IgnoreSyncAndMopsEnd(tid);
   1459   return ret;
   1460 }
   1461 uintptr_t WRAP_NAME(RtlDeleteCriticalSection)(WRAP_PARAM4) {
   1462 //  Printf("T%d pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
   1463   DumpEvent(ctx, LOCK_DESTROY, tid, pc, arg0, 0);
   1464   IgnoreSyncAndMopsBegin(tid);
   1465   uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
   1466   IgnoreSyncAndMopsEnd(tid);
   1467   return ret;
   1468 }
   1469 uintptr_t WRAP_NAME(RtlEnterCriticalSection)(WRAP_PARAM4) {
   1470 //  Printf("T%d pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
   1471   uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
   1472   DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
   1473   return ret;
   1474 }
   1475 uintptr_t WRAP_NAME(RtlTryEnterCriticalSection)(WRAP_PARAM4) {
   1476   // Printf("T%d pc=%p %s: %p\n", tid, pc, __FUNCTION__+5, arg0);
   1477   uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
   1478   if (ret) {
   1479     DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
   1480   }
   1481   return ret;
   1482 }
   1483 uintptr_t WRAP_NAME(RtlLeaveCriticalSection)(WRAP_PARAM4) {
   1484 //  Printf("T%d pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
   1485   DumpEvent(ctx, UNLOCK, tid, pc, arg0, 0);
   1486   return CallStdCallFun1(ctx, tid, f, arg0);
   1487 }
   1488 
   1489 uintptr_t WRAP_NAME(DuplicateHandle)(WRAP_PARAM8) {
   1490   Printf("WARNING: DuplicateHandle called for handle 0x%X.\n", arg1);
   1491   Printf("Future events on this handle may be processed incorrectly.\n");
   1492   return CallStdCallFun7(ctx, tid, f, arg0, arg1, arg2, arg3, arg4, arg5, arg6);
   1493 }
   1494 
   1495 uintptr_t WRAP_NAME(SetEvent)(WRAP_PARAM4) {
   1496   //Printf("T%d before pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
   1497   DumpEvent(ctx, SIGNAL, tid, pc, arg0, 0);
   1498   uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
   1499   //Printf("T%d after pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0);
   1500   return ret;
   1501 }
   1502 
   1503 uintptr_t InternalWrapCreateSemaphore(WRAP_PARAM4) {
   1504   if (arg3 != NULL) {
   1505     Printf("WARNING: CreateSemaphore called with lpName='%s'.\n", arg3);
   1506     Printf("Future events on this semaphore may be processed incorrectly "
   1507            "if it is reused.\n");
   1508   }
   1509   return CallStdCallFun4(ctx, tid, f, arg0, arg1, arg2, arg3);
   1510 }
   1511 
   1512 uintptr_t WRAP_NAME(CreateSemaphoreA)(WRAP_PARAM4) {
   1513   return InternalWrapCreateSemaphore(tid, pc, ctx, f, arg0, arg1, arg2, arg3);
   1514 }
   1515 
   1516 uintptr_t WRAP_NAME(CreateSemaphoreW)(WRAP_PARAM4) {
   1517   return InternalWrapCreateSemaphore(tid, pc, ctx, f, arg0, arg1, arg2, arg3);
   1518 }
   1519 
   1520 uintptr_t WRAP_NAME(ReleaseSemaphore)(WRAP_PARAM4) {
   1521   DumpEvent(ctx, SIGNAL, tid, pc, arg0, 0);
   1522   return CallStdCallFun3(ctx, tid, f, arg0, arg1, arg2);
   1523 }
   1524 
   1525 uintptr_t WRAP_NAME(RtlInterlockedPushEntrySList)(WRAP_PARAM4) {
   1526   DumpEvent(ctx, SIGNAL, tid, pc, arg1, 0);
   1527   uintptr_t ret = CallStdCallFun2(ctx, tid, f, arg0, arg1);
   1528   // Printf("T%d %s list=%p item=%p\n", tid, __FUNCTION__, arg0, arg1);
   1529   return ret;
   1530 }
   1531 
   1532 uintptr_t WRAP_NAME(RtlInterlockedPopEntrySList)(WRAP_PARAM4) {
   1533   uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
   1534   // Printf("T%d %s list=%p item=%p\n", tid, __FUNCTION__, arg0, ret);
   1535   if (ret) {
   1536     DumpEvent(ctx, WAIT, tid, pc, ret, 0);
   1537   }
   1538   return ret;
   1539 }
   1540 
   1541 uintptr_t WRAP_NAME(RtlAcquireSRWLockExclusive)(WRAP_PARAM4) {
   1542   uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
   1543   DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
   1544   return ret;
   1545 }
   1546 uintptr_t WRAP_NAME(RtlAcquireSRWLockShared)(WRAP_PARAM4) {
   1547   uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
   1548   DumpEvent(ctx, READER_LOCK, tid, pc, arg0, 0);
   1549   return ret;
   1550 }
   1551 uintptr_t WRAP_NAME(RtlTryAcquireSRWLockExclusive)(WRAP_PARAM4) {
   1552   // Printf("T%d %s %p\n", tid, __FUNCTION__, arg0);
   1553   uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
   1554   if (ret & 0xFF) {  // Looks like this syscall return value is just 1 byte.
   1555     DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
   1556   }
   1557   return ret;
   1558 }
   1559 uintptr_t WRAP_NAME(RtlTryAcquireSRWLockShared)(WRAP_PARAM4) {
   1560   // Printf("T%d %s %p\n", tid, __FUNCTION__, arg0);
   1561   uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
   1562   if (ret & 0xFF) {  // Looks like this syscall return value is just 1 byte.
   1563     DumpEvent(ctx, READER_LOCK, tid, pc, arg0, 0);
   1564   }
   1565   return ret;
   1566 }
   1567 uintptr_t WRAP_NAME(RtlReleaseSRWLockExclusive)(WRAP_PARAM4) {
   1568   // Printf("T%d %s %p\n", tid, __FUNCTION__, arg0);
   1569   DumpEvent(ctx, UNLOCK, tid, pc, arg0, 0);
   1570   uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
   1571   return ret;
   1572 }
   1573 uintptr_t WRAP_NAME(RtlReleaseSRWLockShared)(WRAP_PARAM4) {
   1574   // Printf("T%d %s %p\n", tid, __FUNCTION__, arg0);
   1575   DumpEvent(ctx, UNLOCK, tid, pc, arg0, 0);
   1576   uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
   1577   return ret;
   1578 }
   1579 uintptr_t WRAP_NAME(RtlInitializeSRWLock)(WRAP_PARAM4) {
   1580   // Printf("T%d %s %p\n", tid, __FUNCTION__, arg0);
   1581   DumpEvent(ctx, LOCK_CREATE, tid, pc, arg0, 0);
   1582   uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
   1583   return ret;
   1584 }
   1585 
   1586 uintptr_t WRAP_NAME(RtlWakeConditionVariable)(WRAP_PARAM4) {
   1587   // Printf("T%d %s arg0=%p\n", tid, __FUNCTION__, arg0);
   1588   DumpEvent(ctx, SIGNAL, tid, pc, arg0, 0);
   1589   uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
   1590   return ret;
   1591 }
   1592 uintptr_t WRAP_NAME(RtlWakeAllConditionVariable)(WRAP_PARAM4) {
   1593   // Printf("T%d %s arg0=%p\n", tid, __FUNCTION__, arg0);
   1594   DumpEvent(ctx, SIGNAL, tid, pc, arg0, 0);
   1595   uintptr_t ret = CallStdCallFun1(ctx, tid, f, arg0);
   1596   return ret;
   1597 }
   1598 uintptr_t WRAP_NAME(RtlSleepConditionVariableSRW)(WRAP_PARAM4) {
   1599   // No need to unlock/lock - looks like RtlSleepConditionVariableSRW performs
   1600   // Rtl{Acquire,Release}SRW... calls itself!
   1601   uintptr_t ret = CallStdCallFun4(ctx, tid, f, arg0, arg1, arg2, arg3);
   1602   if ((ret & 0xFF) == 0)
   1603     DumpEvent(ctx, WAIT, tid, pc, arg0, 0);
   1604   // Printf("T%d %s arg0=%p arg1=%p; ret=%d\n", tid, __FUNCTION__, arg0, arg1, ret);
   1605   return ret;
   1606 }
   1607 uintptr_t WRAP_NAME(RtlSleepConditionVariableCS)(WRAP_PARAM4) {
   1608   // TODO(timurrrr): do we need unlock/lock?
   1609   uintptr_t ret = CallStdCallFun3(ctx, tid, f, arg0, arg1, arg2);
   1610   if ((ret & 0xFF) == 0)
   1611     DumpEvent(ctx, WAIT, tid, pc, arg0, 0);
   1612   // Printf("T%d %s arg0=%p arg1=%p; ret=%d\n", tid, __FUNCTION__, arg0, arg1, ret);
   1613   return ret;
   1614 }
   1615 
   1616 uintptr_t WRAP_NAME(RtlQueueWorkItem)(WRAP_PARAM4) {
   1617   // Printf("T%d %s arg0=%p arg1=%p; arg2=%d\n", tid, __FUNCTION__, arg0, arg1, arg2);
   1618   g_windows_thread_pool_calback_set->insert(arg0);
   1619   DumpEvent(ctx, SIGNAL, tid, pc, arg0, 0);
   1620   uintptr_t ret = CallStdCallFun3(ctx, tid, f, arg0, arg1, arg2);
   1621   return ret;
   1622 }
   1623 
   1624 uintptr_t WRAP_NAME(RegisterWaitForSingleObject)(WRAP_PARAM6) {
   1625   // Printf("T%d %s arg0=%p arg2=%p\n", tid, __FUNCTION__, arg0, arg2);
   1626   g_windows_thread_pool_calback_set->insert(arg2);
   1627   DumpEvent(ctx, SIGNAL, tid, pc, arg2, 0);
   1628   uintptr_t ret = CallStdCallFun6(ctx, tid, f, arg0, arg1, arg2, arg3, arg4, arg5);
   1629   if (ret) {
   1630     uintptr_t wait_object = *(uintptr_t*)arg0;
   1631     (*g_windows_thread_pool_wait_object_map)[wait_object] = arg2;
   1632     // Printf("T%d %s *arg0=%p\n", tid, __FUNCTION__, wait_object);
   1633   }
   1634   return ret;
   1635 }
   1636 
   1637 uintptr_t WRAP_NAME(UnregisterWaitEx)(WRAP_PARAM4) {
   1638   CHECK(g_windows_thread_pool_wait_object_map);
   1639   uintptr_t obj = (*g_windows_thread_pool_wait_object_map)[arg0];
   1640   // Printf("T%d %s arg0=%p obj=%p\n", tid, __FUNCTION__, arg0, obj);
   1641   uintptr_t ret = CallStdCallFun2(ctx, tid, f, arg0, arg1);
   1642   if (ret) {
   1643     DumpEvent(ctx, WAIT, tid, pc, obj, 0);
   1644   }
   1645   return ret;
   1646 }
   1647 
   1648 uintptr_t WRAP_NAME(VirtualAlloc)(WRAP_PARAM4) {
   1649   // Printf("T%d VirtualAlloc: %p %p %p %p\n", tid, arg0, arg1, arg2, arg3);
   1650   uintptr_t ret = CallStdCallFun4(ctx, tid, f, arg0, arg1, arg2, arg3);
   1651   return ret;
   1652 }
   1653 
   1654 uintptr_t WRAP_NAME(GlobalAlloc)(WRAP_PARAM4) {
   1655   uintptr_t ret = CallStdCallFun2(ctx, tid, f, arg0, arg1);
   1656   // Printf("T%d %s(%p %p)=%p\n", tid, __FUNCTION__, arg0, arg1, ret);
   1657   if (ret != 0) {
   1658     DumpEvent(ctx, MALLOC, tid, pc, ret, arg1);
   1659   }
   1660   return ret;
   1661 }
   1662 
   1663 uintptr_t WRAP_NAME(ZwAllocateVirtualMemory)(WRAP_PARAM6) {
   1664   // Printf("T%d >>%s(%p %p %p %p %p %p)\n", tid, __FUNCTION__, arg0, arg1, arg2, arg3, arg4, arg5);
   1665   uintptr_t ret = CallStdCallFun6(ctx, tid, f, arg0, arg1, arg2, arg3, arg4, arg5);
   1666   // Printf("T%d <<%s(%p %p) = %p\n", tid, __FUNCTION__, *(void**)arg1, *(void**)arg3, ret);
   1667   if (ret == 0) {
   1668     DumpEvent(ctx, MALLOC, tid, pc, *(uintptr_t*)arg1, *(uintptr_t*)arg3);
   1669   }
   1670   return ret;
   1671 }
   1672 
   1673 uintptr_t WRAP_NAME(AllocateHeap)(WRAP_PARAM4) {
   1674   uintptr_t ret = CallStdCallFun3(ctx, tid, f, arg0, arg1, arg2);
   1675   // Printf("T%d RtlAllocateHeap(%p %p %p)=%p\n", tid, arg0, arg1, arg2, ret);
   1676   if (ret != 0) {
   1677     DumpEvent(ctx, MALLOC, tid, pc, ret, arg3);
   1678   }
   1679   return ret;
   1680 }
   1681 
   1682 uintptr_t WRAP_NAME(HeapCreate)(WRAP_PARAM4) {
   1683   uintptr_t ret = CallStdCallFun3(ctx, tid, f, arg0, arg1, arg2);
   1684   Printf("T%d %s(%p %p %p)=%p\n", tid, __FUNCTION__, arg0, arg1, arg2, ret);
   1685   return ret;
   1686 }
   1687 
   1688 // We don't use the definition of WAIT_OBJECT_0 from winbase.h because
   1689 // it can't be compiled here for some reason.
   1690 #define WAIT_OBJECT_0_ 0
   1691 
   1692 uintptr_t WRAP_NAME(WaitForSingleObjectEx)(WRAP_PARAM4) {
   1693   if (G_flags->verbosity >= 1) {
   1694     ShowPcAndSp(__FUNCTION__, tid, pc, 0);
   1695     Printf("arg0=%lx arg1=%lx\n", arg0, arg1);
   1696   }
   1697 
   1698   //Printf("T%d before pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0, arg1);
   1699   uintptr_t ret = CallStdCallFun3(ctx, tid, f, arg0, arg1, arg2);
   1700   //Printf("T%d after pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0, arg1);
   1701 
   1702   if (ret == WAIT_OBJECT_0_) {
   1703     bool is_thread_handle = false;
   1704     {
   1705       ScopedReentrantClientLock lock(__LINE__);
   1706       if (g_win_handles_which_are_threads) {
   1707         is_thread_handle = g_win_handles_which_are_threads->count(arg0) > 0;
   1708         g_win_handles_which_are_threads->erase(arg0);
   1709       }
   1710     }
   1711     if (is_thread_handle)
   1712       HandleThreadJoinAfter(tid, arg0);
   1713     DumpEvent(ctx, WAIT, tid, pc, arg0, 0);
   1714   }
   1715 
   1716   return ret;
   1717 }
   1718 
   1719 uintptr_t WRAP_NAME(WaitForMultipleObjectsEx)(WRAP_PARAM6) {
   1720   if (G_flags->verbosity >= 1) {
   1721     ShowPcAndSp(__FUNCTION__, tid, pc, 0);
   1722     Printf("arg0=%lx arg1=%lx arg2=%lx arg3=%lx\n", arg0, arg1, arg2, arg3);
   1723   }
   1724 
   1725   //Printf("T%d before pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0, arg1);
   1726   uintptr_t ret = CallStdCallFun5(ctx, tid, f, arg0, arg1, arg2, arg3, arg4);
   1727   //Printf("T%d after pc=%p %s: %p\n", tid, pc, __FUNCTION__+8, arg0, arg1);
   1728 
   1729   if (ret >= WAIT_OBJECT_0_ && ret < WAIT_OBJECT_0_ + arg0) {
   1730     // TODO(timurrrr): add support for WAIT_ABANDONED_0
   1731 
   1732     int start_id, count;
   1733     if (arg2 /* wait_for_all */ == 1) {
   1734       start_id = 0;
   1735       count = arg0;
   1736     } else {
   1737       start_id = ret - WAIT_OBJECT_0_;
   1738       count = 1;
   1739     }
   1740 
   1741     for (int i = start_id; i < start_id + count; i++) {
   1742       uintptr_t handle = ((uintptr_t*)arg1)[i];
   1743       bool is_thread_handle = false;
   1744       {
   1745         ScopedReentrantClientLock lock(__LINE__);
   1746         if (g_win_handles_which_are_threads) {
   1747           is_thread_handle = g_win_handles_which_are_threads->count(handle) > 0;
   1748           g_win_handles_which_are_threads->erase(handle);
   1749         }
   1750       }
   1751       if (is_thread_handle)
   1752         HandleThreadJoinAfter(tid, handle);
   1753       DumpEvent(ctx, WAIT, tid, pc, handle, 0);
   1754     }
   1755   }
   1756 
   1757   return ret;
   1758 }
   1759 
   1760 #endif  // _MSC_VER
   1761 
   1762 //--------- memory allocation ---------------------- {{{2
   1763 uintptr_t WRAP_NAME(mmap)(WRAP_PARAM6) {
   1764   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_6();
   1765 
   1766   if (ret != (ADDRINT)-1L) {
   1767     DumpEvent(ctx, MMAP, tid, pc, ret, arg1);
   1768   }
   1769 
   1770   return ret;
   1771 }
   1772 
   1773 uintptr_t WRAP_NAME(munmap)(WRAP_PARAM4) {
   1774   PinThread &t = g_pin_threads[tid];
   1775   TLEBFlushLocked(t);
   1776   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   1777   if (ret != (uintptr_t)-1L) {
   1778     DumpEvent(ctx, MUNMAP, tid, pc, arg0, arg1);
   1779   }
   1780   return ret;
   1781 }
   1782 
   1783 
   1784 void After_malloc(FAST_WRAP_PARAM_AFTER) {
   1785   size_t size = frame.arg[0];
   1786   if (DEBUG_FAST_INTERCEPTORS)
   1787     Printf("T%d %s %ld %p\n", tid, __FUNCTION__, size, ret);
   1788   IgnoreSyncAndMopsEnd(tid);
   1789   DumpEventWithSp(frame.sp, MALLOC, tid, frame.pc, ret, size);
   1790 }
   1791 
   1792 void Before_malloc(FAST_WRAP_PARAM1) {
   1793   IgnoreSyncAndMopsBegin(tid);
   1794   PUSH_AFTER_CALLBACK1(After_malloc, arg0);
   1795 }
   1796 
   1797 void After_free(FAST_WRAP_PARAM_AFTER) {
   1798   if (DEBUG_FAST_INTERCEPTORS)
   1799     Printf("T%d %s %p\n", tid, __FUNCTION__, frame.arg[0]);
   1800   IgnoreSyncAndMopsEnd(tid);
   1801 }
   1802 
   1803 void Before_free(FAST_WRAP_PARAM1) {
   1804   DumpEvent(0, FREE, tid, pc, arg0, 0);
   1805   IgnoreSyncAndMopsBegin(tid);
   1806   PUSH_AFTER_CALLBACK1(After_free, arg0);
   1807 }
   1808 
   1809 void Before_calloc(FAST_WRAP_PARAM2) {
   1810   IgnoreSyncAndMopsBegin(tid);
   1811   PUSH_AFTER_CALLBACK1(After_malloc, arg0 * arg1);
   1812 }
   1813 
   1814 void Before_realloc(FAST_WRAP_PARAM2) {
   1815   IgnoreSyncAndMopsBegin(tid);
   1816   // TODO: handle FREE? We don't do it in Valgrind right now.
   1817   PUSH_AFTER_CALLBACK1(After_malloc, arg1);
   1818 }
   1819 
   1820 // Fast path for INS_InsertIfCall.
   1821 ADDRINT Before_RET_IF(THREADID tid, ADDRINT pc, ADDRINT sp, ADDRINT ret) {
   1822   PinThread &t = g_pin_threads[tid];
   1823   return t.ic_stack.size();
   1824 }
   1825 
   1826 void Before_RET_THEN(THREADID tid, ADDRINT pc, ADDRINT sp, ADDRINT ret) {
   1827   PinThread &t = g_pin_threads[tid];
   1828   if (t.ic_stack.size() == 0) return;
   1829   DCHECK(t.ic_stack.size());
   1830   InstrumentedCallFrame *frame = t.ic_stack.Top();
   1831   if (DEBUG_FAST_INTERCEPTORS) {
   1832     Printf("T%d RET  pc=%p sp=%p *sp=%p frame.sp=%p stack_size %ld\n",
   1833            tid, pc, sp, *(uintptr_t*)sp, frame->sp, t.ic_stack.size());
   1834     t.ic_stack.Print();
   1835   }
   1836   while (frame->sp <= sp) {
   1837     if (DEBUG_FAST_INTERCEPTORS)
   1838       Printf("pop\n");
   1839     frame->callback(tid, *frame, ret);
   1840     t.ic_stack.Pop();
   1841     if (t.ic_stack.size()) {
   1842       frame = t.ic_stack.Top();
   1843     } else {
   1844       break;
   1845     }
   1846   }
   1847 }
   1848 
   1849 uintptr_t WRAP_NAME(malloc)(WRAP_PARAM4) {
   1850   IgnoreSyncAndMopsBegin(tid);
   1851   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   1852   IgnoreSyncAndMopsEnd(tid);
   1853 
   1854   DumpEvent(ctx, MALLOC, tid, pc, ret, arg0);
   1855   return ret;
   1856 }
   1857 
   1858 uintptr_t WRAP_NAME(realloc)(WRAP_PARAM4) {
   1859   PinThread &t = g_pin_threads[tid];
   1860   TLEBFlushLocked(t);
   1861   IgnoreSyncAndMopsBegin(tid);
   1862   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   1863   IgnoreSyncAndMopsEnd(tid);
   1864 
   1865   // TODO: handle FREE? We don't do it in Valgrind right now.
   1866   DumpEvent(ctx, MALLOC, tid, pc, ret, arg1);
   1867   return ret;
   1868 }
   1869 
   1870 uintptr_t WRAP_NAME(calloc)(WRAP_PARAM4) {
   1871   IgnoreSyncAndMopsBegin(tid);
   1872   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   1873   IgnoreSyncAndMopsEnd(tid);
   1874 
   1875   DumpEvent(ctx, MALLOC, tid, pc, ret, arg0*arg1);
   1876   return ret;
   1877 }
   1878 
   1879 uintptr_t WRAP_NAME(free)(WRAP_PARAM4) {
   1880   DumpEvent(ctx, FREE, tid, pc, arg0, 0);
   1881 
   1882   IgnoreSyncAndMopsBegin(tid);
   1883   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   1884   IgnoreSyncAndMopsEnd(tid);
   1885   return ret;
   1886 }
   1887 
   1888 
   1889 //-------- Routines and stack ---------------------- {{{2
   1890 static INLINE void UpdateCallStack(PinThread &t, ADDRINT sp) {
   1891   while (t.shadow_stack.size() > 0 && sp >= t.shadow_stack.back().sp) {
   1892     TLEBAddRtnExit(t);
   1893     size_t size = t.shadow_stack.size();
   1894     CHECK(size < 1000000);  // stay sane.
   1895     uintptr_t popped_pc = t.shadow_stack.back().pc;
   1896 #ifdef _MSC_VER
   1897     // h-b edge from here to UnregisterWaitEx.
   1898     CHECK(g_windows_thread_pool_calback_set);
   1899     if (g_windows_thread_pool_calback_set->count(popped_pc)) {
   1900       DumpEvent(0, SIGNAL, t.tid, 0, popped_pc, 0);
   1901       // Printf("T%d ret %p\n", t.tid, popped_pc);
   1902     }
   1903 #endif
   1904 
   1905     if (debug_rtn) {
   1906       ShowPcAndSp("RET : ", t.tid, popped_pc, sp);
   1907     }
   1908     t.shadow_stack.pop_back();
   1909     CHECK(size - 1 == t.shadow_stack.size());
   1910     if (DEB_PR) {
   1911       Printf("POP SHADOW STACK\n");
   1912       PrintShadowStack(t);
   1913     }
   1914   }
   1915 }
   1916 
   1917 void InsertBeforeEvent_SysCall(THREADID tid, ADDRINT sp) {
   1918   PinThread &t = g_pin_threads[tid];
   1919   UpdateCallStack(t, sp);
   1920   TLEBFlushLocked(t);
   1921 }
   1922 
   1923 void InsertBeforeEvent_Call(THREADID tid, ADDRINT pc, ADDRINT target,
   1924                             ADDRINT sp, IGNORE_BELOW_RTN ignore_below) {
   1925   PinThread &t = g_pin_threads[tid];
   1926   DebugOnlyShowPcAndSp(__FUNCTION__, t.tid, pc, sp);
   1927   UpdateCallStack(t, sp);
   1928   TLEBAddRtnCall(t, pc, target, ignore_below);
   1929   t.shadow_stack.push_back(StackFrame(target, sp));
   1930   if (DEB_PR) {
   1931     PrintShadowStack(t);
   1932   }
   1933   if (DEBUG_MODE && debug_rtn) {
   1934     ShowPcAndSp("CALL: ", t.tid, target, sp);
   1935   }
   1936 
   1937 #ifdef _MSC_VER
   1938   // h-b edge from RtlQueueWorkItem to here.
   1939   CHECK(g_windows_thread_pool_calback_set);
   1940   if (g_windows_thread_pool_calback_set->count(target)) {
   1941     DumpEvent(0, WAIT, tid, pc, target, 0);
   1942   }
   1943 #endif
   1944 }
   1945 
   1946 static void OnTraceSerial(THREADID tid, ADDRINT sp, TraceInfo *trace_info,
   1947     uintptr_t **tls_reg_p) {
   1948   PinThread &t = g_pin_threads[tid];
   1949 
   1950   DCHECK(trace_info);
   1951   DCHECK(trace_info->n_mops() > 0);
   1952   DebugOnlyShowPcAndSp(__FUNCTION__, t.tid, trace_info->pc(), sp);
   1953 
   1954   UpdateCallStack(t, sp);
   1955 
   1956   t.trace_info = trace_info;
   1957   trace_info->counter()++;
   1958   *tls_reg_p = TLEBAddTrace(t);
   1959 }
   1960 
   1961 static void OnTraceParallel(uintptr_t *tls_reg, ADDRINT sp, TraceInfo *trace_info) {
   1962   // Get the thread handler directly from tls_reg.
   1963   PinThread &t = *(PinThread*)(tls_reg - 4);
   1964   t.trace_info = trace_info;
   1965   if (t.ignore_accesses) return;
   1966 
   1967   DCHECK(trace_info);
   1968   DCHECK(trace_info->n_mops() > 0);
   1969   DebugOnlyShowPcAndSp(__FUNCTION__, t.tid, trace_info->pc(), sp);
   1970 
   1971   UpdateCallStack(t, sp);
   1972 
   1973 
   1974   if (DEBUG_MODE && G_flags->show_stats)  // this stat may be racey; avoid ping-pong.
   1975     trace_info->counter()++;
   1976   TLEBAddTrace(t);
   1977 }
   1978 
   1979 /* Verify all mop accesses in the last trace of the given thread by registering
   1980    them with RaceVerifier and sleeping a bit. */
   1981 static void OnTraceVerifyInternal(PinThread &t, uintptr_t **tls_reg_p) {
   1982   DCHECK(g_race_verifier_active);
   1983   if (t.trace_info) {
   1984     int need_sleep = 0;
   1985     for (unsigned i = 0; i < t.trace_info->n_mops(); ++i) {
   1986       uintptr_t addr = (*tls_reg_p)[i];
   1987       if (addr) {
   1988         MopInfo *mop = t.trace_info->GetMop(i);
   1989         need_sleep += RaceVerifierStartAccess(t.uniq_tid, addr, mop->pc(),
   1990             mop->is_write());
   1991       }
   1992     }
   1993 
   1994     if (!need_sleep)
   1995       return;
   1996 
   1997     usleep(G_flags->race_verifier_sleep_ms * 1000);
   1998 
   1999     for (unsigned i = 0; i < t.trace_info->n_mops(); ++i) {
   2000       uintptr_t addr = (*tls_reg_p)[i];
   2001       if (addr) {
   2002         MopInfo *mop = t.trace_info->GetMop(i);
   2003         RaceVerifierEndAccess(t.uniq_tid, addr, mop->pc(), mop->is_write());
   2004       }
   2005     }
   2006   }
   2007 }
   2008 
   2009 static void OnTraceNoMopsVerify(THREADID tid, ADDRINT sp,
   2010     uintptr_t **tls_reg_p) {
   2011   PinThread &t = g_pin_threads[tid];
   2012   DCHECK(g_race_verifier_active);
   2013   OnTraceVerifyInternal(t, tls_reg_p);
   2014   t.trace_info = NULL;
   2015 }
   2016 
   2017 static void OnTraceVerify(THREADID tid, ADDRINT sp, TraceInfo *trace_info,
   2018     uintptr_t **tls_reg_p) {
   2019   DCHECK(g_race_verifier_active);
   2020   PinThread &t = g_pin_threads[tid];
   2021   OnTraceVerifyInternal(t, tls_reg_p);
   2022 
   2023   DCHECK(trace_info->n_mops() > 0);
   2024 
   2025   t.trace_info = trace_info;
   2026   trace_info->counter()++;
   2027   *tls_reg_p = TLEBAddTrace(t);
   2028 }
   2029 
   2030 
   2031 //---------- Memory accesses -------------------------- {{{2
   2032 // 'addr' is the section of t.tleb.events which is set in OnTrace.
   2033 // 'idx' is the number of this mop in its trace.
   2034 // 'a' is the actuall address.
   2035 // 'tid' is thread ID, used only in debug mode.
   2036 //
   2037 // In opt mode this is just one instruction! Something like this:
   2038 // mov %rcx,(%rdi,%rdx,8)
   2039 static void OnMop(uintptr_t *addr, THREADID tid, ADDRINT idx, ADDRINT a) {
   2040   if (DEBUG_MODE) {
   2041     PinThread &t= g_pin_threads[tid];
   2042     CHECK(idx < kMaxMopsPerTrace);
   2043     CHECK(idx < t.trace_info->n_mops());
   2044     uintptr_t *ptr = addr + idx;
   2045     CHECK(ptr >= t.tleb.events);
   2046     CHECK(ptr < t.tleb.events + kThreadLocalEventBufferSize);
   2047     if (a == G_flags->trace_addr) {
   2048       Printf("T%d %s %lx\n", t.tid, __FUNCTION__, a);
   2049     }
   2050   }
   2051   addr[idx] = a;
   2052 }
   2053 
   2054 static void On_PredicatedMop(BOOL is_running, uintptr_t *addr,
   2055                              THREADID tid, ADDRINT idx, ADDRINT a) {
   2056   if (is_running) {
   2057     OnMop(addr, tid, idx, a);
   2058   }
   2059 }
   2060 
   2061 static void OnMopCheckIdentStoreBefore(uintptr_t *addr, THREADID tid, ADDRINT idx, ADDRINT a) {
   2062   // Write the value of *a to tleb.
   2063   addr[idx] = *(uintptr_t*)a;
   2064 }
   2065 static void OnMopCheckIdentStoreAfter(uintptr_t *addr, THREADID tid, ADDRINT idx, ADDRINT a) {
   2066   // Check if the previous value of *a is equal to the new one.
   2067   // If not, we have a regular memory access. If yes, we have an ident operation,
   2068   // which we want to ignore.
   2069   uintptr_t previous_value_of_a = addr[idx];
   2070   uintptr_t new_value_of_a = *(uintptr_t*)a;
   2071   // 111...111 if the values are different, 0 otherwise.
   2072   uintptr_t ne_mask = -(uintptr_t)(new_value_of_a != previous_value_of_a);
   2073   addr[idx] = ne_mask & a;
   2074 }
   2075 
   2076 //---------- I/O; exit------------------------------- {{{2
   2077 static const uintptr_t kIOMagic = 0x1234c678;
   2078 
   2079 static void Before_SignallingIOCall(THREADID tid, ADDRINT pc) {
   2080   DumpEvent(0, SIGNAL, tid, pc, kIOMagic, 0);
   2081 }
   2082 
   2083 static void After_WaitingIOCall(THREADID tid, ADDRINT pc) {
   2084   DumpEvent(0, WAIT, tid, pc, kIOMagic, 0);
   2085 }
   2086 
   2087 static const uintptr_t kAtexitMagic = 0x9876f432;
   2088 
   2089 static void On_atexit(THREADID tid, ADDRINT pc) {
   2090   DumpEvent(0, SIGNAL, tid, pc, kAtexitMagic, 0);
   2091 }
   2092 
   2093 static void On_exit(THREADID tid, ADDRINT pc) {
   2094   DumpEvent(0, WAIT, tid, pc, kAtexitMagic, 0);
   2095 }
   2096 
   2097 //---------- Synchronization -------------------------- {{{2
   2098 // locks
   2099 static void Before_pthread_unlock(THREADID tid, ADDRINT pc, ADDRINT mu) {
   2100   DumpEvent(0, UNLOCK, tid, pc, mu, 0);
   2101 }
   2102 
   2103 static void After_pthread_mutex_lock(FAST_WRAP_PARAM_AFTER) {
   2104   DumpEventWithSp(frame.sp, WRITER_LOCK, tid, frame.pc, frame.arg[0], 0);
   2105 }
   2106 
   2107 static void Before_pthread_mutex_lock(FAST_WRAP_PARAM1) {
   2108   PUSH_AFTER_CALLBACK1(After_pthread_mutex_lock, arg0);
   2109 }
   2110 
   2111 // In some versions of libpthread, pthread_spin_lock is effectively
   2112 // a recursive function. It jumps to its first insn:
   2113 //    beb0:       f0 ff 0f                lock decl (%rdi)
   2114 //    beb3:       75 0b                   jne    bec0 <pthread_spin_lock+0x10>
   2115 //    beb5:       31 c0                   xor    %eax,%eax
   2116 //    beb7:       c3                      retq
   2117 //    beb8:       0f 1f 84 00 00 00 00    nopl   0x0(%rax,%rax,1)
   2118 //    bebf:       00
   2119 //    bec0:       f3 90                   pause
   2120 //    bec2:       83 3f 00                cmpl   $0x0,(%rdi)
   2121 //    bec5:       7f e9  >>>>>>>>>>>>>    jg     beb0 <pthread_spin_lock>
   2122 //    bec7:       eb f7                   jmp    bec0 <pthread_spin_lock+0x10>
   2123 //
   2124 // So, we need to act only when we return from the last (depth=0) invocation.
   2125 static uintptr_t WRAP_NAME(pthread_spin_lock)(WRAP_PARAM4) {
   2126   PinThread &t= g_pin_threads[tid];
   2127   t.spin_lock_recursion_depth++;
   2128   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   2129   t.spin_lock_recursion_depth--;
   2130   if (t.spin_lock_recursion_depth == 0) {
   2131     DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
   2132   }
   2133   return ret;
   2134 }
   2135 
   2136 static uintptr_t WRAP_NAME(pthread_rwlock_wrlock)(WRAP_PARAM4) {
   2137   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   2138   DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
   2139   return ret;
   2140 }
   2141 
   2142 static uintptr_t WRAP_NAME(pthread_rwlock_rdlock)(WRAP_PARAM4) {
   2143   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   2144   DumpEvent(ctx, READER_LOCK, tid, pc, arg0, 0);
   2145   return ret;
   2146 }
   2147 
   2148 static uintptr_t WRAP_NAME(pthread_mutex_trylock)(WRAP_PARAM4) {
   2149   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   2150   if (ret == 0)
   2151     DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
   2152   return ret;
   2153 }
   2154 
   2155 static uintptr_t WRAP_NAME(pthread_spin_trylock)(WRAP_PARAM4) {
   2156   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   2157   if (ret == 0)
   2158     DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
   2159   return ret;
   2160 }
   2161 
   2162 static uintptr_t WRAP_NAME(pthread_spin_init)(WRAP_PARAM4) {
   2163   DumpEvent(ctx, UNLOCK_OR_INIT, tid, pc, arg0, 0);
   2164   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   2165   return ret;
   2166 }
   2167 static uintptr_t WRAP_NAME(pthread_spin_destroy)(WRAP_PARAM4) {
   2168   DumpEvent(ctx, LOCK_DESTROY, tid, pc, arg0, 0);
   2169   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   2170   return ret;
   2171 }
   2172 static uintptr_t WRAP_NAME(pthread_spin_unlock)(WRAP_PARAM4) {
   2173   DumpEvent(ctx, UNLOCK_OR_INIT, tid, pc, arg0, 0);
   2174   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   2175   return ret;
   2176 }
   2177 
   2178 static uintptr_t WRAP_NAME(pthread_rwlock_trywrlock)(WRAP_PARAM4) {
   2179   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   2180   if (ret == 0)
   2181     DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0, 0);
   2182   return ret;
   2183 }
   2184 
   2185 static uintptr_t WRAP_NAME(pthread_rwlock_tryrdlock)(WRAP_PARAM4) {
   2186   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   2187   if (ret == 0)
   2188     DumpEvent(ctx, READER_LOCK, tid, pc, arg0, 0);
   2189   return ret;
   2190 }
   2191 
   2192 
   2193 static void Before_pthread_mutex_init(THREADID tid, ADDRINT pc, ADDRINT mu) {
   2194   DumpEvent(0, LOCK_CREATE, tid, pc, mu, 0);
   2195 }
   2196 static void Before_pthread_rwlock_init(THREADID tid, ADDRINT pc, ADDRINT mu) {
   2197   DumpEvent(0, LOCK_CREATE, tid, pc, mu, 0);
   2198 }
   2199 
   2200 static void Before_pthread_mutex_destroy(THREADID tid, ADDRINT pc, ADDRINT mu) {
   2201   DumpEvent(0, LOCK_DESTROY, tid, pc, mu, 0);
   2202 }
   2203 static void Before_pthread_rwlock_destroy(THREADID tid, ADDRINT pc, ADDRINT mu) {
   2204   DumpEvent(0, LOCK_DESTROY, tid, pc, mu, 0);
   2205 }
   2206 
   2207 // barrier
   2208 static uintptr_t WRAP_NAME(pthread_barrier_init)(WRAP_PARAM4) {
   2209   DumpEvent(ctx, CYCLIC_BARRIER_INIT, tid, pc, arg0, arg2);
   2210   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   2211   return ret;
   2212 }
   2213 static uintptr_t WRAP_NAME(pthread_barrier_wait)(WRAP_PARAM4) {
   2214   DumpEvent(ctx, CYCLIC_BARRIER_WAIT_BEFORE, tid, pc, arg0, 0);
   2215   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   2216   DumpEvent(ctx, CYCLIC_BARRIER_WAIT_AFTER, tid, pc, arg0, 0);
   2217   return ret;
   2218 }
   2219 
   2220 
   2221 // condvar
   2222 static void Before_pthread_cond_signal(THREADID tid, ADDRINT pc, ADDRINT cv) {
   2223   DumpEvent(0, SIGNAL, tid, pc, cv, 0);
   2224 }
   2225 
   2226 static uintptr_t WRAP_NAME(pthread_cond_wait)(WRAP_PARAM4) {
   2227   DumpEvent(ctx, UNLOCK, tid, pc, arg1, 0);
   2228   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   2229   DumpEvent(ctx, WAIT, tid, pc, arg0, 0);
   2230   DumpEvent(ctx, WRITER_LOCK, tid, pc, arg1, 0);
   2231   return ret;
   2232 }
   2233 static uintptr_t WRAP_NAME(pthread_cond_timedwait)(WRAP_PARAM4) {
   2234   DumpEvent(ctx, UNLOCK, tid, pc, arg1, 0);
   2235   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   2236   if (ret == 0) {
   2237     DumpEvent(ctx, WAIT, tid, pc, arg0, 0);
   2238   }
   2239   DumpEvent(ctx, WRITER_LOCK, tid, pc, arg1, 0);
   2240   return ret;
   2241 }
   2242 
   2243 // epoll
   2244 static const uintptr_t kSocketMagic = 0xDEADFBAD;
   2245 
   2246 static void Before_epoll_ctl(THREADID tid, ADDRINT pc) {
   2247   DumpEvent(0, SIGNAL, tid, pc, kSocketMagic, 0);
   2248 }
   2249 
   2250 static void After_epoll_wait(THREADID tid, ADDRINT pc) {
   2251   DumpEvent(0, WAIT, tid, pc, kSocketMagic, 0);
   2252 }
   2253 
   2254 // sem
   2255 static void After_sem_open(THREADID tid, ADDRINT pc, ADDRINT ret) {
   2256   // TODO(kcc): need to handle it more precise?
   2257   DumpEvent(0, SIGNAL, tid, pc, ret, 0);
   2258 }
   2259 static void Before_sem_post(THREADID tid, ADDRINT pc, ADDRINT sem) {
   2260   DumpEvent(0, SIGNAL, tid, pc, sem, 0);
   2261 }
   2262 
   2263 static uintptr_t WRAP_NAME(sem_wait)(WRAP_PARAM4) {
   2264   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   2265   DumpEvent(ctx, WAIT, tid, pc, arg0, 0);
   2266   return ret;
   2267 }
   2268 static uintptr_t WRAP_NAME(sem_trywait)(WRAP_PARAM4) {
   2269   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   2270   if (ret == 0) {
   2271     DumpEvent(ctx, WAIT, tid, pc, arg0, 0);
   2272   }
   2273   return ret;
   2274 }
   2275 
   2276 // etc
   2277 #if defined(__GNUC__)
   2278 uintptr_t WRAP_NAME(lockf)(WRAP_PARAM4) {
   2279   const long offset_magic = 0xFEB0ACC0;
   2280 
   2281   if (arg1 == F_ULOCK)
   2282     DumpEvent(ctx, UNLOCK, tid, pc, arg0 ^ offset_magic, 0);
   2283 
   2284   uintptr_t ret = CALL_ME_INSIDE_WRAPPER_4();
   2285 
   2286   if (arg1 == F_LOCK && ret == 0)
   2287     DumpEvent(ctx, WRITER_LOCK, tid, pc, arg0 ^ offset_magic, 0);
   2288 
   2289   return ret;
   2290 }
   2291 #endif
   2292 
   2293 //--------- Annotations -------------------------- {{{2
   2294 static void On_AnnotateBenignRace(THREADID tid, ADDRINT pc,
   2295                                   ADDRINT file, ADDRINT line,
   2296                                   ADDRINT a, ADDRINT descr) {
   2297   DumpEvent(0, BENIGN_RACE, tid, descr, a, 1);
   2298 }
   2299 
   2300 static void On_AnnotateBenignRaceSized(THREADID tid, ADDRINT pc,
   2301                                        ADDRINT file, ADDRINT line,
   2302                                        ADDRINT a, ADDRINT size, ADDRINT descr) {
   2303   DumpEvent(0, BENIGN_RACE, tid, descr, a, size);
   2304 }
   2305 
   2306 static void On_AnnotateExpectRace(THREADID tid, ADDRINT pc,
   2307                                   ADDRINT file, ADDRINT line,
   2308                                   ADDRINT a, ADDRINT descr) {
   2309   DumpEvent(0, EXPECT_RACE, tid, descr, a, 1);
   2310 }
   2311 
   2312 static void On_AnnotateFlushExpectedRaces(THREADID tid, ADDRINT pc,
   2313                                   ADDRINT file, ADDRINT line) {
   2314   DumpEvent(0, FLUSH_EXPECTED_RACES, 0, 0, 0, 0);
   2315 }
   2316 
   2317 
   2318 static void On_AnnotateTraceMemory(THREADID tid, ADDRINT pc,
   2319                                    ADDRINT file, ADDRINT line,
   2320                                    ADDRINT a) {
   2321   DumpEvent(0, TRACE_MEM, tid, pc, a, 0);
   2322 }
   2323 
   2324 static void On_AnnotateNewMemory(THREADID tid, ADDRINT pc,
   2325                                    ADDRINT file, ADDRINT line,
   2326                                    ADDRINT a, ADDRINT size) {
   2327   DumpEvent(0, MALLOC, tid, pc, a, size);
   2328 }
   2329 
   2330 static void On_AnnotateNoOp(THREADID tid, ADDRINT pc,
   2331                             ADDRINT file, ADDRINT line, ADDRINT a) {
   2332   Printf("%s T%d: %s:%d %p\n", __FUNCTION__, tid, (char*)file, (int)line, a);
   2333   //DumpEvent(0, STACK_TRACE, tid, pc, 0, 0);
   2334 //  PrintShadowStack(tid);
   2335 }
   2336 
   2337 static void On_AnnotateFlushState(THREADID tid, ADDRINT pc,
   2338                                   ADDRINT file, ADDRINT line) {
   2339   DumpEvent(0, FLUSH_STATE, tid, pc, 0, 0);
   2340 }
   2341 
   2342 static void On_AnnotateCondVarSignal(THREADID tid, ADDRINT pc,
   2343                                      ADDRINT file, ADDRINT line, ADDRINT obj) {
   2344   DumpEvent(0, SIGNAL, tid, pc, obj, 0);
   2345 }
   2346 
   2347 static void On_AnnotateCondVarWait(THREADID tid, ADDRINT pc,
   2348                                    ADDRINT file, ADDRINT line, ADDRINT obj) {
   2349   DumpEvent(0, WAIT, tid, pc, obj, 0);
   2350 }
   2351 
   2352 static void On_AnnotateHappensBefore(THREADID tid, ADDRINT pc,
   2353                                      ADDRINT file, ADDRINT line, ADDRINT obj) {
   2354   DumpEvent(0, SIGNAL, tid, pc, obj, 0);
   2355 }
   2356 
   2357 static void On_AnnotateHappensAfter(THREADID tid, ADDRINT pc,
   2358                                     ADDRINT file, ADDRINT line, ADDRINT obj) {
   2359   DumpEvent(0, WAIT, tid, pc, obj, 0);
   2360 }
   2361 
   2362 static void On_AnnotateEnableRaceDetection(THREADID tid, ADDRINT pc,
   2363                                         ADDRINT file, ADDRINT line,
   2364                                         ADDRINT enable) {
   2365   if (!g_race_verifier_active)
   2366     TLEBSimpleEvent(g_pin_threads[tid],
   2367         enable ? TLEB_GLOBAL_IGNORE_OFF : TLEB_GLOBAL_IGNORE_ON);
   2368 }
   2369 
   2370 static void On_AnnotateIgnoreReadsBegin(THREADID tid, ADDRINT pc,
   2371                                         ADDRINT file, ADDRINT line) {
   2372   DumpEvent(0, IGNORE_READS_BEG, tid, pc, 0, 0);
   2373 }
   2374 static void On_AnnotateIgnoreReadsEnd(THREADID tid, ADDRINT pc,
   2375                                       ADDRINT file, ADDRINT line) {
   2376   DumpEvent(0, IGNORE_READS_END, tid, pc, 0, 0);
   2377 }
   2378 static void On_AnnotateIgnoreWritesBegin(THREADID tid, ADDRINT pc,
   2379                                          ADDRINT file, ADDRINT line) {
   2380   DumpEvent(0, IGNORE_WRITES_BEG, tid, pc, 0, 0);
   2381 }
   2382 static void On_AnnotateIgnoreWritesEnd(THREADID tid, ADDRINT pc,
   2383                                        ADDRINT file, ADDRINT line) {
   2384   DumpEvent(0, IGNORE_WRITES_END, tid, pc, 0, 0);
   2385 }
   2386 static void On_AnnotateThreadName(THREADID tid, ADDRINT pc,
   2387                                   ADDRINT file, ADDRINT line,
   2388                                   ADDRINT name) {
   2389   DumpEvent(0, SET_THREAD_NAME, tid, pc, name, 0);
   2390 }
   2391 static void On_AnnotatePublishMemoryRange(THREADID tid, ADDRINT pc,
   2392                                           ADDRINT file, ADDRINT line,
   2393                                           ADDRINT a, ADDRINT size) {
   2394   DumpEvent(0, PUBLISH_RANGE, tid, pc, a, size);
   2395 }
   2396 
   2397 static void On_AnnotateUnpublishMemoryRange(THREADID tid, ADDRINT pc,
   2398                                           ADDRINT file, ADDRINT line,
   2399                                           ADDRINT a, ADDRINT size) {
   2400 //  Printf("T%d %s %lx %lx\n", tid, __FUNCTION__, a, size);
   2401   DumpEvent(0, UNPUBLISH_RANGE, tid, pc, a, size);
   2402 }
   2403 
   2404 
   2405 static void On_AnnotateMutexIsUsedAsCondVar(THREADID tid, ADDRINT pc,
   2406                                             ADDRINT file, ADDRINT line,
   2407                                             ADDRINT mu) {
   2408   DumpEvent(0, HB_LOCK, tid, pc, mu, 0);
   2409 }
   2410 
   2411 static void On_AnnotateMutexIsNotPhb(THREADID tid, ADDRINT pc,
   2412                                      ADDRINT file, ADDRINT line,
   2413                                      ADDRINT mu) {
   2414   DumpEvent(0, NON_HB_LOCK, tid, pc, mu, 0);
   2415 }
   2416 
   2417 static void On_AnnotatePCQCreate(THREADID tid, ADDRINT pc,
   2418                                  ADDRINT file, ADDRINT line,
   2419                                  ADDRINT pcq) {
   2420   DumpEvent(0, PCQ_CREATE, tid, pc, pcq, 0);
   2421 }
   2422 
   2423 static void On_AnnotatePCQDestroy(THREADID tid, ADDRINT pc,
   2424                                   ADDRINT file, ADDRINT line,
   2425                                   ADDRINT pcq) {
   2426   DumpEvent(0, PCQ_DESTROY, tid, pc, pcq, 0);
   2427 }
   2428 
   2429 static void On_AnnotatePCQPut(THREADID tid, ADDRINT pc,
   2430                               ADDRINT file, ADDRINT line,
   2431                               ADDRINT pcq) {
   2432   DumpEvent(0, PCQ_PUT, tid, pc, pcq, 0);
   2433 }
   2434 
   2435 static void On_AnnotatePCQGet(THREADID tid, ADDRINT pc,
   2436                               ADDRINT file, ADDRINT line,
   2437                               ADDRINT pcq) {
   2438   DumpEvent(0, PCQ_GET, tid, pc, pcq, 0);
   2439 }
   2440 
   2441 static void On_AnnotateRWLockCreate(THREADID tid, ADDRINT pc,
   2442                                     ADDRINT file, ADDRINT line,
   2443                                     ADDRINT lock) {
   2444   DumpEvent(0, LOCK_CREATE, tid, pc, lock, 0);
   2445 }
   2446 
   2447 static void On_AnnotateRWLockDestroy(THREADID tid, ADDRINT pc,
   2448                                     ADDRINT file, ADDRINT line,
   2449                                     ADDRINT lock) {
   2450   DumpEvent(0, LOCK_DESTROY, tid, pc, lock, 0);
   2451 }
   2452 
   2453 static void On_AnnotateRWLockAcquired(THREADID tid, ADDRINT pc,
   2454                                      ADDRINT file, ADDRINT line,
   2455                                      ADDRINT lock, ADDRINT is_w) {
   2456   DumpEvent(0, is_w ? WRITER_LOCK : READER_LOCK, tid, pc, lock, 0);
   2457 }
   2458 
   2459 static void On_AnnotateRWLockReleased(THREADID tid, ADDRINT pc,
   2460                                      ADDRINT file, ADDRINT line,
   2461                                      ADDRINT lock, ADDRINT is_w) {
   2462   DumpEvent(0, UNLOCK, tid, pc, lock, 0);
   2463 }
   2464 
   2465 
   2466 int WRAP_NAME(RunningOnValgrind)(WRAP_PARAM4) {
   2467   return 1;
   2468 }
   2469 
   2470 //--------- Instrumentation ----------------------- {{{1
   2471 static bool IgnoreImage(IMG img) {
   2472   string name = IMG_Name(img);
   2473   if (name.find("/ld-") != string::npos)
   2474     return true;
   2475   return false;
   2476 }
   2477 
   2478 static bool IgnoreRtn(RTN rtn) {
   2479   CHECK(rtn != RTN_Invalid());
   2480   ADDRINT rtn_address = RTN_Address(rtn);
   2481   if (ThreadSanitizerWantToInstrumentSblock(rtn_address) == false)
   2482     return true;
   2483   return false;
   2484 }
   2485 
   2486 static bool InstrumentCall(INS ins) {
   2487   // Call.
   2488   if (INS_IsProcedureCall(ins) && !INS_IsSyscall(ins)) {
   2489     IGNORE_BELOW_RTN ignore_below = IGNORE_BELOW_RTN_UNKNOWN;
   2490     if (INS_IsDirectBranchOrCall(ins)) {
   2491       ADDRINT target = INS_DirectBranchOrCallTargetAddress(ins);
   2492       bool ignore = ThreadSanitizerIgnoreAccessesBelowFunction(target);
   2493       ignore_below = ignore ? IGNORE_BELOW_RTN_YES : IGNORE_BELOW_RTN_NO;
   2494     }
   2495     INS_InsertCall(ins, IPOINT_BEFORE,
   2496                    (AFUNPTR)InsertBeforeEvent_Call,
   2497                    IARG_THREAD_ID,
   2498                    IARG_INST_PTR,
   2499                    IARG_BRANCH_TARGET_ADDR,
   2500                    IARG_REG_VALUE, REG_STACK_PTR,
   2501                    IARG_ADDRINT, ignore_below,
   2502                    IARG_END);
   2503     return true;
   2504   }
   2505   if (INS_IsSyscall(ins)) {
   2506     INS_InsertCall(ins, IPOINT_BEFORE,
   2507                    (AFUNPTR)InsertBeforeEvent_SysCall,
   2508                    IARG_THREAD_ID,
   2509                    IARG_REG_VALUE, REG_STACK_PTR,
   2510                    IARG_END);
   2511   }
   2512   return false;
   2513 }
   2514 
   2515 
   2516 // return the number of inserted instrumentations.
   2517 static void InstrumentMopsInBBl(BBL bbl, RTN rtn, TraceInfo *trace_info, uintptr_t instrument_pc, size_t *mop_idx) {
   2518   // compute 'dtor_head', see
   2519   // http://code.google.com/p/data-race-test/wiki/PopularDataRaces#Data_race_on_vptr
   2520   // On x86_64 only the first BB of DTOR is treated as dtor_head.
   2521   // On x86, we have to treat more BBs as dtor_head due to -fPIC.
   2522   // See http://code.google.com/p/chromium/issues/detail?id=61199
   2523   bool dtor_head = false;
   2524 #ifdef TARGET_IA32
   2525   size_t max_offset_for_dtor_head = 32;
   2526 #else
   2527   size_t max_offset_for_dtor_head = 0;
   2528 #endif
   2529 
   2530   if (BBL_Address(bbl) - RTN_Address(rtn) <= max_offset_for_dtor_head) {
   2531     string demangled_rtn_name = Demangle(RTN_Name(rtn).c_str());
   2532     if (demangled_rtn_name.find("::~") != string::npos)
   2533       dtor_head = true;
   2534   }
   2535 
   2536   INS tail = BBL_InsTail(bbl);
   2537   // All memory reads/writes
   2538   for( INS ins = BBL_InsHead(bbl);
   2539        INS_Valid(ins);
   2540        ins = INS_Next(ins) ) {
   2541     if (ins != tail) {
   2542       CHECK(!INS_IsRet(ins));
   2543       CHECK(!INS_IsProcedureCall(ins));
   2544     }
   2545     // bool is_stack = INS_IsStackRead(ins) || INS_IsStackWrite(ins);
   2546     if (INS_IsAtomicUpdate(ins)) continue;
   2547 
   2548     int n_mops = INS_MemoryOperandCount(ins);
   2549     if (n_mops == 0) continue;
   2550 
   2551     string opcode_str = OPCODE_StringShort(INS_Opcode(ins));
   2552     if (trace_info && debug_ins) {
   2553       Printf("  INS: opcode=%s n_mops=%d dis=\"%s\"\n",
   2554              opcode_str.c_str(),  n_mops,
   2555              INS_Disassemble(ins).c_str());
   2556     }
   2557 
   2558     bool ins_ignore_writes = false;
   2559     bool ins_ignore_reads = false;
   2560 
   2561     // CALL writes to stack and (if the call is indirect) reads the target
   2562     // address. We don't want to handle the stack write.
   2563     if (INS_IsCall(ins)) {
   2564       CHECK(n_mops == 1 || n_mops == 2);
   2565       ins_ignore_writes = true;
   2566     }
   2567 
   2568     // PUSH: we ignore the write to stack but we don't ignore the read (if any).
   2569     if (opcode_str == "PUSH") {
   2570       CHECK(n_mops == 1 || n_mops == 2);
   2571       ins_ignore_writes = true;
   2572     }
   2573 
   2574     // POP: we are reading from stack, Ignore it.
   2575     if (opcode_str == "POP") {
   2576       CHECK(n_mops == 1 || n_mops == 2);
   2577       ins_ignore_reads = true;
   2578       continue;
   2579     }
   2580 
   2581     // RET/LEAVE -- ignore it, it just reads the return address and stack.
   2582     if (INS_IsRet(ins) || opcode_str == "LEAVE") {
   2583       CHECK(n_mops == 1);
   2584       continue;
   2585     }
   2586 
   2587     bool is_predicated = INS_IsPredicated(ins);
   2588     for (int i = 0; i < n_mops; i++) {
   2589       if (*mop_idx >= kMaxMopsPerTrace) {
   2590         Report("INFO: too many mops in trace: %d %s\n",
   2591             INS_Address(ins), PcToRtnName(INS_Address(ins), true).c_str());
   2592         return;
   2593       }
   2594       size_t size = INS_MemoryOperandSize(ins, i);
   2595       CHECK(size);
   2596       bool is_write = INS_MemoryOperandIsWritten(ins, i);
   2597 
   2598       if (ins_ignore_writes && is_write) continue;
   2599       if (ins_ignore_reads && !is_write) continue;
   2600       if (instrument_pc && instrument_pc != INS_Address(ins)) continue;
   2601 
   2602       bool check_ident_store = false;
   2603       if (dtor_head && is_write && INS_IsMov(ins) && size == sizeof(void*)) {
   2604         // This is a special case for '*addr = value', where we want to ignore the
   2605         // access if *addr == value before the store.
   2606         CHECK(!is_predicated);
   2607         check_ident_store = true;
   2608       }
   2609 
   2610       if (trace_info) {
   2611         if (debug_ins) {
   2612           Printf("    size=%ld is_w=%d\n", size, (int)is_write);
   2613         }
   2614         IPOINT point = IPOINT_BEFORE;
   2615         AFUNPTR on_mop_callback = (AFUNPTR)OnMop;
   2616         if (check_ident_store) {
   2617           INS_InsertCall(ins, IPOINT_BEFORE,
   2618             (AFUNPTR)OnMopCheckIdentStoreBefore,
   2619             IARG_REG_VALUE, tls_reg,
   2620             IARG_THREAD_ID,
   2621             IARG_ADDRINT, *mop_idx,
   2622             IARG_MEMORYOP_EA, i,
   2623             IARG_END);
   2624           // This is just a MOV, so we can insert the instrumentation code
   2625           // after the insn.
   2626           point = IPOINT_AFTER;
   2627           on_mop_callback = (AFUNPTR)OnMopCheckIdentStoreAfter;
   2628         }
   2629 
   2630         MopInfo *mop = trace_info->GetMop(*mop_idx);
   2631         new (mop) MopInfo(INS_Address(ins), size, is_write, false);
   2632         if (is_predicated) {
   2633           INS_InsertPredicatedCall(ins, point,
   2634               (AFUNPTR)On_PredicatedMop,
   2635               IARG_EXECUTING,
   2636               IARG_REG_VALUE, tls_reg,
   2637               IARG_THREAD_ID,
   2638               IARG_ADDRINT, *mop_idx,
   2639               IARG_MEMORYOP_EA, i,
   2640               IARG_END);
   2641         } else {
   2642           INS_InsertCall(ins, point,
   2643               on_mop_callback,
   2644               IARG_REG_VALUE, tls_reg,
   2645               IARG_THREAD_ID,
   2646               IARG_ADDRINT, *mop_idx,
   2647               IARG_MEMORYOP_EA, i,
   2648               IARG_END);
   2649         }
   2650       }
   2651       (*mop_idx)++;
   2652     }
   2653   }
   2654 }
   2655 
   2656 void CallbackForTRACE(TRACE trace, void *v) {
   2657   CHECK(n_started_threads > 0);
   2658 
   2659   RTN rtn = TRACE_Rtn(trace);
   2660   bool ignore_memory = false;
   2661   string img_name = "<>";
   2662   string rtn_name = "<>";
   2663   if (RTN_Valid(rtn)) {
   2664     SEC sec = RTN_Sec(rtn);
   2665     IMG img = SEC_Img(sec);
   2666     rtn_name = RTN_Name(rtn);
   2667     img_name = IMG_Name(img);
   2668 
   2669     if (IgnoreImage(img)) {
   2670       // Printf("Ignoring memory accesses in %s\n", IMG_Name(img).c_str());
   2671       ignore_memory = true;
   2672     } else if (IgnoreRtn(rtn)) {
   2673       ignore_memory = true;
   2674     }
   2675   }
   2676 
   2677   uintptr_t instrument_pc = 0;
   2678   if (g_race_verifier_active) {
   2679     // Check if this trace looks like part of a possible race report.
   2680     uintptr_t min_pc = UINTPTR_MAX;
   2681     uintptr_t max_pc = 0;
   2682     for(BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) {
   2683       min_pc = MIN(min_pc, INS_Address(BBL_InsHead(bbl)));
   2684       max_pc = MAX(max_pc, INS_Address(BBL_InsTail(bbl)));
   2685     }
   2686 
   2687     bool verify_trace = RaceVerifierGetAddresses(min_pc, max_pc, &instrument_pc);
   2688     if (!verify_trace)
   2689       ignore_memory = true;
   2690   }
   2691 
   2692   size_t n_mops = 0;
   2693   // count the mops.
   2694   for(BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) {
   2695     if (!ignore_memory) {
   2696       InstrumentMopsInBBl(bbl, rtn, NULL, instrument_pc, &n_mops);
   2697     }
   2698     INS tail = BBL_InsTail(bbl);
   2699     if (INS_IsRet(tail)) {
   2700 #if 0
   2701       INS_InsertIfCall(tail, IPOINT_BEFORE,
   2702                        (AFUNPTR)Before_RET_IF,
   2703                        IARG_THREAD_ID,
   2704                        IARG_END);
   2705 
   2706       INS_InsertThenCall(
   2707 #else
   2708         INS_InsertCall(
   2709 #endif
   2710           tail, IPOINT_BEFORE,
   2711           (AFUNPTR)Before_RET_THEN,
   2712           IARG_THREAD_ID,
   2713           IARG_INST_PTR,
   2714           IARG_REG_VALUE, REG_STACK_PTR,
   2715           IARG_FUNCRET_EXITPOINT_VALUE,
   2716           IARG_END);
   2717     }
   2718   }
   2719 
   2720   // Handle the head of the trace
   2721   INS head = BBL_InsHead(TRACE_BblHead(trace));
   2722   CHECK(n_mops <= kMaxMopsPerTrace);
   2723 
   2724   TraceInfo *trace_info = NULL;
   2725   if (n_mops) {
   2726     trace_info = TraceInfo::NewTraceInfo(n_mops, INS_Address(head));
   2727     if (TS_SERIALIZED == 0) {
   2728       // TODO(kcc): implement race verifier here.
   2729       INS_InsertCall(head, IPOINT_BEFORE,
   2730                      (AFUNPTR)OnTraceParallel,
   2731                      IARG_REG_VALUE, tls_reg,
   2732                      IARG_REG_VALUE, REG_STACK_PTR,
   2733                      IARG_PTR, trace_info,
   2734                      IARG_END);
   2735     } else {
   2736       AFUNPTR handler = (AFUNPTR)(g_race_verifier_active ?
   2737                                   OnTraceVerify : OnTraceSerial);
   2738       INS_InsertCall(head, IPOINT_BEFORE,
   2739                      handler,
   2740                      IARG_THREAD_ID,
   2741                      IARG_REG_VALUE, REG_STACK_PTR,
   2742                      IARG_PTR, trace_info,
   2743                      IARG_REG_REFERENCE, tls_reg,
   2744                      IARG_END);
   2745     }
   2746   } else {
   2747     if (g_race_verifier_active) {
   2748       INS_InsertCall(head, IPOINT_BEFORE,
   2749                      (AFUNPTR)OnTraceNoMopsVerify,
   2750                      IARG_THREAD_ID,
   2751                      IARG_REG_VALUE, REG_STACK_PTR,
   2752                      IARG_REG_REFERENCE, tls_reg,
   2753                      IARG_END);
   2754     }
   2755   }
   2756 
   2757   // instrument the mops. We want to do it after we instrumented the head
   2758   // to maintain the right order of instrumentation callbacks (head first, then
   2759   // mops).
   2760   size_t i = 0;
   2761   if (n_mops) {
   2762     if (debug_ins) {
   2763       Printf("TRACE %p (%p); n_mops=%ld %s\n", trace_info,
   2764              TRACE_Address(trace),
   2765              trace_info->n_mops(),
   2766              PcToRtnName(trace_info->pc(), false).c_str());
   2767     }
   2768     for(BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) {
   2769       InstrumentMopsInBBl(bbl, rtn, trace_info, instrument_pc, &i);
   2770     }
   2771   }
   2772 
   2773   // instrument the calls, do it after all other instrumentation.
   2774   if (!g_race_verifier_active) {
   2775     for(BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) {
   2776       InstrumentCall(BBL_InsTail(bbl));
   2777     }
   2778   }
   2779 
   2780   CHECK(n_mops == i);
   2781 }
   2782 
   2783 
   2784 #define INSERT_FN_HELPER(point, name, rtn, to_insert, ...) \
   2785     RTN_Open(rtn); \
   2786     if (G_flags->verbosity >= 2) Printf("RTN: Inserting %-50s (%s) %s (%s) img: %s\n", \
   2787     #to_insert, #point, RTN_Name(rtn).c_str(), name, IMG_Name(img).c_str());\
   2788     RTN_InsertCall(rtn, point, (AFUNPTR)to_insert, IARG_THREAD_ID, \
   2789                    IARG_INST_PTR, __VA_ARGS__, IARG_END);\
   2790     RTN_Close(rtn); \
   2791 
   2792 #define INSERT_FN(point, name, to_insert, ...) \
   2793   while (RtnMatchesName(rtn_name, name)) {\
   2794     INSERT_FN_HELPER(point, name, rtn, to_insert, __VA_ARGS__); \
   2795     break;\
   2796   }\
   2797 
   2798 
   2799 #define INSERT_BEFORE_FN(name, to_insert, ...) \
   2800     INSERT_FN(IPOINT_BEFORE, name, to_insert, __VA_ARGS__)
   2801 
   2802 #define INSERT_BEFORE_1_SP(name, to_insert) \
   2803     INSERT_BEFORE_FN(name, to_insert, \
   2804                      IARG_REG_VALUE, REG_STACK_PTR, \
   2805                      IARG_FUNCARG_ENTRYPOINT_VALUE, 0)
   2806 
   2807 #define INSERT_BEFORE_2_SP(name, to_insert) \
   2808     INSERT_BEFORE_FN(name, to_insert, \
   2809                      IARG_REG_VALUE, REG_STACK_PTR, \
   2810                      IARG_FUNCARG_ENTRYPOINT_VALUE, 0, \
   2811                      IARG_FUNCARG_ENTRYPOINT_VALUE, 1)
   2812 
   2813 #define INSERT_BEFORE_0(name, to_insert) \
   2814     INSERT_BEFORE_FN(name, to_insert, IARG_END);
   2815 
   2816 #define INSERT_BEFORE_1(name, to_insert) \
   2817     INSERT_BEFORE_FN(name, to_insert, \
   2818                      IARG_FUNCARG_ENTRYPOINT_VALUE, 0)
   2819 
   2820 #define INSERT_BEFORE_2(name, to_insert) \
   2821     INSERT_BEFORE_FN(name, to_insert, \
   2822                      IARG_FUNCARG_ENTRYPOINT_VALUE, 0, \
   2823                      IARG_FUNCARG_ENTRYPOINT_VALUE, 1)
   2824 
   2825 #define INSERT_BEFORE_3(name, to_insert) \
   2826     INSERT_BEFORE_FN(name, to_insert, \
   2827                      IARG_FUNCARG_ENTRYPOINT_VALUE, 0, \
   2828                      IARG_FUNCARG_ENTRYPOINT_VALUE, 1, \
   2829                      IARG_FUNCARG_ENTRYPOINT_VALUE, 2)
   2830 
   2831 #define INSERT_BEFORE_4(name, to_insert) \
   2832     INSERT_BEFORE_FN(name, to_insert, \
   2833                      IARG_FUNCARG_ENTRYPOINT_VALUE, 0, \
   2834                      IARG_FUNCARG_ENTRYPOINT_VALUE, 1, \
   2835                      IARG_FUNCARG_ENTRYPOINT_VALUE, 2, \
   2836                      IARG_FUNCARG_ENTRYPOINT_VALUE, 3)
   2837 
   2838 #define INSERT_BEFORE_5(name, to_insert) \
   2839     INSERT_BEFORE_FN(name, to_insert, \
   2840                      IARG_FUNCARG_ENTRYPOINT_VALUE, 0, \
   2841                      IARG_FUNCARG_ENTRYPOINT_VALUE, 1, \
   2842                      IARG_FUNCARG_ENTRYPOINT_VALUE, 2, \
   2843                      IARG_FUNCARG_ENTRYPOINT_VALUE, 3, \
   2844                      IARG_FUNCARG_ENTRYPOINT_VALUE, 4)
   2845 
   2846 #define INSERT_BEFORE_6(name, to_insert) \
   2847     INSERT_BEFORE_FN(name, to_insert, \
   2848                      IARG_FUNCARG_ENTRYPOINT_VALUE, 0, \
   2849                      IARG_FUNCARG_ENTRYPOINT_VALUE, 1, \
   2850                      IARG_FUNCARG_ENTRYPOINT_VALUE, 2, \
   2851                      IARG_FUNCARG_ENTRYPOINT_VALUE, 3, \
   2852                      IARG_FUNCARG_ENTRYPOINT_VALUE, 4, \
   2853                      IARG_FUNCARG_ENTRYPOINT_VALUE, 5)
   2854 
   2855 #define INSERT_AFTER_FN(name, to_insert, ...) \
   2856     INSERT_FN(IPOINT_AFTER, name, to_insert, __VA_ARGS__)
   2857 
   2858 #define INSERT_AFTER_0(name, to_insert) \
   2859     INSERT_AFTER_FN(name, to_insert, IARG_END)
   2860 
   2861 #define INSERT_AFTER_1(name, to_insert) \
   2862     INSERT_AFTER_FN(name, to_insert, IARG_FUNCRET_EXITPOINT_VALUE)
   2863 
   2864 
   2865 #ifdef _MSC_VER
   2866 void WrapStdCallFunc1(RTN rtn, char *name, AFUNPTR replacement_func) {
   2867   if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
   2868     InformAboutFunctionWrap(rtn, name);
   2869     PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
   2870                                  CALLINGSTD_STDCALL,
   2871                                  "proto",
   2872                                  PIN_PARG(uintptr_t),
   2873                                  PIN_PARG_END());
   2874     RTN_ReplaceSignature(rtn,
   2875                          AFUNPTR(replacement_func),
   2876                          IARG_PROTOTYPE, proto,
   2877                          IARG_THREAD_ID,
   2878                          IARG_INST_PTR,
   2879                          IARG_CONTEXT,
   2880                          IARG_ORIG_FUNCPTR,
   2881                          IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
   2882                          IARG_END);
   2883     PROTO_Free(proto);
   2884   }
   2885 }
   2886 
   2887 void WrapStdCallFunc2(RTN rtn, char *name, AFUNPTR replacement_func) {
   2888   if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
   2889     InformAboutFunctionWrap(rtn, name);
   2890     PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
   2891                                  CALLINGSTD_STDCALL,
   2892                                  "proto",
   2893                                  PIN_PARG(uintptr_t),
   2894                                  PIN_PARG(uintptr_t),
   2895                                  PIN_PARG_END());
   2896     RTN_ReplaceSignature(rtn,
   2897                          AFUNPTR(replacement_func),
   2898                          IARG_PROTOTYPE, proto,
   2899                          IARG_THREAD_ID,
   2900                          IARG_INST_PTR,
   2901                          IARG_CONTEXT,
   2902                          IARG_ORIG_FUNCPTR,
   2903                          IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
   2904                          IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
   2905                          IARG_END);
   2906     PROTO_Free(proto);
   2907   }
   2908 }
   2909 
   2910 void WrapStdCallFunc3(RTN rtn, char *name, AFUNPTR replacement_func) {
   2911   if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
   2912     InformAboutFunctionWrap(rtn, name);
   2913     PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
   2914                                  CALLINGSTD_STDCALL,
   2915                                  "proto",
   2916                                  PIN_PARG(uintptr_t),
   2917                                  PIN_PARG(uintptr_t),
   2918                                  PIN_PARG(uintptr_t),
   2919                                  PIN_PARG_END());
   2920     RTN_ReplaceSignature(rtn,
   2921                          AFUNPTR(replacement_func),
   2922                          IARG_PROTOTYPE, proto,
   2923                          IARG_THREAD_ID,
   2924                          IARG_INST_PTR,
   2925                          IARG_CONTEXT,
   2926                          IARG_ORIG_FUNCPTR,
   2927                          IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
   2928                          IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
   2929                          IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
   2930                          IARG_END);
   2931     PROTO_Free(proto);
   2932   }
   2933 }
   2934 
   2935 void WrapStdCallFunc4(RTN rtn, char *name, AFUNPTR replacement_func) {
   2936   if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
   2937     InformAboutFunctionWrap(rtn, name);
   2938     PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
   2939                                  CALLINGSTD_STDCALL,
   2940                                  "proto",
   2941                                  PIN_PARG(uintptr_t),
   2942                                  PIN_PARG(uintptr_t),
   2943                                  PIN_PARG(uintptr_t),
   2944                                  PIN_PARG(uintptr_t),
   2945                                  PIN_PARG_END());
   2946     RTN_ReplaceSignature(rtn,
   2947                          AFUNPTR(replacement_func),
   2948                          IARG_PROTOTYPE, proto,
   2949                          IARG_THREAD_ID,
   2950                          IARG_INST_PTR,
   2951                          IARG_CONTEXT,
   2952                          IARG_ORIG_FUNCPTR,
   2953                          IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
   2954                          IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
   2955                          IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
   2956                          IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
   2957                          IARG_END);
   2958     PROTO_Free(proto);
   2959   }
   2960 }
   2961 
   2962 void WrapStdCallFunc5(RTN rtn, char *name, AFUNPTR replacement_func) {
   2963   if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
   2964     InformAboutFunctionWrap(rtn, name);
   2965     PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
   2966                                  CALLINGSTD_STDCALL,
   2967                                  "proto",
   2968                                  PIN_PARG(uintptr_t),
   2969                                  PIN_PARG(uintptr_t),
   2970                                  PIN_PARG(uintptr_t),
   2971                                  PIN_PARG(uintptr_t),
   2972                                  PIN_PARG(uintptr_t),
   2973                                  PIN_PARG_END());
   2974     RTN_ReplaceSignature(rtn,
   2975                          AFUNPTR(replacement_func),
   2976                          IARG_PROTOTYPE, proto,
   2977                          IARG_THREAD_ID,
   2978                          IARG_INST_PTR,
   2979                          IARG_CONTEXT,
   2980                          IARG_ORIG_FUNCPTR,
   2981                          IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
   2982                          IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
   2983                          IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
   2984                          IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
   2985                          IARG_FUNCARG_ENTRYPOINT_VALUE, 4,
   2986                          IARG_END);
   2987     PROTO_Free(proto);
   2988   }
   2989 }
   2990 
   2991 void WrapStdCallFunc6(RTN rtn, char *name, AFUNPTR replacement_func) {
   2992   if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
   2993     InformAboutFunctionWrap(rtn, name);
   2994     PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
   2995                                  CALLINGSTD_STDCALL,
   2996                                  "proto",
   2997                                  PIN_PARG(uintptr_t),
   2998                                  PIN_PARG(uintptr_t),
   2999                                  PIN_PARG(uintptr_t),
   3000                                  PIN_PARG(uintptr_t),
   3001                                  PIN_PARG(uintptr_t),
   3002                                  PIN_PARG(uintptr_t),
   3003                                  PIN_PARG_END());
   3004     RTN_ReplaceSignature(rtn,
   3005                          AFUNPTR(replacement_func),
   3006                          IARG_PROTOTYPE, proto,
   3007                          IARG_THREAD_ID,
   3008                          IARG_INST_PTR,
   3009                          IARG_CONTEXT,
   3010                          IARG_ORIG_FUNCPTR,
   3011                          IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
   3012                          IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
   3013                          IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
   3014                          IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
   3015                          IARG_FUNCARG_ENTRYPOINT_VALUE, 4,
   3016                          IARG_FUNCARG_ENTRYPOINT_VALUE, 5,
   3017                          IARG_END);
   3018     PROTO_Free(proto);
   3019   }
   3020 }
   3021 
   3022 void WrapStdCallFunc7(RTN rtn, char *name, AFUNPTR replacement_func) {
   3023   if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
   3024     InformAboutFunctionWrap(rtn, name);
   3025     PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
   3026                                  CALLINGSTD_STDCALL,
   3027                                  "proto",
   3028                                  PIN_PARG(uintptr_t),
   3029                                  PIN_PARG(uintptr_t),
   3030                                  PIN_PARG(uintptr_t),
   3031                                  PIN_PARG(uintptr_t),
   3032                                  PIN_PARG(uintptr_t),
   3033                                  PIN_PARG(uintptr_t),
   3034                                  PIN_PARG(uintptr_t),
   3035                                  PIN_PARG_END());
   3036     RTN_ReplaceSignature(rtn,
   3037                          AFUNPTR(replacement_func),
   3038                          IARG_PROTOTYPE, proto,
   3039                          IARG_THREAD_ID,
   3040                          IARG_INST_PTR,
   3041                          IARG_CONTEXT,
   3042                          IARG_ORIG_FUNCPTR,
   3043                          IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
   3044                          IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
   3045                          IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
   3046                          IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
   3047                          IARG_FUNCARG_ENTRYPOINT_VALUE, 4,
   3048                          IARG_FUNCARG_ENTRYPOINT_VALUE, 5,
   3049                          IARG_FUNCARG_ENTRYPOINT_VALUE, 6,
   3050                          IARG_END);
   3051     PROTO_Free(proto);
   3052   }
   3053 }
   3054 
   3055 void WrapStdCallFunc8(RTN rtn, char *name, AFUNPTR replacement_func) {
   3056   if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
   3057     InformAboutFunctionWrap(rtn, name);
   3058     PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
   3059                                  CALLINGSTD_STDCALL,
   3060                                  "proto",
   3061                                  PIN_PARG(uintptr_t),
   3062                                  PIN_PARG(uintptr_t),
   3063                                  PIN_PARG(uintptr_t),
   3064                                  PIN_PARG(uintptr_t),
   3065                                  PIN_PARG(uintptr_t),
   3066                                  PIN_PARG(uintptr_t),
   3067                                  PIN_PARG(uintptr_t),
   3068                                  PIN_PARG(uintptr_t),
   3069                                  PIN_PARG_END());
   3070     RTN_ReplaceSignature(rtn,
   3071                          AFUNPTR(replacement_func),
   3072                          IARG_PROTOTYPE, proto,
   3073                          IARG_THREAD_ID,
   3074                          IARG_INST_PTR,
   3075                          IARG_CONTEXT,
   3076                          IARG_ORIG_FUNCPTR,
   3077                          IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
   3078                          IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
   3079                          IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
   3080                          IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
   3081                          IARG_FUNCARG_ENTRYPOINT_VALUE, 4,
   3082                          IARG_FUNCARG_ENTRYPOINT_VALUE, 5,
   3083                          IARG_FUNCARG_ENTRYPOINT_VALUE, 6,
   3084                          IARG_FUNCARG_ENTRYPOINT_VALUE, 7,
   3085                          IARG_END);
   3086     PROTO_Free(proto);
   3087   }
   3088 }
   3089 
   3090 void WrapStdCallFunc10(RTN rtn, char *name, AFUNPTR replacement_func) {
   3091   if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
   3092     InformAboutFunctionWrap(rtn, name);
   3093     PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
   3094                                  CALLINGSTD_STDCALL,
   3095                                  "proto",
   3096                                  PIN_PARG(uintptr_t),
   3097                                  PIN_PARG(uintptr_t),
   3098                                  PIN_PARG(uintptr_t),
   3099                                  PIN_PARG(uintptr_t),
   3100                                  PIN_PARG(uintptr_t),
   3101                                  PIN_PARG(uintptr_t),
   3102                                  PIN_PARG(uintptr_t),
   3103                                  PIN_PARG(uintptr_t),
   3104                                  PIN_PARG(uintptr_t),
   3105                                  PIN_PARG(uintptr_t),
   3106                                  PIN_PARG_END());
   3107     RTN_ReplaceSignature(rtn,
   3108                          AFUNPTR(replacement_func),
   3109                          IARG_PROTOTYPE, proto,
   3110                          IARG_THREAD_ID,
   3111                          IARG_INST_PTR,
   3112                          IARG_CONTEXT,
   3113                          IARG_ORIG_FUNCPTR,
   3114                          IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
   3115                          IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
   3116                          IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
   3117                          IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
   3118                          IARG_FUNCARG_ENTRYPOINT_VALUE, 4,
   3119                          IARG_FUNCARG_ENTRYPOINT_VALUE, 5,
   3120                          IARG_FUNCARG_ENTRYPOINT_VALUE, 6,
   3121                          IARG_FUNCARG_ENTRYPOINT_VALUE, 7,
   3122                          IARG_FUNCARG_ENTRYPOINT_VALUE, 8,
   3123                          IARG_FUNCARG_ENTRYPOINT_VALUE, 9,
   3124                          IARG_END);
   3125     PROTO_Free(proto);
   3126   }
   3127 }
   3128 
   3129 void WrapStdCallFunc11(RTN rtn, char *name, AFUNPTR replacement_func) {
   3130   if (RTN_Valid(rtn) && RtnMatchesName(RTN_Name(rtn), name)) {
   3131     InformAboutFunctionWrap(rtn, name);
   3132     PROTO proto = PROTO_Allocate(PIN_PARG(uintptr_t),
   3133                                  CALLINGSTD_STDCALL,
   3134                                  "proto",
   3135                                  PIN_PARG(uintptr_t),
   3136                                  PIN_PARG(uintptr_t),
   3137                                  PIN_PARG(uintptr_t),
   3138                                  PIN_PARG(uintptr_t),
   3139                                  PIN_PARG(uintptr_t),
   3140                                  PIN_PARG(uintptr_t),
   3141                                  PIN_PARG(uintptr_t),
   3142                                  PIN_PARG(uintptr_t),
   3143                                  PIN_PARG(uintptr_t),
   3144                                  PIN_PARG(uintptr_t),
   3145                                  PIN_PARG(uintptr_t),
   3146                                  PIN_PARG_END());
   3147     RTN_ReplaceSignature(rtn,
   3148                          AFUNPTR(replacement_func),
   3149                          IARG_PROTOTYPE, proto,
   3150                          IARG_THREAD_ID,
   3151                          IARG_INST_PTR,
   3152                          IARG_CONTEXT,
   3153                          IARG_ORIG_FUNCPTR,
   3154                          IARG_FUNCARG_ENTRYPOINT_VALUE, 0,
   3155                          IARG_FUNCARG_ENTRYPOINT_VALUE, 1,
   3156                          IARG_FUNCARG_ENTRYPOINT_VALUE, 2,
   3157                          IARG_FUNCARG_ENTRYPOINT_VALUE, 3,
   3158                          IARG_FUNCARG_ENTRYPOINT_VALUE, 4,
   3159                          IARG_FUNCARG_ENTRYPOINT_VALUE, 5,
   3160                          IARG_FUNCARG_ENTRYPOINT_VALUE, 6,
   3161                          IARG_FUNCARG_ENTRYPOINT_VALUE, 7,
   3162                          IARG_FUNCARG_ENTRYPOINT_VALUE, 8,
   3163                          IARG_FUNCARG_ENTRYPOINT_VALUE, 9,
   3164                          IARG_FUNCARG_ENTRYPOINT_VALUE, 10,
   3165                          IARG_END);
   3166     PROTO_Free(proto);
   3167   }
   3168 }
   3169 
   3170 #endif
   3171 
   3172 static void MaybeInstrumentOneRoutine(IMG img, RTN rtn) {
   3173   if (IgnoreImage(img)) {
   3174     return;
   3175   }
   3176   string rtn_name = RTN_Name(rtn);
   3177   string img_name = IMG_Name(img);
   3178   if (debug_wrap) {
   3179     Printf("%s: %s %s pc=%p\n", __FUNCTION__, rtn_name.c_str(),
   3180            img_name.c_str(), RTN_Address(rtn));
   3181   }
   3182 
   3183   // malloc/free/etc
   3184   const char *malloc_names[] = {
   3185     "malloc",
   3186 #if defined(__GNUC__)
   3187     "_Znwm",
   3188     "_Znam",
   3189     "_Znwj",
   3190     "_Znaj",
   3191     "_ZnwmRKSt9nothrow_t",
   3192     "_ZnamRKSt9nothrow_t",
   3193     "_ZnwjRKSt9nothrow_t",
   3194     "_ZnajRKSt9nothrow_t",
   3195 #endif
   3196 #if defined(_MSC_VER)
   3197     "operator new",
   3198     "operator new[]",
   3199 #endif  // _MSC_VER
   3200   };
   3201 
   3202   const char *free_names[] = {
   3203     "free",
   3204 #if defined(__GNUC__)
   3205     "_ZdaPv",
   3206     "_ZdlPv",
   3207     "_ZdlPvRKSt9nothrow_t",
   3208     "_ZdaPvRKSt9nothrow_t",
   3209 #endif  // __GNUC__
   3210 #if defined(_MSC_VER)
   3211     "operator delete",
   3212     "operator delete[]",
   3213 #endif  // _MSC_VER
   3214   };
   3215 
   3216   for (size_t i = 0; i < TS_ARRAY_SIZE(malloc_names); i++) {
   3217     const char *name = malloc_names[i];
   3218     INSERT_BEFORE_1_SP(name, Before_malloc);
   3219   }
   3220 
   3221   for (size_t i = 0; i < TS_ARRAY_SIZE(free_names); i++) {
   3222     const char *name = free_names[i];
   3223     INSERT_BEFORE_1_SP(name, Before_free);
   3224   }
   3225 
   3226   INSERT_BEFORE_2_SP("calloc", Before_calloc);
   3227   INSERT_BEFORE_2_SP("realloc", Before_realloc);
   3228 
   3229 #if defined(__GNUC__)
   3230   WrapFunc6(img, rtn, "mmap", (AFUNPTR)WRAP_NAME(mmap));
   3231   WrapFunc4(img, rtn, "munmap", (AFUNPTR)WRAP_NAME(munmap));
   3232 
   3233   WrapFunc4(img, rtn, "lockf", (AFUNPTR)WRAP_NAME(lockf));
   3234   // pthread create/join
   3235   WrapFunc4(img, rtn, "pthread_create", (AFUNPTR)WRAP_NAME(pthread_create));
   3236   WrapFunc4(img, rtn, "pthread_join", (AFUNPTR)WRAP_NAME(pthread_join));
   3237   WrapFunc4(img, rtn, "fwrite", (AFUNPTR)WRAP_NAME(fwrite));
   3238 
   3239   INSERT_FN(IPOINT_BEFORE, "start_thread",
   3240             Before_start_thread,
   3241             IARG_REG_VALUE, REG_STACK_PTR, IARG_END);
   3242 
   3243    // pthread_cond_*
   3244   INSERT_BEFORE_1("pthread_cond_signal", Before_pthread_cond_signal);
   3245   WRAP4(pthread_cond_wait);
   3246   WRAP4(pthread_cond_timedwait);
   3247 
   3248   // pthread_mutex_*
   3249   INSERT_BEFORE_1("pthread_mutex_init", Before_pthread_mutex_init);
   3250   INSERT_BEFORE_1("pthread_mutex_destroy", Before_pthread_mutex_destroy);
   3251   INSERT_BEFORE_1("pthread_mutex_unlock", Before_pthread_unlock);
   3252 
   3253 
   3254   INSERT_BEFORE_1_SP("pthread_mutex_lock", Before_pthread_mutex_lock);
   3255   WRAP4(pthread_mutex_trylock);
   3256   WRAP4(pthread_spin_lock);
   3257   WRAP4(pthread_spin_trylock);
   3258   WRAP4(pthread_spin_init);
   3259   WRAP4(pthread_spin_destroy);
   3260   WRAP4(pthread_spin_unlock);
   3261   WRAP4(pthread_rwlock_wrlock);
   3262   WRAP4(pthread_rwlock_rdlock);
   3263   WRAP4(pthread_rwlock_trywrlock);
   3264   WRAP4(pthread_rwlock_tryrdlock);
   3265 
   3266   // pthread_rwlock_*
   3267   INSERT_BEFORE_1("pthread_rwlock_init", Before_pthread_rwlock_init);
   3268   INSERT_BEFORE_1("pthread_rwlock_destroy", Before_pthread_rwlock_destroy);
   3269   INSERT_BEFORE_1("pthread_rwlock_unlock", Before_pthread_unlock);
   3270 
   3271   // pthread_barrier_*
   3272   WrapFunc4(img, rtn, "pthread_barrier_init",
   3273             (AFUNPTR)WRAP_NAME(pthread_barrier_init));
   3274   WrapFunc4(img, rtn, "pthread_barrier_wait",
   3275             (AFUNPTR)WRAP_NAME(pthread_barrier_wait));
   3276 
   3277   // pthread_once
   3278   WrapFunc4(img, rtn, "pthread_once", (AFUNPTR)WRAP_NAME(pthread_once));
   3279 
   3280   // sem_*
   3281   INSERT_AFTER_1("sem_open", After_sem_open);
   3282   INSERT_BEFORE_1("sem_post", Before_sem_post);
   3283   WRAP4(sem_wait);
   3284   WRAP4(sem_trywait);
   3285 
   3286   INSERT_BEFORE_0("epoll_ctl", Before_epoll_ctl);
   3287   INSERT_AFTER_0("epoll_wait", After_epoll_wait);
   3288 #endif  // __GNUC__
   3289 
   3290 #ifdef _MSC_VER
   3291   WrapStdCallFunc6(rtn, "CreateThread", (AFUNPTR)WRAP_NAME(CreateThread));
   3292   WRAPSTD1(ResumeThread);
   3293 
   3294   INSERT_FN(IPOINT_BEFORE, "BaseThreadInitThunk",
   3295             Before_BaseThreadInitThunk,
   3296             IARG_REG_VALUE, REG_STACK_PTR, IARG_END);
   3297 
   3298   INSERT_BEFORE_0("RtlExitUserThread", Before_RtlExitUserThread);
   3299   INSERT_BEFORE_0("ExitThread", Before_RtlExitUserThread);
   3300 
   3301   WRAPSTD1(RtlInitializeCriticalSection);
   3302   WRAPSTD2(RtlInitializeCriticalSectionAndSpinCount);
   3303   WRAPSTD3(RtlInitializeCriticalSectionEx);
   3304   WRAPSTD1(RtlDeleteCriticalSection);
   3305   WRAPSTD1(RtlEnterCriticalSection);
   3306   WRAPSTD1(RtlTryEnterCriticalSection);
   3307   WRAPSTD1(RtlLeaveCriticalSection);
   3308   WRAPSTD7(DuplicateHandle);
   3309   WRAPSTD1(SetEvent);
   3310   WRAPSTD4(CreateSemaphoreA);
   3311   WRAPSTD4(CreateSemaphoreW);
   3312   WRAPSTD3(ReleaseSemaphore);
   3313 
   3314   WRAPSTD1(RtlInterlockedPopEntrySList);
   3315   WRAPSTD2(RtlInterlockedPushEntrySList);
   3316 
   3317 #if 1
   3318   WRAPSTD1(RtlAcquireSRWLockExclusive);
   3319   WRAPSTD1(RtlAcquireSRWLockShared);
   3320   WRAPSTD1(RtlTryAcquireSRWLockExclusive);
   3321   WRAPSTD1(RtlTryAcquireSRWLockShared);
   3322   WRAPSTD1(RtlReleaseSRWLockExclusive);
   3323   WRAPSTD1(RtlReleaseSRWLockShared);
   3324   WRAPSTD1(RtlInitializeSRWLock);
   3325   // For some reason, RtlInitializeSRWLock is aliased to RtlInitializeSRWLock..
   3326   WrapStdCallFunc1(rtn, "RtlRunOnceInitialize",
   3327                    (AFUNPTR)Wrap_RtlInitializeSRWLock);
   3328 
   3329   /* We haven't seen these syscalls used in the wild yet.
   3330   WRAPSTD2(RtlUpdateClonedSRWLock);
   3331   WRAPSTD1(RtlAcquireReleaseSRWLockExclusive);
   3332   WRAPSTD1(RtlUpdateClonedCriticalSection);
   3333   */
   3334 
   3335   WRAPSTD1(RtlWakeConditionVariable);
   3336   WRAPSTD1(RtlWakeAllConditionVariable);
   3337   WRAPSTD4(RtlSleepConditionVariableSRW);
   3338   WRAPSTD3(RtlSleepConditionVariableCS);
   3339 #endif  // if 1
   3340 
   3341   WRAPSTD3(RtlQueueWorkItem);
   3342   WRAPSTD6(RegisterWaitForSingleObject);
   3343   WRAPSTD2(UnregisterWaitEx);
   3344 
   3345   WRAPSTD3(WaitForSingleObjectEx);
   3346   WRAPSTD5(WaitForMultipleObjectsEx);
   3347 
   3348   WrapStdCallFunc4(rtn, "VirtualAlloc", (AFUNPTR)(WRAP_NAME(VirtualAlloc)));
   3349   WrapStdCallFunc6(rtn, "ZwAllocateVirtualMemory", (AFUNPTR)(WRAP_NAME(ZwAllocateVirtualMemory)));
   3350   WrapStdCallFunc2(rtn, "GlobalAlloc", (AFUNPTR)WRAP_NAME(GlobalAlloc));
   3351 //  WrapStdCallFunc3(rtn, "RtlAllocateHeap", (AFUNPTR) WRAP_NAME(AllocateHeap));
   3352 //  WrapStdCallFunc3(rtn, "HeapCreate", (AFUNPTR) WRAP_NAME(HeapCreate));
   3353 #endif  // _MSC_VER
   3354 
   3355   // Annotations.
   3356   INSERT_BEFORE_4("AnnotateBenignRace", On_AnnotateBenignRace);
   3357   INSERT_BEFORE_5("AnnotateBenignRaceSized", On_AnnotateBenignRaceSized);
   3358   INSERT_BEFORE_5("WTFAnnotateBenignRaceSized", On_AnnotateBenignRaceSized);
   3359   INSERT_BEFORE_4("AnnotateExpectRace", On_AnnotateExpectRace);
   3360   INSERT_BEFORE_2("AnnotateFlushExpectedRaces", On_AnnotateFlushExpectedRaces);
   3361   INSERT_BEFORE_3("AnnotateTraceMemory", On_AnnotateTraceMemory);
   3362   INSERT_BEFORE_4("AnnotateNewMemory", On_AnnotateNewMemory);
   3363   INSERT_BEFORE_3("AnnotateNoOp", On_AnnotateNoOp);
   3364   INSERT_BEFORE_2("AnnotateFlushState", On_AnnotateFlushState);
   3365 
   3366   INSERT_BEFORE_3("AnnotateCondVarWait", On_AnnotateCondVarWait);
   3367   INSERT_BEFORE_3("AnnotateCondVarSignal", On_AnnotateCondVarSignal);
   3368   INSERT_BEFORE_3("AnnotateCondVarSignalAll", On_AnnotateCondVarSignal);
   3369   INSERT_BEFORE_3("AnnotateHappensBefore", On_AnnotateHappensBefore);
   3370   INSERT_BEFORE_3("WTFAnnotateHappensBefore", On_AnnotateHappensBefore);
   3371   INSERT_BEFORE_3("AnnotateHappensAfter", On_AnnotateHappensAfter);
   3372   INSERT_BEFORE_3("WTFAnnotateHappensAfter", On_AnnotateHappensAfter);
   3373 
   3374   INSERT_BEFORE_3("AnnotateEnableRaceDetection", On_AnnotateEnableRaceDetection);
   3375   INSERT_BEFORE_0("AnnotateIgnoreReadsBegin", On_AnnotateIgnoreReadsBegin);
   3376   INSERT_BEFORE_0("AnnotateIgnoreReadsEnd", On_AnnotateIgnoreReadsEnd);
   3377   INSERT_BEFORE_0("AnnotateIgnoreWritesBegin", On_AnnotateIgnoreWritesBegin);
   3378   INSERT_BEFORE_0("AnnotateIgnoreWritesEnd", On_AnnotateIgnoreWritesEnd);
   3379   INSERT_BEFORE_3("AnnotateThreadName", On_AnnotateThreadName);
   3380   INSERT_BEFORE_4("AnnotatePublishMemoryRange", On_AnnotatePublishMemoryRange);
   3381   INSERT_BEFORE_4("AnnotateUnpublishMemoryRange", On_AnnotateUnpublishMemoryRange);
   3382   INSERT_BEFORE_3("AnnotateMutexIsUsedAsCondVar", On_AnnotateMutexIsUsedAsCondVar);
   3383   INSERT_BEFORE_3("AnnotateMutexIsNotPHB", On_AnnotateMutexIsNotPhb);
   3384 
   3385   INSERT_BEFORE_3("AnnotatePCQCreate", On_AnnotatePCQCreate);
   3386   INSERT_BEFORE_3("AnnotatePCQDestroy", On_AnnotatePCQDestroy);
   3387   INSERT_BEFORE_3("AnnotatePCQPut", On_AnnotatePCQPut);
   3388   INSERT_BEFORE_3("AnnotatePCQGet", On_AnnotatePCQGet);
   3389 
   3390   INSERT_BEFORE_3("AnnotateRWLockCreate", On_AnnotateRWLockCreate);
   3391   INSERT_BEFORE_3("AnnotateRWLockDestroy", On_AnnotateRWLockDestroy);
   3392   INSERT_BEFORE_4("AnnotateRWLockAcquired", On_AnnotateRWLockAcquired);
   3393   INSERT_BEFORE_4("AnnotateRWLockReleased", On_AnnotateRWLockReleased);
   3394 
   3395   // ThreadSanitizerQuery
   3396   WrapFunc4(img, rtn, "ThreadSanitizerQuery",
   3397             (AFUNPTR)WRAP_NAME(ThreadSanitizerQuery));
   3398   WrapFunc4(img, rtn, "RunningOnValgrind",
   3399             (AFUNPTR)WRAP_NAME(RunningOnValgrind));
   3400 
   3401   // I/O
   3402   INSERT_BEFORE_0("write", Before_SignallingIOCall);
   3403   INSERT_BEFORE_0("unlink", Before_SignallingIOCall);
   3404   INSERT_BEFORE_0("rmdir", Before_SignallingIOCall);
   3405 //  INSERT_BEFORE_0("send", Before_SignallingIOCall);
   3406   INSERT_AFTER_0("__read_nocancel", After_WaitingIOCall);
   3407   INSERT_AFTER_0("fopen", After_WaitingIOCall);
   3408   INSERT_AFTER_0("__fopen_internal", After_WaitingIOCall);
   3409   INSERT_AFTER_0("open", After_WaitingIOCall);
   3410   INSERT_AFTER_0("opendir", After_WaitingIOCall);
   3411 //  INSERT_AFTER_0("recv", After_WaitingIOCall);
   3412 
   3413   // strlen and friends.
   3414   // These wrappers will generate memory access events.
   3415   // So, if we don't want to get those events (e.g. memcpy inside
   3416   // ld.so or ntdll.dll) we don't wrap them and the regular
   3417   // ignore machinery will make sure we don't get the events.
   3418   if (ThreadSanitizerWantToInstrumentSblock(RTN_Address(rtn))) {
   3419     ReplaceFunc3(img, rtn, "memchr", (AFUNPTR)Replace_memchr);
   3420     ReplaceFunc3(img, rtn, "strchr", (AFUNPTR)Replace_strchr);
   3421     ReplaceFunc3(img, rtn, "index", (AFUNPTR)Replace_strchr);
   3422     ReplaceFunc3(img, rtn, "strrchr", (AFUNPTR)Replace_strrchr);
   3423     ReplaceFunc3(img, rtn, "rindex", (AFUNPTR)Replace_strrchr);
   3424     ReplaceFunc3(img, rtn, "strlen", (AFUNPTR)Replace_strlen);
   3425     ReplaceFunc3(img, rtn, "strcmp", (AFUNPTR)Replace_strcmp);
   3426     ReplaceFunc3(img, rtn, "strncmp", (AFUNPTR)Replace_strncmp);
   3427     ReplaceFunc3(img, rtn, "memcpy", (AFUNPTR)Replace_memcpy);
   3428     ReplaceFunc3(img, rtn, "memcmp", (AFUNPTR)Replace_memcmp);
   3429     ReplaceFunc3(img, rtn, "memmove", (AFUNPTR)Replace_memmove);
   3430     ReplaceFunc3(img, rtn, "strcpy", (AFUNPTR)Replace_strcpy);
   3431     ReplaceFunc3(img, rtn, "strncpy", (AFUNPTR)Replace_strncpy);
   3432     ReplaceFunc3(img, rtn, "strcat", (AFUNPTR)Replace_strcat);
   3433     ReplaceFunc3(img, rtn, "stpcpy", (AFUNPTR)Replace_stpcpy);
   3434   }
   3435 
   3436   // __cxa_guard_acquire / __cxa_guard_release
   3437   INSERT_BEFORE_1("__cxa_guard_acquire", Before_cxa_guard_acquire);
   3438   INSERT_AFTER_1("__cxa_guard_acquire", After_cxa_guard_acquire);
   3439   INSERT_AFTER_0("__cxa_guard_release", After_cxa_guard_release);
   3440 
   3441   INSERT_BEFORE_0("atexit", On_atexit);
   3442   INSERT_BEFORE_0("exit", On_exit);
   3443 }
   3444 
   3445 // Pin calls this function every time a new img is loaded.
   3446 static void CallbackForIMG(IMG img, void *v) {
   3447   if (debug_wrap) {
   3448     Printf("Started CallbackForIMG %s\n", IMG_Name(img).c_str());
   3449   }
   3450 
   3451   string img_name = IMG_Name(img);
   3452   for (SEC sec = IMG_SecHead(img); SEC_Valid(sec); sec = SEC_Next(sec)) {
   3453     for (RTN rtn = SEC_RtnHead(sec); RTN_Valid(rtn); rtn = RTN_Next(rtn)) {
   3454       MaybeInstrumentOneRoutine(img, rtn);
   3455     }
   3456   }
   3457   // In DEBUG_MODE check that we have the debug symbols in the Windows guts.
   3458   // We should work w/o them too.
   3459   // TODO(timurrrr): I doubt the problem is the missing symbols.
   3460   // I have a strong gut feeling that this syscall was added
   3461   // in Vista but only used since Windows 7. We had its wrapper wrong
   3462   // (found on W7) but the Vista build was fine for months.
   3463   // Also, we wrap RtlReleaseSRWLock*, so our TSan assertions would have been
   3464   // broken if RtlTryAcquireSRWLock* wasn't wrapped - and we haven't see this.
   3465   if (DEBUG_MODE && img_name.find("ntdll.dll") != string::npos) {
   3466     if (g_wrapped_functions.count("RtlTryAcquireSRWLockExclusive") == 0) {
   3467       Printf("WARNING: Debug symbols for ntdll.dll not found.\n");
   3468     }
   3469   }
   3470 }
   3471 
   3472 // Returns:
   3473 // TRUE
   3474 // If user is interested to inject Pin (and tool) into child/exec-ed process
   3475 // FALSE
   3476 // If user is not interested to inject Pin (and tool) into child/exec-ed process
   3477 static BOOL CallbackForExec(CHILD_PROCESS childProcess, VOID *val) {
   3478   int argc = 0;
   3479   const CHAR *const * argv = NULL;
   3480   CHILD_PROCESS_GetCommandLine(childProcess, &argc, &argv);
   3481   CHECK(argc > 0);
   3482   CHECK(argv);
   3483   bool follow = G_flags->trace_children;
   3484   if (DEBUG_MODE) {
   3485     Printf("CallbackForExec: follow=%d: ", follow);
   3486     for (int i = 0; i < argc; i++) {
   3487       Printf("%s ", argv[i]);
   3488     }
   3489   }
   3490   Printf("\n");
   3491   return follow;
   3492 }
   3493 
   3494 //--------- Fini ---------- {{{1
   3495 static void CallbackForFini(INT32 code, void *v) {
   3496   DumpEvent(0, THR_END, 0, 0, 0, 0);
   3497   ThreadSanitizerFini();
   3498   if (g_race_verifier_active) {
   3499     RaceVerifierFini();
   3500   }
   3501   if (G_flags->show_stats) {
   3502     TraceInfo::PrintTraceProfile();
   3503   }
   3504   if (G_flags->error_exitcode && GetNumberOfFoundErrors() > 0) {
   3505     exit(G_flags->error_exitcode);
   3506   }
   3507 }
   3508 
   3509 //--------- Call Coverage ----------------- {{{1
   3510 // A simplistic call coverage tool.
   3511 // Outputs all pairs <call_site,call_target>.
   3512 
   3513 typedef set<pair<uintptr_t, uintptr_t> > CallCoverageSet;
   3514 static CallCoverageSet *call_coverage_set;
   3515 
   3516 static map<uintptr_t, string> *function_names_map;
   3517 static uintptr_t symbolized_functions_cache[1023];
   3518 static pair<uintptr_t, uintptr_t> registered_pairs_cache[1023];
   3519 
   3520 static void symbolize_pc(uintptr_t pc) {
   3521   // Check a simple cache if we already symbolized this pc (racey).
   3522   size_t idx = pc % TS_ARRAY_SIZE(symbolized_functions_cache);
   3523   if (symbolized_functions_cache[idx] == pc) return;
   3524 
   3525   ScopedReentrantClientLock lock(__LINE__);
   3526   CHECK(function_names_map);
   3527   if (function_names_map->count(pc) == 0) {
   3528     (*function_names_map)[pc] = PcToRtnName(pc, false);
   3529   }
   3530   symbolized_functions_cache[idx] = pc;
   3531 }
   3532 
   3533 static void CallCoverageRegisterCall(uintptr_t from, uintptr_t to) {
   3534   symbolize_pc(from);
   3535   symbolize_pc(to);
   3536 
   3537   // Check if we already registered this pair (racey).
   3538   size_t idx = (from ^ to) % TS_ARRAY_SIZE(registered_pairs_cache);
   3539   if (registered_pairs_cache[idx] == make_pair(from,to)) return;
   3540 
   3541   ScopedReentrantClientLock lock(__LINE__);
   3542   call_coverage_set->insert(make_pair(from, to));
   3543   registered_pairs_cache[idx] = make_pair(from,to);
   3544 }
   3545 
   3546 static void CallCoverageCallbackForTRACE(TRACE trace, void *v) {
   3547   RTN rtn = TRACE_Rtn(trace);
   3548   if (RTN_Valid(rtn)) {
   3549     SEC sec = RTN_Sec(rtn);
   3550     IMG img = SEC_Img(sec);
   3551     string img_name = IMG_Name(img);
   3552     // Don't instrument system libraries.
   3553     if (img_name.find("/usr/") == 0) return;
   3554   }
   3555 
   3556   if (call_coverage_set == NULL) {
   3557     call_coverage_set = new CallCoverageSet;
   3558     function_names_map = new map<uintptr_t, string>;
   3559   }
   3560   for(BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) {
   3561     INS ins = BBL_InsTail(bbl);
   3562     if (!INS_IsProcedureCall(ins) || INS_IsSyscall(ins)) continue;
   3563     if (INS_IsDirectBranchOrCall(ins)) {
   3564       // If <from, to> is know at instrumentation time, don't instrument.
   3565       ADDRINT to = INS_DirectBranchOrCallTargetAddress(ins);
   3566       ADDRINT from = INS_Address(ins);
   3567       CallCoverageRegisterCall(from, to);
   3568     } else {
   3569       // target is dynamic. Need to instrument.
   3570       INS_InsertCall(ins, IPOINT_BEFORE,
   3571                      (AFUNPTR)CallCoverageRegisterCall,
   3572                      IARG_INST_PTR,
   3573                      IARG_BRANCH_TARGET_ADDR,
   3574                      IARG_END);
   3575     }
   3576   }
   3577 }
   3578 
   3579 // Output all <from,to> pairs.
   3580 static void CallCoverageCallbackForFini(INT32 code, void *v) {
   3581   CHECK(call_coverage_set);
   3582   CHECK(function_names_map);
   3583   for (CallCoverageSet::iterator it = call_coverage_set->begin();
   3584        it != call_coverage_set->end(); ++it) {
   3585     string from_name = (*function_names_map)[it->first];
   3586     string to_name   = (*function_names_map)[it->second];
   3587     if (to_name == ".plt" || to_name == "") continue;
   3588     Printf("CallCoverage: %s => %s\n", from_name.c_str(), to_name.c_str());
   3589   }
   3590 }
   3591 
   3592 //--------- Main -------------------------- {{{1
   3593 int main(INT32 argc, CHAR **argv) {
   3594   PIN_Init(argc, argv);
   3595   PIN_InitSymbols();
   3596   G_out = stderr;
   3597 
   3598   // Init ThreadSanitizer.
   3599   int first_param = 1;
   3600   // skip until '-t something.so'.
   3601   for (; first_param < argc && argv[first_param] != string("-t");
   3602        first_param++) {
   3603   }
   3604   first_param += 2;
   3605   vector<string> args;
   3606   for (; first_param < argc; first_param++) {
   3607     string param = argv[first_param];
   3608     if (param == "--") break;
   3609     if (param == "-short_name") continue;
   3610     if (param == "-slow_asserts") continue;
   3611     if (param == "1") continue;
   3612     args.push_back(param);
   3613   }
   3614 
   3615   G_flags = new FLAGS;
   3616   ThreadSanitizerParseFlags(&args);
   3617 
   3618   if (G_flags->dry_run >= 2) {
   3619     PIN_StartProgram();
   3620     return 0;
   3621   }
   3622 
   3623   FILE *socket_output = OpenSocketForWriting(G_flags->log_file);
   3624   if (socket_output) {
   3625     G_out = socket_output;
   3626   } else if (!G_flags->log_file.empty()) {
   3627     // Replace %p with tool PID
   3628     string fname = G_flags->log_file;
   3629     char pid_str[100] = "";
   3630     sprintf(pid_str, "%u", getpid());
   3631     while (fname.find("%p") != fname.npos)
   3632       fname.replace(fname.find("%p"), 2, pid_str);
   3633 
   3634     G_out = fopen(fname.c_str(), "w");
   3635     CHECK(G_out);
   3636   }
   3637 
   3638   ThreadSanitizerInit();
   3639 
   3640   if (G_flags->call_coverage) {
   3641     PIN_AddFiniFunction(CallCoverageCallbackForFini, 0);
   3642     TRACE_AddInstrumentFunction(CallCoverageCallbackForTRACE, 0);
   3643     PIN_StartProgram();
   3644     return 0;
   3645   }
   3646 
   3647   tls_reg = PIN_ClaimToolRegister();
   3648   CHECK(REG_valid(tls_reg));
   3649 #if _MSC_VER
   3650   g_windows_thread_pool_calback_set = new unordered_set<uintptr_t>;
   3651   g_windows_thread_pool_wait_object_map = new unordered_map<uintptr_t, uintptr_t>;
   3652 #endif
   3653 
   3654   // Set up PIN callbacks.
   3655   PIN_AddThreadStartFunction(CallbackForThreadStart, 0);
   3656   PIN_AddThreadFiniFunction(CallbackForThreadFini, 0);
   3657   PIN_AddFiniFunction(CallbackForFini, 0);
   3658   IMG_AddInstrumentFunction(CallbackForIMG, 0);
   3659   TRACE_AddInstrumentFunction(CallbackForTRACE, 0);
   3660   PIN_AddFollowChildProcessFunction(CallbackForExec, NULL);
   3661 
   3662   Report("ThreadSanitizerPin r%s pin %d: %s\n",
   3663          TS_VERSION, PIN_BUILD_NUMBER,
   3664          G_flags->pure_happens_before ? "hybrid=no" : "hybrid=yes");
   3665   if (DEBUG_MODE) {
   3666     Report("INFO: Debug build\n");
   3667   }
   3668 
   3669   if (g_race_verifier_active) {
   3670     RaceVerifierInit(G_flags->race_verifier, G_flags->race_verifier_extra);
   3671     global_ignore = true;
   3672   }
   3673 
   3674   // Fire!
   3675   PIN_StartProgram();
   3676   return 0;
   3677 }
   3678 
   3679 //--------- Questions about PIN -------------------------- {{{1
   3680 /* Questions about PIN:
   3681 
   3682   - Names (e.g. pthread_create (at) ... __pthread_mutex_unlock)
   3683   - How to get name of a global var by it's address?
   3684   - How to get stack pointer at thread creation?
   3685   - How to get a stack trace (other than intercepting calls, entries, exits)
   3686   - assert with full stack trace?
   3687   */
   3688 // end. {{{1
   3689 // vim:shiftwidth=2:softtabstop=2:expandtab
   3690