Home | History | Annotate | Download | only in tsan
      1 /* Copyright (c) 2008-2010, Google Inc.
      2  * All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are
      6  * met:
      7  *
      8  *     * Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *     * Neither the name of Google Inc. nor the names of its
     11  * contributors may be used to endorse or promote products derived from
     12  * this software without specific prior written permission.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     15  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     16  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     17  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     18  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     19  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     20  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 // This file is part of ThreadSanitizer, a dynamic data race detector.
     28 // Author: Konstantin Serebryany.
     29 // Author: Timur Iskhodzhanov.
     30 
     31 // You can find the details on this tool at
     32 // http://code.google.com/p/data-race-test
     33 
     34 #include "thread_sanitizer.h"
     35 #include "common_util.h"
     36 #include "suppressions.h"
     37 #include "ignore.h"
     38 #include "ts_lock.h"
     39 #include "ts_atomic_int.h"
     40 #include "dense_multimap.h"
     41 #include <stdarg.h>
     42 // -------- Constants --------------- {{{1
     43 // Segment ID (SID)      is in range [1, kMaxSID-1]
     44 // Segment Set ID (SSID) is in range [-kMaxSID+1, -1]
     45 // This is not a compile-time constant, but it can only be changed at startup.
     46 int kMaxSID = (1 << 23);
     47 // Flush state after so many SIDs have been allocated. Set by command line flag.
     48 int kMaxSIDBeforeFlush;
     49 
     50 // Lock ID (LID)      is in range [1, kMaxLID-1]
     51 // Lock Set ID (LSID) is in range [-kMaxLID+1, -1]
     52 const int kMaxLID = (1 << 23);
     53 
     54 // This is not a compile-time constant, but it can be changed only at startup.
     55 int kSizeOfHistoryStackTrace = 10;
     56 
     57 // Maximal number of segments in a SegmentSet.
     58 // If you change this constant, you also need to change several places
     59 // in SegmentSet code.
     60 const int kMaxSegmentSetSize = 4;
     61 
     62 // -------- Globals --------------- {{{1
     63 
     64 // If true, ignore all accesses in all threads.
     65 bool global_ignore;
     66 
     67 bool g_so_far_only_one_thread = false;
     68 bool g_has_entered_main = false;
     69 bool g_has_exited_main = false;
     70 
     71 size_t g_last_flush_time;
     72 
     73 // Incremented on each Lock and Unlock. Used by LockHistory.
     74 uint32_t g_lock_era = 0;
     75 
     76 uintptr_t g_nacl_mem_start = (uintptr_t)-1;
     77 uintptr_t g_nacl_mem_end = (uintptr_t)-1;
     78 
     79 bool g_race_verifier_active = false;
     80 
     81 bool debug_expected_races = false;
     82 bool debug_benign_races = false;
     83 bool debug_malloc = false;
     84 bool debug_free = false;
     85 bool debug_thread = false;
     86 bool debug_ignore = false;
     87 bool debug_rtn = false;
     88 bool debug_lock = false;
     89 bool debug_wrap = false;
     90 bool debug_ins = false;
     91 bool debug_shadow_stack = false;
     92 bool debug_happens_before = false;
     93 bool debug_cache = false;
     94 bool debug_race_verifier = false;
     95 bool debug_atomic = false;
     96 
     97 #define PrintfIf(flag, ...) \
     98   do { if ((flag)) Printf(__VA_ARGS__); } while ((void)0, 0)
     99 
    100 // -------- TIL --------------- {{{1
    101 // ThreadSanitizer Internal lock (scoped).
    102 class TIL {
    103  public:
    104   TIL(TSLock *lock, int lock_site, bool need_locking = true) :
    105     lock_(lock),
    106     need_locking_(need_locking) {
    107     DCHECK(lock_);
    108     if (need_locking_ && (TS_SERIALIZED == 0)) {
    109       lock_->Lock();
    110       G_stats->lock_sites[lock_site]++;
    111     }
    112   }
    113   ~TIL() {
    114     if (need_locking_ && (TS_SERIALIZED == 0))
    115       lock_->Unlock();
    116   }
    117  private:
    118   TSLock *lock_;
    119   bool need_locking_;
    120 };
    121 
    122 static TSLock *ts_lock;
    123 static TSLock *ts_ignore_below_lock;
    124 
    125 #ifdef TS_LLVM
    126 void ThreadSanitizerLockAcquire() {
    127   ts_lock->Lock();
    128 }
    129 
    130 void ThreadSanitizerLockRelease() {
    131   ts_lock->Unlock();
    132 }
    133 #endif
    134 
    135 static INLINE void AssertTILHeld() {
    136   if (TS_SERIALIZED == 0 && DEBUG_MODE) {
    137     ts_lock->AssertHeld();
    138   }
    139 }
    140 
    141 // -------- Util ----------------------------- {{{1
    142 
    143 // Can't use ANNOTATE_UNPROTECTED_READ, it may get instrumented.
    144 template <class T>
    145 inline T INTERNAL_ANNOTATE_UNPROTECTED_READ(const volatile T &x) {
    146   ANNOTATE_IGNORE_READS_BEGIN();
    147   T res = x;
    148   ANNOTATE_IGNORE_READS_END();
    149   return res;
    150 }
    151 
    152 static string RemoveFilePrefix(string str) {
    153   for (size_t i = 0; i < G_flags->file_prefix_to_cut.size(); i++) {
    154     string prefix_to_cut = G_flags->file_prefix_to_cut[i];
    155     size_t pos = str.find(prefix_to_cut);
    156     if (pos != string::npos) {
    157       str = str.substr(pos + prefix_to_cut.size());
    158     }
    159   }
    160   if (str.find("./") == 0) {  // remove leading ./
    161     str = str.substr(2);
    162   }
    163   return str;
    164 }
    165 
    166 string PcToRtnNameAndFilePos(uintptr_t pc) {
    167   G_stats->pc_to_strings++;
    168   string img_name;
    169   string file_name;
    170   string rtn_name;
    171   int line_no = -1;
    172   PcToStrings(pc, G_flags->demangle, &img_name, &rtn_name,
    173               &file_name, &line_no);
    174   if (G_flags->demangle && !G_flags->full_stack_frames)
    175     rtn_name = NormalizeFunctionName(rtn_name);
    176   file_name = RemoveFilePrefix(file_name);
    177   if (file_name == "") {
    178     return rtn_name + " " + RemoveFilePrefix(img_name);
    179   }
    180   char buff[10];
    181   snprintf(buff, sizeof(buff), "%d", line_no);
    182   return rtn_name + " " + file_name + ":" + buff;
    183 }
    184 
    185 // -------- ID ---------------------- {{{1
    186 // We wrap int32_t into ID class and then inherit various ID type from ID.
    187 // This is done in an attempt to implement type safety of IDs, i.e.
    188 // to make it impossible to make implicit cast from one ID type to another.
    189 class ID {
    190  public:
    191   typedef int32_t T;
    192   explicit ID(T id) : id_(id) {}
    193   ID(const ID &id) : id_(id.id_) {}
    194   INLINE bool operator ==  (const ID &id) const { return id_ == id.id_; }
    195   bool operator !=  (const ID &id) const { return id_ != id.id_; }
    196   bool operator <  (const ID &id) const { return id_ < id.id_; }
    197   bool operator >  (const ID &id) const { return id_ > id.id_; }
    198   bool operator >=  (const ID &id) const { return id_ >= id.id_; }
    199   bool operator <=  (const ID &id) const { return id_ <= id.id_; }
    200 
    201   bool IsValid() const { return id_ >= 0; }
    202 
    203   const ID &operator = (const ID &id) {
    204     this->id_ = id.id_;
    205     return *this;
    206   }
    207   T raw() const { return id_; }
    208 
    209  private:
    210   T id_;
    211 };
    212 
    213 // Thread ID.
    214 // id >= 0
    215 class TID: public ID {
    216  public:
    217   static const int32_t kInvalidTID;
    218 
    219   explicit TID(T id) : ID(id) {}
    220   TID() : ID(kInvalidTID) {}
    221   bool valid() const { return raw() >= 0; }
    222 };
    223 
    224 const int32_t TID::kInvalidTID = -1;
    225 
    226 // Segment ID.
    227 // id > 0 && id < kMaxSID
    228 class SID: public ID {
    229  public:
    230   explicit SID(T id) : ID(id) {}
    231   SID() : ID(0) {}
    232   bool valid() const { return raw() > 0 && raw() < kMaxSID; }
    233 };
    234 
    235 // Lock ID.
    236 // id > 0 && id < kMaxLID
    237 class LID: public ID {
    238  public:
    239   explicit LID(T id) : ID(id) {}
    240   LID() : ID(0) {}
    241   bool valid() const { return raw() > 0 && raw() < kMaxLID; }
    242 };
    243 
    244 // LockSet ID.
    245 // Empty lockset: id == 0
    246 // Singleton:     id > 0 (id == Lock's id)
    247 // Tuple:         id < 0
    248 class LSID: public ID {
    249  public:
    250   explicit LSID(T id) : ID(id) {}
    251   LSID() : ID(INT_MAX) {}
    252   bool valid() const {
    253     return raw() < kMaxLID && raw() > -(kMaxLID);
    254   }
    255   bool IsEmpty() const { return raw() == 0; }
    256   bool IsSingleton() const { return raw() > 0; }
    257   LID GetSingleton() const { return LID(raw()); }
    258 };
    259 
    260 // SegmentSet ID.
    261 // Empty SegmentSet: id == 0
    262 // Singleton:        id > 0 (id == Segment's id)
    263 // Tuple:            id < 0
    264 class SSID: public ID {
    265  public:
    266   explicit SSID(T id) : ID(id) {}
    267   explicit SSID(SID sid) : ID(sid.raw()) {}
    268   SSID(): ID(INT_MAX) {}
    269   bool valid() const {
    270     return raw() != 0 && raw() < kMaxSID && raw() > -kMaxSID;
    271   }
    272   bool IsValidOrEmpty() { return raw() < kMaxSID && raw() > -kMaxSID; }
    273   bool IsEmpty() const { return raw() == 0; }
    274   bool IsSingleton() const {return raw() > 0; }
    275   bool IsTuple() const {return raw() < 0; }
    276   SID  GetSingleton() const {
    277     DCHECK(IsSingleton());
    278     return SID(raw());
    279   }
    280   // TODO(timurrrr): need to start SegmentSetArray indices from 1
    281   // to avoid "int ???() { return -raw() - 1; }"
    282 };
    283 
    284 // -------- Colors ----------------------------- {{{1
    285 // Colors for ansi terminals and for html.
    286 const char *c_bold    = "";
    287 const char *c_red     = "";
    288 const char *c_green   = "";
    289 const char *c_magenta = "";
    290 const char *c_cyan    = "";
    291 const char *c_blue    = "";
    292 const char *c_yellow  = "";
    293 const char *c_default = "";
    294 
    295 
    296 // -------- Forward decls ------ {{{1
    297 static void ForgetAllStateAndStartOver(TSanThread *thr, const char *reason);
    298 static void FlushStateIfOutOfSegments(TSanThread *thr);
    299 static int32_t raw_tid(TSanThread *t);
    300 // -------- Simple Cache ------ {{{1
    301 #include "ts_simple_cache.h"
    302 // -------- PairCache & IntPairToIntCache ------ {{{1
    303 template <typename A, typename B, typename Ret,
    304          int kHtableSize, int kArraySize = 8>
    305 class PairCache {
    306  public:
    307   PairCache() {
    308     CHECK(kHtableSize >= 0);
    309     CHECK(sizeof(Entry) == sizeof(A) + sizeof(B) + sizeof(Ret));
    310     Flush();
    311   }
    312 
    313   void Flush() {
    314     memset(this, 0, sizeof(*this));
    315 
    316     // Change the first hashtable entry so it doesn't match (0,0) on Lookup.
    317     if (kHtableSize != 0)
    318       memset(&htable_[0], 1, sizeof(Entry));
    319 
    320     // Any Lookup should fail now.
    321     for (int i = 0; i < kHtableSize; i++) {
    322       Ret tmp;
    323       DCHECK(!Lookup(htable_[i].a, htable_[i].b, &tmp));
    324     }
    325     CHECK(array_pos_    == 0);
    326     CHECK(array_filled_ == false);
    327   }
    328 
    329   void Insert(A a, B b, Ret v) {
    330     // fill the hash table
    331     if (kHtableSize != 0) {
    332       uint32_t idx  = compute_idx(a, b);
    333       htable_[idx].Fill(a, b, v);
    334     }
    335 
    336     // fill the array
    337     Ret dummy;
    338     if (kArraySize != 0 && !ArrayLookup(a, b, &dummy)) {
    339       array_[array_pos_ % kArraySize].Fill(a, b, v);
    340       array_pos_ = (array_pos_ + 1) % kArraySize;
    341       if (array_pos_ > kArraySize)
    342         array_filled_ = true;
    343     }
    344   }
    345 
    346   INLINE bool Lookup(A a, B b, Ret *v) {
    347     // check the array
    348     if (kArraySize != 0 && ArrayLookup(a, b, v)) {
    349       G_stats->ls_cache_fast++;
    350       return true;
    351     }
    352     // check the hash table.
    353     if (kHtableSize != 0) {
    354       uint32_t idx  = compute_idx(a, b);
    355       Entry & prev_e = htable_[idx];
    356       if (prev_e.Match(a, b)) {
    357         *v = prev_e.v;
    358         return true;
    359       }
    360     }
    361     return false;
    362   }
    363 
    364  private:
    365   struct Entry {
    366     A a;
    367     B b;
    368     Ret v;
    369     void Fill(A a, B b, Ret v) {
    370       this->a = a;
    371       this->b = b;
    372       this->v = v;
    373     }
    374     bool Match(A a, B b) const {
    375       return this->a == a && this->b == b;
    376     }
    377   };
    378 
    379   INLINE bool ArrayLookup(A a, B b, Ret *v) {
    380     for (int i = 0; i < (array_filled_ ? kArraySize : array_pos_); i++) {
    381       Entry & entry = array_[i];
    382       if (entry.Match(a, b)) {
    383         *v = entry.v;
    384         return true;
    385       }
    386     }
    387     return false;
    388   }
    389 
    390   uint32_t compute_idx(A a, B b) {
    391     if (kHtableSize == 0)
    392       return 0;
    393     else
    394       return combine2(a, b) % kHtableSize;
    395   }
    396 
    397   static uint32_t combine2(int a, int b) {
    398     return (a << 16) ^ b;
    399   }
    400 
    401   static uint32_t combine2(SSID a, SID b) {
    402     return combine2(a.raw(), b.raw());
    403   }
    404 
    405   Entry htable_[kHtableSize];
    406 
    407   Entry array_[kArraySize];
    408 
    409   // array_pos_    - next element to write to the array_ (mod kArraySize)
    410   // array_filled_ - set to true once we write the last element of the array
    411   int array_pos_;
    412   bool array_filled_;
    413 };
    414 
    415 template<int kHtableSize, int kArraySize = 8>
    416 class IntPairToIntCache
    417   : public PairCache<int, int, int, kHtableSize, kArraySize> {};
    418 
    419 
    420 
    421 // -------- FreeList --------------- {{{1
    422 class FreeList {
    423  public:
    424   FreeList(int obj_size, int chunk_size)
    425     : list_(0),
    426       obj_size_(obj_size),
    427       chunk_size_(chunk_size) {
    428     CHECK_GE(obj_size_, static_cast<int>(sizeof(NULL)));
    429     CHECK((obj_size_ % sizeof(NULL)) == 0);
    430     CHECK_GE(chunk_size_, 1);
    431   }
    432 
    433   void *Allocate() {
    434     if (!list_)
    435       AllocateNewChunk();
    436     CHECK(list_);
    437     List *head = list_;
    438     list_ = list_->next;
    439     return reinterpret_cast<void*>(head);
    440   }
    441 
    442   void Deallocate(void *ptr) {
    443     if (DEBUG_MODE) {
    444       memset(ptr, 0xac, obj_size_);
    445     }
    446     List *new_head = reinterpret_cast<List*>(ptr);
    447     new_head->next = list_;
    448     list_ = new_head;
    449   }
    450 
    451  private:
    452   void AllocateNewChunk() {
    453     CHECK(list_ == NULL);
    454     uint8_t *new_mem = new uint8_t[obj_size_ * chunk_size_];
    455     if (DEBUG_MODE) {
    456       memset(new_mem, 0xab, obj_size_ * chunk_size_);
    457     }
    458     for (int i = 0; i < chunk_size_; i++) {
    459       List *new_head = reinterpret_cast<List*>(new_mem + obj_size_ * i);
    460       new_head->next = list_;
    461       list_ = new_head;
    462     }
    463   }
    464   struct List {
    465     struct List *next;
    466   };
    467   List *list_;
    468 
    469 
    470   const int obj_size_;
    471   const int chunk_size_;
    472 };
    473 // -------- StackTrace -------------- {{{1
    474 class StackTraceFreeList {
    475  public:
    476   uintptr_t *GetNewMemForStackTrace(size_t capacity) {
    477     DCHECK(capacity <= (size_t)G_flags->num_callers);
    478     return reinterpret_cast<uintptr_t*>(free_lists_[capacity]->Allocate());
    479   }
    480 
    481   void TakeStackTraceBack(uintptr_t *mem, size_t capacity) {
    482     DCHECK(capacity <= (size_t)G_flags->num_callers);
    483     free_lists_[capacity]->Deallocate(mem);
    484   }
    485 
    486   StackTraceFreeList() {
    487     size_t n = G_flags->num_callers + 1;
    488     free_lists_ = new FreeList *[n];
    489     free_lists_[0] = NULL;
    490     for (size_t i = 1; i < n; i++) {
    491       free_lists_[i] = new FreeList((i+2) * sizeof(uintptr_t), 1024);
    492     }
    493   }
    494 
    495  private:
    496   FreeList **free_lists_;  // Array of G_flags->num_callers lists.
    497 };
    498 
    499 static StackTraceFreeList *g_stack_trace_free_list;
    500 
    501 class StackTrace {
    502  public:
    503   static StackTrace *CreateNewEmptyStackTrace(size_t size,
    504                                               size_t capacity = 0) {
    505     ScopedMallocCostCenter cc("StackTrace::CreateNewEmptyStackTrace()");
    506     DCHECK(g_stack_trace_free_list);
    507     DCHECK(size != 0);
    508     if (capacity == 0)
    509       capacity = size;
    510     uintptr_t *mem = g_stack_trace_free_list->GetNewMemForStackTrace(capacity);
    511     DCHECK(mem);
    512     StackTrace *res = new(mem) StackTrace(size, capacity);
    513     return res;
    514   }
    515 
    516   static void Delete(StackTrace *trace) {
    517     if (!trace) return;
    518     DCHECK(g_stack_trace_free_list);
    519     g_stack_trace_free_list->TakeStackTraceBack(
    520         reinterpret_cast<uintptr_t*>(trace), trace->capacity());
    521   }
    522 
    523   size_t size() const { return size_; }
    524   size_t capacity() const { return capacity_; }
    525 
    526   void set_size(size_t size) {
    527     CHECK(size <= capacity());
    528     size_ = size;
    529   }
    530 
    531 
    532   void Set(size_t i, uintptr_t pc) {
    533     arr_[i] = pc;
    534   }
    535 
    536   uintptr_t Get(size_t i) const {
    537     return arr_[i];
    538   }
    539 
    540   static bool CutStackBelowFunc(const string func_name) {
    541     for (size_t i = 0; i < G_flags->cut_stack_below.size(); i++) {
    542       if (StringMatch(G_flags->cut_stack_below[i], func_name)) {
    543         return true;
    544       }
    545     }
    546     return false;
    547   }
    548 
    549   static string EmbeddedStackTraceToString(const uintptr_t *emb_trace, size_t n,
    550                                            const char *indent = "    ") {
    551     string res = "";
    552     const int kBuffSize = 10000;
    553     char *buff = new char [kBuffSize];
    554     for (size_t i = 0; i < n; i++) {
    555       if (!emb_trace[i]) break;
    556       string rtn_and_file = PcToRtnNameAndFilePos(emb_trace[i]);
    557       if (rtn_and_file.find("(below main) ") == 0 ||
    558           rtn_and_file.find("ThreadSanitizerStartThread ") == 0)
    559         break;
    560 
    561       if (i == 0) res += c_bold;
    562       if (G_flags->show_pc) {
    563         snprintf(buff, kBuffSize, "%s#%-2d %p: ",
    564                  indent, static_cast<int>(i),
    565                  reinterpret_cast<void*>(emb_trace[i]));
    566       } else {
    567         snprintf(buff, kBuffSize, "%s#%-2d ", indent, static_cast<int>(i));
    568       }
    569       res += buff;
    570 
    571       res += rtn_and_file;
    572       if (i == 0) res += c_default;
    573       res += "\n";
    574 
    575       // don't print after main ...
    576       if (rtn_and_file.find("main ") == 0)
    577         break;
    578       // ... and after some default functions (see ThreadSanitizerParseFlags())
    579       // and some more functions specified via command line flag.
    580       string rtn = NormalizeFunctionName(PcToRtnName(emb_trace[i], true));
    581       if (CutStackBelowFunc(rtn))
    582         break;
    583     }
    584     delete [] buff;
    585     return res;
    586   }
    587 
    588   string ToString(const char *indent = "    ") const {
    589     if (!this) return "NO STACK TRACE\n";
    590     if (size() == 0) return "EMPTY STACK TRACE\n";
    591     return EmbeddedStackTraceToString(arr_, size(), indent);
    592   }
    593 
    594   void PrintRaw() const {
    595     for (size_t i = 0; i < size(); i++) {
    596       Printf("%p ", arr_[i]);
    597     }
    598     Printf("\n");
    599   }
    600 
    601   static bool Equals(const StackTrace *t1, const StackTrace *t2) {
    602     if (t1->size_ != t2->size_) return false;
    603     for (size_t i = 0; i < t1->size_; i++) {
    604       if (t1->arr_[i] != t2->arr_[i]) return false;
    605     }
    606     return true;
    607   }
    608 
    609   struct Less {
    610     bool operator() (const StackTrace *t1, const StackTrace *t2) const {
    611       size_t size = min(t1->size_, t2->size_);
    612       for (size_t i = 0; i < size; i++) {
    613         if (t1->arr_[i] != t2->arr_[i]) {
    614           return (t1->arr_[i] < t2->arr_[i]);
    615         }
    616       }
    617       return t1->size_ < t2->size_;
    618     }
    619   };
    620 
    621  private:
    622   StackTrace(size_t size, size_t capacity)
    623     : size_(size),
    624       capacity_(capacity) {
    625   }
    626 
    627   ~StackTrace() {}
    628 
    629   size_t size_;
    630   size_t capacity_;
    631   uintptr_t arr_[];
    632 };
    633 
    634 
    635 
    636 // -------- Lock -------------------- {{{1
    637 const char *kLockAllocCC = "kLockAllocCC";
    638 class Lock {
    639  public:
    640 
    641   static Lock *Create(uintptr_t lock_addr) {
    642     ScopedMallocCostCenter cc("LockLookup");
    643 //    Printf("Lock::Create: %p\n", lock_addr);
    644     // Destroy(lock_addr);
    645 
    646     // CHECK(Lookup(lock_addr) == NULL);
    647     Lock *res = LookupOrCreate(lock_addr);
    648     res->rd_held_ = 0;
    649     res->wr_held_ = 0;
    650     res->is_pure_happens_before_ = G_flags->pure_happens_before;
    651     res->last_lock_site_ = NULL;
    652     return res;
    653   }
    654 
    655   static void Destroy(uintptr_t lock_addr) {
    656 //    Printf("Lock::Destroy: %p\n", lock_addr);
    657   //  map_.erase(lock_addr);
    658   }
    659 
    660   static NOINLINE Lock *LookupOrCreate(uintptr_t lock_addr) {
    661     ScopedMallocCostCenter cc("LockLookup");
    662     Lock **lock = &(*map_)[lock_addr];
    663     if (*lock == NULL) {
    664 //      Printf("Lock::LookupOrCreate: %p\n", lock_addr);
    665       ScopedMallocCostCenter cc_lock("new Lock");
    666       *lock = new Lock(lock_addr, map_->size());
    667     }
    668     return *lock;
    669   }
    670 
    671   static NOINLINE Lock *Lookup(uintptr_t lock_addr) {
    672     ScopedMallocCostCenter cc("LockLookup");
    673     Map::iterator it = map_->find(lock_addr);
    674     if (it == map_->end()) return NULL;
    675     return it->second;
    676   }
    677 
    678   int       rd_held()   const { return rd_held_; }
    679   int       wr_held()   const { return wr_held_; }
    680   uintptr_t lock_addr() const { return lock_addr_; }
    681   LID       lid()       const { return lid_; }
    682   bool is_pure_happens_before() const { return is_pure_happens_before_; }
    683 
    684   // When a lock is pure happens-before, we need to create hb arcs
    685   // between all Unlock/Lock pairs except RdUnlock/RdLock.
    686   // For that purpose have two IDs on which we signal/wait.
    687   // One id is the lock_addr itself, the second id is derived
    688   // from lock_addr.
    689   uintptr_t wr_signal_addr() const { return lock_addr(); }
    690   uintptr_t rd_signal_addr() const { return lock_addr() + 1; }
    691 
    692 
    693   void set_is_pure_happens_before(bool x) { is_pure_happens_before_ = x; }
    694 
    695   void WrLock(TID tid, StackTrace *lock_site) {
    696     CHECK(!rd_held_);
    697     if (wr_held_ == 0) {
    698       thread_holding_me_in_write_mode_ = tid;
    699     } else {
    700       CHECK(thread_holding_me_in_write_mode_ == tid);
    701     }
    702     wr_held_++;
    703     StackTrace::Delete(last_lock_site_);
    704     last_lock_site_ = lock_site;
    705   }
    706 
    707   void WrUnlock() {
    708     CHECK(!rd_held_);
    709     CHECK(wr_held_ > 0);
    710     wr_held_--;
    711   }
    712 
    713   void RdLock(StackTrace *lock_site) {
    714     CHECK(!wr_held_);
    715     rd_held_++;
    716     StackTrace::Delete(last_lock_site_);
    717     last_lock_site_ = lock_site;
    718   }
    719 
    720   void RdUnlock() {
    721     CHECK(!wr_held_);
    722     CHECK(rd_held_);
    723     rd_held_--;
    724   }
    725 
    726   void set_name(const char *name) { name_ = name; }
    727   const char *name() const { return name_; }
    728 
    729   string ToString() const {
    730     string res;
    731     char buff[100];
    732     snprintf(buff, sizeof(buff), "L%d", lid_.raw());
    733     // do we need to print the address?
    734     // reinterpret_cast<void*>(lock_addr()));
    735     res = buff;
    736     if (name()) {
    737       res += string(" ") + name();
    738     }
    739     return res;
    740   }
    741 
    742   static Lock *LIDtoLock(LID lid) {
    743     // slow, but needed only for reports.
    744     for (Map::iterator it = map_->begin(); it != map_->end(); ++it) {
    745       Lock *l = it->second;
    746       if (l->lid_ == lid) {
    747         return l;
    748       }
    749     }
    750     return NULL;
    751   }
    752 
    753   static string ToString(LID lid) {
    754     Lock *lock = LIDtoLock(lid);
    755     CHECK(lock);
    756     return lock->ToString();
    757   }
    758 
    759   static void ReportLockWithOrWithoutContext(LID lid, bool with_context) {
    760     if (!with_context) {
    761       Report("   L%d\n", lid.raw());
    762       return;
    763     }
    764     Lock *lock = LIDtoLock(lid);
    765     CHECK(lock);
    766     if (lock->last_lock_site_) {
    767       Report("   %s (%p)\n%s",
    768              lock->ToString().c_str(),
    769              lock->lock_addr_,
    770              lock->last_lock_site_->ToString().c_str());
    771     } else {
    772       Report("   %s. This lock was probably destroyed"
    773                  " w/o calling Unlock()\n", lock->ToString().c_str());
    774     }
    775   }
    776 
    777   static void InitClassMembers() {
    778     map_ = new Lock::Map;
    779   }
    780 
    781  private:
    782   Lock(uintptr_t lock_addr, int32_t lid)
    783     : lock_addr_(lock_addr),
    784       lid_(lid),
    785       rd_held_(0),
    786       wr_held_(0),
    787       is_pure_happens_before_(G_flags->pure_happens_before),
    788       last_lock_site_(0),
    789       name_(NULL) {
    790   }
    791 
    792   // Data members
    793   uintptr_t lock_addr_;
    794   LID       lid_;
    795   int       rd_held_;
    796   int       wr_held_;
    797   bool      is_pure_happens_before_;
    798   StackTrace *last_lock_site_;
    799   const char *name_;
    800   TID       thread_holding_me_in_write_mode_;
    801 
    802   // Static members
    803   typedef map<uintptr_t, Lock*> Map;
    804   static Map *map_;
    805 };
    806 
    807 
    808 Lock::Map *Lock::map_;
    809 
    810 // Returns a string like "L123,L234".
    811 static string SetOfLocksToString(const set<LID> &locks) {
    812   string res;
    813   for (set<LID>::const_iterator it = locks.begin();
    814        it != locks.end(); ++it) {
    815     LID lid = *it;
    816     char buff[100];
    817     snprintf(buff, sizeof(buff), "L%d", lid.raw());
    818     if (it != locks.begin())
    819       res += ", ";
    820     res += buff;
    821   }
    822   return res;
    823 }
    824 
    825 // -------- FixedArray--------------- {{{1
    826 template <typename T, size_t SizeLimit = 1024>
    827 class FixedArray {
    828  public:
    829   explicit INLINE FixedArray(size_t array_size)
    830       : size_(array_size),
    831         array_((array_size <= SizeLimit
    832                 ? alloc_space_
    833                 : new T[array_size])) { }
    834 
    835   ~FixedArray() {
    836     if (array_ != alloc_space_) {
    837       delete[] array_;
    838     }
    839   }
    840 
    841   T* begin() { return array_; }
    842   T& operator[](int i)             { return array_[i]; }
    843 
    844  private:
    845   const size_t size_;
    846   T* array_;
    847   T alloc_space_[SizeLimit];
    848 };
    849 
    850 // -------- LockSet ----------------- {{{1
    851 class LockSet {
    852  public:
    853   NOINLINE static LSID Add(LSID lsid, Lock *lock) {
    854     ScopedMallocCostCenter cc("LockSetAdd");
    855     LID lid = lock->lid();
    856     if (lsid.IsEmpty()) {
    857       // adding to an empty lock set
    858       G_stats->ls_add_to_empty++;
    859       return LSID(lid.raw());
    860     }
    861     int cache_res;
    862     if (ls_add_cache_->Lookup(lsid.raw(), lid.raw(), &cache_res)) {
    863       G_stats->ls_add_cache_hit++;
    864       return LSID(cache_res);
    865     }
    866     LSID res;
    867     if (lsid.IsSingleton()) {
    868       LSSet set(lsid.GetSingleton(), lid);
    869       G_stats->ls_add_to_singleton++;
    870       res = ComputeId(set);
    871     } else {
    872       LSSet set(Get(lsid), lid);
    873       G_stats->ls_add_to_multi++;
    874       res = ComputeId(set);
    875     }
    876     ls_add_cache_->Insert(lsid.raw(), lid.raw(), res.raw());
    877     return res;
    878   }
    879 
    880   // If lock is present in lsid, set new_lsid to (lsid \ lock) and return true.
    881   // Otherwise set new_lsid to lsid and return false.
    882   NOINLINE static bool Remove(LSID lsid, Lock *lock, LSID *new_lsid) {
    883     *new_lsid = lsid;
    884     if (lsid.IsEmpty()) return false;
    885     LID lid = lock->lid();
    886 
    887     if (lsid.IsSingleton()) {
    888       // removing the only lock -> LSID(0)
    889       if (lsid.GetSingleton() != lid) return false;
    890       G_stats->ls_remove_from_singleton++;
    891       *new_lsid = LSID(0);
    892       return true;
    893     }
    894 
    895     int cache_res;
    896     if (ls_rem_cache_->Lookup(lsid.raw(), lid.raw(), &cache_res)) {
    897       G_stats->ls_rem_cache_hit++;
    898       *new_lsid = LSID(cache_res);
    899       return true;
    900     }
    901 
    902     LSSet &prev_set = Get(lsid);
    903     if (!prev_set.has(lid)) return false;
    904     LSSet set(prev_set, LSSet::REMOVE, lid);
    905     CHECK(set.size() == prev_set.size() - 1);
    906     G_stats->ls_remove_from_multi++;
    907     LSID res = ComputeId(set);
    908     ls_rem_cache_->Insert(lsid.raw(), lid.raw(), res.raw());
    909     *new_lsid = res;
    910     return true;
    911   }
    912 
    913   NOINLINE static bool IntersectionIsEmpty(LSID lsid1, LSID lsid2) {
    914     // at least one empty
    915     if (lsid1.IsEmpty() || lsid2.IsEmpty())
    916       return true;  // empty
    917 
    918     // both singletons
    919     if (lsid1.IsSingleton() && lsid2.IsSingleton()) {
    920       return lsid1 != lsid2;
    921     }
    922 
    923     // first is singleton, second is not
    924     if (lsid1.IsSingleton()) {
    925       const LSSet &set2 = Get(lsid2);
    926       return set2.has(LID(lsid1.raw())) == false;
    927     }
    928 
    929     // second is singleton, first is not
    930     if (lsid2.IsSingleton()) {
    931       const LSSet &set1 = Get(lsid1);
    932       return set1.has(LID(lsid2.raw())) == false;
    933     }
    934 
    935     // LockSets are equal and not empty
    936     if (lsid1 == lsid2)
    937       return false;
    938 
    939     // both are not singletons - slow path.
    940     bool ret = true,
    941          cache_hit = false;
    942     DCHECK(lsid2.raw() < 0);
    943     if (ls_intersection_cache_->Lookup(lsid1.raw(), -lsid2.raw(), &ret)) {
    944       if (!DEBUG_MODE)
    945         return ret;
    946       cache_hit = true;
    947     }
    948     const LSSet &set1 = Get(lsid1);
    949     const LSSet &set2 = Get(lsid2);
    950 
    951     FixedArray<LID> intersection(min(set1.size(), set2.size()));
    952     LID *end = set_intersection(set1.begin(), set1.end(),
    953                             set2.begin(), set2.end(),
    954                             intersection.begin());
    955     DCHECK(!cache_hit || (ret == (end == intersection.begin())));
    956     ret = (end == intersection.begin());
    957     ls_intersection_cache_->Insert(lsid1.raw(), -lsid2.raw(), ret);
    958     return ret;
    959   }
    960 
    961   static bool HasNonPhbLocks(LSID lsid) {
    962     if (lsid.IsEmpty())
    963       return false;
    964     if (lsid.IsSingleton())
    965       return !Lock::LIDtoLock(LID(lsid.raw()))->is_pure_happens_before();
    966 
    967     LSSet &set = Get(lsid);
    968     for (LSSet::const_iterator it = set.begin(); it != set.end(); ++it)
    969       if (!Lock::LIDtoLock(*it)->is_pure_happens_before())
    970         return true;
    971     return false;
    972   }
    973 
    974   static string ToString(LSID lsid) {
    975     if (lsid.IsEmpty()) {
    976       return "{}";
    977     } else if (lsid.IsSingleton()) {
    978       return "{" + Lock::ToString(lsid.GetSingleton()) + "}";
    979     }
    980     const LSSet &set = Get(lsid);
    981     string res = "{";
    982     for (LSSet::const_iterator it = set.begin(); it != set.end(); ++it) {
    983       if (it != set.begin()) res += ", ";
    984       res += Lock::ToString(*it);
    985     }
    986     res += "}";
    987     return res;
    988   }
    989 
    990   static void ReportLockSetWithContexts(LSID lsid,
    991                                         set<LID> *locks_reported,
    992                                         const char *descr) {
    993     if (lsid.IsEmpty()) return;
    994     Report("%s%s%s\n", c_green, descr, c_default);
    995     if (lsid.IsSingleton()) {
    996       LID lid = lsid.GetSingleton();
    997       Lock::ReportLockWithOrWithoutContext(lid,
    998                                            locks_reported->count(lid) == 0);
    999       locks_reported->insert(lid);
   1000     } else {
   1001       const LSSet &set = Get(lsid);
   1002       for (LSSet::const_iterator it = set.begin(); it != set.end(); ++it) {
   1003         LID lid = *it;
   1004         Lock::ReportLockWithOrWithoutContext(lid,
   1005                                      locks_reported->count(lid) == 0);
   1006         locks_reported->insert(lid);
   1007       }
   1008     }
   1009   }
   1010 
   1011   static void AddLocksToSet(LSID lsid, set<LID> *locks) {
   1012     if (lsid.IsEmpty()) return;
   1013     if (lsid.IsSingleton()) {
   1014       locks->insert(lsid.GetSingleton());
   1015     } else {
   1016       const LSSet &set = Get(lsid);
   1017       for (LSSet::const_iterator it = set.begin(); it != set.end(); ++it) {
   1018         locks->insert(*it);
   1019       }
   1020     }
   1021   }
   1022 
   1023 
   1024   static void InitClassMembers() {
   1025     map_ = new LockSet::Map;
   1026     vec_ = new LockSet::Vec;
   1027     ls_add_cache_ = new LSCache;
   1028     ls_rem_cache_ = new LSCache;
   1029     ls_rem_cache_ = new LSCache;
   1030     ls_intersection_cache_ = new LSIntersectionCache;
   1031   }
   1032 
   1033  private:
   1034   // No instances are allowed.
   1035   LockSet() { }
   1036 
   1037   typedef DenseMultimap<LID, 3> LSSet;
   1038 
   1039   static LSSet &Get(LSID lsid) {
   1040     ScopedMallocCostCenter cc(__FUNCTION__);
   1041     int idx = -lsid.raw() - 1;
   1042     DCHECK(idx >= 0);
   1043     DCHECK(idx < static_cast<int>(vec_->size()));
   1044     return (*vec_)[idx];
   1045   }
   1046 
   1047   static LSID ComputeId(const LSSet &set) {
   1048     CHECK(set.size() > 0);
   1049     if (set.size() == 1) {
   1050       // signleton lock set has lsid == lid.
   1051       return LSID(set.begin()->raw());
   1052     }
   1053     DCHECK(map_);
   1054     DCHECK(vec_);
   1055     // multiple locks.
   1056     ScopedMallocCostCenter cc("LockSet::ComputeId");
   1057     int32_t *id = &(*map_)[set];
   1058     if (*id == 0) {
   1059       vec_->push_back(set);
   1060       *id = map_->size();
   1061       if      (set.size() == 2) G_stats->ls_size_2++;
   1062       else if (set.size() == 3) G_stats->ls_size_3++;
   1063       else if (set.size() == 4) G_stats->ls_size_4++;
   1064       else if (set.size() == 5) G_stats->ls_size_5++;
   1065       else                      G_stats->ls_size_other++;
   1066       if (*id >= 4096 && ((*id & (*id - 1)) == 0)) {
   1067         Report("INFO: %d LockSet IDs have been allocated "
   1068                "(2: %ld 3: %ld 4: %ld 5: %ld o: %ld)\n",
   1069                *id,
   1070                G_stats->ls_size_2, G_stats->ls_size_3,
   1071                G_stats->ls_size_4, G_stats->ls_size_5,
   1072                G_stats->ls_size_other
   1073                );
   1074       }
   1075     }
   1076     return LSID(-*id);
   1077   }
   1078 
   1079   typedef map<LSSet, int32_t> Map;
   1080   static Map *map_;
   1081 
   1082   static const char *kLockSetVecAllocCC;
   1083   typedef vector<LSSet> Vec;
   1084   static Vec *vec_;
   1085 
   1086 //  static const int kPrimeSizeOfLsCache = 307;
   1087 //  static const int kPrimeSizeOfLsCache = 499;
   1088   static const int kPrimeSizeOfLsCache = 1021;
   1089   typedef IntPairToIntCache<kPrimeSizeOfLsCache> LSCache;
   1090   static LSCache *ls_add_cache_;
   1091   static LSCache *ls_rem_cache_;
   1092   static LSCache *ls_int_cache_;
   1093   typedef IntPairToBoolCache<kPrimeSizeOfLsCache> LSIntersectionCache;
   1094   static LSIntersectionCache *ls_intersection_cache_;
   1095 };
   1096 
   1097 LockSet::Map *LockSet::map_;
   1098 LockSet::Vec *LockSet::vec_;
   1099 const char *LockSet::kLockSetVecAllocCC = "kLockSetVecAllocCC";
   1100 LockSet::LSCache *LockSet::ls_add_cache_;
   1101 LockSet::LSCache *LockSet::ls_rem_cache_;
   1102 LockSet::LSCache *LockSet::ls_int_cache_;
   1103 LockSet::LSIntersectionCache *LockSet::ls_intersection_cache_;
   1104 
   1105 
   1106 static string TwoLockSetsToString(LSID rd_lockset, LSID wr_lockset) {
   1107   string res;
   1108   if (rd_lockset == wr_lockset) {
   1109     res = "L";
   1110     res += LockSet::ToString(wr_lockset);
   1111   } else {
   1112     res = "WR-L";
   1113     res += LockSet::ToString(wr_lockset);
   1114     res += "/RD-L";
   1115     res += LockSet::ToString(rd_lockset);
   1116   }
   1117   return res;
   1118 }
   1119 
   1120 
   1121 
   1122 
   1123 // -------- VTS ------------------ {{{1
   1124 class VTS {
   1125  public:
   1126   static size_t MemoryRequiredForOneVts(size_t size) {
   1127     return sizeof(VTS) + size * sizeof(TS);
   1128   }
   1129 
   1130   static size_t RoundUpSizeForEfficientUseOfFreeList(size_t size) {
   1131     if (size < 32) return size;
   1132     if (size < 64) return (size + 7) & ~7;
   1133     if (size < 128) return (size + 15) & ~15;
   1134     return (size + 31) & ~31;
   1135   }
   1136 
   1137   static VTS *Create(size_t size) {
   1138     DCHECK(size > 0);
   1139     void *mem;
   1140     size_t rounded_size = RoundUpSizeForEfficientUseOfFreeList(size);
   1141     DCHECK(size <= rounded_size);
   1142     if (rounded_size <= kNumberOfFreeLists) {
   1143       // Small chunk, use FreeList.
   1144       ScopedMallocCostCenter cc("VTS::Create (from free list)");
   1145       mem = free_lists_[rounded_size]->Allocate();
   1146       G_stats->vts_create_small++;
   1147     } else {
   1148       // Large chunk, use new/delete instead of FreeList.
   1149       ScopedMallocCostCenter cc("VTS::Create (from new[])");
   1150       mem = new int8_t[MemoryRequiredForOneVts(size)];
   1151       G_stats->vts_create_big++;
   1152     }
   1153     VTS *res = new(mem) VTS(size);
   1154     G_stats->vts_total_create += size;
   1155     return res;
   1156   }
   1157 
   1158   static void Unref(VTS *vts) {
   1159     if (!vts) return;
   1160     CHECK_GT(vts->ref_count_, 0);
   1161     if (AtomicDecrementRefcount(&vts->ref_count_) == 0) {
   1162       size_t size = vts->size_;  // can't use vts->size().
   1163       size_t rounded_size = RoundUpSizeForEfficientUseOfFreeList(size);
   1164       if (rounded_size <= kNumberOfFreeLists) {
   1165         free_lists_[rounded_size]->Deallocate(vts);
   1166         G_stats->vts_delete_small++;
   1167       } else {
   1168         G_stats->vts_delete_big++;
   1169         delete vts;
   1170       }
   1171       G_stats->vts_total_delete += rounded_size;
   1172     }
   1173   }
   1174 
   1175   static VTS *CreateSingleton(TID tid, int32_t clk = 1) {
   1176     VTS *res = Create(1);
   1177     res->arr_[0].tid = tid.raw();
   1178     res->arr_[0].clk = clk;
   1179     return res;
   1180   }
   1181 
   1182   VTS *Clone() {
   1183     G_stats->vts_clone++;
   1184     AtomicIncrementRefcount(&ref_count_);
   1185     return this;
   1186   }
   1187 
   1188   static VTS *CopyAndTick(const VTS *vts, TID id_to_tick) {
   1189     CHECK(vts->ref_count_);
   1190     VTS *res = Create(vts->size());
   1191     bool found = false;
   1192     for (size_t i = 0; i < res->size(); i++) {
   1193       res->arr_[i] = vts->arr_[i];
   1194       if (res->arr_[i].tid == id_to_tick.raw()) {
   1195         res->arr_[i].clk++;
   1196         found = true;
   1197       }
   1198     }
   1199     CHECK(found);
   1200     return res;
   1201   }
   1202 
   1203   static VTS *Join(const VTS *vts_a, const VTS *vts_b) {
   1204     CHECK(vts_a->ref_count_);
   1205     CHECK(vts_b->ref_count_);
   1206     FixedArray<TS> result_ts(vts_a->size() + vts_b->size());
   1207     TS *t = result_ts.begin();
   1208     const TS *a = &vts_a->arr_[0];
   1209     const TS *b = &vts_b->arr_[0];
   1210     const TS *a_max = a + vts_a->size();
   1211     const TS *b_max = b + vts_b->size();
   1212     while (a < a_max && b < b_max) {
   1213       if (a->tid < b->tid) {
   1214         *t = *a;
   1215         a++;
   1216         t++;
   1217       } else if (a->tid > b->tid) {
   1218         *t = *b;
   1219         b++;
   1220         t++;
   1221       } else {
   1222         if (a->clk >= b->clk) {
   1223           *t = *a;
   1224         } else {
   1225           *t = *b;
   1226         }
   1227         a++;
   1228         b++;
   1229         t++;
   1230       }
   1231     }
   1232     while (a < a_max) {
   1233       *t = *a;
   1234       a++;
   1235       t++;
   1236     }
   1237     while (b < b_max) {
   1238       *t = *b;
   1239       b++;
   1240       t++;
   1241     }
   1242 
   1243     VTS *res = VTS::Create(t - result_ts.begin());
   1244     for (size_t i = 0; i < res->size(); i++) {
   1245       res->arr_[i] = result_ts[i];
   1246     }
   1247     return res;
   1248   }
   1249 
   1250   int32_t clk(TID tid) const {
   1251     // TODO(dvyukov): this function is sub-optimal,
   1252     // we only need thread's own clock.
   1253     for (size_t i = 0; i < size_; i++) {
   1254       if (arr_[i].tid == tid.raw()) {
   1255         return arr_[i].clk;
   1256       }
   1257     }
   1258     return 0;
   1259   }
   1260 
   1261   static INLINE void FlushHBCache() {
   1262     hb_cache_->Flush();
   1263   }
   1264 
   1265   static INLINE bool HappensBeforeCached(const VTS *vts_a, const VTS *vts_b) {
   1266     bool res = false;
   1267     if (hb_cache_->Lookup(vts_a->uniq_id_, vts_b->uniq_id_, &res)) {
   1268       G_stats->n_vts_hb_cached++;
   1269       DCHECK(res == HappensBefore(vts_a, vts_b));
   1270       return res;
   1271     }
   1272     res = HappensBefore(vts_a, vts_b);
   1273     hb_cache_->Insert(vts_a->uniq_id_, vts_b->uniq_id_, res);
   1274     return res;
   1275   }
   1276 
   1277   // return true if vts_a happens-before vts_b.
   1278   static NOINLINE bool HappensBefore(const VTS *vts_a, const VTS *vts_b) {
   1279     CHECK(vts_a->ref_count_);
   1280     CHECK(vts_b->ref_count_);
   1281     G_stats->n_vts_hb++;
   1282     const TS *a = &vts_a->arr_[0];
   1283     const TS *b = &vts_b->arr_[0];
   1284     const TS *a_max = a + vts_a->size();
   1285     const TS *b_max = b + vts_b->size();
   1286     bool a_less_than_b = false;
   1287     while (a < a_max && b < b_max) {
   1288       if (a->tid < b->tid) {
   1289         // a->tid is not present in b.
   1290         return false;
   1291       } else if (a->tid > b->tid) {
   1292         // b->tid is not present in a.
   1293         a_less_than_b = true;
   1294         b++;
   1295       } else {
   1296         // this tid is present in both VTSs. Compare clocks.
   1297         if (a->clk > b->clk) return false;
   1298         if (a->clk < b->clk) a_less_than_b = true;
   1299         a++;
   1300         b++;
   1301       }
   1302     }
   1303     if (a < a_max) {
   1304       // Some tids are present in a and not in b
   1305       return false;
   1306     }
   1307     if (b < b_max) {
   1308       return true;
   1309     }
   1310     return a_less_than_b;
   1311   }
   1312 
   1313   size_t size() const {
   1314     DCHECK(ref_count_);
   1315     return size_;
   1316   }
   1317 
   1318   string ToString() const {
   1319     DCHECK(ref_count_);
   1320     string res = "[";
   1321     for (size_t i = 0; i < size(); i++) {
   1322       char buff[100];
   1323       snprintf(buff, sizeof(buff), "%d:%d;", arr_[i].tid, arr_[i].clk);
   1324       if (i) res += " ";
   1325       res += buff;
   1326     }
   1327     return res + "]";
   1328   }
   1329 
   1330   void print(const char *name) const {
   1331     string str = ToString();
   1332     Printf("%s: %s\n", name, str.c_str());
   1333   }
   1334 
   1335   static void TestHappensBefore() {
   1336     // TODO(kcc): need more tests here...
   1337     const char *test_vts[] = {
   1338       "[0:1;]",
   1339       "[0:4; 2:1;]",
   1340       "[0:4; 2:2; 4:1;]",
   1341       "[0:4; 3:2; 4:1;]",
   1342       "[0:4; 3:2; 4:2;]",
   1343       "[0:4; 3:3; 4:1;]",
   1344       NULL
   1345     };
   1346 
   1347     for (int i = 0; test_vts[i]; i++) {
   1348       const VTS *vts1 = Parse(test_vts[i]);
   1349       for (int j = 0; test_vts[j]; j++) {
   1350         const VTS *vts2 = Parse(test_vts[j]);
   1351         bool hb  = HappensBefore(vts1, vts2);
   1352         Printf("HB = %d\n   %s\n   %s\n", static_cast<int>(hb),
   1353                vts1->ToString().c_str(),
   1354                vts2->ToString().c_str());
   1355         delete vts2;
   1356       }
   1357       delete vts1;
   1358     }
   1359   }
   1360 
   1361   static void Test() {
   1362     Printf("VTS::test();\n");
   1363     VTS *v1 = CreateSingleton(TID(0));
   1364     VTS *v2 = CreateSingleton(TID(1));
   1365     VTS *v3 = CreateSingleton(TID(2));
   1366     VTS *v4 = CreateSingleton(TID(3));
   1367 
   1368     VTS *v12 = Join(v1, v2);
   1369     v12->print("v12");
   1370     VTS *v34 = Join(v3, v4);
   1371     v34->print("v34");
   1372 
   1373     VTS *x1 = Parse("[0:4; 3:6; 4:2;]");
   1374     CHECK(x1);
   1375     x1->print("x1");
   1376     TestHappensBefore();
   1377   }
   1378 
   1379   // Parse VTS string in the form "[0:4; 3:6; 4:2;]".
   1380   static VTS *Parse(const char *str) {
   1381 #if 1  // TODO(kcc): need sscanf in valgrind
   1382     return NULL;
   1383 #else
   1384     vector<TS> vec;
   1385     if (!str) return NULL;
   1386     if (str[0] != '[') return NULL;
   1387     str++;
   1388     int tid = 0, clk = 0;
   1389     int consumed = 0;
   1390     while (sscanf(str, "%d:%d;%n", &tid, &clk, &consumed) > 0) {
   1391       TS ts;
   1392       ts.tid = TID(tid);
   1393       ts.clk = clk;
   1394       vec.push_back(ts);
   1395       str += consumed;
   1396       // Printf("%d:%d\n", tid, clk);
   1397     }
   1398     if (*str != ']') return NULL;
   1399     VTS *res = Create(vec.size());
   1400     for (size_t i = 0; i < vec.size(); i++) {
   1401       res->arr_[i] = vec[i];
   1402     }
   1403     return res;
   1404 #endif
   1405   }
   1406 
   1407   static void InitClassMembers() {
   1408     hb_cache_ = new HBCache;
   1409     free_lists_ = new FreeList *[kNumberOfFreeLists+1];
   1410     free_lists_[0] = 0;
   1411     for (size_t  i = 1; i <= kNumberOfFreeLists; i++) {
   1412       free_lists_[i] = new FreeList(MemoryRequiredForOneVts(i),
   1413                                     (kNumberOfFreeLists * 4) / i);
   1414     }
   1415   }
   1416 
   1417   int32_t uniq_id() const { return uniq_id_; }
   1418 
   1419  private:
   1420   explicit VTS(size_t size)
   1421     : ref_count_(1),
   1422       size_(size) {
   1423     uniq_id_counter_++;
   1424     // If we've got overflow, we are in trouble, need to have 64-bits...
   1425     CHECK_GT(uniq_id_counter_, 0);
   1426     uniq_id_ = uniq_id_counter_;
   1427   }
   1428   ~VTS() {}
   1429 
   1430   struct TS {
   1431     int32_t tid;
   1432     int32_t clk;
   1433   };
   1434 
   1435 
   1436   // data members
   1437   int32_t ref_count_;
   1438   int32_t uniq_id_;
   1439   size_t size_;
   1440   TS     arr_[];  // array of size_ elements.
   1441 
   1442 
   1443   // static data members
   1444   static int32_t uniq_id_counter_;
   1445   static const int kCacheSize = 4999;  // Has to be prime.
   1446   typedef IntPairToBoolCache<kCacheSize> HBCache;
   1447   static HBCache *hb_cache_;
   1448 
   1449   static const size_t kNumberOfFreeLists = 512;  // Must be power of two.
   1450 //  static const size_t kNumberOfFreeLists = 64; // Must be power of two.
   1451   static FreeList **free_lists_;  // Array of kNumberOfFreeLists elements.
   1452 };
   1453 
   1454 int32_t VTS::uniq_id_counter_;
   1455 VTS::HBCache *VTS::hb_cache_;
   1456 FreeList **VTS::free_lists_;
   1457 
   1458 
   1459 // This class is somewhat similar to VTS,
   1460 // but it's mutable, not reference counted and not sorted.
   1461 class VectorClock {
   1462  public:
   1463   VectorClock()
   1464       : size_(),
   1465         clock_()
   1466   {
   1467   }
   1468 
   1469   void reset() {
   1470     free(clock_);
   1471     size_ = 0;
   1472     clock_ = NULL;
   1473   }
   1474 
   1475   int32_t clock(TID tid) const {
   1476     for (size_t i = 0; i != size_; i += 1) {
   1477       if (clock_[i].tid == tid.raw()) {
   1478         return clock_[i].clk;
   1479       }
   1480     }
   1481     return 0;
   1482   }
   1483 
   1484   void update(TID tid, int32_t clk) {
   1485     for (size_t i = 0; i != size_; i += 1) {
   1486       if (clock_[i].tid == tid.raw()) {
   1487         clock_[i].clk = clk;
   1488         return;
   1489       }
   1490     }
   1491     size_ += 1;
   1492     clock_ = (TS*)realloc(clock_, size_ * sizeof(TS));
   1493     clock_[size_ - 1].tid = tid.raw();
   1494     clock_[size_ - 1].clk = clk;
   1495   }
   1496 
   1497  private:
   1498   struct TS {
   1499     int32_t tid;
   1500     int32_t clk;
   1501   };
   1502 
   1503   size_t    size_;
   1504   TS*       clock_;
   1505 };
   1506 
   1507 
   1508 // -------- Mask -------------------- {{{1
   1509 // A bit mask (32-bits on 32-bit arch and 64-bits on 64-bit arch).
   1510 class Mask {
   1511  public:
   1512   static const uintptr_t kOne = 1;
   1513   static const uintptr_t kNBits = sizeof(uintptr_t) * 8;
   1514   static const uintptr_t kNBitsLog = kNBits == 32 ? 5 : 6;
   1515 
   1516   Mask() : m_(0) {}
   1517   Mask(const Mask &m) : m_(m.m_) { }
   1518   explicit Mask(uintptr_t m) : m_(m) { }
   1519   INLINE bool Get(uintptr_t idx) const   { return m_ & (kOne << idx); }
   1520   INLINE void Set(uintptr_t idx)   { m_ |= kOne << idx; }
   1521   INLINE void Clear(uintptr_t idx) { m_ &= ~(kOne << idx); }
   1522   INLINE bool Empty() const {return m_ == 0; }
   1523 
   1524   // Clear bits in range [a,b) and return old [a,b) range.
   1525   INLINE Mask ClearRangeAndReturnOld(uintptr_t a, uintptr_t b) {
   1526     DCHECK(a < b);
   1527     DCHECK(b <= kNBits);
   1528     uintptr_t res;
   1529     uintptr_t n_bits_in_mask = (b - a);
   1530     if (n_bits_in_mask == kNBits) {
   1531       res = m_;
   1532       m_ = 0;
   1533     } else {
   1534       uintptr_t t = (kOne << n_bits_in_mask);
   1535       uintptr_t mask = (t - 1) << a;
   1536       res = m_ & mask;
   1537       m_ &= ~mask;
   1538     }
   1539     return Mask(res);
   1540   }
   1541 
   1542   INLINE void ClearRange(uintptr_t a, uintptr_t b) {
   1543     ClearRangeAndReturnOld(a, b);
   1544   }
   1545 
   1546   INLINE void SetRange(uintptr_t a, uintptr_t b) {
   1547     DCHECK(a < b);
   1548     DCHECK(b <= kNBits);
   1549     uintptr_t n_bits_in_mask = (b - a);
   1550     if (n_bits_in_mask == kNBits) {
   1551       m_ = ~0;
   1552     } else {
   1553       uintptr_t t = (kOne << n_bits_in_mask);
   1554       uintptr_t mask = (t - 1) << a;
   1555       m_ |= mask;
   1556     }
   1557   }
   1558 
   1559   INLINE uintptr_t GetRange(uintptr_t a, uintptr_t b) const {
   1560     // a bug was fixed here
   1561     DCHECK(a < b);
   1562     DCHECK(b <= kNBits);
   1563     uintptr_t n_bits_in_mask = (b - a);
   1564     if (n_bits_in_mask == kNBits) {
   1565       return m_;
   1566     } else {
   1567       uintptr_t t = (kOne << n_bits_in_mask);
   1568       uintptr_t mask = (t - 1) << a;
   1569       return m_ & mask;
   1570     }
   1571   }
   1572 
   1573   // Get index of some set bit (asumes mask is non zero).
   1574   size_t GetSomeSetBit() {
   1575     DCHECK(m_);
   1576     size_t ret;
   1577 #ifdef __GNUC__
   1578     ret =  __builtin_ctzl(m_);
   1579 #elif defined(_MSC_VER)
   1580     unsigned long index;
   1581     DCHECK(sizeof(uintptr_t) == 4);
   1582     _BitScanReverse(&index, m_);
   1583     ret = index;
   1584 #else
   1585 # error "Unsupported"
   1586 #endif
   1587     DCHECK(this->Get(ret));
   1588     return ret;
   1589   }
   1590 
   1591   size_t PopCount() {
   1592 #ifdef VGO_linux
   1593     return __builtin_popcountl(m_);
   1594 #else
   1595     CHECK(0);
   1596     return 0;
   1597 #endif
   1598   }
   1599 
   1600   void Subtract(Mask m) { m_ &= ~m.m_; }
   1601   void Union(Mask m) { m_ |= m.m_; }
   1602 
   1603   static Mask Intersection(Mask m1, Mask m2) { return Mask(m1.m_ & m2.m_); }
   1604 
   1605 
   1606   void Clear() { m_ = 0; }
   1607 
   1608 
   1609   string ToString() const {
   1610     char buff[kNBits+1];
   1611     for (uintptr_t i = 0; i < kNBits; i++) {
   1612       buff[i] = Get(i) ? '1' : '0';
   1613     }
   1614     buff[kNBits] = 0;
   1615     return buff;
   1616   }
   1617 
   1618   static void Test() {
   1619     Mask m;
   1620     m.Set(2);
   1621     Printf("%s\n", m.ToString().c_str());
   1622     m.ClearRange(0, kNBits);
   1623     Printf("%s\n", m.ToString().c_str());
   1624   }
   1625 
   1626  private:
   1627   uintptr_t m_;
   1628 };
   1629 
   1630 // -------- BitSet -------------------{{{1
   1631 // Poor man's sparse bit set.
   1632 class BitSet {
   1633  public:
   1634   // Add range [a,b). The range should be within one line (kNBitsLog).
   1635   void Add(uintptr_t a, uintptr_t b) {
   1636     uintptr_t line = a & ~(Mask::kNBits - 1);
   1637     DCHECK(a < b);
   1638     DCHECK(a - line < Mask::kNBits);
   1639     if (!(b - line <= Mask::kNBits)) {
   1640       Printf("XXXXX %p %p %p b-line=%ld size=%ld a-line=%ld\n", a, b, line,
   1641              b - line, b - a, a - line);
   1642       return;
   1643     }
   1644     DCHECK(b - line <= Mask::kNBits);
   1645     DCHECK(line == ((b - 1) & ~(Mask::kNBits - 1)));
   1646     Mask &mask= map_[line];
   1647     mask.SetRange(a - line, b - line);
   1648   }
   1649 
   1650   bool empty() { return map_.empty(); }
   1651 
   1652   size_t size() {
   1653     size_t res = 0;
   1654     for (Map::iterator it = map_.begin(); it != map_.end(); ++it) {
   1655       res += it->second.PopCount();
   1656     }
   1657     return res;
   1658   }
   1659 
   1660   string ToString() {
   1661     char buff[100];
   1662     string res;
   1663     int lines = 0;
   1664     snprintf(buff, sizeof(buff), " %ld lines %ld bits:",
   1665              (long)map_.size(), (long)size());
   1666     res += buff;
   1667     for (Map::iterator it = map_.begin(); it != map_.end(); ++it) {
   1668       Mask mask = it->second;
   1669       snprintf(buff, sizeof(buff), " l%d (%ld):", lines++, (long)mask.PopCount());
   1670       res += buff;
   1671       uintptr_t line = it->first;
   1672       bool is_in = false;
   1673       for (size_t i = 0; i < Mask::kNBits; i++) {
   1674         uintptr_t addr = line + i;
   1675         if (mask.Get(i)) {
   1676           if (!is_in) {
   1677             snprintf(buff, sizeof(buff), " [%lx,", (long)addr);
   1678             res += buff;
   1679             is_in = true;
   1680           }
   1681         } else {
   1682           if (is_in) {
   1683             snprintf(buff, sizeof(buff), "%lx);", (long)addr);
   1684             res += buff;
   1685             is_in = false;
   1686           }
   1687         }
   1688       }
   1689       if (is_in) {
   1690         snprintf(buff, sizeof(buff), "%lx);", (long)(line + Mask::kNBits));
   1691         res += buff;
   1692       }
   1693     }
   1694     return res;
   1695   }
   1696 
   1697   void Clear() { map_.clear(); }
   1698  private:
   1699   typedef map<uintptr_t, Mask> Map;
   1700   Map map_;
   1701 };
   1702 
   1703 // -------- Segment -------------------{{{1
   1704 class Segment {
   1705  public:
   1706   // for debugging...
   1707   static bool ProfileSeg(SID sid) {
   1708     // return (sid.raw() % (1 << 14)) == 0;
   1709     return false;
   1710   }
   1711 
   1712   // non-static methods
   1713 
   1714   VTS *vts() const { return vts_; }
   1715   TID tid() const { return TID(tid_); }
   1716   LSID  lsid(bool is_w) const { return lsid_[is_w]; }
   1717   uint32_t lock_era() const { return lock_era_; }
   1718 
   1719   // static methods
   1720 
   1721   static INLINE uintptr_t *embedded_stack_trace(SID sid) {
   1722     DCHECK(sid.valid());
   1723     DCHECK(kSizeOfHistoryStackTrace > 0);
   1724     size_t chunk_idx = (unsigned)sid.raw() / kChunkSizeForStacks;
   1725     size_t idx       = (unsigned)sid.raw() % kChunkSizeForStacks;
   1726     DCHECK(chunk_idx < n_stack_chunks_);
   1727     DCHECK(all_stacks_[chunk_idx] != NULL);
   1728     return &all_stacks_[chunk_idx][idx * kSizeOfHistoryStackTrace];
   1729   }
   1730 
   1731   static void ensure_space_for_stack_trace(SID sid) {
   1732     ScopedMallocCostCenter malloc_cc(__FUNCTION__);
   1733     DCHECK(sid.valid());
   1734     DCHECK(kSizeOfHistoryStackTrace > 0);
   1735     size_t chunk_idx = (unsigned)sid.raw() / kChunkSizeForStacks;
   1736     DCHECK(chunk_idx < n_stack_chunks_);
   1737     if (all_stacks_[chunk_idx])
   1738       return;
   1739     for (size_t i = 0; i <= chunk_idx; i++) {
   1740       if (all_stacks_[i]) continue;
   1741       all_stacks_[i] = new uintptr_t[
   1742           kChunkSizeForStacks * kSizeOfHistoryStackTrace];
   1743       // we don't clear this memory, it will be clreared later lazily.
   1744       // We also never delete it because it will be used until the very end.
   1745     }
   1746   }
   1747 
   1748   static string StackTraceString(SID sid) {
   1749     DCHECK(kSizeOfHistoryStackTrace > 0);
   1750     return StackTrace::EmbeddedStackTraceToString(
   1751         embedded_stack_trace(sid), kSizeOfHistoryStackTrace);
   1752   }
   1753 
   1754   // Allocate `n` fresh segments, put SIDs into `fresh_sids`.
   1755   static INLINE void AllocateFreshSegments(size_t n, SID *fresh_sids) {
   1756     ScopedMallocCostCenter malloc_cc(__FUNCTION__);
   1757     size_t i = 0;
   1758     size_t n_reusable = min(n, reusable_sids_->size());
   1759     // First, allocate from reusable_sids_.
   1760     for (; i < n_reusable; i++) {
   1761       G_stats->seg_reuse++;
   1762       DCHECK(!reusable_sids_->empty());
   1763       SID sid = reusable_sids_->back();
   1764       reusable_sids_->pop_back();
   1765       Segment *seg = GetInternal(sid);
   1766       DCHECK(!seg->seg_ref_count_);
   1767       DCHECK(!seg->vts());
   1768       DCHECK(!seg->tid().valid());
   1769       CHECK(sid.valid());
   1770       if (ProfileSeg(sid)) {
   1771        Printf("Segment: reused SID %d\n", sid.raw());
   1772       }
   1773       fresh_sids[i] = sid;
   1774     }
   1775     // allocate the rest from new sids.
   1776     for (; i < n; i++) {
   1777       G_stats->seg_create++;
   1778       CHECK(n_segments_ < kMaxSID);
   1779       Segment *seg = GetSegmentByIndex(n_segments_);
   1780 
   1781       // This VTS may not be empty due to ForgetAllState().
   1782       VTS::Unref(seg->vts_);
   1783       seg->vts_ = 0;
   1784       seg->seg_ref_count_ = 0;
   1785 
   1786       if (ProfileSeg(SID(n_segments_))) {
   1787        Printf("Segment: allocated SID %d\n", n_segments_);
   1788       }
   1789 
   1790       SID sid = fresh_sids[i] = SID(n_segments_);
   1791       if (kSizeOfHistoryStackTrace > 0) {
   1792         ensure_space_for_stack_trace(sid);
   1793       }
   1794       n_segments_++;
   1795     }
   1796   }
   1797 
   1798   // Initialize the contents of the given segment.
   1799   static INLINE void SetupFreshSid(SID sid, TID tid, VTS *vts,
   1800                                    LSID rd_lockset, LSID wr_lockset) {
   1801     DCHECK(vts);
   1802     DCHECK(tid.valid());
   1803     DCHECK(sid.valid());
   1804     Segment *seg = GetInternal(sid);
   1805     DCHECK(seg);
   1806     DCHECK(seg->seg_ref_count_ == 0);
   1807     seg->seg_ref_count_ = 0;
   1808     seg->tid_ = tid;
   1809     seg->lsid_[0] = rd_lockset;
   1810     seg->lsid_[1] = wr_lockset;
   1811     seg->vts_ = vts;
   1812     seg->lock_era_ = g_lock_era;
   1813     if (kSizeOfHistoryStackTrace) {
   1814       embedded_stack_trace(sid)[0] = 0;
   1815     }
   1816   }
   1817 
   1818   static INLINE SID AddNewSegment(TID tid, VTS *vts,
   1819                            LSID rd_lockset, LSID wr_lockset) {
   1820     ScopedMallocCostCenter malloc_cc("Segment::AddNewSegment()");
   1821     SID sid;
   1822     AllocateFreshSegments(1, &sid);
   1823     SetupFreshSid(sid, tid, vts, rd_lockset, wr_lockset);
   1824     return sid;
   1825   }
   1826 
   1827   static bool Alive(SID sid) {
   1828     Segment *seg = GetInternal(sid);
   1829     return seg->vts() != NULL;
   1830   }
   1831 
   1832   static void AssertLive(SID sid, int line) {
   1833     if (DEBUG_MODE) {
   1834       if (!(sid.raw() < INTERNAL_ANNOTATE_UNPROTECTED_READ(n_segments_))) {
   1835         Printf("Segment::AssertLive: failed on sid=%d n_segments = %dline=%d\n",
   1836                sid.raw(), n_segments_, line);
   1837       }
   1838       Segment *seg = GetInternal(sid);
   1839       if (!seg->vts()) {
   1840         Printf("Segment::AssertLive: failed on sid=%d line=%d\n",
   1841                sid.raw(), line);
   1842       }
   1843       DCHECK(seg->vts());
   1844       DCHECK(seg->tid().valid());
   1845     }
   1846   }
   1847 
   1848   static INLINE Segment *Get(SID sid) {
   1849     AssertLive(sid, __LINE__);
   1850     Segment *res = GetInternal(sid);
   1851     DCHECK(res->vts());
   1852     DCHECK(res->tid().valid());
   1853     return res;
   1854   }
   1855 
   1856   static INLINE void RecycleOneFreshSid(SID sid) {
   1857     Segment *seg = GetInternal(sid);
   1858     seg->tid_ = TID();
   1859     seg->vts_ = NULL;
   1860     reusable_sids_->push_back(sid);
   1861     if (ProfileSeg(sid)) {
   1862       Printf("Segment: recycled SID %d\n", sid.raw());
   1863     }
   1864   }
   1865 
   1866   static bool RecycleOneSid(SID sid) {
   1867     ScopedMallocCostCenter malloc_cc("Segment::RecycleOneSid()");
   1868     Segment *seg = GetInternal(sid);
   1869     DCHECK(seg->seg_ref_count_ == 0);
   1870     DCHECK(sid.raw() < n_segments_);
   1871     if (!seg->vts()) return false;  // Already recycled.
   1872     VTS::Unref(seg->vts_);
   1873     RecycleOneFreshSid(sid);
   1874     return true;
   1875   }
   1876 
   1877   int32_t ref_count() const {
   1878     return INTERNAL_ANNOTATE_UNPROTECTED_READ(seg_ref_count_);
   1879   }
   1880 
   1881   static void INLINE Ref(SID sid, const char *where) {
   1882     Segment *seg = GetInternal(sid);
   1883     if (ProfileSeg(sid)) {
   1884       Printf("SegRef   : %d ref=%d %s; tid=%d\n", sid.raw(),
   1885              seg->seg_ref_count_, where, seg->tid().raw());
   1886     }
   1887     DCHECK(seg->seg_ref_count_ >= 0);
   1888     AtomicIncrementRefcount(&seg->seg_ref_count_);
   1889   }
   1890 
   1891   static INLINE intptr_t UnrefNoRecycle(SID sid, const char *where) {
   1892     Segment *seg = GetInternal(sid);
   1893     if (ProfileSeg(sid)) {
   1894       Printf("SegUnref : %d ref=%d %s\n", sid.raw(), seg->seg_ref_count_, where);
   1895     }
   1896     DCHECK(seg->seg_ref_count_ > 0);
   1897     return AtomicDecrementRefcount(&seg->seg_ref_count_);
   1898   }
   1899 
   1900   static void INLINE Unref(SID sid, const char *where) {
   1901     if (UnrefNoRecycle(sid, where) == 0) {
   1902       RecycleOneSid(sid);
   1903     }
   1904   }
   1905 
   1906 
   1907   static void ForgetAllState() {
   1908     n_segments_ = 1;
   1909     reusable_sids_->clear();
   1910     // vts_'es will be freed in AddNewSegment.
   1911   }
   1912 
   1913   static string ToString(SID sid) {
   1914     char buff[100];
   1915     snprintf(buff, sizeof(buff), "T%d/S%d", Get(sid)->tid().raw(), sid.raw());
   1916     return buff;
   1917   }
   1918 
   1919   static string ToStringTidOnly(SID sid) {
   1920     char buff[100];
   1921     snprintf(buff, sizeof(buff), "T%d", Get(sid)->tid().raw());
   1922     return buff;
   1923   }
   1924 
   1925   static string ToStringWithLocks(SID sid) {
   1926     char buff[100];
   1927     Segment *seg = Get(sid);
   1928     snprintf(buff, sizeof(buff), "T%d/S%d ", seg->tid().raw(), sid.raw());
   1929     string res = buff;
   1930     res += TwoLockSetsToString(seg->lsid(false), seg->lsid(true));
   1931     return res;
   1932   }
   1933 
   1934   static bool INLINE HappensBeforeOrSameThread(SID a, SID b) {
   1935     if (a == b) return true;
   1936     if (Get(a)->tid() == Get(b)->tid()) return true;
   1937     return HappensBefore(a, b);
   1938   }
   1939 
   1940   static bool INLINE HappensBefore(SID a, SID b) {
   1941     DCHECK(a != b);
   1942     G_stats->n_seg_hb++;
   1943     bool res = false;
   1944     const Segment *seg_a = Get(a);
   1945     const Segment *seg_b = Get(b);
   1946     DCHECK(seg_a->tid() != seg_b->tid());
   1947     const VTS *vts_a = seg_a->vts();
   1948     const VTS *vts_b = seg_b->vts();
   1949     res = VTS::HappensBeforeCached(vts_a, vts_b);
   1950 #if 0
   1951     if (DEBUG_MODE) {
   1952       Printf("HB = %d\n  %s\n  %s\n", res,
   1953            vts_a->ToString().c_str(), vts_b->ToString().c_str());
   1954     }
   1955 #endif
   1956     return res;
   1957   }
   1958 
   1959   static int32_t NumberOfSegments() { return n_segments_; }
   1960 
   1961   static void ShowSegmentStats() {
   1962     Printf("Segment::ShowSegmentStats:\n");
   1963     Printf("n_segments_: %d\n", n_segments_);
   1964     Printf("reusable_sids_: %ld\n", reusable_sids_->size());
   1965     map<int, int> ref_to_freq_map;
   1966     for (int i = 1; i < n_segments_; i++) {
   1967       Segment *seg = GetInternal(SID(i));
   1968       int32_t refcount = seg->seg_ref_count_;
   1969       if (refcount > 10) refcount = 10;
   1970       ref_to_freq_map[refcount]++;
   1971     }
   1972     for (map<int, int>::iterator it = ref_to_freq_map.begin();
   1973          it != ref_to_freq_map.end(); ++it) {
   1974       Printf("ref %d => freq %d\n", it->first, it->second);
   1975     }
   1976   }
   1977 
   1978   static void InitClassMembers() {
   1979     if (G_flags->keep_history == 0)
   1980       kSizeOfHistoryStackTrace = 0;
   1981     Report("INFO: Allocating %ldMb (%ld * %ldM) for Segments.\n",
   1982            (sizeof(Segment) * kMaxSID) >> 20,
   1983            sizeof(Segment), kMaxSID >> 20);
   1984     if (kSizeOfHistoryStackTrace) {
   1985       Report("INFO: Will allocate up to %ldMb for 'previous' stack traces.\n",
   1986              (kSizeOfHistoryStackTrace * sizeof(uintptr_t) * kMaxSID) >> 20);
   1987     }
   1988 
   1989     all_segments_  = new Segment[kMaxSID];
   1990     // initialization all segments to 0.
   1991     memset(all_segments_, 0, kMaxSID * sizeof(Segment));
   1992     // initialize all_segments_[0] with garbage
   1993     memset(all_segments_, -1, sizeof(Segment));
   1994 
   1995     if (kSizeOfHistoryStackTrace > 0) {
   1996       n_stack_chunks_ = kMaxSID / kChunkSizeForStacks;
   1997       if (n_stack_chunks_ * kChunkSizeForStacks < (size_t)kMaxSID)
   1998         n_stack_chunks_++;
   1999       all_stacks_ = new uintptr_t*[n_stack_chunks_];
   2000       memset(all_stacks_, 0, sizeof(uintptr_t*) * n_stack_chunks_);
   2001     }
   2002     n_segments_    = 1;
   2003     reusable_sids_ = new vector<SID>;
   2004   }
   2005 
   2006  private:
   2007   static INLINE Segment *GetSegmentByIndex(int32_t index) {
   2008     return &all_segments_[index];
   2009   }
   2010   static INLINE Segment *GetInternal(SID sid) {
   2011     DCHECK(sid.valid());
   2012     DCHECK(sid.raw() < INTERNAL_ANNOTATE_UNPROTECTED_READ(n_segments_));
   2013     Segment *res = GetSegmentByIndex(sid.raw());
   2014     return res;
   2015   }
   2016 
   2017   // Data members.
   2018   int32_t seg_ref_count_;
   2019   LSID     lsid_[2];
   2020   TID      tid_;
   2021   uint32_t lock_era_;
   2022   VTS *vts_;
   2023 
   2024   // static class members.
   2025 
   2026   // One large array of segments. The size is set by a command line (--max-sid)
   2027   // and never changes. Once we are out of vacant segments, we flush the state.
   2028   static Segment *all_segments_;
   2029   // We store stack traces separately because their size is unknown
   2030   // at compile time and because they are needed less often.
   2031   // The stacks are stored as an array of chunks, instead of one array,
   2032   // so that for small tests we do not require too much RAM.
   2033   // We don't use vector<> or another resizable array to avoid expensive
   2034   // resizing.
   2035   enum { kChunkSizeForStacks = DEBUG_MODE ? 512 : 1 * 1024 * 1024 };
   2036   static uintptr_t **all_stacks_;
   2037   static size_t      n_stack_chunks_;
   2038 
   2039   static int32_t n_segments_;
   2040   static vector<SID> *reusable_sids_;
   2041 };
   2042 
   2043 Segment          *Segment::all_segments_;
   2044 uintptr_t       **Segment::all_stacks_;
   2045 size_t            Segment::n_stack_chunks_;
   2046 int32_t           Segment::n_segments_;
   2047 vector<SID>      *Segment::reusable_sids_;
   2048 
   2049 // -------- SegmentSet -------------- {{{1
   2050 class SegmentSet {
   2051  public:
   2052   static NOINLINE SSID AddSegmentToSS(SSID old_ssid, SID new_sid);
   2053   static NOINLINE SSID RemoveSegmentFromSS(SSID old_ssid, SID sid_to_remove);
   2054 
   2055   static INLINE SSID AddSegmentToTupleSS(SSID ssid, SID new_sid);
   2056   static INLINE SSID RemoveSegmentFromTupleSS(SSID old_ssid, SID sid_to_remove);
   2057 
   2058   SSID ComputeSSID() {
   2059     SSID res = map_->GetIdOrZero(this);
   2060     CHECK_NE(res.raw(), 0);
   2061     return res;
   2062   }
   2063 
   2064   int ref_count() const { return ref_count_; }
   2065 
   2066   static void AssertLive(SSID ssid, int line) {
   2067     DCHECK(ssid.valid());
   2068     if (DEBUG_MODE) {
   2069       if (ssid.IsSingleton()) {
   2070         Segment::AssertLive(ssid.GetSingleton(), line);
   2071       } else {
   2072         DCHECK(ssid.IsTuple());
   2073         int idx = -ssid.raw()-1;
   2074         DCHECK(idx < static_cast<int>(vec_->size()));
   2075         DCHECK(idx >= 0);
   2076         SegmentSet *res = (*vec_)[idx];
   2077         DCHECK(res);
   2078         DCHECK(res->ref_count_ >= 0);
   2079         res->Validate(line);
   2080 
   2081         if (!res) {
   2082           Printf("SegmentSet::AssertLive failed at line %d (ssid=%d)\n",
   2083                  line, ssid.raw());
   2084           DCHECK(0);
   2085         }
   2086       }
   2087     }
   2088   }
   2089 
   2090   static SegmentSet *Get(SSID ssid) {
   2091     DCHECK(ssid.valid());
   2092     DCHECK(!ssid.IsSingleton());
   2093     int idx = -ssid.raw()-1;
   2094     ANNOTATE_IGNORE_READS_BEGIN();
   2095     DCHECK(idx < static_cast<int>(vec_->size()) && idx >= 0);
   2096     ANNOTATE_IGNORE_READS_END();
   2097     SegmentSet *res = (*vec_)[idx];
   2098     DCHECK(res);
   2099     DCHECK(res->size() >= 2);
   2100     return res;
   2101   }
   2102 
   2103   void RecycleOneSegmentSet(SSID ssid) {
   2104     DCHECK(ref_count_ == 0);
   2105     DCHECK(ssid.valid());
   2106     DCHECK(!ssid.IsSingleton());
   2107     int idx = -ssid.raw()-1;
   2108     DCHECK(idx < static_cast<int>(vec_->size()) && idx >= 0);
   2109     CHECK((*vec_)[idx] == this);
   2110     // Printf("SegmentSet::RecycleOneSegmentSet: %d\n", ssid.raw());
   2111     //
   2112     // Recycle segments
   2113     for (int i = 0; i < kMaxSegmentSetSize; i++) {
   2114       SID sid = this->GetSID(i);
   2115       if (sid.raw() == 0) break;
   2116       Segment::Unref(sid, "SegmentSet::Recycle");
   2117     }
   2118     ref_count_ = -1;
   2119 
   2120     map_->Erase(this);
   2121     ready_to_be_reused_->push_back(ssid);
   2122     G_stats->ss_recycle++;
   2123   }
   2124 
   2125   static void INLINE Ref(SSID ssid, const char *where) {
   2126     AssertTILHeld(); // The reference counting logic below is not thread-safe
   2127     DCHECK(ssid.valid());
   2128     if (ssid.IsSingleton()) {
   2129       Segment::Ref(ssid.GetSingleton(), where);
   2130     } else {
   2131       SegmentSet *sset = Get(ssid);
   2132       // Printf("SSRef   : %d ref=%d %s\n", ssid.raw(), sset->ref_count_, where);
   2133       DCHECK(sset->ref_count_ >= 0);
   2134       sset->ref_count_++;
   2135     }
   2136   }
   2137 
   2138   static void INLINE Unref(SSID ssid, const char *where) {
   2139     AssertTILHeld(); // The reference counting logic below is not thread-safe
   2140     DCHECK(ssid.valid());
   2141     if (ssid.IsSingleton()) {
   2142       Segment::Unref(ssid.GetSingleton(), where);
   2143     } else {
   2144       SegmentSet *sset = Get(ssid);
   2145       // Printf("SSUnref : %d ref=%d %s\n", ssid.raw(), sset->ref_count_, where);
   2146       DCHECK(sset->ref_count_ > 0);
   2147       sset->ref_count_--;
   2148       if (sset->ref_count_ == 0) {
   2149         // We don't delete unused SSID straightaway due to performance reasons
   2150         // (to avoid flushing caches too often and because SSID may be reused
   2151         // again soon)
   2152         //
   2153         // Instead, we use two queues (deques):
   2154         //    ready_to_be_recycled_ and ready_to_be_reused_.
   2155         // The algorithm is following:
   2156         // 1) When refcount_ becomes zero, we push the SSID into
   2157         //    ready_to_be_recycled_.
   2158         // 2) When ready_to_be_recycled_ becomes too large, we call
   2159         //    FlushRecycleQueue().
   2160         //    In FlushRecycleQueue(), we pop the first half of
   2161         //    ready_to_be_recycled_ and for each popped SSID we do
   2162         //     * if "refcount_ > 0", do nothing (this SSID is in use again)
   2163         //     * otherwise, we recycle this SSID (delete its VTS, etc) and push
   2164         //       it into ready_to_be_reused_
   2165         // 3) When a new SegmentSet is about to be created, we re-use SSID from
   2166         //    ready_to_be_reused_ (if available)
   2167         ready_to_be_recycled_->push_back(ssid);
   2168         if (UNLIKELY(ready_to_be_recycled_->size() >
   2169                      2 * G_flags->segment_set_recycle_queue_size)) {
   2170           FlushRecycleQueue();
   2171         }
   2172       }
   2173     }
   2174   }
   2175 
   2176   static void FlushRecycleQueue() {
   2177     while (ready_to_be_recycled_->size() >
   2178         G_flags->segment_set_recycle_queue_size) {
   2179       SSID rec_ssid = ready_to_be_recycled_->front();
   2180       ready_to_be_recycled_->pop_front();
   2181       int idx = -rec_ssid.raw()-1;
   2182       SegmentSet *rec_ss = (*vec_)[idx];
   2183       DCHECK(rec_ss);
   2184       DCHECK(rec_ss == Get(rec_ssid));
   2185       // We should check that this SSID haven't been referenced again.
   2186       if (rec_ss->ref_count_ == 0) {
   2187         rec_ss->RecycleOneSegmentSet(rec_ssid);
   2188       }
   2189     }
   2190 
   2191     // SSIDs will be reused soon - need to flush some caches.
   2192     FlushCaches();
   2193   }
   2194 
   2195   string ToString() const;
   2196   void Print() {
   2197     Printf("SS%d:%s\n", -ComputeSSID().raw(), ToString().c_str());
   2198   }
   2199 
   2200   static string ToString(SSID ssid) {
   2201     CHECK(ssid.IsValidOrEmpty());
   2202     if (ssid.IsSingleton()) {
   2203       return "{" +  Segment::ToStringTidOnly(SID(ssid.raw())) + "}";
   2204     } else if (ssid.IsEmpty()) {
   2205       return "{}";
   2206     } else {
   2207       AssertLive(ssid, __LINE__);
   2208       return Get(ssid)->ToString();
   2209     }
   2210   }
   2211 
   2212 
   2213   static string ToStringWithLocks(SSID ssid);
   2214 
   2215   static void FlushCaches() {
   2216     add_segment_cache_->Flush();
   2217     remove_segment_cache_->Flush();
   2218   }
   2219 
   2220   static void ForgetAllState() {
   2221     for (size_t i = 0; i < vec_->size(); i++) {
   2222       delete (*vec_)[i];
   2223     }
   2224     map_->Clear();
   2225     vec_->clear();
   2226     ready_to_be_reused_->clear();
   2227     ready_to_be_recycled_->clear();
   2228     FlushCaches();
   2229   }
   2230 
   2231 
   2232   static void Test();
   2233 
   2234   static int32_t Size(SSID ssid) {
   2235     if (ssid.IsEmpty()) return 0;
   2236     if (ssid.IsSingleton()) return 1;
   2237     return Get(ssid)->size();
   2238   }
   2239 
   2240   SID GetSID(int32_t i) const {
   2241     DCHECK(i >= 0 && i < kMaxSegmentSetSize);
   2242     DCHECK(i == 0 || sids_[i-1].raw() != 0);
   2243     return sids_[i];
   2244   }
   2245 
   2246   void SetSID(int32_t i, SID sid) {
   2247     DCHECK(i >= 0 && i < kMaxSegmentSetSize);
   2248     DCHECK(i == 0 || sids_[i-1].raw() != 0);
   2249     sids_[i] = sid;
   2250   }
   2251 
   2252   static SID GetSID(SSID ssid, int32_t i, int line) {
   2253     DCHECK(ssid.valid());
   2254     if (ssid.IsSingleton()) {
   2255       DCHECK(i == 0);
   2256       Segment::AssertLive(ssid.GetSingleton(), line);
   2257       return ssid.GetSingleton();
   2258     } else {
   2259       AssertLive(ssid, __LINE__);
   2260       SID sid = Get(ssid)->GetSID(i);
   2261       Segment::AssertLive(sid, line);
   2262       return sid;
   2263     }
   2264   }
   2265 
   2266   static bool INLINE Contains(SSID ssid, SID seg) {
   2267     if (LIKELY(ssid.IsSingleton())) {
   2268       return ssid.GetSingleton() == seg;
   2269     } else if (LIKELY(ssid.IsEmpty())) {
   2270       return false;
   2271     }
   2272 
   2273     SegmentSet *ss = Get(ssid);
   2274     for (int i = 0; i < kMaxSegmentSetSize; i++) {
   2275       SID sid = ss->GetSID(i);
   2276       if (sid.raw() == 0) break;
   2277       if (sid == seg)
   2278         return true;
   2279     }
   2280     return false;
   2281   }
   2282 
   2283   static Segment *GetSegmentForNonSingleton(SSID ssid, int32_t i, int line) {
   2284     return Segment::Get(GetSID(ssid, i, line));
   2285   }
   2286 
   2287   void NOINLINE Validate(int line) const;
   2288 
   2289   static size_t NumberOfSegmentSets() { return vec_->size(); }
   2290 
   2291 
   2292   static void InitClassMembers() {
   2293     map_    = new Map;
   2294     vec_    = new vector<SegmentSet *>;
   2295     ready_to_be_recycled_ = new deque<SSID>;
   2296     ready_to_be_reused_ = new deque<SSID>;
   2297     add_segment_cache_ = new SsidSidToSidCache;
   2298     remove_segment_cache_ = new SsidSidToSidCache;
   2299   }
   2300 
   2301  private:
   2302   SegmentSet()  // Private CTOR
   2303     : ref_count_(0) {
   2304     // sids_ are filled with zeroes due to SID default CTOR.
   2305     if (DEBUG_MODE) {
   2306       for (int i = 0; i < kMaxSegmentSetSize; i++)
   2307         CHECK_EQ(sids_[i].raw(), 0);
   2308     }
   2309   }
   2310 
   2311   int size() const {
   2312     for (int i = 0; i < kMaxSegmentSetSize; i++) {
   2313       if (sids_[i].raw() == 0) {
   2314         CHECK_GE(i, 2);
   2315         return i;
   2316       }
   2317     }
   2318     return kMaxSegmentSetSize;
   2319   }
   2320 
   2321   static INLINE SSID AllocateAndCopy(SegmentSet *ss) {
   2322     DCHECK(ss->ref_count_ == 0);
   2323     DCHECK(sizeof(int32_t) == sizeof(SID));
   2324     SSID res_ssid;
   2325     SegmentSet *res_ss = 0;
   2326 
   2327     if (!ready_to_be_reused_->empty()) {
   2328       res_ssid = ready_to_be_reused_->front();
   2329       ready_to_be_reused_->pop_front();
   2330       int idx = -res_ssid.raw()-1;
   2331       res_ss = (*vec_)[idx];
   2332       DCHECK(res_ss);
   2333       DCHECK(res_ss->ref_count_ == -1);
   2334       G_stats->ss_reuse++;
   2335       for (int i = 0; i < kMaxSegmentSetSize; i++) {
   2336         res_ss->sids_[i] = SID(0);
   2337       }
   2338     } else {
   2339       // create a new one
   2340       ScopedMallocCostCenter cc("SegmentSet::CreateNewSegmentSet");
   2341       G_stats->ss_create++;
   2342       res_ss = new SegmentSet;
   2343       vec_->push_back(res_ss);
   2344       res_ssid = SSID(-((int32_t)vec_->size()));
   2345       CHECK(res_ssid.valid());
   2346     }
   2347     DCHECK(res_ss);
   2348     res_ss->ref_count_ = 0;
   2349     for (int i = 0; i < kMaxSegmentSetSize; i++) {
   2350       SID sid = ss->GetSID(i);
   2351       if (sid.raw() == 0) break;
   2352       Segment::Ref(sid, "SegmentSet::FindExistingOrAlocateAndCopy");
   2353       res_ss->SetSID(i, sid);
   2354     }
   2355     DCHECK(res_ss == Get(res_ssid));
   2356     map_->Insert(res_ss, res_ssid);
   2357     return res_ssid;
   2358   }
   2359 
   2360   static NOINLINE SSID FindExistingOrAlocateAndCopy(SegmentSet *ss) {
   2361     if (DEBUG_MODE) {
   2362       int size = ss->size();
   2363       if (size == 2) G_stats->ss_size_2++;
   2364       if (size == 3) G_stats->ss_size_3++;
   2365       if (size == 4) G_stats->ss_size_4++;
   2366       if (size > 4) G_stats->ss_size_other++;
   2367     }
   2368 
   2369     // First, check if there is such set already.
   2370     SSID ssid = map_->GetIdOrZero(ss);
   2371     if (ssid.raw() != 0) {  // Found.
   2372       AssertLive(ssid, __LINE__);
   2373       G_stats->ss_find++;
   2374       return ssid;
   2375     }
   2376     // If no such set, create one.
   2377     return AllocateAndCopy(ss);
   2378   }
   2379 
   2380   static INLINE SSID DoubletonSSID(SID sid1, SID sid2) {
   2381     SegmentSet tmp;
   2382     tmp.SetSID(0, sid1);
   2383     tmp.SetSID(1, sid2);
   2384     return FindExistingOrAlocateAndCopy(&tmp);
   2385   }
   2386 
   2387   // testing only
   2388   static SegmentSet *AddSegmentToTupleSS(SegmentSet *ss, SID new_sid) {
   2389     SSID ssid = AddSegmentToTupleSS(ss->ComputeSSID(), new_sid);
   2390     AssertLive(ssid, __LINE__);
   2391     return Get(ssid);
   2392   }
   2393 
   2394   static SegmentSet *Doubleton(SID sid1, SID sid2) {
   2395     SSID ssid = DoubletonSSID(sid1, sid2);
   2396     AssertLive(ssid, __LINE__);
   2397     return Get(ssid);
   2398   }
   2399 
   2400   // static data members
   2401   struct Less {
   2402     INLINE bool operator() (const SegmentSet *ss1,
   2403                             const SegmentSet *ss2) const {
   2404       for (int i = 0; i < kMaxSegmentSetSize; i++) {
   2405         SID sid1 = ss1->sids_[i],
   2406             sid2 = ss2->sids_[i];
   2407         if (sid1 != sid2) return sid1 < sid2;
   2408       }
   2409       return false;
   2410     }
   2411   };
   2412 
   2413   struct SSEq {
   2414     INLINE bool operator() (const SegmentSet *ss1,
   2415                             const SegmentSet *ss2) const {
   2416       G_stats->sseq_calls++;
   2417 
   2418       for (int i = 0; i < kMaxSegmentSetSize; i++) {
   2419         SID sid1 = ss1->sids_[i],
   2420             sid2 = ss2->sids_[i];
   2421         if (sid1 != sid2) return false;
   2422       }
   2423       return true;
   2424     }
   2425   };
   2426 
   2427   struct SSHash {
   2428     INLINE size_t operator() (const SegmentSet *ss) const {
   2429       uintptr_t res = 0;
   2430       uint32_t* sids_array = (uint32_t*)ss->sids_;
   2431       // We must have even number of SIDs.
   2432       DCHECK((kMaxSegmentSetSize % 2) == 0);
   2433 
   2434       G_stats->sshash_calls++;
   2435       // xor all SIDs together, half of them bswap-ed.
   2436       for (int i = 0; i < kMaxSegmentSetSize; i += 2) {
   2437         uintptr_t t1 = sids_array[i];
   2438         uintptr_t t2 = sids_array[i+1];
   2439         if (t2) t2 = tsan_bswap(t2);
   2440         res = res ^ t1 ^ t2;
   2441       }
   2442       return res;
   2443     }
   2444   };
   2445 
   2446   struct SSTraits {
   2447     enum {
   2448       // These values are taken from the hash_compare defaults.
   2449       bucket_size = 4,  // Must be greater than zero.
   2450       min_buckets = 8,  // Must be power of 2.
   2451     };
   2452 
   2453     INLINE size_t operator()(const SegmentSet *ss) const {
   2454       SSHash sshash;
   2455       return sshash(ss);
   2456     }
   2457 
   2458     INLINE bool operator()(const SegmentSet *ss1, const SegmentSet *ss2) const {
   2459       Less less;
   2460       return less(ss1, ss2);
   2461     }
   2462   };
   2463 
   2464   template <class MapType>
   2465   static SSID GetIdOrZeroFromMap(MapType *map, SegmentSet *ss) {
   2466     typename MapType::iterator it = map->find(ss);
   2467     if (it == map->end())
   2468       return SSID(0);
   2469     return it->second;
   2470   }
   2471 
   2472   class Map {
   2473    public:
   2474     SSID GetIdOrZero(SegmentSet *ss) {
   2475       return GetIdOrZeroFromMap(&map_, ss);
   2476     }
   2477 
   2478     void Insert(SegmentSet *ss, SSID id) {
   2479       map_[ss] = id;
   2480     }
   2481 
   2482     void Erase(SegmentSet *ss) {
   2483       CHECK(map_.erase(ss));
   2484     }
   2485 
   2486     void Clear() {
   2487       map_.clear();
   2488     }
   2489 
   2490    private:
   2491     // TODO(timurrrr): consider making a custom hash_table.
   2492 #if defined(_MSC_VER)
   2493     typedef stdext::hash_map<SegmentSet*, SSID, SSTraits > MapType__;
   2494 #elif 1
   2495     typedef unordered_map<SegmentSet*, SSID, SSHash, SSEq > MapType__;
   2496 #else
   2497     // Old code, may be useful for debugging.
   2498     typedef map<SegmentSet*, SSID, Less > MapType__;
   2499 #endif
   2500     MapType__ map_;
   2501   };
   2502 
   2503 //  typedef map<SegmentSet*, SSID, Less> Map;
   2504 
   2505   static Map                  *map_;
   2506   // TODO(kcc): use vector<SegmentSet> instead.
   2507   static vector<SegmentSet *> *vec_;
   2508   static deque<SSID>         *ready_to_be_reused_;
   2509   static deque<SSID>         *ready_to_be_recycled_;
   2510 
   2511   typedef PairCache<SSID, SID, SSID, 1009, 1> SsidSidToSidCache;
   2512   static SsidSidToSidCache    *add_segment_cache_;
   2513   static SsidSidToSidCache    *remove_segment_cache_;
   2514 
   2515   // sids_ contains up to kMaxSegmentSetSize SIDs.
   2516   // Contains zeros at the end if size < kMaxSegmentSetSize.
   2517   SID     sids_[kMaxSegmentSetSize];
   2518   int32_t ref_count_;
   2519 };
   2520 
   2521 SegmentSet::Map      *SegmentSet::map_;
   2522 vector<SegmentSet *> *SegmentSet::vec_;
   2523 deque<SSID>         *SegmentSet::ready_to_be_reused_;
   2524 deque<SSID>         *SegmentSet::ready_to_be_recycled_;
   2525 SegmentSet::SsidSidToSidCache    *SegmentSet::add_segment_cache_;
   2526 SegmentSet::SsidSidToSidCache    *SegmentSet::remove_segment_cache_;
   2527 
   2528 
   2529 
   2530 
   2531 SSID SegmentSet::RemoveSegmentFromSS(SSID old_ssid, SID sid_to_remove) {
   2532   DCHECK(old_ssid.IsValidOrEmpty());
   2533   DCHECK(sid_to_remove.valid());
   2534   SSID res;
   2535   if (remove_segment_cache_->Lookup(old_ssid, sid_to_remove, &res)) {
   2536     return res;
   2537   }
   2538 
   2539   if (old_ssid.IsEmpty()) {
   2540     res = old_ssid;  // Nothing to remove.
   2541   } else if (LIKELY(old_ssid.IsSingleton())) {
   2542     SID sid = old_ssid.GetSingleton();
   2543     if (Segment::HappensBeforeOrSameThread(sid, sid_to_remove))
   2544       res = SSID(0);  // Empty.
   2545     else
   2546       res = old_ssid;
   2547   } else {
   2548     res = RemoveSegmentFromTupleSS(old_ssid, sid_to_remove);
   2549   }
   2550   remove_segment_cache_->Insert(old_ssid, sid_to_remove, res);
   2551   return res;
   2552 }
   2553 
   2554 
   2555 // static
   2556 //
   2557 // This method returns a SSID of a SegmentSet containing "new_sid" and all those
   2558 // segments from "old_ssid" which do not happen-before "new_sid".
   2559 //
   2560 // For details, see
   2561 // http://code.google.com/p/data-race-test/wiki/ThreadSanitizerAlgorithm#State_machine
   2562 SSID SegmentSet::AddSegmentToSS(SSID old_ssid, SID new_sid) {
   2563   DCHECK(old_ssid.raw() == 0 || old_ssid.valid());
   2564   DCHECK(new_sid.valid());
   2565   Segment::AssertLive(new_sid, __LINE__);
   2566   SSID res;
   2567 
   2568   // These two TIDs will only be used if old_ssid.IsSingleton() == true.
   2569   TID old_tid;
   2570   TID new_tid;
   2571 
   2572   if (LIKELY(old_ssid.IsSingleton())) {
   2573     SID old_sid(old_ssid.raw());
   2574     DCHECK(old_sid.valid());
   2575     Segment::AssertLive(old_sid, __LINE__);
   2576 
   2577     if (UNLIKELY(old_sid == new_sid)) {
   2578       // The new segment equals the old one - nothing has changed.
   2579       return old_ssid;
   2580     }
   2581 
   2582     old_tid = Segment::Get(old_sid)->tid();
   2583     new_tid = Segment::Get(new_sid)->tid();
   2584     if (LIKELY(old_tid == new_tid)) {
   2585       // The new segment is in the same thread - just replace the SID.
   2586       return SSID(new_sid);
   2587     }
   2588 
   2589     if (Segment::HappensBefore(old_sid, new_sid)) {
   2590       // The new segment is in another thread, but old segment
   2591       // happens before the new one - just replace the SID.
   2592       return SSID(new_sid);
   2593     }
   2594 
   2595     DCHECK(!Segment::HappensBefore(new_sid, old_sid));
   2596     // The only other case is Signleton->Doubleton transition, see below.
   2597   } else if (LIKELY(old_ssid.IsEmpty())) {
   2598     return SSID(new_sid);
   2599   }
   2600 
   2601   // Lookup the cache.
   2602   if (add_segment_cache_->Lookup(old_ssid, new_sid, &res)) {
   2603     SegmentSet::AssertLive(res, __LINE__);
   2604     return res;
   2605   }
   2606 
   2607   if (LIKELY(old_ssid.IsSingleton())) {
   2608     // Signleton->Doubleton transition.
   2609     // These two TIDs were initialized before cache lookup (see above).
   2610     DCHECK(old_tid.valid());
   2611     DCHECK(new_tid.valid());
   2612 
   2613     SID old_sid(old_ssid.raw());
   2614     DCHECK(old_sid.valid());
   2615 
   2616     DCHECK(!Segment::HappensBefore(new_sid, old_sid));
   2617     DCHECK(!Segment::HappensBefore(old_sid, new_sid));
   2618     res = (old_tid < new_tid
   2619       ? DoubletonSSID(old_sid, new_sid)
   2620       : DoubletonSSID(new_sid, old_sid));
   2621     SegmentSet::AssertLive(res, __LINE__);
   2622   } else {
   2623     res = AddSegmentToTupleSS(old_ssid, new_sid);
   2624     SegmentSet::AssertLive(res, __LINE__);
   2625   }
   2626 
   2627   // Put the result into cache.
   2628   add_segment_cache_->Insert(old_ssid, new_sid, res);
   2629 
   2630   return res;
   2631 }
   2632 
   2633 SSID SegmentSet::RemoveSegmentFromTupleSS(SSID ssid, SID sid_to_remove) {
   2634   DCHECK(ssid.IsTuple());
   2635   DCHECK(ssid.valid());
   2636   AssertLive(ssid, __LINE__);
   2637   SegmentSet *ss = Get(ssid);
   2638 
   2639   int32_t old_size = 0, new_size = 0;
   2640   SegmentSet tmp;
   2641   SID * tmp_sids = tmp.sids_;
   2642   CHECK(sizeof(int32_t) == sizeof(SID));
   2643 
   2644   for (int i = 0; i < kMaxSegmentSetSize; i++, old_size++) {
   2645     SID sid = ss->GetSID(i);
   2646     if (sid.raw() == 0) break;
   2647     DCHECK(sid.valid());
   2648     Segment::AssertLive(sid, __LINE__);
   2649     if (Segment::HappensBeforeOrSameThread(sid, sid_to_remove))
   2650       continue;  // Skip this segment from the result.
   2651     tmp_sids[new_size++] = sid;
   2652   }
   2653 
   2654   if (new_size == old_size) return ssid;
   2655   if (new_size == 0) return SSID(0);
   2656   if (new_size == 1) return SSID(tmp_sids[0]);
   2657 
   2658   if (DEBUG_MODE) tmp.Validate(__LINE__);
   2659 
   2660   SSID res = FindExistingOrAlocateAndCopy(&tmp);
   2661   if (DEBUG_MODE) Get(res)->Validate(__LINE__);
   2662   return res;
   2663 }
   2664 
   2665 //  static
   2666 SSID SegmentSet::AddSegmentToTupleSS(SSID ssid, SID new_sid) {
   2667   DCHECK(ssid.IsTuple());
   2668   DCHECK(ssid.valid());
   2669   AssertLive(ssid, __LINE__);
   2670   SegmentSet *ss = Get(ssid);
   2671 
   2672   Segment::AssertLive(new_sid, __LINE__);
   2673   const Segment *new_seg = Segment::Get(new_sid);
   2674   TID            new_tid = new_seg->tid();
   2675 
   2676   int32_t old_size = 0, new_size = 0;
   2677   SID tmp_sids[kMaxSegmentSetSize + 1];
   2678   CHECK(sizeof(int32_t) == sizeof(SID));
   2679   bool inserted_new_sid = false;
   2680   // traverse all SID in current ss. tids are ordered.
   2681   for (int i = 0; i < kMaxSegmentSetSize; i++, old_size++) {
   2682     SID sid = ss->GetSID(i);
   2683     if (sid.raw() == 0) break;
   2684     DCHECK(sid.valid());
   2685     Segment::AssertLive(sid, __LINE__);
   2686     const Segment *seg = Segment::Get(sid);
   2687     TID            tid = seg->tid();
   2688 
   2689     if (sid == new_sid) {
   2690       // we are trying to insert a sid which is already there.
   2691       // SS will not change.
   2692       return ssid;
   2693     }
   2694 
   2695     if (tid == new_tid) {
   2696       if (seg->vts() == new_seg->vts() &&
   2697           seg->lsid(true) == new_seg->lsid(true) &&
   2698           seg->lsid(false) == new_seg->lsid(false)) {
   2699         // Optimization: if a segment with the same VTS and LS
   2700         // as in the current is already inside SS, don't modify the SS.
   2701         // Improves performance with --keep-history >= 1.
   2702         return ssid;
   2703       }
   2704       // we have another segment from the same thread => replace it.
   2705       tmp_sids[new_size++] = new_sid;
   2706       inserted_new_sid = true;
   2707       continue;
   2708     }
   2709 
   2710     if (tid > new_tid && !inserted_new_sid) {
   2711       // there was no segment with this tid, put it now.
   2712       tmp_sids[new_size++] = new_sid;
   2713       inserted_new_sid = true;
   2714     }
   2715 
   2716     if (!Segment::HappensBefore(sid, new_sid)) {
   2717       DCHECK(!Segment::HappensBefore(new_sid, sid));
   2718       tmp_sids[new_size++] = sid;
   2719     }
   2720   }
   2721 
   2722   if (!inserted_new_sid) {
   2723     tmp_sids[new_size++] = new_sid;
   2724   }
   2725 
   2726   CHECK_GT(new_size, 0);
   2727   if (new_size == 1) {
   2728     return SSID(new_sid.raw());  // Singleton.
   2729   }
   2730 
   2731   if (new_size > kMaxSegmentSetSize) {
   2732     CHECK(new_size == kMaxSegmentSetSize + 1);
   2733     // we need to forget one segment. Which? The oldest one.
   2734     int seg_to_forget = 0;
   2735     Segment *oldest_segment = NULL;
   2736     for (int i = 0; i < new_size; i++) {
   2737       SID sid = tmp_sids[i];
   2738       if (sid == new_sid) continue;
   2739       Segment *s = Segment::Get(tmp_sids[i]);
   2740       if (oldest_segment == NULL ||
   2741           oldest_segment->vts()->uniq_id() > s->vts()->uniq_id()) {
   2742         oldest_segment = s;
   2743         seg_to_forget = i;
   2744       }
   2745     }
   2746     DCHECK(oldest_segment);
   2747 
   2748     // Printf("seg_to_forget: %d T%d\n", tmp_sids[seg_to_forget].raw(),
   2749     //        oldest_segment->tid().raw());
   2750     for (int i = seg_to_forget; i < new_size - 1; i++) {
   2751       tmp_sids[i] = tmp_sids[i+1];
   2752     }
   2753     new_size--;
   2754   }
   2755 
   2756   CHECK(new_size <= kMaxSegmentSetSize);
   2757   SegmentSet tmp;
   2758   for (int i = 0; i < new_size; i++)
   2759     tmp.sids_[i] = tmp_sids[i];  // TODO(timurrrr): avoid copying?
   2760   if (DEBUG_MODE) tmp.Validate(__LINE__);
   2761 
   2762   SSID res = FindExistingOrAlocateAndCopy(&tmp);
   2763   if (DEBUG_MODE) Get(res)->Validate(__LINE__);
   2764   return res;
   2765 }
   2766 
   2767 
   2768 
   2769 void NOINLINE SegmentSet::Validate(int line) const {
   2770   // This is expensive!
   2771   int my_size = size();
   2772   for (int i = 0; i < my_size; i++) {
   2773     SID sid1 = GetSID(i);
   2774     CHECK(sid1.valid());
   2775     Segment::AssertLive(sid1, __LINE__);
   2776 
   2777     for (int j = i + 1; j < my_size; j++) {
   2778       SID sid2 = GetSID(j);
   2779       CHECK(sid2.valid());
   2780       Segment::AssertLive(sid2, __LINE__);
   2781 
   2782       bool hb1 = Segment::HappensBefore(sid1, sid2);
   2783       bool hb2 = Segment::HappensBefore(sid2, sid1);
   2784       if (hb1 || hb2) {
   2785         Printf("BAD at line %d: %d %d %s %s\n   %s\n   %s\n",
   2786                line, static_cast<int>(hb1), static_cast<int>(hb2),
   2787                Segment::ToString(sid1).c_str(),
   2788                Segment::ToString(sid2).c_str(),
   2789                Segment::Get(sid1)->vts()->ToString().c_str(),
   2790                Segment::Get(sid2)->vts()->ToString().c_str());
   2791       }
   2792       CHECK(!Segment::HappensBefore(GetSID(i), GetSID(j)));
   2793       CHECK(!Segment::HappensBefore(GetSID(j), GetSID(i)));
   2794       CHECK(Segment::Get(sid1)->tid() < Segment::Get(sid2)->tid());
   2795     }
   2796   }
   2797 
   2798   for (int i = my_size; i < kMaxSegmentSetSize; i++) {
   2799     CHECK_EQ(sids_[i].raw(), 0);
   2800   }
   2801 }
   2802 
   2803 string SegmentSet::ToStringWithLocks(SSID ssid) {
   2804   if (ssid.IsEmpty()) return "";
   2805   string res = "";
   2806   for (int i = 0; i < Size(ssid); i++) {
   2807     SID sid = GetSID(ssid, i, __LINE__);
   2808     if (i) res += ", ";
   2809     res += Segment::ToStringWithLocks(sid);
   2810   }
   2811   return res;
   2812 }
   2813 
   2814 string SegmentSet::ToString() const {
   2815   Validate(__LINE__);
   2816   string res = "{";
   2817   for (int i = 0; i < size(); i++) {
   2818     SID sid = GetSID(i);
   2819     if (i) res += ", ";
   2820     CHECK(sid.valid());
   2821     Segment::AssertLive(sid, __LINE__);
   2822     res += Segment::ToStringTidOnly(sid).c_str();
   2823   }
   2824   res += "}";
   2825   return res;
   2826 }
   2827 
   2828 // static
   2829 void SegmentSet::Test() {
   2830   LSID ls(0);  // dummy
   2831   SID sid1 = Segment::AddNewSegment(TID(0), VTS::Parse("[0:2;]"), ls, ls);
   2832   SID sid2 = Segment::AddNewSegment(TID(1), VTS::Parse("[0:1; 1:1]"), ls, ls);
   2833   SID sid3 = Segment::AddNewSegment(TID(2), VTS::Parse("[0:1; 2:1]"), ls, ls);
   2834   SID sid4 = Segment::AddNewSegment(TID(3), VTS::Parse("[0:1; 3:1]"), ls, ls);
   2835   SID sid5 = Segment::AddNewSegment(TID(4), VTS::Parse("[0:3; 2:2; 3:2;]"),
   2836                                     ls, ls);
   2837   SID sid6 = Segment::AddNewSegment(TID(4), VTS::Parse("[0:3; 1:2; 2:2; 3:2;]"),
   2838                                     ls, ls);
   2839 
   2840 
   2841   // SS1:{T0/S1, T2/S3}
   2842   SegmentSet *d1 = SegmentSet::Doubleton(sid1, sid3);
   2843   d1->Print();
   2844   CHECK(SegmentSet::Doubleton(sid1, sid3) == d1);
   2845   // SS2:{T0/S1, T1/S2, T2/S3}
   2846   SegmentSet *d2 = SegmentSet::AddSegmentToTupleSS(d1, sid2);
   2847   CHECK(SegmentSet::AddSegmentToTupleSS(d1, sid2) == d2);
   2848   d2->Print();
   2849 
   2850   // SS3:{T0/S1, T2/S3, T3/S4}
   2851   SegmentSet *d3 = SegmentSet::AddSegmentToTupleSS(d1, sid4);
   2852   CHECK(SegmentSet::AddSegmentToTupleSS(d1, sid4) == d3);
   2853   d3->Print();
   2854 
   2855   // SS4:{T0/S1, T1/S2, T2/S3, T3/S4}
   2856   SegmentSet *d4 = SegmentSet::AddSegmentToTupleSS(d2, sid4);
   2857   CHECK(SegmentSet::AddSegmentToTupleSS(d2, sid4) == d4);
   2858   CHECK(SegmentSet::AddSegmentToTupleSS(d3, sid2) == d4);
   2859   d4->Print();
   2860 
   2861   // SS5:{T1/S2, T4/S5}
   2862   SegmentSet *d5 = SegmentSet::AddSegmentToTupleSS(d4, sid5);
   2863   d5->Print();
   2864 
   2865   SSID ssid6 = SegmentSet::AddSegmentToTupleSS(d4->ComputeSSID(), sid6);
   2866   CHECK(ssid6.IsSingleton());
   2867   Printf("%s\n", ToString(ssid6).c_str());
   2868   CHECK_EQ(sid6.raw(), 6);
   2869   CHECK_EQ(ssid6.raw(), 6);
   2870 }
   2871 
   2872 // -------- Shadow Value ------------ {{{1
   2873 class ShadowValue {
   2874  public:
   2875   ShadowValue() {
   2876     if (DEBUG_MODE) {
   2877       rd_ssid_ = 0xDEADBEEF;
   2878       wr_ssid_ = 0xDEADBEEF;
   2879     }
   2880   }
   2881 
   2882   void Clear() {
   2883     rd_ssid_ = 0;
   2884     wr_ssid_ = 0;
   2885   }
   2886 
   2887   INLINE bool IsNew() const { return rd_ssid_ == 0 && wr_ssid_ == 0; }
   2888   // new experimental state machine.
   2889   SSID rd_ssid() const { return SSID(rd_ssid_); }
   2890   SSID wr_ssid() const { return SSID(wr_ssid_); }
   2891   INLINE void set(SSID rd_ssid, SSID wr_ssid) {
   2892     rd_ssid_ = rd_ssid.raw();
   2893     wr_ssid_ = wr_ssid.raw();
   2894   }
   2895 
   2896   // comparison
   2897   INLINE bool operator == (const ShadowValue &sval) const {
   2898     return rd_ssid_ == sval.rd_ssid_ &&
   2899         wr_ssid_ == sval.wr_ssid_;
   2900   }
   2901   bool operator != (const ShadowValue &sval) const {
   2902     return !(*this == sval);
   2903   }
   2904   bool operator <  (const ShadowValue &sval) const {
   2905     if (rd_ssid_ < sval.rd_ssid_) return true;
   2906     if (rd_ssid_ == sval.rd_ssid_ && wr_ssid_ < sval.wr_ssid_) return true;
   2907     return false;
   2908   }
   2909 
   2910   void Ref(const char *where) {
   2911     if (!rd_ssid().IsEmpty()) {
   2912       DCHECK(rd_ssid().valid());
   2913       SegmentSet::Ref(rd_ssid(), where);
   2914     }
   2915     if (!wr_ssid().IsEmpty()) {
   2916       DCHECK(wr_ssid().valid());
   2917       SegmentSet::Ref(wr_ssid(), where);
   2918     }
   2919   }
   2920 
   2921   void Unref(const char *where) {
   2922     if (!rd_ssid().IsEmpty()) {
   2923       DCHECK(rd_ssid().valid());
   2924       SegmentSet::Unref(rd_ssid(), where);
   2925     }
   2926     if (!wr_ssid().IsEmpty()) {
   2927       DCHECK(wr_ssid().valid());
   2928       SegmentSet::Unref(wr_ssid(), where);
   2929     }
   2930   }
   2931 
   2932   string ToString() const {
   2933     char buff[1000];
   2934     if (IsNew()) {
   2935       return "{New}";
   2936     }
   2937     snprintf(buff, sizeof(buff), "R: %s; W: %s",
   2938             SegmentSet::ToStringWithLocks(rd_ssid()).c_str(),
   2939             SegmentSet::ToStringWithLocks(wr_ssid()).c_str());
   2940     return buff;
   2941   }
   2942 
   2943  private:
   2944   int32_t rd_ssid_;
   2945   int32_t wr_ssid_;
   2946 };
   2947 
   2948 // -------- CacheLine --------------- {{{1
   2949 // The CacheLine is a set of Mask::kNBits (32 or 64) Shadow Values.
   2950 // The shadow values in a cache line are grouped in subsets of 8 values.
   2951 // If a particular address of memory is always accessed by aligned 8-byte
   2952 // read/write instructions, only the shadow value correspoding to the
   2953 // first byte is set, the rest shadow values are not used.
   2954 // Ditto to aligned 4- and 2-byte accesses.
   2955 // If a memory was accessed as 8 bytes and then it was accesed as 4 bytes,
   2956 // (e.g. someone used a C union) we need to split the shadow value into two.
   2957 // If the memory was accessed as 4 bytes and is now accessed as 8 bytes,
   2958 // we need to try joining the shadow values.
   2959 //
   2960 // Hence the concept of granularity_mask (which is a string of 16 bits).
   2961 // 0000000000000000 -- no accesses were observed to these 8 bytes.
   2962 // 0000000000000001 -- all accesses were 8 bytes (aligned).
   2963 // 0000000000000110 -- all accesses were 4 bytes (aligned).
   2964 // 0000000001111000 -- all accesses were 2 bytes (aligned).
   2965 // 0111111110000000 -- all accesses were 1 byte.
   2966 // 0110000000100010 -- First 4 bytes were accessed by 4 byte insns,
   2967 //   next 2 bytes by 2 byte insns, last 2 bytes by 1 byte insns.
   2968 
   2969 
   2970 INLINE bool GranularityIs8(uintptr_t off, uint16_t gr) {
   2971   return gr & 1;
   2972 }
   2973 
   2974 INLINE bool GranularityIs4(uintptr_t off, uint16_t gr) {
   2975   uintptr_t off_within_8_bytes = (off >> 2) & 1;  // 0 or 1.
   2976   return ((gr >> (1 + off_within_8_bytes)) & 1);
   2977 }
   2978 
   2979 INLINE bool GranularityIs2(uintptr_t off, uint16_t gr) {
   2980   uintptr_t off_within_8_bytes = (off >> 1) & 3;  // 0, 1, 2, or 3
   2981   return ((gr >> (3 + off_within_8_bytes)) & 1);
   2982 }
   2983 
   2984 INLINE bool GranularityIs1(uintptr_t off, uint16_t gr) {
   2985   uintptr_t off_within_8_bytes = (off) & 7;       // 0, ..., 7
   2986   return ((gr >> (7 + off_within_8_bytes)) & 1);
   2987 }
   2988 
   2989 class CacheLine {
   2990  public:
   2991   static const uintptr_t kLineSizeBits = Mask::kNBitsLog;  // Don't change this.
   2992   static const uintptr_t kLineSize = Mask::kNBits;
   2993 
   2994   static CacheLine *CreateNewCacheLine(uintptr_t tag) {
   2995     ScopedMallocCostCenter cc("CreateNewCacheLine");
   2996     void *mem = free_list_->Allocate();
   2997     DCHECK(mem);
   2998     return new (mem) CacheLine(tag);
   2999   }
   3000 
   3001   static void Delete(CacheLine *line) {
   3002     free_list_->Deallocate(line);
   3003   }
   3004 
   3005   const Mask &has_shadow_value() const { return has_shadow_value_;  }
   3006   Mask &traced() { return traced_; }
   3007   Mask &published() { return published_; }
   3008   Mask &racey()  { return racey_; }
   3009   uintptr_t tag() { return tag_; }
   3010 
   3011   void DebugTrace(uintptr_t off, const char *where_str, int where_int) {
   3012     (void)off;
   3013     (void)where_str;
   3014     (void)where_int;
   3015 #if 0
   3016     if (DEBUG_MODE && tag() == G_flags->trace_addr) {
   3017       uintptr_t off8 = off & ~7;
   3018       Printf("CacheLine %p, off=%ld off8=%ld gr=%d "
   3019              "has_sval: %d%d%d%d%d%d%d%d (%s:%d)\n",
   3020              tag(), off, off8,
   3021              granularity_[off/8],
   3022              has_shadow_value_.Get(off8 + 0),
   3023              has_shadow_value_.Get(off8 + 1),
   3024              has_shadow_value_.Get(off8 + 2),
   3025              has_shadow_value_.Get(off8 + 3),
   3026              has_shadow_value_.Get(off8 + 4),
   3027              has_shadow_value_.Get(off8 + 5),
   3028              has_shadow_value_.Get(off8 + 6),
   3029              has_shadow_value_.Get(off8 + 7),
   3030              where_str, where_int
   3031              );
   3032     }
   3033 #endif
   3034   }
   3035 
   3036   // Add a new shadow value to a place where there was no shadow value before.
   3037   ShadowValue *AddNewSvalAtOffset(uintptr_t off) {
   3038     DebugTrace(off, __FUNCTION__, __LINE__);
   3039     CHECK(!has_shadow_value().Get(off));
   3040     has_shadow_value_.Set(off);
   3041     published_.Clear(off);
   3042     ShadowValue *res = GetValuePointer(off);
   3043     res->Clear();
   3044     DebugTrace(off, __FUNCTION__, __LINE__);
   3045     return res;
   3046   }
   3047 
   3048   // Return true if this line has no useful information in it.
   3049   bool Empty() {
   3050     // The line has shadow values.
   3051     if (!has_shadow_value().Empty()) return false;
   3052     // If the line is traced, racey or published, we want to keep it.
   3053     if (!traced().Empty()) return false;
   3054     if (!racey().Empty()) return false;
   3055     if (!published().Empty()) return false;
   3056     return true;
   3057   }
   3058 
   3059   INLINE Mask ClearRangeAndReturnOldUsed(uintptr_t from, uintptr_t to) {
   3060     traced_.ClearRange(from, to);
   3061     published_.ClearRange(from, to);
   3062     racey_.ClearRange(from, to);
   3063     for (uintptr_t x = (from + 7) / 8; x < to / 8; x++) {
   3064       granularity_[x] = 0;
   3065     }
   3066     return has_shadow_value_.ClearRangeAndReturnOld(from, to);
   3067   }
   3068 
   3069   void Clear() {
   3070     has_shadow_value_.Clear();
   3071     traced_.Clear();
   3072     published_.Clear();
   3073     racey_.Clear();
   3074     for (size_t i = 0; i < TS_ARRAY_SIZE(granularity_); i++)
   3075       granularity_[i] = 0;
   3076   }
   3077 
   3078   ShadowValue *GetValuePointer(uintptr_t offset) {
   3079     DCHECK(offset < kLineSize);
   3080     return  &vals_[offset];
   3081   }
   3082   ShadowValue  GetValue(uintptr_t offset) { return *GetValuePointer(offset); }
   3083 
   3084   static uintptr_t ComputeOffset(uintptr_t a) {
   3085     return a & (kLineSize - 1);
   3086   }
   3087   static uintptr_t ComputeTag(uintptr_t a) {
   3088     return a & ~(kLineSize - 1);
   3089   }
   3090   static uintptr_t ComputeNextTag(uintptr_t a) {
   3091     return ComputeTag(a) + kLineSize;
   3092   }
   3093 
   3094   uint16_t *granularity_mask(uintptr_t off) {
   3095     DCHECK(off < kLineSize);
   3096     return &granularity_[off / 8];
   3097   }
   3098 
   3099   void Split_8_to_4(uintptr_t off) {
   3100     DebugTrace(off, __FUNCTION__, __LINE__);
   3101     uint16_t gr = *granularity_mask(off);
   3102     if (GranularityIs8(off, gr)) {
   3103       DCHECK(!GranularityIs4(off, gr));
   3104       DCHECK(!GranularityIs2(off, gr));
   3105       DCHECK(!GranularityIs1(off, gr));
   3106       uintptr_t off_8_aligned = off & ~7;
   3107       if (has_shadow_value_.Get(off_8_aligned)) {
   3108         ShadowValue sval = GetValue(off_8_aligned);
   3109         sval.Ref("Split_8_to_4");
   3110         DCHECK(!has_shadow_value_.Get(off_8_aligned + 4));
   3111         *AddNewSvalAtOffset(off_8_aligned + 4) = sval;
   3112       }
   3113       *granularity_mask(off) = gr = 3 << 1;
   3114       DCHECK(GranularityIs4(off, gr));
   3115       DebugTrace(off, __FUNCTION__, __LINE__);
   3116     }
   3117   }
   3118 
   3119   void Split_4_to_2(uintptr_t off) {
   3120     DebugTrace(off, __FUNCTION__, __LINE__);
   3121     uint16_t gr = *granularity_mask(off);
   3122     if (GranularityIs4(off, gr)) {
   3123       DCHECK(!GranularityIs8(off, gr));
   3124       DCHECK(!GranularityIs2(off, gr));
   3125       DCHECK(!GranularityIs1(off, gr));
   3126       uint16_t off_4_aligned = off & ~3;
   3127       if (has_shadow_value_.Get(off_4_aligned)) {
   3128         ShadowValue sval = GetValue(off_4_aligned);
   3129         sval.Ref("Split_4_to_2");
   3130         DCHECK(!has_shadow_value_.Get(off_4_aligned + 2));
   3131         *AddNewSvalAtOffset(off_4_aligned + 2) = sval;
   3132       }
   3133       // Clear this 4-granularity bit.
   3134       uintptr_t off_within_8_bytes = (off >> 2) & 1;  // 0 or 1.
   3135       gr &= ~(1 << (1 + off_within_8_bytes));
   3136       // Set two 2-granularity bits.
   3137       gr |= 3 << (3 + 2 * off_within_8_bytes);
   3138       *granularity_mask(off) = gr;
   3139       DebugTrace(off, __FUNCTION__, __LINE__);
   3140     }
   3141   }
   3142 
   3143   void Split_2_to_1(uintptr_t off) {
   3144     DebugTrace(off, __FUNCTION__, __LINE__);
   3145     uint16_t gr = *granularity_mask(off);
   3146     if (GranularityIs2(off, gr)) {
   3147       DCHECK(!GranularityIs8(off, gr));
   3148       DCHECK(!GranularityIs4(off, gr));
   3149       DCHECK(!GranularityIs1(off, gr));
   3150       uint16_t off_2_aligned = off & ~1;
   3151       if (has_shadow_value_.Get(off_2_aligned)) {
   3152         ShadowValue sval = GetValue(off_2_aligned);
   3153         sval.Ref("Split_2_to_1");
   3154         DCHECK(!has_shadow_value_.Get(off_2_aligned + 1));
   3155         *AddNewSvalAtOffset(off_2_aligned + 1) = sval;
   3156       }
   3157       // Clear this 2-granularity bit.
   3158       uintptr_t off_within_8_bytes = (off >> 1) & 3;  // 0, 1, 2, or 3
   3159       gr &= ~(1 << (3 + off_within_8_bytes));
   3160       // Set two 1-granularity bits.
   3161       gr |= 3 << (7 + 2 * off_within_8_bytes);
   3162       *granularity_mask(off) = gr;
   3163       DebugTrace(off, __FUNCTION__, __LINE__);
   3164     }
   3165   }
   3166 
   3167   void Join_1_to_2(uintptr_t off) {
   3168     DebugTrace(off, __FUNCTION__, __LINE__);
   3169     DCHECK((off & 1) == 0);
   3170     uint16_t gr = *granularity_mask(off);
   3171     if (GranularityIs1(off, gr)) {
   3172       DCHECK(GranularityIs1(off + 1, gr));
   3173       if (has_shadow_value_.Get(off) && has_shadow_value_.Get(off + 1)) {
   3174         if (GetValue(off) == GetValue(off + 1)) {
   3175           ShadowValue *sval_p = GetValuePointer(off + 1);
   3176           sval_p->Unref("Join_1_to_2");
   3177           sval_p->Clear();
   3178           has_shadow_value_.Clear(off + 1);
   3179           uintptr_t off_within_8_bytes = (off >> 1) & 3;  // 0, 1, 2, or 3
   3180           // Clear two 1-granularity bits.
   3181           gr &= ~(3 << (7 + 2 * off_within_8_bytes));
   3182           // Set one 2-granularity bit.
   3183           gr |= 1 << (3 + off_within_8_bytes);
   3184           *granularity_mask(off) = gr;
   3185           DebugTrace(off, __FUNCTION__, __LINE__);
   3186         }
   3187       }
   3188     }
   3189   }
   3190 
   3191   void Join_2_to_4(uintptr_t off) {
   3192     DebugTrace(off, __FUNCTION__, __LINE__);
   3193     DCHECK((off & 3) == 0);
   3194     uint16_t gr = *granularity_mask(off);
   3195     if (GranularityIs2(off, gr) && GranularityIs2(off + 2, gr)) {
   3196       if (has_shadow_value_.Get(off) && has_shadow_value_.Get(off + 2)) {
   3197         if (GetValue(off) == GetValue(off + 2)) {
   3198           ShadowValue *sval_p = GetValuePointer(off + 2);
   3199           sval_p->Unref("Join_2_to_4");
   3200           sval_p->Clear();
   3201           has_shadow_value_.Clear(off + 2);
   3202           uintptr_t off_within_8_bytes = (off >> 2) & 1;  // 0 or 1.
   3203           // Clear two 2-granularity bits.
   3204           gr &= ~(3 << (3 + 2 * off_within_8_bytes));
   3205           // Set one 4-granularity bit.
   3206           gr |= 1 << (1 + off_within_8_bytes);
   3207           *granularity_mask(off) = gr;
   3208           DebugTrace(off, __FUNCTION__, __LINE__);
   3209         }
   3210       }
   3211     }
   3212   }
   3213 
   3214   void Join_4_to_8(uintptr_t off) {
   3215     DebugTrace(off, __FUNCTION__, __LINE__);
   3216     DCHECK((off & 7) == 0);
   3217     uint16_t gr = *granularity_mask(off);
   3218     if (GranularityIs4(off, gr) && GranularityIs4(off + 4, gr)) {
   3219       if (has_shadow_value_.Get(off) && has_shadow_value_.Get(off + 4)) {
   3220         if (GetValue(off) == GetValue(off + 4)) {
   3221           ShadowValue *sval_p = GetValuePointer(off + 4);
   3222           sval_p->Unref("Join_4_to_8");
   3223           sval_p->Clear();
   3224           has_shadow_value_.Clear(off + 4);
   3225           *granularity_mask(off) = 1;
   3226           DebugTrace(off, __FUNCTION__, __LINE__);
   3227         }
   3228       }
   3229     }
   3230   }
   3231 
   3232   static void InitClassMembers() {
   3233     if (DEBUG_MODE) {
   3234       Printf("sizeof(CacheLine) = %ld\n", sizeof(CacheLine));
   3235     }
   3236     free_list_ = new FreeList(sizeof(CacheLine), 1024);
   3237   }
   3238 
   3239  private:
   3240   explicit CacheLine(uintptr_t tag) {
   3241     tag_ = tag;
   3242     Clear();
   3243   }
   3244   ~CacheLine() { }
   3245 
   3246   uintptr_t tag_;
   3247 
   3248   // data members
   3249   Mask has_shadow_value_;
   3250   Mask traced_;
   3251   Mask racey_;
   3252   Mask published_;
   3253   uint16_t granularity_[kLineSize / 8];
   3254   ShadowValue vals_[kLineSize];
   3255 
   3256   // static data members.
   3257   static FreeList *free_list_;
   3258 };
   3259 
   3260 FreeList *CacheLine::free_list_;
   3261 
   3262 // If range [a,b) fits into one line, return that line's tag.
   3263 // Else range [a,b) is broken into these ranges:
   3264 //   [a, line1_tag)
   3265 //   [line1_tag, line2_tag)
   3266 //   [line2_tag, b)
   3267 // and 0 is returned.
   3268 uintptr_t GetCacheLinesForRange(uintptr_t a, uintptr_t b,
   3269                                 uintptr_t *line1_tag, uintptr_t *line2_tag) {
   3270   uintptr_t a_tag = CacheLine::ComputeTag(a);
   3271   uintptr_t next_tag = CacheLine::ComputeNextTag(a);
   3272   if (b < next_tag) {
   3273     return a_tag;
   3274   }
   3275   *line1_tag = next_tag;
   3276   *line2_tag = CacheLine::ComputeTag(b);
   3277   return 0;
   3278 }
   3279 
   3280 
   3281 // -------- Cache ------------------ {{{1
   3282 class Cache {
   3283  public:
   3284   Cache() {
   3285     memset(lines_, 0, sizeof(lines_));
   3286     ANNOTATE_BENIGN_RACE_SIZED(lines_, sizeof(lines_),
   3287                                "Cache::lines_ accessed without a lock");
   3288   }
   3289 
   3290   INLINE static CacheLine *kLineIsLocked() {
   3291     return (CacheLine*)1;
   3292   }
   3293 
   3294   INLINE static bool LineIsNullOrLocked(CacheLine *line) {
   3295     return (uintptr_t)line <= 1;
   3296   }
   3297 
   3298   INLINE CacheLine *TidMagic(int32_t tid) {
   3299     return kLineIsLocked();
   3300   }
   3301 
   3302   // Try to get a CacheLine for exclusive use.
   3303   // May return NULL or kLineIsLocked.
   3304   INLINE CacheLine *TryAcquireLine(TSanThread *thr, uintptr_t a, int call_site) {
   3305     uintptr_t cli = ComputeCacheLineIndexInCache(a);
   3306     CacheLine **addr = &lines_[cli];
   3307     CacheLine *res = (CacheLine*)AtomicExchange(
   3308            (uintptr_t*)addr, (uintptr_t)kLineIsLocked());
   3309     if (DEBUG_MODE && debug_cache) {
   3310       uintptr_t tag = CacheLine::ComputeTag(a);
   3311       if (res && res != kLineIsLocked())
   3312         Printf("TryAcquire %p empty=%d tag=%lx cli=%lx site=%d\n",
   3313                res, res->Empty(), res->tag(), cli, call_site);
   3314       else
   3315         Printf("TryAcquire tag=%lx cli=%d site=%d\n", tag, cli, call_site);
   3316     }
   3317     if (res) {
   3318       ANNOTATE_HAPPENS_AFTER((void*)cli);
   3319     }
   3320     return res;
   3321   }
   3322 
   3323   INLINE CacheLine *AcquireLine(TSanThread *thr, uintptr_t a, int call_site) {
   3324     CacheLine *line = NULL;
   3325     int iter = 0;
   3326     const int max_iter = 1 << 30;
   3327     for (;;) {
   3328       line = TryAcquireLine(thr, a, call_site);
   3329       if (line != kLineIsLocked())
   3330         break;
   3331       iter++;
   3332       if ((iter % (1 << 6)) == 0) {
   3333         YIELD();
   3334         G_stats->try_acquire_line_spin++;
   3335         if (DEBUG_MODE && debug_cache && ((iter & (iter - 1)) == 0)) {
   3336           Printf("T%d %s a=%p iter=%d\n", raw_tid(thr), __FUNCTION__, a, iter);
   3337         }
   3338       } else {
   3339         for (int active_spin = 0; active_spin != 10; active_spin += 1) {
   3340           PROCESSOR_YIELD();
   3341         }
   3342       }
   3343       if (DEBUG_MODE && debug_cache && iter == max_iter) {
   3344         Printf("Failed to acquire a cache line: T%d a=%p site=%d\n",
   3345                raw_tid(thr), a, call_site);
   3346         CHECK(iter < max_iter);
   3347       }
   3348     }
   3349     DCHECK(lines_[ComputeCacheLineIndexInCache(a)] == TidMagic(raw_tid(thr)));
   3350     return line;
   3351   }
   3352 
   3353   // Release a CacheLine from exclusive use.
   3354   INLINE void ReleaseLine(TSanThread *thr, uintptr_t a, CacheLine *line, int call_site) {
   3355     if (TS_SERIALIZED) return;
   3356     DCHECK(line != kLineIsLocked());
   3357     uintptr_t cli = ComputeCacheLineIndexInCache(a);
   3358     DCHECK(line == NULL ||
   3359            cli == ComputeCacheLineIndexInCache(line->tag()));
   3360     CacheLine **addr = &lines_[cli];
   3361     DCHECK(*addr == TidMagic(raw_tid(thr)));
   3362     ReleaseStore((uintptr_t*)addr, (uintptr_t)line);
   3363     ANNOTATE_HAPPENS_BEFORE((void*)cli);
   3364     if (DEBUG_MODE && debug_cache) {
   3365       uintptr_t tag = CacheLine::ComputeTag(a);
   3366       if (line)
   3367         Printf("Release %p empty=%d tag=%lx cli=%lx site=%d\n",
   3368                line, line->Empty(), line->tag(), cli, call_site);
   3369       else
   3370         Printf("Release tag=%lx cli=%d site=%d\n", tag, cli, call_site);
   3371     }
   3372   }
   3373 
   3374   void AcquireAllLines(TSanThread *thr) {
   3375     CHECK(TS_SERIALIZED == 0);
   3376     for (size_t i = 0; i < (size_t)kNumLines; i++) {
   3377       uintptr_t tag = i << CacheLine::kLineSizeBits;
   3378       AcquireLine(thr, tag, __LINE__);
   3379       CHECK(lines_[i] == kLineIsLocked());
   3380     }
   3381   }
   3382 
   3383   // Get a CacheLine. This operation should be performed under a lock
   3384   // (whatever that is), but other threads may be acquiring the same line
   3385   // concurrently w/o a lock.
   3386   // Every call to GetLine() which returns non-null line
   3387   // should be followed by a call to ReleaseLine().
   3388   INLINE CacheLine *GetLine(TSanThread *thr, uintptr_t a, bool create_new_if_need, int call_site) {
   3389     uintptr_t tag = CacheLine::ComputeTag(a);
   3390     DCHECK(tag <= a);
   3391     DCHECK(tag + CacheLine::kLineSize > a);
   3392     uintptr_t cli = ComputeCacheLineIndexInCache(a);
   3393     CacheLine *res = NULL;
   3394     CacheLine *line = NULL;
   3395 
   3396     if (create_new_if_need == false && lines_[cli] == 0) {
   3397       // There is no such line in the cache, nor should it be in the storage.
   3398       // Check that the storage indeed does not have this line.
   3399       // Such DCHECK is racey if tsan is multi-threaded.
   3400       DCHECK(TS_SERIALIZED == 0 || storage_.count(tag) == 0);
   3401       return NULL;
   3402     }
   3403 
   3404     if (TS_SERIALIZED) {
   3405       line = lines_[cli];
   3406     } else {
   3407       line = AcquireLine(thr, tag, call_site);
   3408     }
   3409 
   3410 
   3411     if (LIKELY(line && line->tag() == tag)) {
   3412       res = line;
   3413     } else {
   3414       res = WriteBackAndFetch(thr, line, tag, cli, create_new_if_need);
   3415       if (!res) {
   3416         ReleaseLine(thr, a, line, call_site);
   3417       }
   3418     }
   3419     if (DEBUG_MODE && debug_cache) {
   3420       if (res)
   3421         Printf("GetLine %p empty=%d tag=%lx\n", res, res->Empty(), res->tag());
   3422       else
   3423         Printf("GetLine res=NULL, line=%p tag=%lx cli=%lx\n", line, tag, cli);
   3424     }
   3425     return res;
   3426   }
   3427 
   3428   INLINE CacheLine *GetLineOrCreateNew(TSanThread *thr, uintptr_t a, int call_site) {
   3429     return GetLine(thr, a, true, call_site);
   3430   }
   3431   INLINE CacheLine *GetLineIfExists(TSanThread *thr, uintptr_t a, int call_site) {
   3432     return GetLine(thr, a, false, call_site);
   3433   }
   3434 
   3435   void ForgetAllState(TSanThread *thr) {
   3436     for (int i = 0; i < kNumLines; i++) {
   3437       if (TS_SERIALIZED == 0) CHECK(LineIsNullOrLocked(lines_[i]));
   3438       lines_[i] = NULL;
   3439     }
   3440     map<uintptr_t, Mask> racey_masks;
   3441     for (Map::iterator i = storage_.begin(); i != storage_.end(); ++i) {
   3442       CacheLine *line = i->second;
   3443       if (!line->racey().Empty()) {
   3444         racey_masks[line->tag()] = line->racey();
   3445       }
   3446       CacheLine::Delete(line);
   3447     }
   3448     storage_.clear();
   3449     // Restore the racey masks.
   3450     for (map<uintptr_t, Mask>::iterator it = racey_masks.begin();
   3451          it != racey_masks.end(); it++) {
   3452       CacheLine *line = GetLineOrCreateNew(thr, it->first, __LINE__);
   3453       line->racey() = it->second;
   3454       DCHECK(!line->racey().Empty());
   3455       ReleaseLine(thr, line->tag(), line, __LINE__);
   3456     }
   3457   }
   3458 
   3459   void PrintStorageStats() {
   3460     if (!G_flags->show_stats) return;
   3461     set<ShadowValue> all_svals;
   3462     map<size_t, int> sizes;
   3463     for (Map::iterator it = storage_.begin(); it != storage_.end(); ++it) {
   3464       CacheLine *line = it->second;
   3465       // uintptr_t cli = ComputeCacheLineIndexInCache(line->tag());
   3466       //if (lines_[cli] == line) {
   3467         // this line is in cache -- ignore it.
   3468       //  continue;
   3469       //}
   3470       set<ShadowValue> s;
   3471       for (uintptr_t i = 0; i < CacheLine::kLineSize; i++) {
   3472         if (line->has_shadow_value().Get(i)) {
   3473           ShadowValue sval = *(line->GetValuePointer(i));
   3474           s.insert(sval);
   3475           all_svals.insert(sval);
   3476         }
   3477       }
   3478       size_t size = s.size();
   3479       if (size > 10) size = 10;
   3480       sizes[size]++;
   3481     }
   3482     Printf("Storage sizes: %ld\n", storage_.size());
   3483     for (size_t size = 0; size <= CacheLine::kLineSize; size++) {
   3484       if (sizes[size]) {
   3485         Printf("  %ld => %d\n", size, sizes[size]);
   3486       }
   3487     }
   3488     Printf("Different svals: %ld\n", all_svals.size());
   3489     set <SSID> all_ssids;
   3490     for (set<ShadowValue>::iterator it = all_svals.begin(); it != all_svals.end(); ++it) {
   3491       ShadowValue sval = *it;
   3492       for (int i = 0; i < 2; i++) {
   3493         SSID ssid = i ? sval.rd_ssid() : sval.wr_ssid();
   3494         all_ssids.insert(ssid);
   3495       }
   3496     }
   3497     Printf("Different ssids: %ld\n", all_ssids.size());
   3498     set <SID> all_sids;
   3499     for (set<SSID>::iterator it = all_ssids.begin(); it != all_ssids.end(); ++it) {
   3500       int size = SegmentSet::Size(*it);
   3501       for (int i = 0; i < size; i++) {
   3502         SID sid = SegmentSet::GetSID(*it, i, __LINE__);
   3503         all_sids.insert(sid);
   3504       }
   3505     }
   3506     Printf("Different sids: %ld\n", all_sids.size());
   3507     for (int i = 1; i < Segment::NumberOfSegments(); i++) {
   3508       if (Segment::ProfileSeg(SID(i)) && all_sids.count(SID(i)) == 0) {
   3509         // Printf("Segment SID %d: missing in storage; ref=%d\n", i,
   3510         // Segment::Get(SID(i))->ref_count());
   3511       }
   3512     }
   3513   }
   3514 
   3515  private:
   3516   INLINE uintptr_t ComputeCacheLineIndexInCache(uintptr_t addr) {
   3517     return (addr >> CacheLine::kLineSizeBits) & (kNumLines - 1);
   3518   }
   3519 
   3520   NOINLINE CacheLine *WriteBackAndFetch(TSanThread *thr, CacheLine *old_line,
   3521                                         uintptr_t tag, uintptr_t cli,
   3522                                         bool create_new_if_need) {
   3523     ScopedMallocCostCenter cc("Cache::WriteBackAndFetch");
   3524     CacheLine *res;
   3525     size_t old_storage_size = storage_.size();
   3526     (void)old_storage_size;
   3527     CacheLine **line_for_this_tag = NULL;
   3528     if (create_new_if_need) {
   3529       line_for_this_tag = &storage_[tag];
   3530     } else {
   3531       Map::iterator it = storage_.find(tag);
   3532       if (it == storage_.end()) {
   3533         if (DEBUG_MODE && debug_cache) {
   3534           Printf("WriteBackAndFetch: old_line=%ld tag=%lx cli=%ld\n",
   3535                  old_line, tag, cli);
   3536         }
   3537         return NULL;
   3538       }
   3539       line_for_this_tag = &(it->second);
   3540     }
   3541     CHECK(line_for_this_tag);
   3542     DCHECK(old_line != kLineIsLocked());
   3543     if (*line_for_this_tag == NULL) {
   3544       // creating a new cache line
   3545       CHECK(storage_.size() == old_storage_size + 1);
   3546       res = CacheLine::CreateNewCacheLine(tag);
   3547       if (DEBUG_MODE && debug_cache) {
   3548         Printf("%s %d new line %p cli=%lx\n", __FUNCTION__, __LINE__, res, cli);
   3549       }
   3550       *line_for_this_tag = res;
   3551       G_stats->cache_new_line++;
   3552     } else {
   3553       // taking an existing cache line from storage.
   3554       res = *line_for_this_tag;
   3555       if (DEBUG_MODE && debug_cache) {
   3556         Printf("%s %d exi line %p tag=%lx old=%p empty=%d cli=%lx\n",
   3557              __FUNCTION__, __LINE__, res, res->tag(), old_line,
   3558              res->Empty(), cli);
   3559       }
   3560       DCHECK(!res->Empty());
   3561       G_stats->cache_fetch++;
   3562     }
   3563 
   3564     if (TS_SERIALIZED) {
   3565       lines_[cli] = res;
   3566     } else {
   3567       DCHECK(lines_[cli] == TidMagic(raw_tid(thr)));
   3568     }
   3569 
   3570     if (old_line) {
   3571       if (DEBUG_MODE && debug_cache) {
   3572         Printf("%s %d old line %p empty=%d\n", __FUNCTION__, __LINE__,
   3573                old_line, old_line->Empty());
   3574       }
   3575       if (old_line->Empty()) {
   3576         storage_.erase(old_line->tag());
   3577         CacheLine::Delete(old_line);
   3578         G_stats->cache_delete_empty_line++;
   3579       } else {
   3580         if (debug_cache) {
   3581           DebugOnlyCheckCacheLineWhichWeReplace(old_line, res);
   3582         }
   3583       }
   3584     }
   3585     DCHECK(res->tag() == tag);
   3586 
   3587     if (G_stats->cache_max_storage_size < storage_.size()) {
   3588       G_stats->cache_max_storage_size = storage_.size();
   3589     }
   3590 
   3591     return res;
   3592   }
   3593 
   3594   void DebugOnlyCheckCacheLineWhichWeReplace(CacheLine *old_line,
   3595                                              CacheLine *new_line) {
   3596     static int c = 0;
   3597     c++;
   3598     if ((c % 1024) == 1) {
   3599       set<int64_t> s;
   3600       for (uintptr_t i = 0; i < CacheLine::kLineSize; i++) {
   3601         if (old_line->has_shadow_value().Get(i)) {
   3602           int64_t sval = *reinterpret_cast<int64_t*>(
   3603                             old_line->GetValuePointer(i));
   3604           s.insert(sval);
   3605         }
   3606       }
   3607       Printf("\n[%d] Cache Size=%ld %s different values: %ld\n", c,
   3608              storage_.size(), old_line->has_shadow_value().ToString().c_str(),
   3609              s.size());
   3610 
   3611       Printf("new line: %p %p\n", new_line->tag(), new_line->tag()
   3612              + CacheLine::kLineSize);
   3613       G_stats->PrintStatsForCache();
   3614     }
   3615   }
   3616 
   3617   static const int kNumLines = 1 << (DEBUG_MODE ? 14 : 21);
   3618   CacheLine *lines_[kNumLines];
   3619 
   3620   // tag => CacheLine
   3621   typedef unordered_map<uintptr_t, CacheLine*> Map;
   3622   Map storage_;
   3623 };
   3624 
   3625 static  Cache *G_cache;
   3626 
   3627 // -------- Published range -------------------- {{{1
   3628 struct PublishInfo {
   3629   uintptr_t tag;   // Tag of the cache line where the mem is published.
   3630   Mask      mask;  // The bits that are actually published.
   3631   VTS      *vts;   // The point where this range has been published.
   3632 };
   3633 
   3634 
   3635 typedef multimap<uintptr_t, PublishInfo> PublishInfoMap;
   3636 
   3637 // Maps 'mem+size' to the PublishInfoMap{mem, size, vts}.
   3638 static PublishInfoMap *g_publish_info_map;
   3639 
   3640 const int kDebugPublish = 0;
   3641 
   3642 // Get a VTS where 'a' has been published,
   3643 // return NULL if 'a' was not published.
   3644 static const VTS *GetPublisherVTS(uintptr_t a) {
   3645   uintptr_t tag = CacheLine::ComputeTag(a);
   3646   uintptr_t off = CacheLine::ComputeOffset(a);
   3647   typedef PublishInfoMap::iterator Iter;
   3648 
   3649   pair<Iter, Iter> eq_range = g_publish_info_map->equal_range(tag);
   3650   for (Iter it = eq_range.first; it != eq_range.second; ++it) {
   3651     PublishInfo &info = it->second;
   3652     DCHECK(info.tag == tag);
   3653     if (info.mask.Get(off)) {
   3654       G_stats->publish_get++;
   3655       // Printf("GetPublisherVTS: a=%p vts=%p\n", a, info.vts);
   3656       return info.vts;
   3657     }
   3658   }
   3659   Printf("GetPublisherVTS returned NULL: a=%p\n", a);
   3660   return NULL;
   3661 }
   3662 
   3663 static bool CheckSanityOfPublishedMemory(uintptr_t tag, int line) {
   3664   if (!DEBUG_MODE) return true;
   3665   if (kDebugPublish)
   3666     Printf("CheckSanityOfPublishedMemory: line=%d\n", line);
   3667   typedef PublishInfoMap::iterator Iter;
   3668   pair<Iter, Iter> eq_range = g_publish_info_map->equal_range(tag);
   3669   Mask union_of_masks(0);
   3670   // iterate over all entries for this tag
   3671   for (Iter it = eq_range.first; it != eq_range.second; ++it) {
   3672     PublishInfo &info = it->second;
   3673     CHECK(info.tag  == tag);
   3674     CHECK(it->first == tag);
   3675     CHECK(info.vts);
   3676     Mask mask(info.mask);
   3677     CHECK(!mask.Empty());  // Mask should not be empty..
   3678     // And should not intersect with other masks.
   3679     CHECK(Mask::Intersection(union_of_masks, mask).Empty());
   3680     union_of_masks.Union(mask);
   3681   }
   3682   return true;
   3683 }
   3684 
   3685 // Clear the publish attribute for the bytes from 'line' that are set in 'mask'
   3686 static void ClearPublishedAttribute(CacheLine *line, Mask mask) {
   3687   CHECK(CheckSanityOfPublishedMemory(line->tag(), __LINE__));
   3688   typedef PublishInfoMap::iterator Iter;
   3689   bool deleted_some = true;
   3690   if (kDebugPublish)
   3691     Printf(" ClearPublishedAttribute: %p %s\n",
   3692            line->tag(), mask.ToString().c_str());
   3693   while (deleted_some) {
   3694     deleted_some = false;
   3695     pair<Iter, Iter> eq_range = g_publish_info_map->equal_range(line->tag());
   3696     for (Iter it = eq_range.first; it != eq_range.second; ++it) {
   3697       PublishInfo &info = it->second;
   3698       DCHECK(info.tag == line->tag());
   3699       if (kDebugPublish)
   3700         Printf("?ClearPublishedAttribute: %p %s\n", line->tag(),
   3701                info.mask.ToString().c_str());
   3702       info.mask.Subtract(mask);
   3703       if (kDebugPublish)
   3704         Printf("+ClearPublishedAttribute: %p %s\n", line->tag(),
   3705                info.mask.ToString().c_str());
   3706       G_stats->publish_clear++;
   3707       if (info.mask.Empty()) {
   3708         VTS::Unref(info.vts);
   3709         g_publish_info_map->erase(it);
   3710         deleted_some = true;
   3711         break;
   3712       }
   3713     }
   3714   }
   3715   CHECK(CheckSanityOfPublishedMemory(line->tag(), __LINE__));
   3716 }
   3717 
   3718 // Publish range [a, b) in addr's CacheLine with vts.
   3719 static void PublishRangeInOneLine(TSanThread *thr, uintptr_t addr, uintptr_t a,
   3720                                   uintptr_t b, VTS *vts) {
   3721   ScopedMallocCostCenter cc("PublishRangeInOneLine");
   3722   DCHECK(b <= CacheLine::kLineSize);
   3723   DCHECK(a < b);
   3724   uintptr_t tag = CacheLine::ComputeTag(addr);
   3725   CHECK(CheckSanityOfPublishedMemory(tag, __LINE__));
   3726   CacheLine *line = G_cache->GetLineOrCreateNew(thr, tag, __LINE__);
   3727 
   3728   if (1 || line->published().GetRange(a, b)) {
   3729     Mask mask(0);
   3730     mask.SetRange(a, b);
   3731     // TODO(timurrrr): add warning for re-publishing.
   3732     ClearPublishedAttribute(line, mask);
   3733   }
   3734 
   3735   line->published().SetRange(a, b);
   3736   G_cache->ReleaseLine(thr, tag, line, __LINE__);
   3737 
   3738   PublishInfo pub_info;
   3739   pub_info.tag  = tag;
   3740   pub_info.mask.SetRange(a, b);
   3741   pub_info.vts  = vts->Clone();
   3742   g_publish_info_map->insert(make_pair(tag, pub_info));
   3743   G_stats->publish_set++;
   3744   if (kDebugPublish)
   3745     Printf("PublishRange   : [%p,%p) %p %s vts=%p\n",
   3746            a, b, tag, pub_info.mask.ToString().c_str(), vts);
   3747   CHECK(CheckSanityOfPublishedMemory(tag, __LINE__));
   3748 }
   3749 
   3750 // Publish memory range [a, b).
   3751 static void PublishRange(TSanThread *thr, uintptr_t a, uintptr_t b, VTS *vts) {
   3752   CHECK(a);
   3753   CHECK(a < b);
   3754   if (kDebugPublish)
   3755     Printf("PublishRange   : [%p,%p), size=%d, tag=%p\n",
   3756            a, b, (int)(b - a), CacheLine::ComputeTag(a));
   3757   uintptr_t line1_tag = 0, line2_tag = 0;
   3758   uintptr_t tag = GetCacheLinesForRange(a, b, &line1_tag, &line2_tag);
   3759   if (tag) {
   3760     PublishRangeInOneLine(thr, tag, a - tag, b - tag, vts);
   3761     return;
   3762   }
   3763   uintptr_t a_tag = CacheLine::ComputeTag(a);
   3764   PublishRangeInOneLine(thr, a, a - a_tag, CacheLine::kLineSize, vts);
   3765   for (uintptr_t tag_i = line1_tag; tag_i < line2_tag;
   3766        tag_i += CacheLine::kLineSize) {
   3767     PublishRangeInOneLine(thr, tag_i, 0, CacheLine::kLineSize, vts);
   3768   }
   3769   if (b > line2_tag) {
   3770     PublishRangeInOneLine(thr, line2_tag, 0, b - line2_tag, vts);
   3771   }
   3772 }
   3773 
   3774 // -------- ThreadSanitizerReport -------------- {{{1
   3775 struct ThreadSanitizerReport {
   3776   // Types of reports.
   3777   enum ReportType {
   3778     DATA_RACE,
   3779     UNLOCK_FOREIGN,
   3780     UNLOCK_NONLOCKED,
   3781     INVALID_LOCK,
   3782     ATOMICITY_VIOLATION,
   3783   };
   3784 
   3785   // Common fields.
   3786   ReportType  type;
   3787   TID         tid;
   3788   StackTrace *stack_trace;
   3789 
   3790   const char *ReportName() const {
   3791     switch (type) {
   3792       case DATA_RACE:        return "Race";
   3793       case UNLOCK_FOREIGN:   return "UnlockForeign";
   3794       case UNLOCK_NONLOCKED: return "UnlockNonLocked";
   3795       case INVALID_LOCK:     return "InvalidLock";
   3796       case ATOMICITY_VIOLATION: return "AtomicityViolation";
   3797     }
   3798     CHECK(0);
   3799     return NULL;
   3800   }
   3801 
   3802   virtual ~ThreadSanitizerReport() {
   3803     StackTrace::Delete(stack_trace);
   3804   }
   3805 };
   3806 
   3807 static bool ThreadSanitizerPrintReport(ThreadSanitizerReport *report);
   3808 
   3809 // DATA_RACE.
   3810 struct ThreadSanitizerDataRaceReport : public ThreadSanitizerReport {
   3811   uintptr_t   racey_addr;
   3812   string      racey_addr_description;
   3813   uintptr_t   last_access_size;
   3814   TID         last_access_tid;
   3815   SID         last_access_sid;
   3816   bool        last_access_is_w;
   3817   LSID        last_acces_lsid[2];
   3818 
   3819   ShadowValue new_sval;
   3820   ShadowValue old_sval;
   3821 
   3822   bool        is_expected;
   3823   bool        racey_addr_was_published;
   3824 };
   3825 
   3826 // Report for bad unlock (UNLOCK_FOREIGN, UNLOCK_NONLOCKED).
   3827 struct ThreadSanitizerBadUnlockReport : public ThreadSanitizerReport {
   3828   LID lid;
   3829 };
   3830 
   3831 // Report for invalid lock addresses (INVALID_LOCK).
   3832 struct ThreadSanitizerInvalidLockReport : public ThreadSanitizerReport {
   3833   uintptr_t lock_addr;
   3834 };
   3835 
   3836 class AtomicityRegion;
   3837 
   3838 struct ThreadSanitizerAtomicityViolationReport : public ThreadSanitizerReport {
   3839   AtomicityRegion *r1, *r2, *r3;
   3840 };
   3841 
   3842 
   3843 // -------- LockHistory ------------- {{{1
   3844 // For each thread we store a limited amount of history of locks and unlocks.
   3845 // If there is a race report (in hybrid mode) we try to guess a lock
   3846 // which might have been used to pass the ownership of the object between
   3847 // threads.
   3848 //
   3849 // Thread1:                    Thread2:
   3850 // obj->UpdateMe();
   3851 // mu.Lock();
   3852 // flag = true;
   3853 // mu.Unlock(); // (*)
   3854 //                             mu.Lock();  // (**)
   3855 //                             bool f = flag;
   3856 //                             mu.Unlock();
   3857 //                             if (f)
   3858 //                                obj->UpdateMeAgain();
   3859 //
   3860 // For this code a hybrid detector may report a false race.
   3861 // LockHistory will find the lock mu and report it.
   3862 
   3863 struct LockHistory {
   3864  public:
   3865   // LockHistory which will track no more than `size` recent locks
   3866   // and the same amount of unlocks.
   3867   LockHistory(size_t size): size_(size) { }
   3868 
   3869   // Record a Lock event.
   3870   void OnLock(LID lid) {
   3871     g_lock_era++;
   3872     Push(LockHistoryElement(lid, g_lock_era), &locks_);
   3873   }
   3874 
   3875   // Record an Unlock event.
   3876   void OnUnlock(LID lid) {
   3877     g_lock_era++;
   3878     Push(LockHistoryElement(lid, g_lock_era), &unlocks_);
   3879   }
   3880 
   3881   // Find locks such that:
   3882   // - A Lock happend in `l`.
   3883   // - An Unlock happened in `u`.
   3884   // - Lock's era is greater than Unlock's era.
   3885   // - Both eras are greater or equal than min_lock_era.
   3886   static bool Intersect(const LockHistory &l, const LockHistory &u,
   3887                         int32_t min_lock_era, set<LID> *locks) {
   3888     const Queue &lq = l.locks_;
   3889     const Queue &uq = u.unlocks_;
   3890     for (size_t i = 0; i < lq.size(); i++) {
   3891       int32_t l_era = lq[i].lock_era;
   3892       if (l_era < min_lock_era) continue;
   3893       LID lid = lq[i].lid;
   3894       // We don't want to report pure happens-before locks since
   3895       // they already create h-b arcs.
   3896       if (Lock::LIDtoLock(lid)->is_pure_happens_before()) continue;
   3897       for (size_t j = 0; j < uq.size(); j++) {
   3898         int32_t u_era = uq[j].lock_era;
   3899         if (lid != uq[j].lid) continue;
   3900         // Report("LockHistory::Intersect: L%d %d %d %d\n", lid.raw(), min_lock_era, u_era, l_era);
   3901         if (u_era < min_lock_era)  continue;
   3902         if (u_era > l_era) continue;
   3903         locks->insert(lid);
   3904       }
   3905     }
   3906     return !locks->empty();
   3907   }
   3908 
   3909   void PrintLocks() const { Print(&locks_); }
   3910   void PrintUnlocks() const { Print(&unlocks_); }
   3911 
   3912  private:
   3913   struct LockHistoryElement {
   3914     LID lid;
   3915     uint32_t lock_era;
   3916     LockHistoryElement(LID l, uint32_t era)
   3917         : lid(l),
   3918         lock_era(era) {
   3919         }
   3920   };
   3921 
   3922   typedef deque<LockHistoryElement> Queue;
   3923 
   3924   void Push(LockHistoryElement e, Queue *q) {
   3925     CHECK(q->size() <= size_);
   3926     if (q->size() == size_)
   3927       q->pop_front();
   3928     q->push_back(e);
   3929   }
   3930 
   3931   void Print(const Queue *q) const {
   3932     set<LID> printed;
   3933     for (size_t i = 0; i < q->size(); i++) {
   3934       const LockHistoryElement &e = (*q)[i];
   3935       if (printed.count(e.lid)) continue;
   3936       Report("era %d: \n", e.lock_era);
   3937       Lock::ReportLockWithOrWithoutContext(e.lid, true);
   3938       printed.insert(e.lid);
   3939     }
   3940   }
   3941 
   3942   Queue locks_;
   3943   Queue unlocks_;
   3944   size_t size_;
   3945 };
   3946 
   3947 // -------- RecentSegmentsCache ------------- {{{1
   3948 // For each thread we store a limited amount of recent segments with
   3949 // the same VTS and LS as the current segment.
   3950 // When a thread enters a new basic block, we can sometimes reuse a
   3951 // recent segment if it is the same or not used anymore (see Search()).
   3952 //
   3953 // We need to flush the cache when current lockset changes or the current
   3954 // VTS changes or we do ForgetAllState.
   3955 // TODO(timurrrr): probably we can cache segments with different LSes and
   3956 // compare their LS with the current LS.
   3957 struct RecentSegmentsCache {
   3958  public:
   3959   RecentSegmentsCache(int cache_size) : cache_size_(cache_size) {}
   3960   ~RecentSegmentsCache() { Clear(); }
   3961 
   3962   void Clear() {
   3963     ShortenQueue(0);
   3964   }
   3965 
   3966   void Push(SID sid) {
   3967     queue_.push_front(sid);
   3968     Segment::Ref(sid, "RecentSegmentsCache::ShortenQueue");
   3969     ShortenQueue(cache_size_);
   3970   }
   3971 
   3972   void ForgetAllState() {
   3973     queue_.clear();  // Don't unref - the segments are already dead.
   3974   }
   3975 
   3976   INLINE SID Search(CallStack *curr_stack,
   3977                     SID curr_sid, /*OUT*/ bool *needs_refill) {
   3978     // TODO(timurrrr): we can probably move the matched segment to the head
   3979     // of the queue.
   3980 
   3981     deque<SID>::iterator it = queue_.begin();
   3982     for (; it != queue_.end(); it++) {
   3983       SID sid = *it;
   3984       Segment::AssertLive(sid, __LINE__);
   3985       Segment *seg = Segment::Get(sid);
   3986 
   3987       if (seg->ref_count() == 1 + (sid == curr_sid)) {
   3988         // The current segment is not used anywhere else,
   3989         // so just replace the stack trace in it.
   3990         // The refcount of an unused segment is equal to
   3991         // *) 1 if it is stored only in the cache,
   3992         // *) 2 if it is the current segment of the Thread.
   3993         *needs_refill = true;
   3994         return sid;
   3995       }
   3996 
   3997       // Check three top entries of the call stack of the recent segment.
   3998       // If they match the current segment stack, don't create a new segment.
   3999       // This can probably lead to a little bit wrong stack traces in rare
   4000       // occasions but we don't really care that much.
   4001       if (kSizeOfHistoryStackTrace > 0) {
   4002         size_t n = curr_stack->size();
   4003         uintptr_t *emb_trace = Segment::embedded_stack_trace(sid);
   4004         if(*emb_trace &&  // This stack trace was filled
   4005            curr_stack->size() >= 3 &&
   4006            emb_trace[0] == (*curr_stack)[n-1] &&
   4007            emb_trace[1] == (*curr_stack)[n-2] &&
   4008            emb_trace[2] == (*curr_stack)[n-3]) {
   4009           *needs_refill = false;
   4010           return sid;
   4011         }
   4012       }
   4013     }
   4014 
   4015     return SID();
   4016   }
   4017 
   4018  private:
   4019   void ShortenQueue(size_t flush_to_length) {
   4020     while (queue_.size() > flush_to_length) {
   4021       SID sid = queue_.back();
   4022       Segment::Unref(sid, "RecentSegmentsCache::ShortenQueue");
   4023       queue_.pop_back();
   4024     }
   4025   }
   4026 
   4027   deque<SID> queue_;
   4028   size_t cache_size_;
   4029 };
   4030 
   4031 // -------- TraceInfo ------------------ {{{1
   4032 vector<TraceInfo*> *TraceInfo::g_all_traces;
   4033 
   4034 TraceInfo *TraceInfo::NewTraceInfo(size_t n_mops, uintptr_t pc) {
   4035   ScopedMallocCostCenter cc("TraceInfo::NewTraceInfo");
   4036   size_t mem_size = (sizeof(TraceInfo) + (n_mops - 1) * sizeof(MopInfo));
   4037   uint8_t *mem = new uint8_t[mem_size];
   4038   memset(mem, 0xab, mem_size);
   4039   TraceInfo *res = new (mem) TraceInfo;
   4040   res->n_mops_ = n_mops;
   4041   res->pc_ = ThreadSanitizerWantToCreateSegmentsOnSblockEntry(pc) ? pc : 0;
   4042   res->counter_ = 0;
   4043   if (g_all_traces == NULL) {
   4044     g_all_traces = new vector<TraceInfo*>;
   4045   }
   4046   res->literace_storage = NULL;
   4047   if (G_flags->literace_sampling != 0) {
   4048     ScopedMallocCostCenter cc("TraceInfo::NewTraceInfo::LiteRaceStorage");
   4049     size_t index_of_this_trace = g_all_traces->size();
   4050     if ((index_of_this_trace % kLiteRaceStorageSize) == 0) {
   4051       res->literace_storage = (LiteRaceStorage*)
   4052           new LiteRaceCounters [kLiteRaceStorageSize * kLiteRaceNumTids];
   4053       memset(res->literace_storage, 0, sizeof(LiteRaceStorage));
   4054     } else {
   4055       CHECK(index_of_this_trace > 0);
   4056       res->literace_storage = (*g_all_traces)[index_of_this_trace - 1]->literace_storage;
   4057       CHECK(res->literace_storage);
   4058     }
   4059     res->storage_index = index_of_this_trace % kLiteRaceStorageSize;
   4060   }
   4061   g_all_traces->push_back(res);
   4062   return res;
   4063 }
   4064 
   4065 void TraceInfo::PrintTraceProfile() {
   4066   if (!G_flags->trace_profile) return;
   4067   if (!g_all_traces) return;
   4068   int64_t total_counter = 0;
   4069   multimap<size_t, TraceInfo*> traces;
   4070   for (size_t i = 0; i < g_all_traces->size(); i++) {
   4071     TraceInfo *trace = (*g_all_traces)[i];
   4072     traces.insert(make_pair(trace->counter(), trace));
   4073     total_counter += trace->counter();
   4074   }
   4075   if (total_counter == 0) return;
   4076   Printf("TraceProfile: %ld traces, %lld hits\n",
   4077          g_all_traces->size(), total_counter);
   4078   int i = 0;
   4079   for (multimap<size_t, TraceInfo*>::reverse_iterator it = traces.rbegin();
   4080        it != traces.rend(); ++it, i++) {
   4081     TraceInfo *trace = it->second;
   4082     int64_t c = it->first;
   4083     int64_t permile = (c * 1000) / total_counter;
   4084     CHECK(trace->n_mops() > 0);
   4085     uintptr_t pc = trace->GetMop(0)->pc();
   4086     CHECK(pc);
   4087     if (permile == 0 || i >= 20) break;
   4088     Printf("TR=%p pc: %p %p c=%lld (%lld/1000) n_mops=%ld %s\n",
   4089            trace, trace->pc(), pc, c,
   4090            permile, trace->n_mops(),
   4091            PcToRtnNameAndFilePos(pc).c_str());
   4092   }
   4093 }
   4094 
   4095 // -------- Atomicity --------------- {{{1
   4096 // An attempt to detect atomicity violations (aka high level races).
   4097 // Here we try to find a very restrictive pattern:
   4098 // Thread1                    Thread2
   4099 //   r1: {
   4100 //     mu.Lock();
   4101 //     code_r1();
   4102 //     mu.Unlock();
   4103 //   }
   4104 //   r2: {
   4105 //     mu.Lock();
   4106 //     code_r2();
   4107 //     mu.Unlock();
   4108 //   }
   4109 //                           r3: {
   4110 //                             mu.Lock();
   4111 //                             code_r3();
   4112 //                             mu.Unlock();
   4113 //                           }
   4114 // We have 3 regions of code such that
   4115 // - two of them are in one thread and 3-rd in another thread.
   4116 // - all 3 regions have the same lockset,
   4117 // - the distance between r1 and r2 is small,
   4118 // - there is no h-b arc between r2 and r3,
   4119 // - r1 and r2 have different stack traces,
   4120 //
   4121 // In this situation we report a 'Suspected atomicity violation'.
   4122 //
   4123 // Current status:
   4124 // this code detects atomicity violations on our two motivating examples
   4125 // (--gtest_filter=*Atomicity*  --gtest_also_run_disabled_tests) and does
   4126 // not overwhelm with false reports.
   4127 // However, this functionality is still raw and not tuned for performance.
   4128 
   4129 // TS_ATOMICITY is on in debug mode or if we enabled it at the build time.
   4130 #ifndef TS_ATOMICITY
   4131 # define TS_ATOMICITY DEBUG_MODE
   4132 #endif
   4133 
   4134 
   4135 struct AtomicityRegion {
   4136   int lock_era;
   4137   TID tid;
   4138   VTS *vts;
   4139   StackTrace *stack_trace;
   4140   LSID lsid[2];
   4141   BitSet access_set[2];
   4142   bool used;
   4143   int n_mops_since_start;
   4144 
   4145   void Print() {
   4146     Report("T%d era=%d nmss=%ld AtomicityRegion:\n  rd: %s\n  wr: %s\n  %s\n%s",
   4147            tid.raw(),
   4148            lock_era,
   4149            n_mops_since_start,
   4150            access_set[0].ToString().c_str(),
   4151            access_set[1].ToString().c_str(),
   4152            TwoLockSetsToString(lsid[false], lsid[true]).c_str(),
   4153            stack_trace->ToString().c_str()
   4154           );
   4155   }
   4156 };
   4157 
   4158 bool SimilarLockSetForAtomicity(AtomicityRegion *r1, AtomicityRegion *r2) {
   4159   // Compare only reader locksets (in case one region took reader locks)
   4160   return ((r1->lsid[0] == r2->lsid[0]));
   4161 }
   4162 
   4163 static deque<AtomicityRegion *> *g_atomicity_regions;
   4164 static map<StackTrace *, int, StackTrace::Less> *reported_atomicity_stacks_;
   4165 const size_t kMaxAtomicityRegions = 8;
   4166 
   4167 static void HandleAtomicityRegion(AtomicityRegion *atomicity_region) {
   4168   if (!g_atomicity_regions) {
   4169     g_atomicity_regions = new deque<AtomicityRegion*>;
   4170     reported_atomicity_stacks_ = new map<StackTrace *, int, StackTrace::Less>;
   4171   }
   4172 
   4173   if (g_atomicity_regions->size() >= kMaxAtomicityRegions) {
   4174     AtomicityRegion *to_delete = g_atomicity_regions->back();
   4175     g_atomicity_regions->pop_back();
   4176     if (!to_delete->used) {
   4177       VTS::Unref(to_delete->vts);
   4178       StackTrace::Delete(to_delete->stack_trace);
   4179       delete to_delete;
   4180     }
   4181   }
   4182   g_atomicity_regions->push_front(atomicity_region);
   4183   size_t n = g_atomicity_regions->size();
   4184 
   4185   if (0) {
   4186     for (size_t i = 0; i < n; i++) {
   4187       AtomicityRegion *r = (*g_atomicity_regions)[i];
   4188       r->Print();
   4189     }
   4190   }
   4191 
   4192   AtomicityRegion *r3 = (*g_atomicity_regions)[0];
   4193   for (size_t i = 1; i < n; i++) {
   4194     AtomicityRegion *r2 = (*g_atomicity_regions)[i];
   4195     if (r2->tid     != r3->tid &&
   4196         SimilarLockSetForAtomicity(r2, r3) &&
   4197         !VTS::HappensBeforeCached(r2->vts, r3->vts)) {
   4198       for (size_t j = i + 1; j < n; j++) {
   4199         AtomicityRegion *r1 = (*g_atomicity_regions)[j];
   4200         if (r1->tid != r2->tid) continue;
   4201         CHECK(r2->lock_era > r1->lock_era);
   4202         if (r2->lock_era - r1->lock_era > 2) break;
   4203         if (!SimilarLockSetForAtomicity(r1, r2)) continue;
   4204         if (StackTrace::Equals(r1->stack_trace, r2->stack_trace)) continue;
   4205         if (!(r1->access_set[1].empty() &&
   4206               !r2->access_set[1].empty() &&
   4207               !r3->access_set[1].empty())) continue;
   4208         CHECK(r1->n_mops_since_start <= r2->n_mops_since_start);
   4209         if (r2->n_mops_since_start - r1->n_mops_since_start > 5) continue;
   4210         if ((*reported_atomicity_stacks_)[r1->stack_trace] > 0) continue;
   4211 
   4212         (*reported_atomicity_stacks_)[r1->stack_trace]++;
   4213         (*reported_atomicity_stacks_)[r2->stack_trace]++;
   4214         (*reported_atomicity_stacks_)[r3->stack_trace]++;
   4215         r1->used = r2->used = r3->used = true;
   4216         ThreadSanitizerAtomicityViolationReport *report =
   4217             new ThreadSanitizerAtomicityViolationReport;
   4218         report->type = ThreadSanitizerReport::ATOMICITY_VIOLATION;
   4219         report->tid = TID(0);
   4220         report->stack_trace = r1->stack_trace;
   4221         report->r1 = r1;
   4222         report->r2 = r2;
   4223         report->r3 = r3;
   4224         ThreadSanitizerPrintReport(report);
   4225         break;
   4226       }
   4227     }
   4228   }
   4229 }
   4230 
   4231 // -------- TSanThread ------------------ {{{1
   4232 struct TSanThread {
   4233  public:
   4234   ThreadLocalStats stats;
   4235 
   4236   TSanThread(TID tid, TID parent_tid, VTS *vts, StackTrace *creation_context,
   4237          CallStack *call_stack)
   4238     : is_running_(true),
   4239       tid_(tid),
   4240       sid_(0),
   4241       parent_tid_(parent_tid),
   4242       max_sp_(0),
   4243       min_sp_(0),
   4244       stack_size_for_ignore_(0),
   4245       fun_r_ignore_(0),
   4246       min_sp_for_ignore_(0),
   4247       n_mops_since_start_(0),
   4248       creation_context_(creation_context),
   4249       announced_(false),
   4250       rd_lockset_(0),
   4251       wr_lockset_(0),
   4252       expensive_bits_(0),
   4253       vts_at_exit_(NULL),
   4254       call_stack_(call_stack),
   4255       lock_history_(128),
   4256       recent_segments_cache_(G_flags->recent_segments_cache_size),
   4257       inside_atomic_op_(),
   4258       rand_state_((unsigned)(tid.raw() + (uintptr_t)vts
   4259                       + (uintptr_t)creation_context
   4260                       + (uintptr_t)call_stack)) {
   4261 
   4262     NewSegmentWithoutUnrefingOld("TSanThread Creation", vts);
   4263     ignore_depth_[0] = ignore_depth_[1] = 0;
   4264 
   4265     HandleRtnCall(0, 0, IGNORE_BELOW_RTN_UNKNOWN);
   4266     ignore_context_[0] = NULL;
   4267     ignore_context_[1] = NULL;
   4268     if (tid != TID(0) && parent_tid.valid()) {
   4269       CHECK(creation_context_);
   4270     }
   4271 
   4272     // Add myself to the array of threads.
   4273     CHECK(tid.raw() < G_flags->max_n_threads);
   4274     CHECK(all_threads_[tid.raw()] == NULL);
   4275     n_threads_ = max(n_threads_, tid.raw() + 1);
   4276     all_threads_[tid.raw()] = this;
   4277     dead_sids_.reserve(kMaxNumDeadSids);
   4278     fresh_sids_.reserve(kMaxNumFreshSids);
   4279     ComputeExpensiveBits();
   4280   }
   4281 
   4282   TID tid() const { return tid_; }
   4283   TID parent_tid() const { return parent_tid_; }
   4284 
   4285   void increment_n_mops_since_start() {
   4286     n_mops_since_start_++;
   4287   }
   4288 
   4289   // STACK
   4290   uintptr_t max_sp() const { return max_sp_; }
   4291   uintptr_t min_sp() const { return min_sp_; }
   4292 
   4293   unsigned random() {
   4294     return tsan_prng(&rand_state_);
   4295   }
   4296 
   4297   bool ShouldReportRaces() const {
   4298     return (inside_atomic_op_ == 0);
   4299   }
   4300 
   4301   void SetStack(uintptr_t stack_min, uintptr_t stack_max) {
   4302     CHECK(stack_min < stack_max);
   4303     // Stay sane. Expect stack less than 64M.
   4304     CHECK(stack_max - stack_min <= 64 * 1024 * 1024);
   4305     min_sp_ = stack_min;
   4306     max_sp_ = stack_max;
   4307     if (G_flags->ignore_stack) {
   4308       min_sp_for_ignore_ = min_sp_;
   4309       stack_size_for_ignore_ = max_sp_ - min_sp_;
   4310     } else {
   4311       CHECK(min_sp_for_ignore_ == 0 &&
   4312             stack_size_for_ignore_ == 0);
   4313     }
   4314   }
   4315 
   4316   bool MemoryIsInStack(uintptr_t a) {
   4317     return a >= min_sp_ && a <= max_sp_;
   4318   }
   4319 
   4320   bool IgnoreMemoryIfInStack(uintptr_t a) {
   4321     return (a - min_sp_for_ignore_) < stack_size_for_ignore_;
   4322   }
   4323 
   4324 
   4325   bool Announce() {
   4326     if (announced_) return false;
   4327     announced_ = true;
   4328     if (tid_ == TID(0)) {
   4329       Report("INFO: T0 is program's main thread\n");
   4330     } else {
   4331       if (G_flags->announce_threads) {
   4332         Report("INFO: T%d has been created by T%d at this point: {{{\n%s}}}\n",
   4333                tid_.raw(), parent_tid_.raw(),
   4334                creation_context_->ToString().c_str());
   4335         TSanThread * parent = GetIfExists(parent_tid_);
   4336         CHECK(parent);
   4337         parent->Announce();
   4338       } else {
   4339         Report("INFO: T%d has been created by T%d. "
   4340                "Use --announce-threads to see the creation stack.\n",
   4341                tid_.raw(), parent_tid_.raw());
   4342       }
   4343     }
   4344     return true;
   4345   }
   4346 
   4347   string ThreadName() const {
   4348     char buff[100];
   4349     snprintf(buff, sizeof(buff), "T%d", tid().raw());
   4350     string res = buff;
   4351     if (thread_name_.length() > 0) {
   4352       res += " (";
   4353       res += thread_name_;
   4354       res += ")";
   4355     }
   4356     return res;
   4357   }
   4358 
   4359   bool is_running() const { return is_running_; }
   4360 
   4361   INLINE void ComputeExpensiveBits() {
   4362     bool has_expensive_flags = G_flags->trace_level > 0 ||
   4363         G_flags->show_stats > 1                      ||
   4364         G_flags->sample_events > 0;
   4365 
   4366     expensive_bits_ =
   4367         (ignore_depth_[0] != 0) |
   4368         ((ignore_depth_[1] != 0) << 1) |
   4369         ((has_expensive_flags == true) << 2);
   4370   }
   4371 
   4372   int expensive_bits() { return expensive_bits_; }
   4373   int ignore_reads() { return expensive_bits() & 1; }
   4374   int ignore_writes() { return (expensive_bits() >> 1) & 1; }
   4375 
   4376   // ignore
   4377   INLINE void set_ignore_accesses(bool is_w, bool on) {
   4378     ignore_depth_[is_w] += on ? 1 : -1;
   4379     CHECK(ignore_depth_[is_w] >= 0);
   4380     ComputeExpensiveBits();
   4381     if (on && G_flags->save_ignore_context) {
   4382       StackTrace::Delete(ignore_context_[is_w]);
   4383       ignore_context_[is_w] = CreateStackTrace(0, 3);
   4384     }
   4385   }
   4386   INLINE void set_ignore_all_accesses(bool on) {
   4387     set_ignore_accesses(false, on);
   4388     set_ignore_accesses(true, on);
   4389   }
   4390 
   4391   StackTrace *GetLastIgnoreContext(bool is_w) {
   4392     return ignore_context_[is_w];
   4393   }
   4394 
   4395   SID sid() const {
   4396     return sid_;
   4397   }
   4398 
   4399   Segment *segment() const {
   4400     CHECK(sid().valid());
   4401     Segment::AssertLive(sid(), __LINE__);
   4402     return Segment::Get(sid());
   4403   }
   4404 
   4405   VTS *vts() const {
   4406     return segment()->vts();
   4407   }
   4408 
   4409   void set_thread_name(const char *name) {
   4410     thread_name_ = string(name);
   4411   }
   4412 
   4413   void HandleThreadEnd() {
   4414     CHECK(is_running_);
   4415     is_running_ = false;
   4416     CHECK(!vts_at_exit_);
   4417     vts_at_exit_ = vts()->Clone();
   4418     CHECK(vts_at_exit_);
   4419     FlushDeadSids();
   4420     ReleaseFreshSids();
   4421     call_stack_ = NULL;
   4422   }
   4423 
   4424   // Return the TID of the joined child and it's vts
   4425   TID HandleThreadJoinAfter(VTS **vts_at_exit, TID joined_tid) {
   4426     CHECK(joined_tid.raw() > 0);
   4427     CHECK(GetIfExists(joined_tid) != NULL);
   4428     TSanThread* joined_thread  = TSanThread::Get(joined_tid);
   4429     // Sometimes the joined thread is not truly dead yet.
   4430     // In that case we just take the current vts.
   4431     if (joined_thread->is_running_)
   4432       *vts_at_exit = joined_thread->vts()->Clone();
   4433     else
   4434       *vts_at_exit = joined_thread->vts_at_exit_;
   4435 
   4436     if (*vts_at_exit == NULL) {
   4437       Printf("vts_at_exit==NULL; parent=%d, child=%d\n",
   4438              tid().raw(), joined_tid.raw());
   4439     }
   4440     CHECK(*vts_at_exit);
   4441     if (0)
   4442     Printf("T%d: vts_at_exit_: %s\n", joined_tid.raw(),
   4443            (*vts_at_exit)->ToString().c_str());
   4444     return joined_tid;
   4445   }
   4446 
   4447   static int NumberOfThreads() {
   4448     return INTERNAL_ANNOTATE_UNPROTECTED_READ(n_threads_);
   4449   }
   4450 
   4451   static TSanThread *GetIfExists(TID tid) {
   4452     if (tid.raw() < NumberOfThreads())
   4453       return Get(tid);
   4454     return NULL;
   4455   }
   4456 
   4457   static TSanThread *Get(TID tid) {
   4458     DCHECK(tid.raw() < NumberOfThreads());
   4459     return all_threads_[tid.raw()];
   4460   }
   4461 
   4462   void HandleAccessSet() {
   4463     BitSet *rd_set = lock_era_access_set(false);
   4464     BitSet *wr_set = lock_era_access_set(true);
   4465     if (rd_set->empty() && wr_set->empty()) return;
   4466     CHECK(G_flags->atomicity && !G_flags->pure_happens_before);
   4467     AtomicityRegion *atomicity_region = new AtomicityRegion;
   4468     atomicity_region->lock_era = g_lock_era;
   4469     atomicity_region->tid = tid();
   4470     atomicity_region->vts = vts()->Clone();
   4471     atomicity_region->lsid[0] = lsid(0);
   4472     atomicity_region->lsid[1] = lsid(1);
   4473     atomicity_region->access_set[0] = *rd_set;
   4474     atomicity_region->access_set[1] = *wr_set;
   4475     atomicity_region->stack_trace = CreateStackTrace();
   4476     atomicity_region->used = false;
   4477     atomicity_region->n_mops_since_start = this->n_mops_since_start_;
   4478     // atomicity_region->Print();
   4479     // Printf("----------- %s\n", __FUNCTION__);
   4480     // ReportStackTrace(0, 7);
   4481     HandleAtomicityRegion(atomicity_region);
   4482   }
   4483 
   4484   // Locks
   4485   void HandleLock(uintptr_t lock_addr, bool is_w_lock) {
   4486     Lock *lock = Lock::LookupOrCreate(lock_addr);
   4487 
   4488     if (debug_lock) {
   4489       Printf("T%d lid=%d %sLock   %p; %s\n",
   4490            tid_.raw(), lock->lid().raw(),
   4491            is_w_lock ? "Wr" : "Rd",
   4492            lock_addr,
   4493            LockSet::ToString(lsid(is_w_lock)).c_str());
   4494 
   4495       ReportStackTrace(0, 7);
   4496     }
   4497 
   4498     // NOTE: we assume that all locks can be acquired recurively.
   4499     // No warning about recursive locking will be issued.
   4500     if (is_w_lock) {
   4501       // Recursive locks are properly handled because LockSet is in fact a
   4502       // multiset.
   4503       wr_lockset_ = LockSet::Add(wr_lockset_, lock);
   4504       rd_lockset_ = LockSet::Add(rd_lockset_, lock);
   4505       lock->WrLock(tid_, CreateStackTrace());
   4506     } else {
   4507       if (lock->wr_held()) {
   4508         ReportStackTrace();
   4509       }
   4510       rd_lockset_ = LockSet::Add(rd_lockset_, lock);
   4511       lock->RdLock(CreateStackTrace());
   4512     }
   4513 
   4514     if (lock->is_pure_happens_before()) {
   4515       if (is_w_lock) {
   4516         HandleWait(lock->wr_signal_addr());
   4517       } else {
   4518         HandleWait(lock->rd_signal_addr());
   4519       }
   4520     }
   4521 
   4522     if (G_flags->suggest_happens_before_arcs) {
   4523       lock_history_.OnLock(lock->lid());
   4524     }
   4525     NewSegmentForLockingEvent();
   4526     lock_era_access_set_[0].Clear();
   4527     lock_era_access_set_[1].Clear();
   4528   }
   4529 
   4530   void HandleUnlock(uintptr_t lock_addr) {
   4531     HandleAccessSet();
   4532 
   4533     Lock *lock = Lock::Lookup(lock_addr);
   4534     // If the lock is not found, report an error.
   4535     if (lock == NULL) {
   4536       ThreadSanitizerInvalidLockReport *report =
   4537           new ThreadSanitizerInvalidLockReport;
   4538       report->type = ThreadSanitizerReport::INVALID_LOCK;
   4539       report->tid = tid();
   4540       report->lock_addr = lock_addr;
   4541       report->stack_trace = CreateStackTrace();
   4542       ThreadSanitizerPrintReport(report);
   4543       return;
   4544     }
   4545     bool is_w_lock = lock->wr_held();
   4546 
   4547     if (debug_lock) {
   4548       Printf("T%d lid=%d %sUnlock %p; %s\n",
   4549              tid_.raw(), lock->lid().raw(),
   4550              is_w_lock ? "Wr" : "Rd",
   4551              lock_addr,
   4552              LockSet::ToString(lsid(is_w_lock)).c_str());
   4553       ReportStackTrace(0, 7);
   4554     }
   4555 
   4556     if (lock->is_pure_happens_before()) {
   4557       // reader unlock signals only to writer lock,
   4558       // writer unlock signals to both.
   4559       if (is_w_lock) {
   4560         HandleSignal(lock->rd_signal_addr());
   4561       }
   4562       HandleSignal(lock->wr_signal_addr());
   4563     }
   4564 
   4565     if (!lock->wr_held() && !lock->rd_held()) {
   4566       ThreadSanitizerBadUnlockReport *report =
   4567           new ThreadSanitizerBadUnlockReport;
   4568       report->type = ThreadSanitizerReport::UNLOCK_NONLOCKED;
   4569       report->tid = tid();
   4570       report->lid = lock->lid();
   4571       report->stack_trace = CreateStackTrace();
   4572       ThreadSanitizerPrintReport(report);
   4573       return;
   4574     }
   4575 
   4576     bool removed = false;
   4577     if (is_w_lock) {
   4578       lock->WrUnlock();
   4579       removed =  LockSet::Remove(wr_lockset_, lock, &wr_lockset_)
   4580               && LockSet::Remove(rd_lockset_, lock, &rd_lockset_);
   4581     } else {
   4582       lock->RdUnlock();
   4583       removed = LockSet::Remove(rd_lockset_, lock, &rd_lockset_);
   4584     }
   4585 
   4586     if (!removed) {
   4587       ThreadSanitizerBadUnlockReport *report =
   4588           new ThreadSanitizerBadUnlockReport;
   4589       report->type = ThreadSanitizerReport::UNLOCK_FOREIGN;
   4590       report->tid = tid();
   4591       report->lid = lock->lid();
   4592       report->stack_trace = CreateStackTrace();
   4593       ThreadSanitizerPrintReport(report);
   4594     }
   4595 
   4596     if (G_flags->suggest_happens_before_arcs) {
   4597       lock_history_.OnUnlock(lock->lid());
   4598     }
   4599 
   4600     NewSegmentForLockingEvent();
   4601     lock_era_access_set_[0].Clear();
   4602     lock_era_access_set_[1].Clear();
   4603   }
   4604 
   4605   // Handles memory access with race reports suppressed.
   4606   void HandleAtomicMop(uintptr_t a,
   4607                        uintptr_t pc,
   4608                        tsan_atomic_op op,
   4609                        tsan_memory_order mo,
   4610                        size_t size);
   4611 
   4612   void HandleForgetSignaller(uintptr_t cv) {
   4613     SignallerMap::iterator it = signaller_map_->find(cv);
   4614     if (it != signaller_map_->end()) {
   4615       if (debug_happens_before) {
   4616         Printf("T%d: ForgetSignaller: %p:\n    %s\n", tid_.raw(), cv,
   4617             (it->second.vts)->ToString().c_str());
   4618         if (G_flags->debug_level >= 1) {
   4619           ReportStackTrace();
   4620         }
   4621       }
   4622       VTS::Unref(it->second.vts);
   4623       signaller_map_->erase(it);
   4624     }
   4625   }
   4626 
   4627   LSID lsid(bool is_w) {
   4628     return is_w ? wr_lockset_ : rd_lockset_;
   4629   }
   4630 
   4631   const LockHistory &lock_history() { return lock_history_; }
   4632 
   4633   // SIGNAL/WAIT events.
   4634   void HandleWait(uintptr_t cv) {
   4635 
   4636     SignallerMap::iterator it = signaller_map_->find(cv);
   4637     if (it != signaller_map_->end()) {
   4638       const VTS *signaller_vts = it->second.vts;
   4639       NewSegmentForWait(signaller_vts);
   4640     }
   4641 
   4642     if (debug_happens_before) {
   4643       Printf("T%d: Wait: %p:\n    %s %s\n", tid_.raw(),
   4644              cv,
   4645              vts()->ToString().c_str(),
   4646              Segment::ToString(sid()).c_str());
   4647       if (G_flags->debug_level >= 1) {
   4648         ReportStackTrace();
   4649       }
   4650     }
   4651   }
   4652 
   4653   void HandleSignal(uintptr_t cv) {
   4654     Signaller *signaller = &(*signaller_map_)[cv];
   4655     if (!signaller->vts) {
   4656       signaller->vts = vts()->Clone();
   4657     } else {
   4658       VTS *new_vts = VTS::Join(signaller->vts, vts());
   4659       VTS::Unref(signaller->vts);
   4660       signaller->vts = new_vts;
   4661     }
   4662     NewSegmentForSignal();
   4663     if (debug_happens_before) {
   4664       Printf("T%d: Signal: %p:\n    %s %s\n    %s\n", tid_.raw(), cv,
   4665              vts()->ToString().c_str(), Segment::ToString(sid()).c_str(),
   4666              (signaller->vts)->ToString().c_str());
   4667       if (G_flags->debug_level >= 1) {
   4668         ReportStackTrace();
   4669       }
   4670     }
   4671   }
   4672 
   4673   void INLINE NewSegmentWithoutUnrefingOld(const char *call_site,
   4674                                            VTS *new_vts) {
   4675     DCHECK(new_vts);
   4676     SID new_sid = Segment::AddNewSegment(tid(), new_vts,
   4677                                          rd_lockset_, wr_lockset_);
   4678     SID old_sid = sid();
   4679     if (old_sid.raw() != 0 && new_vts != vts()) {
   4680       // Flush the cache if VTS changed - the VTS won't repeat.
   4681       recent_segments_cache_.Clear();
   4682     }
   4683     sid_ = new_sid;
   4684     Segment::Ref(new_sid, "TSanThread::NewSegmentWithoutUnrefingOld");
   4685 
   4686     if (kSizeOfHistoryStackTrace > 0) {
   4687       FillEmbeddedStackTrace(Segment::embedded_stack_trace(sid()));
   4688     }
   4689     if (0)
   4690     Printf("2: %s T%d/S%d old_sid=%d NewSegment: %s\n", call_site,
   4691            tid().raw(), sid().raw(), old_sid.raw(),
   4692          vts()->ToString().c_str());
   4693   }
   4694 
   4695   void INLINE NewSegment(const char *call_site, VTS *new_vts) {
   4696     SID old_sid = sid();
   4697     NewSegmentWithoutUnrefingOld(call_site, new_vts);
   4698     Segment::Unref(old_sid, "TSanThread::NewSegment");
   4699   }
   4700 
   4701   void NewSegmentForLockingEvent() {
   4702     // Flush the cache since we can't reuse segments with different lockset.
   4703     recent_segments_cache_.Clear();
   4704     NewSegment(__FUNCTION__, vts()->Clone());
   4705   }
   4706 
   4707   void NewSegmentForMallocEvent() {
   4708     // Flush the cache since we can't reuse segments with different lockset.
   4709     recent_segments_cache_.Clear();
   4710     NewSegment(__FUNCTION__, vts()->Clone());
   4711   }
   4712 
   4713 
   4714   void SetTopPc(uintptr_t pc) {
   4715     if (pc) {
   4716       DCHECK(!call_stack_->empty());
   4717       call_stack_->back() = pc;
   4718     }
   4719   }
   4720 
   4721   void NOINLINE HandleSblockEnterSlowLocked() {
   4722     AssertTILHeld();
   4723     FlushStateIfOutOfSegments(this);
   4724     this->stats.history_creates_new_segment++;
   4725     VTS *new_vts = vts()->Clone();
   4726     NewSegment("HandleSblockEnter", new_vts);
   4727     recent_segments_cache_.Push(sid());
   4728     GetSomeFreshSids();  // fill the thread-local SID cache.
   4729   }
   4730 
   4731   INLINE bool HandleSblockEnter(uintptr_t pc, bool allow_slow_path) {
   4732     DCHECK(G_flags->keep_history);
   4733     if (!pc) return true;
   4734 
   4735     this->stats.events[SBLOCK_ENTER]++;
   4736 
   4737     SetTopPc(pc);
   4738 
   4739     bool refill_stack = false;
   4740     SID match = recent_segments_cache_.Search(call_stack_, sid(),
   4741                                               /*OUT*/&refill_stack);
   4742     DCHECK(kSizeOfHistoryStackTrace > 0);
   4743 
   4744     if (match.valid()) {
   4745       // This part is 100% thread-local, no need for locking.
   4746       if (sid_ != match) {
   4747         Segment::Ref(match, "TSanThread::HandleSblockEnter");
   4748         this->AddDeadSid(sid_, "TSanThread::HandleSblockEnter");
   4749         sid_ = match;
   4750       }
   4751       if (refill_stack) {
   4752         this->stats.history_reuses_segment++;
   4753         FillEmbeddedStackTrace(Segment::embedded_stack_trace(sid()));
   4754       } else {
   4755         this->stats.history_uses_same_segment++;
   4756       }
   4757     } else if (fresh_sids_.size() > 0) {
   4758       // We have a fresh ready-to-use segment in thread local cache.
   4759       SID fresh_sid = fresh_sids_.back();
   4760       fresh_sids_.pop_back();
   4761       Segment::SetupFreshSid(fresh_sid, tid(), vts()->Clone(),
   4762                              rd_lockset_, wr_lockset_);
   4763       this->AddDeadSid(sid_, "TSanThread::HandleSblockEnter-1");
   4764       Segment::Ref(fresh_sid, "TSanThread::HandleSblockEnter-1");
   4765       sid_ = fresh_sid;
   4766       recent_segments_cache_.Push(sid());
   4767       FillEmbeddedStackTrace(Segment::embedded_stack_trace(sid()));
   4768       this->stats.history_uses_preallocated_segment++;
   4769     } else {
   4770       if (!allow_slow_path) return false;
   4771       AssertTILHeld();
   4772       // No fresh SIDs available, have to grab a lock and get few.
   4773       HandleSblockEnterSlowLocked();
   4774     }
   4775     return true;
   4776   }
   4777 
   4778   void NewSegmentForWait(const VTS *signaller_vts) {
   4779     const VTS *current_vts   = vts();
   4780     if (0)
   4781     Printf("T%d NewSegmentForWait: \n  %s\n  %s\n", tid().raw(),
   4782            current_vts->ToString().c_str(),
   4783            signaller_vts->ToString().c_str());
   4784     // We don't want to create a happens-before arc if it will be redundant.
   4785     if (!VTS::HappensBeforeCached(signaller_vts, current_vts)) {
   4786       VTS *new_vts = VTS::Join(current_vts, signaller_vts);
   4787       NewSegment("NewSegmentForWait", new_vts);
   4788     }
   4789     DCHECK(VTS::HappensBeforeCached(signaller_vts, vts()));
   4790   }
   4791 
   4792   void NewSegmentForSignal() {
   4793     VTS *cur_vts = vts();
   4794     VTS *new_vts = VTS::CopyAndTick(cur_vts, tid());
   4795     NewSegment("NewSegmentForSignal", new_vts);
   4796   }
   4797 
   4798   // When creating a child thread, we need to know
   4799   // 1. where the thread was created (ctx)
   4800   // 2. What was the vector clock of the parent thread (vts).
   4801 
   4802   struct ThreadCreateInfo {
   4803     StackTrace *ctx;
   4804     VTS        *vts;
   4805   };
   4806 
   4807   static void StopIgnoringAccessesInT0BecauseNewThreadStarted() {
   4808     AssertTILHeld();
   4809     if (g_so_far_only_one_thread) {
   4810       g_so_far_only_one_thread = false;
   4811       Get(TID(0))->set_ignore_all_accesses(false);
   4812     }
   4813   }
   4814 
   4815   // This event comes before the child is created (e.g. just
   4816   // as we entered pthread_create).
   4817   void HandleThreadCreateBefore(TID parent_tid, uintptr_t pc) {
   4818     CHECK(parent_tid == tid());
   4819     StopIgnoringAccessesInT0BecauseNewThreadStarted();
   4820     // Store ctx and vts under TID(0).
   4821     ThreadCreateInfo info;
   4822     info.ctx = CreateStackTrace(pc);
   4823     info.vts = vts()->Clone();
   4824     CHECK(info.ctx && info.vts);
   4825     child_tid_to_create_info_[TID(0)] = info;
   4826     // Tick vts.
   4827     this->NewSegmentForSignal();
   4828 
   4829     if (debug_thread) {
   4830       Printf("T%d: THR_CREATE_BEFORE\n", parent_tid.raw());
   4831     }
   4832   }
   4833 
   4834   // This event comes when we are exiting the thread creation routine.
   4835   // It may appear before *or* after THR_START event, at least with PIN.
   4836   void HandleThreadCreateAfter(TID parent_tid, TID child_tid) {
   4837     CHECK(parent_tid == tid());
   4838     // Place the info under child_tid if we did not use it yet.
   4839     if (child_tid_to_create_info_.count(TID(0))){
   4840       child_tid_to_create_info_[child_tid] = child_tid_to_create_info_[TID(0)];
   4841       child_tid_to_create_info_.erase(TID(0));
   4842     }
   4843 
   4844     if (debug_thread) {
   4845       Printf("T%d: THR_CREATE_AFTER %d\n", parent_tid.raw(), child_tid.raw());
   4846     }
   4847   }
   4848 
   4849   void HandleChildThreadStart(TID child_tid, VTS **vts, StackTrace **ctx) {
   4850     TSanThread *parent = this;
   4851     ThreadCreateInfo info;
   4852     if (child_tid_to_create_info_.count(child_tid)) {
   4853       // We already seen THR_CREATE_AFTER, so the info is under child_tid.
   4854       info = child_tid_to_create_info_[child_tid];
   4855       child_tid_to_create_info_.erase(child_tid);
   4856       CHECK(info.ctx && info.vts);
   4857     } else if (child_tid_to_create_info_.count(TID(0))){
   4858       // We have not seen THR_CREATE_AFTER, but already seen THR_CREATE_BEFORE.
   4859       info = child_tid_to_create_info_[TID(0)];
   4860       child_tid_to_create_info_.erase(TID(0));
   4861       CHECK(info.ctx && info.vts);
   4862     } else {
   4863       // We have not seen THR_CREATE_BEFORE/THR_CREATE_AFTER.
   4864       // If the tool is single-threaded (valgrind) these events are redundant.
   4865       info.ctx = parent->CreateStackTrace();
   4866       info.vts = parent->vts()->Clone();
   4867       parent->NewSegmentForSignal();
   4868     }
   4869     *ctx = info.ctx;
   4870     VTS *singleton = VTS::CreateSingleton(child_tid);
   4871     *vts = VTS::Join(singleton, info.vts);
   4872     VTS::Unref(singleton);
   4873     VTS::Unref(info.vts);
   4874 
   4875 
   4876     if (debug_thread) {
   4877       Printf("T%d: THR_START parent: T%d : %s %s\n", child_tid.raw(),
   4878              parent->tid().raw(),
   4879              parent->vts()->ToString().c_str(),
   4880              (*vts)->ToString().c_str());
   4881       if (G_flags->announce_threads) {
   4882         Printf("%s\n", (*ctx)->ToString().c_str());
   4883       }
   4884     }
   4885 
   4886     // Parent should have ticked its VTS so there should be no h-b.
   4887     DCHECK(!VTS::HappensBefore(parent->vts(), *vts));
   4888   }
   4889 
   4890   // Support for Cyclic Barrier, e.g. pthread_barrier_t.
   4891   // We need to create (barrier_count-1)^2 h-b arcs between
   4892   // threads blocking on a barrier. We should not create any h-b arcs
   4893   // for two calls to barrier_wait if the barrier was reset between then.
   4894   struct CyclicBarrierInfo {
   4895     // The value given to barrier_init.
   4896     uint32_t barrier_count;
   4897     // How many times we may block on this barrier before resetting.
   4898     int32_t calls_before_reset;
   4899     // How many times we entered the 'wait-before' and 'wait-after' handlers.
   4900     int32_t n_wait_before, n_wait_after;
   4901   };
   4902   // The following situation is possible:
   4903   // - N threads blocked on a barrier.
   4904   // - All N threads reached the barrier and we started getting 'wait-after'
   4905   //   events, but did not yet get all of them.
   4906   // - N threads blocked on the barrier again and we started getting
   4907   //   'wait-before' events from the next barrier epoch.
   4908   // - We continue getting 'wait-after' events from the previous epoch.
   4909   //
   4910   // We don't want to create h-b arcs between barrier events of different
   4911   // epochs, so we use 'barrier + (epoch % 4)' as an object on which we
   4912   // signal and wait (it is unlikely that more than 4 epochs are live at once.
   4913   enum { kNumberOfPossibleBarrierEpochsLiveAtOnce = 4 };
   4914   // Maps the barrier pointer to CyclicBarrierInfo.
   4915   typedef unordered_map<uintptr_t, CyclicBarrierInfo> CyclicBarrierMap;
   4916 
   4917   CyclicBarrierInfo &GetCyclicBarrierInfo(uintptr_t barrier) {
   4918     if (cyclic_barrier_map_ == NULL) {
   4919       cyclic_barrier_map_ = new CyclicBarrierMap;
   4920     }
   4921     return (*cyclic_barrier_map_)[barrier];
   4922   }
   4923 
   4924   void HandleBarrierInit(uintptr_t barrier, uint32_t n) {
   4925     CyclicBarrierInfo &info = GetCyclicBarrierInfo(barrier);
   4926     CHECK(n > 0);
   4927     memset(&info, 0, sizeof(CyclicBarrierInfo));
   4928     info.barrier_count = n;
   4929   }
   4930 
   4931   void HandleBarrierWaitBefore(uintptr_t barrier) {
   4932     CyclicBarrierInfo &info = GetCyclicBarrierInfo(barrier);
   4933 
   4934     CHECK(info.calls_before_reset >= 0);
   4935     int32_t epoch = info.n_wait_before / info.barrier_count;
   4936     epoch %= kNumberOfPossibleBarrierEpochsLiveAtOnce;
   4937     info.n_wait_before++;
   4938     if (info.calls_before_reset == 0) {
   4939       // We are blocking the first time after reset. Clear the VTS.
   4940       info.calls_before_reset = info.barrier_count;
   4941       Signaller &signaller = (*signaller_map_)[barrier + epoch];
   4942       VTS::Unref(signaller.vts);
   4943       signaller.vts = NULL;
   4944       if (debug_happens_before) {
   4945         Printf("T%d barrier %p (epoch %d) reset\n", tid().raw(),
   4946                barrier, epoch);
   4947       }
   4948     }
   4949     info.calls_before_reset--;
   4950     // Signal to all threads that blocked on this barrier.
   4951     if (debug_happens_before) {
   4952       Printf("T%d barrier %p (epoch %d) wait before\n", tid().raw(),
   4953              barrier, epoch);
   4954     }
   4955     HandleSignal(barrier + epoch);
   4956   }
   4957 
   4958   void HandleBarrierWaitAfter(uintptr_t barrier) {
   4959     CyclicBarrierInfo &info = GetCyclicBarrierInfo(barrier);
   4960     int32_t epoch = info.n_wait_after / info.barrier_count;
   4961     epoch %= kNumberOfPossibleBarrierEpochsLiveAtOnce;
   4962     info.n_wait_after++;
   4963     if (debug_happens_before) {
   4964       Printf("T%d barrier %p (epoch %d) wait after\n", tid().raw(),
   4965              barrier, epoch);
   4966     }
   4967     HandleWait(barrier + epoch);
   4968   }
   4969 
   4970   // Call stack  -------------
   4971   void PopCallStack() {
   4972     CHECK(!call_stack_->empty());
   4973     call_stack_->pop_back();
   4974   }
   4975 
   4976   void HandleRtnCall(uintptr_t call_pc, uintptr_t target_pc,
   4977                      IGNORE_BELOW_RTN ignore_below) {
   4978     this->stats.events[RTN_CALL]++;
   4979     if (!call_stack_->empty() && call_pc) {
   4980       call_stack_->back() = call_pc;
   4981     }
   4982     call_stack_->push_back(target_pc);
   4983 
   4984     bool ignore = false;
   4985     if (ignore_below == IGNORE_BELOW_RTN_UNKNOWN) {
   4986       if (ignore_below_cache_.Lookup(target_pc, &ignore) == false) {
   4987         ignore = ThreadSanitizerIgnoreAccessesBelowFunction(target_pc);
   4988         ignore_below_cache_.Insert(target_pc, ignore);
   4989         G_stats->ignore_below_cache_miss++;
   4990       } else {
   4991         // Just in case, check the result of caching.
   4992         DCHECK(ignore ==
   4993                ThreadSanitizerIgnoreAccessesBelowFunction(target_pc));
   4994       }
   4995     } else {
   4996       DCHECK(ignore_below == IGNORE_BELOW_RTN_YES ||
   4997              ignore_below == IGNORE_BELOW_RTN_NO);
   4998       ignore = ignore_below == IGNORE_BELOW_RTN_YES;
   4999     }
   5000 
   5001     if (fun_r_ignore_) {
   5002       fun_r_ignore_++;
   5003     } else if (ignore) {
   5004       fun_r_ignore_ = 1;
   5005       set_ignore_all_accesses(true);
   5006     }
   5007   }
   5008 
   5009   void HandleRtnExit() {
   5010     this->stats.events[RTN_EXIT]++;
   5011     if (!call_stack_->empty()) {
   5012       call_stack_->pop_back();
   5013       if (fun_r_ignore_) {
   5014         if (--fun_r_ignore_ == 0) {
   5015           set_ignore_all_accesses(false);
   5016         }
   5017       }
   5018     }
   5019   }
   5020 
   5021   uintptr_t GetCallstackEntry(size_t offset_from_top) {
   5022     if (offset_from_top >= call_stack_->size()) return 0;
   5023     return (*call_stack_)[call_stack_->size() - offset_from_top - 1];
   5024   }
   5025 
   5026   string CallStackRtnName(size_t offset_from_top = 0) {
   5027     if (call_stack_->size() <= offset_from_top)
   5028       return "";
   5029     uintptr_t pc = (*call_stack_)[call_stack_->size() - offset_from_top - 1];
   5030     return PcToRtnName(pc, false);
   5031   }
   5032 
   5033   string CallStackToStringRtnOnly(int len) {
   5034     string res;
   5035     for (int i = 0; i < len; i++) {
   5036       if (i)
   5037         res += " ";
   5038       res += CallStackRtnName(i);
   5039     }
   5040     return res;
   5041   }
   5042 
   5043   uintptr_t CallStackTopPc() {
   5044     if (call_stack_->empty())
   5045       return 0;
   5046     return call_stack_->back();
   5047   }
   5048 
   5049   INLINE void FillEmbeddedStackTrace(uintptr_t *emb_trace) {
   5050     size_t size = min(call_stack_->size(), (size_t)kSizeOfHistoryStackTrace);
   5051     size_t idx = call_stack_->size() - 1;
   5052     uintptr_t *pcs = call_stack_->pcs();
   5053     for (size_t i = 0; i < size; i++, idx--) {
   5054       emb_trace[i] = pcs[idx];
   5055     }
   5056     if (size < (size_t) kSizeOfHistoryStackTrace) {
   5057       emb_trace[size] = 0;
   5058     }
   5059   }
   5060 
   5061   INLINE void FillStackTrace(StackTrace *trace, size_t size) {
   5062     size_t idx = call_stack_->size() - 1;
   5063     uintptr_t *pcs = call_stack_->pcs();
   5064     for (size_t i = 0; i < size; i++, idx--) {
   5065       trace->Set(i, pcs[idx]);
   5066     }
   5067   }
   5068 
   5069   INLINE StackTrace *CreateStackTrace(uintptr_t pc = 0,
   5070                                       int max_len = -1,
   5071                                       int capacity = 0) {
   5072     if (!call_stack_->empty() && pc) {
   5073       call_stack_->back() = pc;
   5074     }
   5075     if (max_len <= 0) {
   5076       max_len = G_flags->num_callers;
   5077     }
   5078     int size = call_stack_->size();
   5079     if (size > max_len)
   5080       size = max_len;
   5081     StackTrace *res = StackTrace::CreateNewEmptyStackTrace(size, capacity);
   5082     FillStackTrace(res, size);
   5083     return res;
   5084   }
   5085 
   5086   void ReportStackTrace(uintptr_t pc = 0, int max_len = -1) {
   5087     StackTrace *trace = CreateStackTrace(pc, max_len);
   5088     Report("%s", trace->ToString().c_str());
   5089     StackTrace::Delete(trace);
   5090   }
   5091 
   5092   static void ForgetAllState() {
   5093     // G_flags->debug_level = 2;
   5094     for (int i = 0; i < TSanThread::NumberOfThreads(); i++) {
   5095       TSanThread *thr = Get(TID(i));
   5096       thr->recent_segments_cache_.ForgetAllState();
   5097       thr->sid_ = SID();  // Reset the old SID so we don't try to read its VTS.
   5098       VTS *singleton_vts = VTS::CreateSingleton(TID(i), 2);
   5099       if (thr->is_running()) {
   5100         thr->NewSegmentWithoutUnrefingOld("ForgetAllState", singleton_vts);
   5101       }
   5102       for (map<TID, ThreadCreateInfo>::iterator j =
   5103                thr->child_tid_to_create_info_.begin();
   5104            j != thr->child_tid_to_create_info_.end(); ++j) {
   5105         ThreadCreateInfo &info = j->second;
   5106         VTS::Unref(info.vts);
   5107         // The parent's VTS should neither happen-before nor equal the child's.
   5108         info.vts = VTS::CreateSingleton(TID(i), 1);
   5109       }
   5110       if (thr->vts_at_exit_) {
   5111         VTS::Unref(thr->vts_at_exit_);
   5112         thr->vts_at_exit_ = singleton_vts->Clone();
   5113       }
   5114       thr->dead_sids_.clear();
   5115       thr->fresh_sids_.clear();
   5116     }
   5117     signaller_map_->ClearAndDeleteElements();
   5118   }
   5119 
   5120   static void InitClassMembers() {
   5121     ScopedMallocCostCenter malloc_cc("InitClassMembers");
   5122     all_threads_        = new TSanThread*[G_flags->max_n_threads];
   5123     memset(all_threads_, 0, sizeof(TSanThread*) * G_flags->max_n_threads);
   5124     n_threads_          = 0;
   5125     signaller_map_      = new SignallerMap;
   5126   }
   5127 
   5128   BitSet *lock_era_access_set(int is_w) {
   5129     return &lock_era_access_set_[is_w];
   5130   }
   5131 
   5132   // --------- dead SIDs, fresh SIDs
   5133   // When running fast path w/o a lock we need to recycle SIDs to a thread-local
   5134   // pool. HasRoomForDeadSids and AddDeadSid may be called w/o a lock.
   5135   // FlushDeadSids should be called under a lock.
   5136   // When creating a new segment on SBLOCK_ENTER, we need to get a fresh SID
   5137   // from somewhere. We keep a pile of fresh ready-to-use SIDs in
   5138   // a thread-local array.
   5139   enum { kMaxNumDeadSids = 64,
   5140          kMaxNumFreshSids = 256, };
   5141   INLINE void AddDeadSid(SID sid, const char *where) {
   5142     if (TS_SERIALIZED) {
   5143       Segment::Unref(sid, where);
   5144     } else {
   5145       if (Segment::UnrefNoRecycle(sid, where) == 0) {
   5146         dead_sids_.push_back(sid);
   5147       }
   5148     }
   5149   }
   5150 
   5151   INLINE void FlushDeadSids() {
   5152     if (TS_SERIALIZED) return;
   5153     size_t n = dead_sids_.size();
   5154     for (size_t i = 0; i < n; i++) {
   5155       SID sid = dead_sids_[i];
   5156       Segment::AssertLive(sid, __LINE__);
   5157       DCHECK(Segment::Get(sid)->ref_count() == 0);
   5158       Segment::RecycleOneSid(sid);
   5159     }
   5160     dead_sids_.clear();
   5161   }
   5162 
   5163   INLINE bool HasRoomForDeadSids() const {
   5164     return TS_SERIALIZED ? false :
   5165         dead_sids_.size() < kMaxNumDeadSids - 2;
   5166   }
   5167 
   5168   void GetSomeFreshSids() {
   5169     size_t cur_size = fresh_sids_.size();
   5170     DCHECK(cur_size <= kMaxNumFreshSids);
   5171     if (cur_size > kMaxNumFreshSids / 2) {
   5172       // We already have quite a few fresh SIDs, do nothing.
   5173       return;
   5174     }
   5175     DCHECK(fresh_sids_.capacity() >= kMaxNumFreshSids);
   5176     size_t n_requested_sids = kMaxNumFreshSids - cur_size;
   5177     fresh_sids_.resize(kMaxNumFreshSids);
   5178     Segment::AllocateFreshSegments(n_requested_sids, &fresh_sids_[cur_size]);
   5179   }
   5180 
   5181   void ReleaseFreshSids() {
   5182     for (size_t i = 0; i < fresh_sids_.size(); i++) {
   5183       Segment::RecycleOneFreshSid(fresh_sids_[i]);
   5184     }
   5185     fresh_sids_.clear();
   5186   }
   5187 
   5188  private:
   5189   bool is_running_;
   5190   string thread_name_;
   5191 
   5192   TID    tid_;         // This thread's tid.
   5193   SID    sid_;         // Current segment ID.
   5194   TID    parent_tid_;  // Parent's tid.
   5195   bool   thread_local_copy_of_g_has_expensive_flags_;
   5196   uintptr_t  max_sp_;
   5197   uintptr_t  min_sp_;
   5198   uintptr_t  stack_size_for_ignore_;
   5199   uintptr_t  fun_r_ignore_;  // > 0 if we are inside a fun_r-ed function.
   5200   uintptr_t  min_sp_for_ignore_;
   5201   uintptr_t  n_mops_since_start_;
   5202   StackTrace *creation_context_;
   5203   bool      announced_;
   5204 
   5205   LSID   rd_lockset_;
   5206   LSID   wr_lockset_;
   5207 
   5208   // These bits should be read in the hottest loop, so we combine them all
   5209   // together.
   5210   // bit 1 -- ignore reads.
   5211   // bit 2 -- ignore writes.
   5212   // bit 3 -- have expensive flags
   5213   int expensive_bits_;
   5214   int ignore_depth_[2];
   5215   StackTrace *ignore_context_[2];
   5216 
   5217   VTS *vts_at_exit_;
   5218 
   5219   CallStack *call_stack_;
   5220 
   5221   vector<SID> dead_sids_;
   5222   vector<SID> fresh_sids_;
   5223 
   5224   PtrToBoolCache<251> ignore_below_cache_;
   5225 
   5226   LockHistory lock_history_;
   5227   BitSet lock_era_access_set_[2];
   5228   RecentSegmentsCache recent_segments_cache_;
   5229 
   5230   map<TID, ThreadCreateInfo> child_tid_to_create_info_;
   5231 
   5232   // This var is used to suppress race reports
   5233   // when handling atomic memory accesses.
   5234   // That is, an atomic memory access can't race with other accesses,
   5235   // however plain memory accesses can race with atomic memory accesses.
   5236   int inside_atomic_op_;
   5237 
   5238   prng_t rand_state_;
   5239 
   5240   struct Signaller {
   5241     VTS *vts;
   5242   };
   5243 
   5244   class SignallerMap: public unordered_map<uintptr_t, Signaller> {
   5245     public:
   5246      void ClearAndDeleteElements() {
   5247        for (iterator it = begin(); it != end(); ++it) {
   5248          VTS::Unref(it->second.vts);
   5249        }
   5250        clear();
   5251      }
   5252   };
   5253 
   5254   // All threads. The main thread has tid 0.
   5255   static TSanThread **all_threads_;
   5256   static int      n_threads_;
   5257 
   5258   // signaller address -> VTS
   5259   static SignallerMap *signaller_map_;
   5260   static CyclicBarrierMap *cyclic_barrier_map_;
   5261 };
   5262 
   5263 INLINE static int32_t raw_tid(TSanThread *t) {
   5264   return t->tid().raw();
   5265 }
   5266 
   5267 // TSanThread:: static members
   5268 TSanThread                    **TSanThread::all_threads_;
   5269 int                         TSanThread::n_threads_;
   5270 TSanThread::SignallerMap       *TSanThread::signaller_map_;
   5271 TSanThread::CyclicBarrierMap   *TSanThread::cyclic_barrier_map_;
   5272 
   5273 
   5274 // -------- TsanAtomicCore ------------------ {{{1
   5275 
   5276 // Responsible for handling of atomic memory accesses.
   5277 class TsanAtomicCore {
   5278  public:
   5279   TsanAtomicCore();
   5280 
   5281   void HandleWrite(TSanThread* thr,
   5282                    uintptr_t a,
   5283                    uint64_t v,
   5284                    uint64_t prev,
   5285                    bool is_acquire,
   5286                    bool is_release,
   5287                    bool is_rmw);
   5288 
   5289   uint64_t HandleRead(TSanThread* thr,
   5290                       uintptr_t a,
   5291                       uint64_t v,
   5292                       bool is_acquire);
   5293 
   5294   void ClearMemoryState(uintptr_t a, uintptr_t b);
   5295 
   5296  private:
   5297   // Represents one value in modification history
   5298   // of an atomic variable.
   5299   struct AtomicHistoryEntry {
   5300     // Actual value.
   5301     // (atomics of size more than uint64_t are not supported as of now)
   5302     uint64_t val;
   5303     // ID of a thread that did the modification.
   5304     TID tid;
   5305     // The thread's clock during the modification.
   5306     int32_t clk;
   5307     // Vector clock that is acquired by a thread
   5308     // that loads the value.
   5309     // Similar to Signaller::vts.
   5310     VTS* vts;
   5311   };
   5312 
   5313   // Descriptor of an atomic variable.
   5314   struct Atomic {
   5315     // Number of stored entries in the modification order of the variable.
   5316     // This represents space-modelling preciseness trade-off.
   5317     // 4 values should be generally enough.
   5318     static int32_t const kHistSize = 4;
   5319     // Current position in the modification order.
   5320     int32_t hist_pos;
   5321     // Modification history organized as a circular buffer.
   5322     // That is, old values are discarded.
   5323     AtomicHistoryEntry hist [kHistSize];
   5324     // It's basically a tid->hist_pos map that tracks what threads
   5325     // had seen what values. It's required to meet the following requirement:
   5326     // even relaxed loads must not be reordered in a single thread.
   5327     VectorClock last_seen;
   5328 
   5329     Atomic();
   5330     void reset(bool init = false);
   5331   };
   5332 
   5333   typedef map<uintptr_t, Atomic> AtomicMap;
   5334   AtomicMap atomic_map_;
   5335 
   5336   void AtomicFixHist(Atomic* atomic,
   5337                      uint64_t prev);
   5338 
   5339   TsanAtomicCore(TsanAtomicCore const&);
   5340   void operator=(TsanAtomicCore const&);
   5341 };
   5342 
   5343 
   5344 static TsanAtomicCore* g_atomicCore;
   5345 
   5346 
   5347 // -------- Clear Memory State ------------------ {{{1
   5348 static void INLINE UnrefSegmentsInMemoryRange(uintptr_t a, uintptr_t b,
   5349                                                 Mask mask, CacheLine *line) {
   5350   while (!mask.Empty()) {
   5351     uintptr_t x = mask.GetSomeSetBit();
   5352     DCHECK(mask.Get(x));
   5353     mask.Clear(x);
   5354     line->GetValuePointer(x)->Unref("Detector::UnrefSegmentsInMemoryRange");
   5355   }
   5356 }
   5357 
   5358 void INLINE ClearMemoryStateInOneLine(TSanThread *thr, uintptr_t addr,
   5359                                       uintptr_t beg, uintptr_t end) {
   5360   AssertTILHeld();
   5361   CacheLine *line = G_cache->GetLineIfExists(thr, addr, __LINE__);
   5362   // CacheLine *line = G_cache->GetLineOrCreateNew(addr, __LINE__);
   5363   if (line) {
   5364     DCHECK(beg < CacheLine::kLineSize);
   5365     DCHECK(end <= CacheLine::kLineSize);
   5366     DCHECK(beg < end);
   5367     Mask published = line->published();
   5368     if (UNLIKELY(!published.Empty())) {
   5369       Mask mask(published.GetRange(beg, end));
   5370       ClearPublishedAttribute(line, mask);
   5371     }
   5372     Mask old_used = line->ClearRangeAndReturnOldUsed(beg, end);
   5373     UnrefSegmentsInMemoryRange(beg, end, old_used, line);
   5374     G_cache->ReleaseLine(thr, addr, line, __LINE__);
   5375   }
   5376 }
   5377 
   5378 // clear memory state for [a,b)
   5379 void NOINLINE ClearMemoryState(TSanThread *thr, uintptr_t a, uintptr_t b) {
   5380   if (a == b) return;
   5381   CHECK(a < b);
   5382   uintptr_t line1_tag = 0, line2_tag = 0;
   5383   uintptr_t single_line_tag = GetCacheLinesForRange(a, b,
   5384                                                     &line1_tag, &line2_tag);
   5385   if (single_line_tag) {
   5386     ClearMemoryStateInOneLine(thr, a, a - single_line_tag,
   5387                               b - single_line_tag);
   5388     return;
   5389   }
   5390 
   5391   uintptr_t a_tag = CacheLine::ComputeTag(a);
   5392   ClearMemoryStateInOneLine(thr, a, a - a_tag, CacheLine::kLineSize);
   5393 
   5394   for (uintptr_t tag_i = line1_tag; tag_i < line2_tag;
   5395        tag_i += CacheLine::kLineSize) {
   5396     ClearMemoryStateInOneLine(thr, tag_i, 0, CacheLine::kLineSize);
   5397   }
   5398 
   5399   if (b > line2_tag) {
   5400     ClearMemoryStateInOneLine(thr, line2_tag, 0, b - line2_tag);
   5401   }
   5402 
   5403   if (DEBUG_MODE && G_flags->debug_level >= 2) {
   5404     // Check that we've cleared it. Slow!
   5405     for (uintptr_t x = a; x < b; x++) {
   5406       uintptr_t off = CacheLine::ComputeOffset(x);
   5407       (void)off;
   5408       CacheLine *line = G_cache->GetLineOrCreateNew(thr, x, __LINE__);
   5409       CHECK(!line->has_shadow_value().Get(off));
   5410       G_cache->ReleaseLine(thr, x, line, __LINE__);
   5411     }
   5412   }
   5413 
   5414   g_atomicCore->ClearMemoryState(a, b);
   5415 }
   5416 
   5417 // -------- PCQ --------------------- {{{1
   5418 struct PCQ {
   5419   uintptr_t pcq_addr;
   5420   deque<VTS*> putters;
   5421 };
   5422 
   5423 typedef map<uintptr_t, PCQ> PCQMap;
   5424 static PCQMap *g_pcq_map;
   5425 
   5426 // -------- Heap info ---------------------- {{{1
   5427 #include "ts_heap_info.h"
   5428 // Information about heap memory.
   5429 
   5430 struct HeapInfo {
   5431   uintptr_t   ptr;
   5432   uintptr_t   size;
   5433   SID         sid;
   5434   HeapInfo() : ptr(0), size(0), sid(0) { }
   5435 
   5436   Segment *seg() { return Segment::Get(sid); }
   5437   TID tid() { return seg()->tid(); }
   5438   string StackTraceString() { return Segment::StackTraceString(sid); }
   5439 };
   5440 
   5441 static HeapMap<HeapInfo> *G_heap_map;
   5442 
   5443 struct ThreadStackInfo {
   5444   uintptr_t   ptr;
   5445   uintptr_t   size;
   5446   ThreadStackInfo() : ptr(0), size(0) { }
   5447 };
   5448 
   5449 static HeapMap<ThreadStackInfo> *G_thread_stack_map;
   5450 
   5451 // -------- Forget all state -------- {{{1
   5452 // We need to forget all state and start over because we've
   5453 // run out of some resources (most likely, segment IDs).
   5454 static void ForgetAllStateAndStartOver(TSanThread *thr, const char *reason) {
   5455   // This is done under the main lock.
   5456   AssertTILHeld();
   5457   size_t start_time = g_last_flush_time = TimeInMilliSeconds();
   5458   Report("T%d INFO: %s. Flushing state.\n", raw_tid(thr), reason);
   5459 
   5460   if (TS_SERIALIZED == 0) {
   5461     // We own the lock, but we also must acquire all cache lines
   5462     // so that the fast-path (unlocked) code does not execute while
   5463     // we are flushing.
   5464     G_cache->AcquireAllLines(thr);
   5465   }
   5466 
   5467 
   5468   if (0) {
   5469     Report("INFO: Thread Sanitizer will now forget all history.\n");
   5470     Report("INFO: This is experimental, and may fail!\n");
   5471     if (G_flags->keep_history > 0) {
   5472       Report("INFO: Consider re-running with --keep_history=0\n");
   5473     }
   5474     if (G_flags->show_stats) {
   5475         G_stats->PrintStats();
   5476     }
   5477   }
   5478 
   5479   G_stats->n_forgets++;
   5480 
   5481   Segment::ForgetAllState();
   5482   SegmentSet::ForgetAllState();
   5483   TSanThread::ForgetAllState();
   5484   VTS::FlushHBCache();
   5485 
   5486   G_heap_map->Clear();
   5487 
   5488   g_publish_info_map->clear();
   5489 
   5490   for (PCQMap::iterator it = g_pcq_map->begin(); it != g_pcq_map->end(); ++it) {
   5491     PCQ &pcq = it->second;
   5492     for (deque<VTS*>::iterator it2 = pcq.putters.begin();
   5493          it2 != pcq.putters.end(); ++it2) {
   5494       VTS::Unref(*it2);
   5495       *it2 = VTS::CreateSingleton(TID(0), 1);
   5496     }
   5497   }
   5498 
   5499   // Must be the last one to flush as it effectively releases the
   5500   // cach lines and enables fast path code to run in other threads.
   5501   G_cache->ForgetAllState(thr);
   5502 
   5503   size_t stop_time = TimeInMilliSeconds();
   5504   if (DEBUG_MODE || (stop_time - start_time > 0)) {
   5505     Report("T%d INFO: Flush took %ld ms\n", raw_tid(thr),
   5506            stop_time - start_time);
   5507   }
   5508 }
   5509 
   5510 static INLINE void FlushStateIfOutOfSegments(TSanThread *thr) {
   5511   if (Segment::NumberOfSegments() > kMaxSIDBeforeFlush) {
   5512     // too few sids left -- flush state.
   5513     if (DEBUG_MODE) {
   5514       G_cache->PrintStorageStats();
   5515       Segment::ShowSegmentStats();
   5516     }
   5517     ForgetAllStateAndStartOver(thr, "run out of segment IDs");
   5518   }
   5519 }
   5520 
   5521 // -------- Expected Race ---------------------- {{{1
   5522 typedef  HeapMap<ExpectedRace> ExpectedRacesMap;
   5523 static ExpectedRacesMap *G_expected_races_map;
   5524 static bool g_expecting_races;
   5525 static int g_found_races_since_EXPECT_RACE_BEGIN;
   5526 
   5527 ExpectedRace* ThreadSanitizerFindExpectedRace(uintptr_t addr) {
   5528   return G_expected_races_map->GetInfo(addr);
   5529 }
   5530 
   5531 // -------- Suppressions ----------------------- {{{1
   5532 static const char default_suppressions[] =
   5533 // TODO(kcc): as it gets bigger, move it into a separate object file.
   5534 "# We need to have some default suppressions, but we don't want to    \n"
   5535 "# keep them in a separate text file, so we keep the in the code.     \n"
   5536 
   5537 #ifdef VGO_darwin
   5538 "{                                                                    \n"
   5539 "   dyld tries to unlock an invalid mutex when adding/removing image. \n"
   5540 "   ThreadSanitizer:InvalidLock                                       \n"
   5541 "   fun:pthread_mutex_unlock                                          \n"
   5542 "   fun:_dyld_register_func_for_*_image                               \n"
   5543 "}                                                                    \n"
   5544 
   5545 "{                                                                      \n"
   5546 "  Benign reports in __NSOperationInternal when using workqueue threads \n"
   5547 "  ThreadSanitizer:Race                                                 \n"
   5548 "  fun:__+[__NSOperationInternal _observeValueForKeyPath:ofObject:changeKind:oldValue:newValue:indexes:context:]_block_invoke_*\n"
   5549 "  fun:_dispatch_call_block_and_release                                 \n"
   5550 "}                                                                      \n"
   5551 
   5552 "{                                                                    \n"
   5553 "  Benign race in GCD when using workqueue threads.                   \n"
   5554 "  ThreadSanitizer:Race                                               \n"
   5555 "  fun:____startOperations_block_invoke_*                             \n"
   5556 "  ...                                                                \n"
   5557 "  fun:_dispatch_call_block_and_release                               \n"
   5558 "}                                                                    \n"
   5559 
   5560 "{                                                                    \n"
   5561 "  Benign race in NSOQSchedule when using workqueue threads.          \n"
   5562 "  ThreadSanitizer:Race                                               \n"
   5563 "  fun:__doStart*                                                     \n"
   5564 "  ...                                                                \n"
   5565 "  fun:_dispatch_call_block_and_release                               \n"
   5566 "}                                                                    \n"
   5567 
   5568 
   5569 #endif
   5570 
   5571 #ifndef _MSC_VER
   5572 "{                                                                   \n"
   5573 "  False reports on std::string internals. See TSan issue #40.       \n"
   5574 "  ThreadSanitizer:Race                                              \n"
   5575 "  ...                                                               \n"
   5576 "  fun:*~basic_string*                                               \n"
   5577 "}                                                                   \n"
   5578 
   5579 "{                                                                   \n"
   5580 "  False reports on std::string internals. See TSan issue #40.       \n"
   5581 "  ThreadSanitizer:Race                                              \n"
   5582 "  ...                                                               \n"
   5583 "  fun:*basic_string*_M_destroy                                      \n"
   5584 "}                                                                   \n"
   5585 
   5586 #else
   5587 "{                                                                   \n"
   5588 "  False lock report inside ntdll.dll                                \n"
   5589 "  ThreadSanitizer:InvalidLock                                       \n"
   5590 "  fun:*                                                             \n"
   5591 "  obj:*ntdll.dll                                                    \n"
   5592 "}                                                                   \n"
   5593 
   5594 "{                                                                   \n"
   5595 "  False report due to lack of debug symbols in ntdll.dll  (a)       \n"
   5596 "  ThreadSanitizer:InvalidLock                                       \n"
   5597 "  fun:*SRWLock*                                                     \n"
   5598 "}                                                                   \n"
   5599 
   5600 "{                                                                   \n"
   5601 "  False report due to lack of debug symbols in ntdll.dll  (b)       \n"
   5602 "  ThreadSanitizer:UnlockForeign                                     \n"
   5603 "  fun:*SRWLock*                                                     \n"
   5604 "}                                                                   \n"
   5605 
   5606 "{                                                                   \n"
   5607 "  False report due to lack of debug symbols in ntdll.dll  (c)       \n"
   5608 "  ThreadSanitizer:UnlockNonLocked                                   \n"
   5609 "  fun:*SRWLock*                                                     \n"
   5610 "}                                                                   \n"
   5611 
   5612 "{                                                                   \n"
   5613 "  False reports on std::string internals (2). See TSan issue #40.   \n"
   5614 "  ThreadSanitizer:Race                                              \n"
   5615 "  ...                                                               \n"
   5616 "  fun:*basic_string*scalar deleting destructor*                     \n"
   5617 "}                                                                   \n"
   5618 #endif
   5619 
   5620 #ifdef TS_PIN
   5621 "{                                                                   \n"
   5622 "  Suppression for issue 54 (PIN lacks support for IFUNC)            \n"
   5623 "  ThreadSanitizer:Race                                              \n"
   5624 "  ...                                                               \n"
   5625 "  fun:*NegativeTests_Strlen::Worker*                                \n"
   5626 "}                                                                   \n"
   5627 #endif
   5628 
   5629 ;
   5630 
   5631 // -------- Report Storage --------------------- {{{1
   5632 class ReportStorage {
   5633  public:
   5634 
   5635   ReportStorage()
   5636    : n_reports(0),
   5637      n_race_reports(0),
   5638      program_finished_(0),
   5639      unwind_cb_(0) {
   5640     if (G_flags->generate_suppressions) {
   5641       Report("INFO: generate_suppressions = true\n");
   5642     }
   5643     // Read default suppressions
   5644     int n = suppressions_.ReadFromString(default_suppressions);
   5645     if (n == -1) {
   5646       Report("Error reading default suppressions at line %d: %s\n",
   5647           suppressions_.GetErrorLineNo(),
   5648           suppressions_.GetErrorString().c_str());
   5649       exit(1);
   5650     }
   5651 
   5652     // Read user-supplied suppressions.
   5653     for (size_t i = 0; i < G_flags->suppressions.size(); i++) {
   5654       const string &supp_path = G_flags->suppressions[i];
   5655       Report("INFO: reading suppressions file %s\n", supp_path.c_str());
   5656       int n = suppressions_.ReadFromString(ReadFileToString(supp_path, true));
   5657       if (n == -1) {
   5658         Report("Error at line %d: %s\n",
   5659             suppressions_.GetErrorLineNo(),
   5660             suppressions_.GetErrorString().c_str());
   5661         exit(1);
   5662       }
   5663       Report("INFO: %6d suppression(s) read from file %s\n",
   5664              n, supp_path.c_str());
   5665     }
   5666   }
   5667 
   5668   bool NOINLINE AddReport(TSanThread *thr, uintptr_t pc, bool is_w, uintptr_t addr,
   5669                           int size,
   5670                           ShadowValue old_sval, ShadowValue new_sval,
   5671                           bool is_published) {
   5672     {
   5673       // Check this isn't a "_ZNSs4_Rep20_S_empty_rep_storageE" report.
   5674       uintptr_t offset;
   5675       string symbol_descr;
   5676       if (GetNameAndOffsetOfGlobalObject(addr, &symbol_descr, &offset)) {
   5677         if (StringMatch("*empty_rep_storage*", symbol_descr))
   5678           return false;
   5679         if (StringMatch("_IO_stdfile_*_lock", symbol_descr))
   5680           return false;
   5681         if (StringMatch("_IO_*_stdout_", symbol_descr))
   5682           return false;
   5683         if (StringMatch("_IO_*_stderr_", symbol_descr))
   5684           return false;
   5685       }
   5686     }
   5687 
   5688     bool is_expected = false;
   5689     ExpectedRace *expected_race = G_expected_races_map->GetInfo(addr);
   5690     if (debug_expected_races) {
   5691       Printf("Checking expected race for %lx; exp_race=%p\n",
   5692              addr, expected_race);
   5693       if (expected_race) {
   5694         Printf("  FOUND\n");
   5695       }
   5696     }
   5697 
   5698     if (expected_race) {
   5699       if (G_flags->nacl_untrusted != expected_race->is_nacl_untrusted) {
   5700         Report("WARNING: this race is only expected in NaCl %strusted mode\n",
   5701             expected_race->is_nacl_untrusted ? "un" : "");
   5702       } else {
   5703         is_expected = true;
   5704         expected_race->count++;
   5705       }
   5706     }
   5707 
   5708     if (g_expecting_races) {
   5709       is_expected = true;
   5710       g_found_races_since_EXPECT_RACE_BEGIN++;
   5711     }
   5712 
   5713     if (is_expected && !G_flags->show_expected_races) return false;
   5714 
   5715     StackTrace *stack_trace = thr->CreateStackTrace(pc);
   5716     if (unwind_cb_) {
   5717       int const maxcnt = 256;
   5718       uintptr_t cur_stack [maxcnt];
   5719       int cnt = unwind_cb_(cur_stack, maxcnt, pc);
   5720       if (cnt > 0 && cnt <= maxcnt) {
   5721         cnt = min<int>(cnt, stack_trace->capacity());
   5722         stack_trace->set_size(cnt);
   5723         for (int i = 0; i < cnt; i++)
   5724           stack_trace->Set(i, cur_stack[i]);
   5725       }
   5726     }
   5727     int n_reports_for_this_context = reported_stacks_[stack_trace]++;
   5728 
   5729     if (n_reports_for_this_context > 0) {
   5730       // we already reported a race here.
   5731       StackTrace::Delete(stack_trace);
   5732       return false;
   5733     }
   5734 
   5735 
   5736     ThreadSanitizerDataRaceReport *race_report =
   5737         new ThreadSanitizerDataRaceReport;
   5738 
   5739     race_report->type = ThreadSanitizerReport::DATA_RACE;
   5740     race_report->new_sval = new_sval;
   5741     race_report->old_sval = old_sval;
   5742     race_report->is_expected = is_expected;
   5743     race_report->last_access_is_w = is_w;
   5744     race_report->racey_addr = addr;
   5745     race_report->racey_addr_description = DescribeMemory(addr);
   5746     race_report->last_access_tid = thr->tid();
   5747     race_report->last_access_sid = thr->sid();
   5748     race_report->last_access_size = size;
   5749     race_report->stack_trace = stack_trace;
   5750     race_report->racey_addr_was_published = is_published;
   5751     race_report->last_acces_lsid[false] = thr->lsid(false);
   5752     race_report->last_acces_lsid[true] = thr->lsid(true);
   5753 
   5754     Segment *seg = Segment::Get(thr->sid());
   5755     (void)seg;
   5756     CHECK(thr->lsid(false) == seg->lsid(false));
   5757     CHECK(thr->lsid(true) == seg->lsid(true));
   5758 
   5759     return ThreadSanitizerPrintReport(race_report);
   5760   }
   5761 
   5762   void AnnounceThreadsInSegmentSet(SSID ssid) {
   5763     if (ssid.IsEmpty()) return;
   5764     for (int s = 0; s < SegmentSet::Size(ssid); s++) {
   5765       Segment *seg = SegmentSet::GetSegmentForNonSingleton(ssid, s, __LINE__);
   5766       TSanThread::Get(seg->tid())->Announce();
   5767     }
   5768   }
   5769 
   5770 
   5771 
   5772   void PrintConcurrentSegmentSet(SSID ssid, TID tid, SID sid,
   5773                                  LSID lsid, bool is_w,
   5774                                  const char *descr, set<LID> *locks,
   5775                                  set<SID>* concurrent_sids) {
   5776     if (ssid.IsEmpty()) return;
   5777     bool printed_header = false;
   5778     TSanThread *thr1 = TSanThread::Get(tid);
   5779     for (int s = 0; s < SegmentSet::Size(ssid); s++) {
   5780       SID concurrent_sid = SegmentSet::GetSID(ssid, s, __LINE__);
   5781       Segment *seg = Segment::Get(concurrent_sid);
   5782       if (Segment::HappensBeforeOrSameThread(concurrent_sid, sid)) continue;
   5783       if (!LockSet::IntersectionIsEmpty(lsid, seg->lsid(is_w))) continue;
   5784       if (concurrent_sids) {
   5785         concurrent_sids->insert(concurrent_sid);
   5786       }
   5787       TSanThread *thr2 = TSanThread::Get(seg->tid());
   5788       if (!printed_header) {
   5789         Report("  %sConcurrent %s happened at (OR AFTER) these points:%s\n",
   5790                c_magenta, descr, c_default);
   5791         printed_header = true;
   5792       }
   5793 
   5794       Report("   %s (%s):\n",
   5795              thr2->ThreadName().c_str(),
   5796              TwoLockSetsToString(seg->lsid(false),
   5797                                  seg->lsid(true)).c_str());
   5798       if (G_flags->show_states) {
   5799         Report("   S%d\n", concurrent_sid.raw());
   5800       }
   5801       LockSet::AddLocksToSet(seg->lsid(false), locks);
   5802       LockSet::AddLocksToSet(seg->lsid(true), locks);
   5803       Report("%s", Segment::StackTraceString(concurrent_sid).c_str());
   5804       if (!G_flags->pure_happens_before &&
   5805           G_flags->suggest_happens_before_arcs) {
   5806         set<LID> message_locks;
   5807         // Report("Locks in T%d\n", thr1->tid().raw());
   5808         // thr1->lock_history().PrintLocks();
   5809         // Report("Unlocks in T%d\n", thr2->tid().raw());
   5810         // thr2->lock_history().PrintUnlocks();
   5811         if (LockHistory::Intersect(thr1->lock_history(), thr2->lock_history(),
   5812                                    seg->lock_era(), &message_locks)) {
   5813           Report("   Note: these locks were recently released by T%d"
   5814                  " and later acquired by T%d: {%s}\n"
   5815                  "   See http://code.google.com/p/data-race-test/wiki/"
   5816                  "PureHappensBeforeVsHybrid\n",
   5817                  thr2->tid().raw(),
   5818                  thr1->tid().raw(),
   5819                  SetOfLocksToString(message_locks).c_str());
   5820           locks->insert(message_locks.begin(), message_locks.end());
   5821         }
   5822       }
   5823     }
   5824   }
   5825 
   5826   void SetProgramFinished() {
   5827     CHECK(!program_finished_);
   5828     program_finished_ = true;
   5829   }
   5830 
   5831   string RaceInfoString(uintptr_t pc, set<SID>& concurrent_sids) {
   5832     string s;
   5833     char buf[100];
   5834     snprintf(buf, 100, "Race verifier data: %p", (void*)pc);
   5835     s += buf;
   5836     for (set<SID>::iterator it = concurrent_sids.begin();
   5837          it != concurrent_sids.end(); ++it) {
   5838       // Take the first pc of the concurrent stack trace.
   5839       uintptr_t concurrent_pc = *Segment::embedded_stack_trace(*it);
   5840       snprintf(buf, 100, ",%p", (void*)concurrent_pc);
   5841       s += buf;
   5842     }
   5843     s += "\n";
   5844     return s;
   5845   }
   5846 
   5847   void PrintRaceReport(ThreadSanitizerDataRaceReport *race) {
   5848     bool short_report = program_finished_;
   5849     if (!short_report) {
   5850       AnnounceThreadsInSegmentSet(race->new_sval.rd_ssid());
   5851       AnnounceThreadsInSegmentSet(race->new_sval.wr_ssid());
   5852     }
   5853     bool is_w = race->last_access_is_w;
   5854     TID     tid = race->last_access_tid;
   5855     TSanThread *thr = TSanThread::Get(tid);
   5856     SID     sid = race->last_access_sid;
   5857     LSID    lsid = race->last_acces_lsid[is_w];
   5858     set<LID> all_locks;
   5859 
   5860     n_race_reports++;
   5861     if (G_flags->html) {
   5862       Report("<b id=race%d>Race report #%d; </b>"
   5863              "<a href=\"#race%d\">Next;</a>  "
   5864              "<a href=\"#race%d\">Prev;</a>\n",
   5865              n_race_reports, n_race_reports,
   5866              n_race_reports+1, n_race_reports-1);
   5867     }
   5868 
   5869 
   5870     // Note the {{{ and }}}. These are for vim folds.
   5871     Report("%sWARNING: %s data race during %s of size %d at %p: {{{%s\n",
   5872            c_red,
   5873            race->is_expected ? "Expected" : "Possible",
   5874            is_w ? "write" : "read",
   5875            race->last_access_size,
   5876            race->racey_addr,
   5877            c_default);
   5878     if (!short_report) {
   5879       LockSet::AddLocksToSet(race->last_acces_lsid[false], &all_locks);
   5880       LockSet::AddLocksToSet(race->last_acces_lsid[true], &all_locks);
   5881       Report("   %s (%s):\n",
   5882              thr->ThreadName().c_str(),
   5883              TwoLockSetsToString(race->last_acces_lsid[false],
   5884                                  race->last_acces_lsid[true]).c_str());
   5885     }
   5886 
   5887     CHECK(race->stack_trace);
   5888     Report("%s", race->stack_trace->ToString().c_str());
   5889     if (short_report) {
   5890       Report(" See the full version of this report above.\n");
   5891       Report("}%s\n", "}}");
   5892       return;
   5893     }
   5894     // Report(" sid=%d; vts=%s\n", thr->sid().raw(),
   5895     //       thr->vts()->ToString().c_str());
   5896     if (G_flags->show_states) {
   5897       Report(" old state: %s\n", race->old_sval.ToString().c_str());
   5898       Report(" new state: %s\n", race->new_sval.ToString().c_str());
   5899     }
   5900     set<SID> concurrent_sids;
   5901     if (G_flags->keep_history) {
   5902       PrintConcurrentSegmentSet(race->new_sval.wr_ssid(),
   5903                                 tid, sid, lsid, true, "write(s)", &all_locks,
   5904                                 &concurrent_sids);
   5905       if (is_w) {
   5906         PrintConcurrentSegmentSet(race->new_sval.rd_ssid(),
   5907                                   tid, sid, lsid, false, "read(s)", &all_locks,
   5908                                   &concurrent_sids);
   5909       }
   5910     } else {
   5911       Report("  %sAccess history is disabled. "
   5912              "Consider running with --keep-history=1 for better reports.%s\n",
   5913              c_cyan, c_default);
   5914     }
   5915 
   5916     if (race->racey_addr_was_published) {
   5917       Report(" This memory was published\n");
   5918     }
   5919     if (race->racey_addr_description.size() > 0) {
   5920       Report("%s", race->racey_addr_description.c_str());
   5921     }
   5922     if (race->is_expected) {
   5923       ExpectedRace *expected_race =
   5924           G_expected_races_map->GetInfo(race->racey_addr);
   5925       if (expected_race) {
   5926         CHECK(expected_race->description);
   5927         Report(" Description: \"%s\"\n", expected_race->description);
   5928       }
   5929     }
   5930     set<LID>  locks_reported;
   5931 
   5932     if (!all_locks.empty()) {
   5933       Report("  %sLocks involved in this report "
   5934              "(reporting last lock sites):%s {%s}\n",
   5935              c_green, c_default,
   5936              SetOfLocksToString(all_locks).c_str());
   5937 
   5938       for (set<LID>::iterator it = all_locks.begin();
   5939            it != all_locks.end(); ++it) {
   5940         LID lid = *it;
   5941         Lock::ReportLockWithOrWithoutContext(lid, true);
   5942       }
   5943     }
   5944 
   5945     string raceInfoString = RaceInfoString(race->stack_trace->Get(0),
   5946         concurrent_sids);
   5947     Report("   %s", raceInfoString.c_str());
   5948     Report("}}}\n");
   5949   }
   5950 
   5951   bool PrintReport(ThreadSanitizerReport *report) {
   5952     CHECK(report);
   5953     // Check if we have a suppression.
   5954     vector<string> funcs_mangled;
   5955     vector<string> funcs_demangled;
   5956     vector<string> objects;
   5957 
   5958     CHECK(!g_race_verifier_active);
   5959     CHECK(report->stack_trace);
   5960     CHECK(report->stack_trace->size());
   5961     for (size_t i = 0; i < report->stack_trace->size(); i++) {
   5962       uintptr_t pc = report->stack_trace->Get(i);
   5963       string img, rtn, file;
   5964       int line;
   5965       PcToStrings(pc, false, &img, &rtn, &file, &line);
   5966       if (rtn == "(below main)" || rtn == "ThreadSanitizerStartThread")
   5967         break;
   5968 
   5969       funcs_mangled.push_back(rtn);
   5970       funcs_demangled.push_back(NormalizeFunctionName(PcToRtnName(pc, true)));
   5971       objects.push_back(img);
   5972 
   5973       if (rtn == "main")
   5974         break;
   5975     }
   5976     string suppression_name;
   5977     if (suppressions_.StackTraceSuppressed("ThreadSanitizer",
   5978                                            report->ReportName(),
   5979                                            funcs_mangled,
   5980                                            funcs_demangled,
   5981                                            objects,
   5982                                            &suppression_name)) {
   5983       used_suppressions_[suppression_name]++;
   5984       return false;
   5985     }
   5986 
   5987     // Actually print it.
   5988     if (report->type == ThreadSanitizerReport::UNLOCK_FOREIGN) {
   5989       ThreadSanitizerBadUnlockReport *bad_unlock =
   5990           reinterpret_cast<ThreadSanitizerBadUnlockReport*>(report);
   5991       Report("WARNING: Lock %s was released by thread T%d"
   5992              " which did not acquire this lock: {{{\n%s}}}\n",
   5993              Lock::ToString(bad_unlock->lid).c_str(),
   5994              bad_unlock->tid.raw(),
   5995              bad_unlock->stack_trace->ToString().c_str());
   5996     } else if (report->type == ThreadSanitizerReport::UNLOCK_NONLOCKED) {
   5997       ThreadSanitizerBadUnlockReport *bad_unlock =
   5998           reinterpret_cast<ThreadSanitizerBadUnlockReport*>(report);
   5999       Report("WARNING: Unlocking a non-locked lock %s in thread T%d: "
   6000              "{{{\n%s}}}\n",
   6001              Lock::ToString(bad_unlock->lid).c_str(),
   6002              bad_unlock->tid.raw(),
   6003              bad_unlock->stack_trace->ToString().c_str());
   6004     } else if (report->type == ThreadSanitizerReport::INVALID_LOCK) {
   6005       ThreadSanitizerInvalidLockReport *invalid_lock =
   6006           reinterpret_cast<ThreadSanitizerInvalidLockReport*>(report);
   6007       Report("WARNING: accessing an invalid lock %p in thread T%d: "
   6008              "{{{\n%s}}}\n",
   6009              invalid_lock->lock_addr,
   6010              invalid_lock->tid.raw(),
   6011              invalid_lock->stack_trace->ToString().c_str());
   6012     } else if (report->type == ThreadSanitizerReport::ATOMICITY_VIOLATION) {
   6013       ThreadSanitizerAtomicityViolationReport *av =
   6014           reinterpret_cast<ThreadSanitizerAtomicityViolationReport*>(report);
   6015       Report("WARNING: Suspected atomicity violation {{{\n");
   6016       av->r1->Print();
   6017       av->r2->Print();
   6018       av->r3->Print();
   6019       Report("}}}\n");
   6020 
   6021     } else {
   6022       CHECK(report->type == ThreadSanitizerReport::DATA_RACE);
   6023       ThreadSanitizerDataRaceReport *race =
   6024           reinterpret_cast<ThreadSanitizerDataRaceReport*>(report);
   6025       PrintRaceReport(race);
   6026     }
   6027 
   6028     n_reports++;
   6029     SetNumberOfFoundErrors(n_reports);
   6030     if (!G_flags->summary_file.empty()) {
   6031       char buff[100];
   6032       snprintf(buff, sizeof(buff),
   6033                "ThreadSanitizer: %d warning(s) reported\n", n_reports);
   6034       // We overwrite the contents of this file with the new summary.
   6035       // We don't do that at the end because even if we crash later
   6036       // we will already have the summary.
   6037       OpenFileWriteStringAndClose(G_flags->summary_file, buff);
   6038     }
   6039 
   6040     // Generate a suppression.
   6041     if (G_flags->generate_suppressions) {
   6042       string supp = "{\n";
   6043       supp += "  <Put your suppression name here>\n";
   6044       supp += string("  ThreadSanitizer:") + report->ReportName() + "\n";
   6045       for (size_t i = 0; i < funcs_mangled.size(); i++) {
   6046         const string &func = funcs_demangled[i];
   6047         if (func.size() == 0 || func == "(no symbols") {
   6048           supp += "  obj:" + objects[i] + "\n";
   6049         } else {
   6050           supp += "  fun:" + funcs_demangled[i] + "\n";
   6051         }
   6052         if (StackTrace::CutStackBelowFunc(funcs_demangled[i])) {
   6053           break;
   6054         }
   6055       }
   6056       supp += "}";
   6057       Printf("------- suppression -------\n%s\n------- end suppression -------\n",
   6058              supp.c_str());
   6059     }
   6060 
   6061     return true;
   6062   }
   6063 
   6064   void PrintUsedSuppression() {
   6065     for (map<string, int>::iterator it = used_suppressions_.begin();
   6066          it != used_suppressions_.end(); ++it) {
   6067       Report("used_suppression: %d %s\n", it->second, it->first.c_str());
   6068     }
   6069   }
   6070 
   6071   void PrintSummary() {
   6072     Report("ThreadSanitizer summary: reported %d warning(s) (%d race(s))\n",
   6073            n_reports, n_race_reports);
   6074   }
   6075 
   6076 
   6077   string DescribeMemory(uintptr_t a) {
   6078     const int kBufLen = 1023;
   6079     char buff[kBufLen+1];
   6080 
   6081     // Is this stack?
   6082     for (int i = 0; i < TSanThread::NumberOfThreads(); i++) {
   6083       TSanThread *t = TSanThread::Get(TID(i));
   6084       if (!t || !t->is_running()) continue;
   6085       if (t->MemoryIsInStack(a)) {
   6086         snprintf(buff, sizeof(buff),
   6087                  "  %sLocation %p is %ld bytes inside T%d's stack [%p,%p]%s\n",
   6088                  c_blue,
   6089                  reinterpret_cast<void*>(a),
   6090                  static_cast<long>(t->max_sp() - a),
   6091                  i,
   6092                  reinterpret_cast<void*>(t->min_sp()),
   6093                  reinterpret_cast<void*>(t->max_sp()),
   6094                  c_default
   6095                 );
   6096         return buff;
   6097       }
   6098     }
   6099 
   6100     HeapInfo *heap_info = G_heap_map->GetInfo(a);
   6101     if (heap_info) {
   6102       snprintf(buff, sizeof(buff),
   6103              "  %sLocation %p is %ld bytes inside a block starting at %p"
   6104              " of size %ld allocated by T%d from heap:%s\n",
   6105              c_blue,
   6106              reinterpret_cast<void*>(a),
   6107              static_cast<long>(a - heap_info->ptr),
   6108              reinterpret_cast<void*>(heap_info->ptr),
   6109              static_cast<long>(heap_info->size),
   6110              heap_info->tid().raw(), c_default);
   6111       return string(buff) + heap_info->StackTraceString().c_str();
   6112     }
   6113 
   6114 
   6115     // Is it a global object?
   6116     uintptr_t offset;
   6117     string symbol_descr;
   6118     if (GetNameAndOffsetOfGlobalObject(a, &symbol_descr, &offset)) {
   6119       snprintf(buff, sizeof(buff),
   6120               "  %sAddress %p is %d bytes inside data symbol \"",
   6121               c_blue, reinterpret_cast<void*>(a), static_cast<int>(offset));
   6122       return buff + symbol_descr + "\"" + c_default + "\n";
   6123     }
   6124 
   6125     if (G_flags->debug_level >= 2) {
   6126       string res;
   6127       // Is this near stack?
   6128       for (int i = 0; i < TSanThread::NumberOfThreads(); i++) {
   6129         TSanThread *t = TSanThread::Get(TID(i));
   6130         const uintptr_t kMaxStackDiff = 1024 * 16;
   6131         uintptr_t diff1 = a - t->max_sp();
   6132         uintptr_t diff2 = t->min_sp() - a;
   6133         if (diff1 < kMaxStackDiff ||
   6134             diff2 < kMaxStackDiff ||
   6135             t->MemoryIsInStack(a)) {
   6136           uintptr_t diff = t->MemoryIsInStack(a) ? 0 :
   6137               (diff1 < kMaxStackDiff ? diff1 : diff2);
   6138           snprintf(buff, sizeof(buff),
   6139                    "  %sLocation %p is within %d bytes outside T%d's stack [%p,%p]%s\n",
   6140                    c_blue,
   6141                    reinterpret_cast<void*>(a),
   6142                    static_cast<int>(diff),
   6143                    i,
   6144                    reinterpret_cast<void*>(t->min_sp()),
   6145                    reinterpret_cast<void*>(t->max_sp()),
   6146                    c_default
   6147                   );
   6148           res += buff;
   6149         }
   6150       }
   6151       if (res.size() > 0) {
   6152         return res +
   6153             "  This report _may_ indicate that valgrind incorrectly "
   6154             "computed the stack boundaries\n";
   6155       }
   6156     }
   6157 
   6158     return "";
   6159   }
   6160 
   6161   void SetUnwindCallback(ThreadSanitizerUnwindCallback cb) {
   6162     unwind_cb_ = cb;
   6163   }
   6164 
   6165  private:
   6166   map<StackTrace *, int, StackTrace::Less> reported_stacks_;
   6167   int n_reports;
   6168   int n_race_reports;
   6169   bool program_finished_;
   6170   Suppressions suppressions_;
   6171   map<string, int> used_suppressions_;
   6172   ThreadSanitizerUnwindCallback unwind_cb_;
   6173 };
   6174 
   6175 // -------- Event Sampling ---------------- {{{1
   6176 // This class samples (profiles) events.
   6177 // Instances of this class should all be static.
   6178 class EventSampler {
   6179  public:
   6180 
   6181   // Sample one event
   6182   void Sample(TSanThread *thr, const char *event_name, bool need_locking) {
   6183     CHECK_NE(G_flags->sample_events, 0);
   6184     (counter_)++;
   6185     if ((counter_ & ((1 << G_flags->sample_events) - 1)) != 0)
   6186       return;
   6187 
   6188     TIL til(ts_lock, 8, need_locking);
   6189     string pos = thr->CallStackToStringRtnOnly(G_flags->sample_events_depth);
   6190     (*samples_)[event_name][pos]++;
   6191     total_samples_++;
   6192     if (total_samples_ >= print_after_this_number_of_samples_) {
   6193       print_after_this_number_of_samples_ +=
   6194           print_after_this_number_of_samples_ / 2;
   6195       ShowSamples();
   6196     }
   6197   }
   6198 
   6199   // Show existing samples
   6200   static void ShowSamples() {
   6201     if (G_flags->sample_events == 0) return;
   6202     Printf("ShowSamples: (all samples: %lld)\n", total_samples_);
   6203     for (SampleMapMap::iterator it1 = samples_->begin();
   6204          it1 != samples_->end(); ++it1) {
   6205       string name = it1->first;
   6206       SampleMap &m = it1->second;
   6207       int total = 0;
   6208       for (SampleMap::iterator it2 = m.begin(); it2 != m.end(); it2++) {
   6209         total += it2->second;
   6210       }
   6211 
   6212       map<int, string> reverted_map;
   6213       for (SampleMap::iterator it2 = m.begin(); it2 != m.end(); it2++) {
   6214         int n_samples = it2->second;
   6215         if (n_samples * 1000 < total) continue;
   6216         reverted_map[n_samples] = it2->first;
   6217       }
   6218       Printf("%s: total samples %'d (~%'lld events)\n", name.c_str(),
   6219              total,
   6220              (int64_t)total << G_flags->sample_events);
   6221       for (map<int, string>::iterator it = reverted_map.begin();
   6222            it != reverted_map.end(); ++it) {
   6223         Printf("%s: %d samples (~%d%%) %s\n", name.c_str(), it->first,
   6224                (it->first * 100) / total, it->second.c_str());
   6225       }
   6226       Printf("\n");
   6227     }
   6228   }
   6229 
   6230   static void InitClassMembers() {
   6231     samples_ = new SampleMapMap;
   6232     total_samples_ = 0;
   6233     print_after_this_number_of_samples_ = 1000;
   6234   }
   6235 
   6236  private:
   6237   int counter_;
   6238 
   6239   typedef map<string, int> SampleMap;
   6240   typedef map<string, SampleMap> SampleMapMap;
   6241   static SampleMapMap *samples_;
   6242   static int64_t total_samples_;
   6243   static int64_t print_after_this_number_of_samples_;
   6244 };
   6245 
   6246 EventSampler::SampleMapMap *EventSampler::samples_;
   6247 int64_t EventSampler::total_samples_;
   6248 int64_t EventSampler::print_after_this_number_of_samples_;
   6249 
   6250 // -------- Detector ---------------------- {{{1
   6251 // Collection of event handlers.
   6252 class Detector {
   6253  public:
   6254   void INLINE HandleTraceLoop(TSanThread *thr, uintptr_t pc,
   6255                               MopInfo *mops,
   6256                               uintptr_t *tleb, size_t n,
   6257                               int expensive_bits, bool need_locking) {
   6258     bool has_expensive_flags = (expensive_bits & 4) != 0;
   6259     size_t i = 0;
   6260     uintptr_t sblock_pc = pc;
   6261     size_t n_locks = 0;
   6262     do {
   6263       uintptr_t addr = tleb[i];
   6264       if (addr == 0) continue;  // This mop was not executed.
   6265       MopInfo *mop = &mops[i];
   6266       tleb[i] = 0;  // we've consumed this mop, clear it.
   6267       DCHECK(mop->size() != 0);
   6268       DCHECK(mop->pc() != 0);
   6269       if ((expensive_bits & 1) && mop->is_write() == false) continue;
   6270       if ((expensive_bits & 2) && mop->is_write() == true) continue;
   6271       n_locks += HandleMemoryAccessInternal(thr, &sblock_pc, addr, mop,
   6272                                  has_expensive_flags,
   6273                                  need_locking);
   6274     } while (++i < n);
   6275     if (has_expensive_flags) {
   6276       const size_t mop_stat_size = TS_ARRAY_SIZE(thr->stats.mops_per_trace);
   6277       thr->stats.mops_per_trace[min(n, mop_stat_size - 1)]++;
   6278       const size_t stat_size = TS_ARRAY_SIZE(thr->stats.locks_per_trace);
   6279       thr->stats.locks_per_trace[min(n_locks, stat_size - 1)]++;
   6280     }
   6281   }
   6282 
   6283 #ifdef _MSC_VER
   6284   NOINLINE
   6285   // With MSVC, INLINE would cause the compilation to be insanely slow.
   6286 #else
   6287   INLINE
   6288 #endif
   6289   void HandleTrace(TSanThread *thr, MopInfo *mops, size_t n, uintptr_t pc,
   6290                    uintptr_t *tleb, bool need_locking) {
   6291     DCHECK(n);
   6292     // 0 bit - ignore reads, 1 bit -- ignore writes,
   6293     // 2 bit - has_expensive_flags.
   6294     int expensive_bits = thr->expensive_bits();
   6295 
   6296     if (expensive_bits == 0) {
   6297       HandleTraceLoop(thr, pc, mops, tleb, n, 0, need_locking);
   6298     } else {
   6299       if ((expensive_bits & 3) == 3) {
   6300         // everything is ignored, just clear the tleb.
   6301         for (size_t i = 0; i < n; i++) tleb[i] = 0;
   6302       } else {
   6303         HandleTraceLoop(thr, pc, mops, tleb, n, expensive_bits, need_locking);
   6304       }
   6305     }
   6306     // At the end, the tleb must be cleared.
   6307     for (size_t i = 0; i < n; i++) DCHECK(tleb[i] == 0);
   6308   }
   6309 
   6310   // Special case of a trace with just one mop and no sblock.
   6311   void INLINE HandleMemoryAccess(TSanThread *thr, uintptr_t pc,
   6312                                  uintptr_t addr, uintptr_t size,
   6313                                  bool is_w, bool need_locking) {
   6314     CHECK(size);
   6315     MopInfo mop(pc, size, is_w, false);
   6316     HandleTrace(thr, &mop, 1, 0/*no sblock*/, &addr, need_locking);
   6317   }
   6318 
   6319   void ShowUnfreedHeap() {
   6320     // check if there is not deleted memory
   6321     // (for debugging free() interceptors, not for leak detection)
   6322     if (DEBUG_MODE && G_flags->debug_level >= 1) {
   6323       for (HeapMap<HeapInfo>::iterator it = G_heap_map->begin();
   6324            it != G_heap_map->end(); ++it) {
   6325         HeapInfo &info = it->second;
   6326         Printf("Not free()-ed memory: %p [%p, %p)\n%s\n",
   6327                info.size, info.ptr, info.ptr + info.size,
   6328                info.StackTraceString().c_str());
   6329       }
   6330     }
   6331   }
   6332 
   6333   void FlushExpectedRaces(bool print_summary) {
   6334     // Report("ThreadSanitizerValgrind: done\n");
   6335     // check if we found all expected races (for unit tests only).
   6336     static int total_missing = 0;
   6337     int this_flush_missing = 0;
   6338     for (ExpectedRacesMap::iterator it = G_expected_races_map->begin();
   6339          it != G_expected_races_map->end(); ++it) {
   6340       ExpectedRace race = it->second;
   6341       if (debug_expected_races) {
   6342         Printf("Checking if expected race fired: %p\n", race.ptr);
   6343       }
   6344       if (race.count == 0 &&
   6345           !(g_race_verifier_active && !race.is_verifiable) &&
   6346           (G_flags->nacl_untrusted == race.is_nacl_untrusted)) {
   6347         ++this_flush_missing;
   6348         Printf("Missing an expected race on %p: %s (annotated at %s)\n",
   6349                it->first,
   6350                race.description,
   6351                PcToRtnNameAndFilePos(race.pc).c_str());
   6352       }
   6353     }
   6354 
   6355     if (this_flush_missing) {
   6356       int n_errs = GetNumberOfFoundErrors();
   6357       SetNumberOfFoundErrors(n_errs + this_flush_missing);
   6358       total_missing += this_flush_missing;
   6359     }
   6360     G_expected_races_map->Clear();
   6361 
   6362     if (print_summary && total_missing > 0)
   6363       Report("WARNING: %d expected race(s) NOT detected!\n", total_missing);
   6364   }
   6365 
   6366   void HandleProgramEnd() {
   6367     FlushExpectedRaces(true);
   6368     // ShowUnfreedHeap();
   6369     EventSampler::ShowSamples();
   6370     ShowStats();
   6371     TraceInfo::PrintTraceProfile();
   6372     ShowProcSelfStatus();
   6373     reports_.PrintUsedSuppression();
   6374     reports_.PrintSummary();
   6375     // Report("ThreadSanitizerValgrind: exiting\n");
   6376   }
   6377 
   6378   void FlushIfOutOfMem(TSanThread *thr) {
   6379     static int max_vm_size;
   6380     static int soft_limit;
   6381     const int hard_limit = G_flags->max_mem_in_mb;
   6382     const int minimal_soft_limit = (hard_limit * 13) / 16;
   6383     const int print_info_limit   = (hard_limit * 12) / 16;
   6384 
   6385     CHECK(hard_limit > 0);
   6386 
   6387     int vm_size_in_mb = GetVmSizeInMb();
   6388     if (max_vm_size < vm_size_in_mb) {
   6389       max_vm_size = vm_size_in_mb;
   6390       if (max_vm_size > print_info_limit) {
   6391         Report("INFO: ThreadSanitizer's VmSize: %dM\n", (int)max_vm_size);
   6392       }
   6393     }
   6394 
   6395     if (soft_limit == 0) {
   6396       soft_limit = minimal_soft_limit;
   6397     }
   6398 
   6399     if (vm_size_in_mb > soft_limit) {
   6400       ForgetAllStateAndStartOver(thr,
   6401           "ThreadSanitizer is running close to its memory limit");
   6402       soft_limit = vm_size_in_mb + 1;
   6403     }
   6404   }
   6405 
   6406   // Force state flushing.
   6407   void FlushState(TID tid) {
   6408     ForgetAllStateAndStartOver(TSanThread::Get(tid),
   6409                                "State flushing requested by client");
   6410   }
   6411 
   6412   void FlushIfNeeded(TSanThread *thr) {
   6413     // Are we out of segment IDs?
   6414 #ifdef TS_VALGRIND  // GetVmSizeInMb() works only with valgrind any way.
   6415     static int counter;
   6416     counter++;  // ATTENTION: don't do this in multi-threaded code -- too slow.
   6417     CHECK(TS_SERIALIZED == 1);
   6418 
   6419     // Are we out of memory?
   6420     if (G_flags->max_mem_in_mb > 0) {
   6421       const int kFreq = 1014 * 32;
   6422       if ((counter % kFreq) == 0) {  // Don't do it too often.
   6423         // TODO(kcc): find a way to check memory limit more frequently.
   6424         TIL til(ts_lock, 7);
   6425         AssertTILHeld();
   6426         FlushIfOutOfMem(thr);
   6427       }
   6428     }
   6429 #if 0
   6430     if ((counter % (1024 * 1024 * 64)) == 0 ||
   6431         counter == (1024 * 1024)) {
   6432       // ShowStats();
   6433       EventSampler::ShowSamples();
   6434       TraceInfo::PrintTraceProfile();
   6435     }
   6436 #endif
   6437 #endif
   6438 
   6439 #if 0  // do we still need it? Hope not..
   6440     size_t flush_period = G_flags->flush_period * 1000;  // milliseconds.
   6441     if (flush_period && (counter % (1024 * 4)) == 0) {
   6442       size_t cur_time = TimeInMilliSeconds();
   6443       if (cur_time - g_last_flush_time  > flush_period) {
   6444         TIL til(ts_lock, 7);
   6445         ForgetAllStateAndStartOver(
   6446           "Doing periodic flush (period is set by --flush_period=n_seconds)");
   6447       }
   6448     }
   6449 #endif
   6450   }
   6451 
   6452   void HandleRtnCall(TID tid, uintptr_t call_pc, uintptr_t target_pc,
   6453                      IGNORE_BELOW_RTN ignore_below) {
   6454     TSanThread *thr = TSanThread::Get(tid);
   6455     thr->HandleRtnCall(call_pc, target_pc, ignore_below);
   6456     FlushIfNeeded(thr);
   6457   }
   6458 
   6459   void INLINE HandleOneEvent(Event *e) {
   6460     ScopedMallocCostCenter malloc_cc("HandleOneEvent");
   6461 
   6462     DCHECK(e);
   6463     EventType type = e->type();
   6464     DCHECK(type != NOOP);
   6465     TSanThread *thr = NULL;
   6466     if (type != THR_START) {
   6467       thr = TSanThread::Get(TID(e->tid()));
   6468       DCHECK(thr);
   6469       thr->SetTopPc(e->pc());
   6470       thr->stats.events[type]++;
   6471     }
   6472 
   6473     switch (type) {
   6474       case READ:
   6475         HandleMemoryAccess(thr, e->pc(), e->a(), e->info(), false, true);
   6476         return;
   6477       case WRITE:
   6478         HandleMemoryAccess(thr, e->pc(), e->a(), e->info(), true, true);
   6479         return;
   6480       case RTN_CALL:
   6481         HandleRtnCall(TID(e->tid()), e->pc(), e->a(),
   6482                       IGNORE_BELOW_RTN_UNKNOWN);
   6483         return;
   6484       case RTN_EXIT:
   6485         thr->HandleRtnExit();
   6486         return;
   6487       default: break;
   6488     }
   6489 
   6490     // Everything else is under a lock.
   6491     TIL til(ts_lock, 0);
   6492     AssertTILHeld();
   6493 
   6494 
   6495     if (UNLIKELY(type == THR_START)) {
   6496         HandleThreadStart(TID(e->tid()), TID(e->info()), (CallStack*)e->pc());
   6497         TSanThread::Get(TID(e->tid()))->stats.events[type]++;
   6498         return;
   6499     }
   6500 
   6501     FlushStateIfOutOfSegments(thr);
   6502 
   6503     // Since we have the lock, get some fresh SIDs.
   6504     thr->GetSomeFreshSids();
   6505 
   6506     switch (type) {
   6507       case THR_START   : CHECK(0); break;
   6508         break;
   6509       case SBLOCK_ENTER:
   6510         if (thr->ignore_reads() && thr->ignore_writes()) break;
   6511         thr->HandleSblockEnter(e->pc(), /*allow_slow_path=*/true);
   6512         break;
   6513       case THR_CREATE_BEFORE:
   6514         thr->HandleThreadCreateBefore(TID(e->tid()), e->pc());
   6515         break;
   6516       case THR_CREATE_AFTER:
   6517         thr->HandleThreadCreateAfter(TID(e->tid()), TID(e->info()));
   6518         break;
   6519       case THR_FIRST_INSN:
   6520         HandleThreadFirstInsn(TID(e->tid()));
   6521         break;
   6522       case THR_JOIN_AFTER     : HandleThreadJoinAfter(e);   break;
   6523       case THR_STACK_TOP      : HandleThreadStackTop(e); break;
   6524 
   6525       case THR_END     : HandleThreadEnd(TID(e->tid()));     break;
   6526       case MALLOC      : HandleMalloc(e, false);     break;
   6527       case FREE        : HandleFree(e);         break;
   6528       case MMAP        : HandleMalloc(e, true);      break;  // same as MALLOC
   6529       case MUNMAP      : HandleMunmap(e);     break;
   6530 
   6531 
   6532       case WRITER_LOCK : thr->HandleLock(e->a(), true);     break;
   6533       case READER_LOCK : thr->HandleLock(e->a(), false);    break;
   6534       case UNLOCK      : thr->HandleUnlock(e->a());       break;
   6535       case UNLOCK_OR_INIT : HandleUnlockOrInit(e); break;
   6536 
   6537       case LOCK_CREATE:
   6538       case LOCK_DESTROY: HandleLockCreateOrDestroy(e); break;
   6539 
   6540       case SIGNAL      : thr->HandleSignal(e->a());  break;
   6541       case WAIT        : thr->HandleWait(e->a());   break;
   6542 
   6543       case CYCLIC_BARRIER_INIT:
   6544         thr->HandleBarrierInit(e->a(), e->info());
   6545         break;
   6546       case CYCLIC_BARRIER_WAIT_BEFORE  :
   6547         thr->HandleBarrierWaitBefore(e->a());
   6548         break;
   6549       case CYCLIC_BARRIER_WAIT_AFTER  :
   6550         thr->HandleBarrierWaitAfter(e->a());
   6551         break;
   6552 
   6553       case PCQ_CREATE   : HandlePcqCreate(e);   break;
   6554       case PCQ_DESTROY  : HandlePcqDestroy(e);  break;
   6555       case PCQ_PUT      : HandlePcqPut(e);      break;
   6556       case PCQ_GET      : HandlePcqGet(e);      break;
   6557 
   6558 
   6559       case EXPECT_RACE :
   6560         HandleExpectRace(e->a(), (const char*)e->pc(), TID(e->tid()));
   6561         break;
   6562       case BENIGN_RACE :
   6563         HandleBenignRace(e->a(), e->info(),
   6564                          (const char*)e->pc(), TID(e->tid()));
   6565         break;
   6566       case FLUSH_EXPECTED_RACES:
   6567         FlushExpectedRaces(false);
   6568         break;
   6569       case EXPECT_RACE_BEGIN:
   6570         CHECK(g_expecting_races == false);
   6571         g_expecting_races = true;
   6572         g_found_races_since_EXPECT_RACE_BEGIN = 0;
   6573         break;
   6574       case EXPECT_RACE_END:
   6575         CHECK(g_expecting_races == true);
   6576         g_expecting_races = false;
   6577         if (g_found_races_since_EXPECT_RACE_BEGIN == 0) {
   6578           int n_errs = GetNumberOfFoundErrors();
   6579           SetNumberOfFoundErrors(n_errs + 1);
   6580           Printf("WARNING: expected race not found.\n");
   6581         }
   6582         break;
   6583 
   6584       case HB_LOCK     : HandleHBLock(e);       break;
   6585       case NON_HB_LOCK : HandleNonHBLock(e);    break;
   6586 
   6587       case IGNORE_READS_BEG:  HandleIgnore(e, false, true);  break;
   6588       case IGNORE_READS_END:  HandleIgnore(e, false, false); break;
   6589       case IGNORE_WRITES_BEG: HandleIgnore(e, true, true);   break;
   6590       case IGNORE_WRITES_END: HandleIgnore(e, true, false);  break;
   6591 
   6592       case SET_THREAD_NAME:
   6593         thr->set_thread_name((const char*)e->a());
   6594         break;
   6595       case SET_LOCK_NAME: {
   6596           uintptr_t lock_addr = e->a();
   6597           const char *name = reinterpret_cast<const char *>(e->info());
   6598           Lock *lock = Lock::LookupOrCreate(lock_addr);
   6599           lock->set_name(name);
   6600         }
   6601         break;
   6602 
   6603       case PUBLISH_RANGE : HandlePublishRange(e); break;
   6604       case UNPUBLISH_RANGE :
   6605         Report("WARNING: ANNOTATE_UNPUBLISH_MEMORY_RANGE is deprecated\n");
   6606         break;
   6607 
   6608       case TRACE_MEM   : HandleTraceMem(e);   break;
   6609       case STACK_TRACE : HandleStackTrace(e); break;
   6610       case NOOP        : CHECK(0);           break;  // can't happen.
   6611       case VERBOSITY   : e->Print(); G_flags->verbosity = e->info(); break;
   6612       case FLUSH_STATE : FlushState(TID(e->tid()));       break;
   6613       default                 : CHECK(0);    break;
   6614     }
   6615   }
   6616 
   6617  private:
   6618   void ShowProcSelfStatus() {
   6619     if (G_flags->show_proc_self_status) {
   6620       string str = ReadFileToString("/proc/self/status", false);
   6621       if (!str.empty()) {
   6622         Printf("%s", str.c_str());
   6623       }
   6624     }
   6625   }
   6626 
   6627   void ShowStats() {
   6628     if (G_flags->show_stats) {
   6629       G_stats->PrintStats();
   6630       G_cache->PrintStorageStats();
   6631     }
   6632   }
   6633 
   6634   // PCQ_CREATE, PCQ_DESTROY, PCQ_PUT, PCQ_GET
   6635   void HandlePcqCreate(Event *e) {
   6636     if (G_flags->verbosity >= 2) {
   6637       e->Print();
   6638     }
   6639     PCQ pcq;
   6640     pcq.pcq_addr = e->a();
   6641     CHECK(!g_pcq_map->count(e->a()));
   6642     (*g_pcq_map)[e->a()] = pcq;
   6643   }
   6644   void HandlePcqDestroy(Event *e) {
   6645     if (G_flags->verbosity >= 2) {
   6646       e->Print();
   6647     }
   6648     CHECK(g_pcq_map->count(e->a()));
   6649     g_pcq_map->erase(e->a());
   6650   }
   6651   void HandlePcqPut(Event *e) {
   6652     if (G_flags->verbosity >= 2) {
   6653       e->Print();
   6654     }
   6655     PCQ &pcq = (*g_pcq_map)[e->a()];
   6656     CHECK(pcq.pcq_addr == e->a());
   6657     TSanThread *thread = TSanThread::Get(TID(e->tid()));
   6658     VTS *vts = thread->segment()->vts()->Clone();
   6659     pcq.putters.push_back(vts);
   6660     thread->NewSegmentForSignal();
   6661   }
   6662   void HandlePcqGet(Event *e) {
   6663     if (G_flags->verbosity >= 2) {
   6664       e->Print();
   6665     }
   6666     PCQ &pcq = (*g_pcq_map)[e->a()];
   6667     CHECK(pcq.pcq_addr == e->a());
   6668     CHECK(!pcq.putters.empty());
   6669     VTS *putter = pcq.putters.front();
   6670     pcq.putters.pop_front();
   6671     CHECK(putter);
   6672     TSanThread *thread = TSanThread::Get(TID(e->tid()));
   6673     thread->NewSegmentForWait(putter);
   6674     VTS::Unref(putter);
   6675   }
   6676 
   6677   // PUBLISH_RANGE
   6678   void HandlePublishRange(Event *e) {
   6679     if (G_flags->verbosity >= 2) {
   6680       e->Print();
   6681     }
   6682     static int reported_deprecation;
   6683     reported_deprecation++;
   6684     if (reported_deprecation < 20) {
   6685       Report("WARNING: ANNOTATE_PUBLISH_MEMORY_RANGE is deprecated and will not"
   6686              " be supported in future versions of ThreadSanitizer.\n");
   6687     }
   6688 
   6689     uintptr_t mem = e->a();
   6690     uintptr_t size = e->info();
   6691 
   6692     TID tid(e->tid());
   6693     TSanThread *thread = TSanThread::Get(tid);
   6694     VTS *vts = thread->segment()->vts();
   6695     PublishRange(thread, mem, mem + size, vts);
   6696 
   6697     thread->NewSegmentForSignal();
   6698     // Printf("Publish: [%p, %p)\n", mem, mem+size);
   6699   }
   6700 
   6701   void HandleIgnore(Event *e, bool is_w, bool on) {
   6702     if (G_flags->verbosity >= 2) {
   6703       e->Print();
   6704     }
   6705     TSanThread *thread = TSanThread::Get(TID(e->tid()));
   6706     thread->set_ignore_accesses(is_w, on);
   6707   }
   6708 
   6709   // BENIGN_RACE
   6710   void HandleBenignRace(uintptr_t ptr, uintptr_t size,
   6711                         const char *descr, TID tid) {
   6712     TSanThread *thr = TSanThread::Get(tid);
   6713     if (debug_benign_races) {
   6714       Printf("T%d: BENIGN_RACE: ptr=%p size=%ld descr='%s'\n",
   6715              tid.raw(), ptr, size, descr);
   6716     }
   6717     // Simply set all 'racey' bits in the shadow state of [ptr, ptr+size).
   6718     for (uintptr_t p = ptr; p < ptr + size; p++) {
   6719       CacheLine *line = G_cache->GetLineOrCreateNew(thr, p, __LINE__);
   6720       CHECK(line);
   6721       line->racey().Set(CacheLine::ComputeOffset(p));
   6722       G_cache->ReleaseLine(thr, p, line, __LINE__);
   6723     }
   6724   }
   6725 
   6726   // EXPECT_RACE
   6727   void HandleExpectRace(uintptr_t ptr, const char *descr, TID tid) {
   6728     ExpectedRace expected_race;
   6729     expected_race.ptr = ptr;
   6730     expected_race.size = 1;
   6731     expected_race.count = 0;
   6732     expected_race.is_verifiable = !descr ||
   6733         (string(descr).find("UNVERIFIABLE") == string::npos);
   6734     expected_race.is_nacl_untrusted = !descr ||
   6735         (string(descr).find("NACL_UNTRUSTED") != string::npos);
   6736     // copy descr (may not have strdup)
   6737     CHECK(descr);
   6738     size_t descr_len = strlen(descr);
   6739     char *d = new char [descr_len + 1];
   6740     memcpy(d, descr, descr_len);
   6741     d[descr_len] = 0;
   6742     expected_race.description = d;
   6743 
   6744     TSanThread *thread = TSanThread::Get(tid);
   6745     expected_race.pc = thread->GetCallstackEntry(1);
   6746     G_expected_races_map->InsertInfo(ptr, expected_race);
   6747 
   6748     // Flush 'racey' flag for the address
   6749     CacheLine *cache_line = G_cache->GetLineIfExists(thread, ptr, __LINE__);
   6750     if (cache_line != NULL) {
   6751       uintptr_t offset = CacheLine::ComputeOffset(ptr);
   6752       cache_line->racey().ClearRange(offset, offset + 1);
   6753       G_cache->ReleaseLine(thread, ptr, cache_line, __LINE__);
   6754     }
   6755 
   6756     if (debug_expected_races) {
   6757       Printf("T%d: EXPECT_RACE: ptr=%p descr='%s'\n", tid.raw(), ptr, descr);
   6758       thread->ReportStackTrace(ptr);
   6759       int i = 0;
   6760       for (ExpectedRacesMap::iterator it = G_expected_races_map->begin();
   6761            it != G_expected_races_map->end(); ++it) {
   6762         ExpectedRace &x = it->second;
   6763         Printf("  [%d] %p [0x%lx]\n", i, &x, x.ptr);
   6764         i++;
   6765       }
   6766     }
   6767   }
   6768 
   6769   void HandleStackTrace(Event *e) {
   6770     TSanThread *thread = TSanThread::Get(TID(e->tid()));
   6771     e->Print();
   6772     thread->ReportStackTrace();
   6773   }
   6774 
   6775   // HB_LOCK
   6776   void HandleHBLock(Event *e) {
   6777     if (G_flags->verbosity >= 2) {
   6778       e->Print();
   6779     }
   6780     Lock *lock = Lock::LookupOrCreate(e->a());
   6781     CHECK(lock);
   6782     lock->set_is_pure_happens_before(true);
   6783   }
   6784 
   6785   // NON_HB_LOCK
   6786   void HandleNonHBLock(Event *e) {
   6787     if (G_flags->verbosity >= 2) {
   6788       e->Print();
   6789     }
   6790     Lock *lock = Lock::LookupOrCreate(e->a());
   6791     CHECK(lock);
   6792     lock->set_is_pure_happens_before(false);
   6793   }
   6794 
   6795   // UNLOCK_OR_INIT
   6796   // This is a hack to handle posix pthread_spin_unlock which is sometimes
   6797   // the same symbol as pthread_spin_init. We need to handle unlock as init
   6798   // if the lock was not seen before or if it is currently unlocked.
   6799   // TODO(kcc): is there a way to distinguish pthread_spin_init
   6800   // and pthread_spin_unlock?
   6801   void HandleUnlockOrInit(Event *e) {
   6802     TSanThread *thread = TSanThread::Get(TID(e->tid()));
   6803     if (G_flags->verbosity >= 2) {
   6804       e->Print();
   6805       thread->ReportStackTrace();
   6806     }
   6807     uintptr_t lock_addr = e->a();
   6808     Lock *lock = Lock::Lookup(lock_addr);
   6809     if (lock && lock->wr_held()) {
   6810       // We know this lock and it is locked. Just unlock it.
   6811       thread->HandleUnlock(lock_addr);
   6812     } else {
   6813       // Never seen this lock or it is currently unlocked. Init it.
   6814       Lock::Create(lock_addr);
   6815     }
   6816   }
   6817 
   6818   void HandleLockCreateOrDestroy(Event *e) {
   6819     TSanThread *thread = TSanThread::Get(TID(e->tid()));
   6820     uintptr_t lock_addr = e->a();
   6821     if (debug_lock) {
   6822       e->Print();
   6823     }
   6824     if (e->type() == LOCK_CREATE) {
   6825       Lock::Create(lock_addr);
   6826     } else {
   6827       CHECK(e->type() == LOCK_DESTROY);
   6828       // A locked pthread_mutex_t can not be destroyed but other lock types can.
   6829       // When destroying a lock, we must unlock it.
   6830       // If there is a bug in a program when someone attempts to unlock
   6831       // a destoyed lock, we are likely to fail in an assert.
   6832       //
   6833       // We do not unlock-on-destroy after main() has exited.
   6834       // This is because global Mutex objects may be desctructed while threads
   6835       // holding them are still running. Urgh...
   6836       Lock *lock = Lock::Lookup(lock_addr);
   6837       // If the lock is not found, report an error.
   6838       if (lock == NULL) {
   6839         ThreadSanitizerInvalidLockReport *report =
   6840             new ThreadSanitizerInvalidLockReport;
   6841         report->type = ThreadSanitizerReport::INVALID_LOCK;
   6842         report->tid = TID(e->tid());
   6843         report->lock_addr = lock_addr;
   6844         report->stack_trace = thread->CreateStackTrace();
   6845         ThreadSanitizerPrintReport(report);
   6846         return;
   6847       }
   6848       if (lock->wr_held() || lock->rd_held()) {
   6849         if (G_flags->unlock_on_mutex_destroy && !g_has_exited_main) {
   6850           thread->HandleUnlock(lock_addr);
   6851         }
   6852       }
   6853       thread->HandleForgetSignaller(lock_addr);
   6854       Lock::Destroy(lock_addr);
   6855     }
   6856   }
   6857 
   6858   void HandleTraceMem(Event *e) {
   6859     if (G_flags->trace_level == 0) return;
   6860     TID tid(e->tid());
   6861     TSanThread *thr = TSanThread::Get(TID(e->tid()));
   6862     uintptr_t a = e->a();
   6863     CacheLine *line = G_cache->GetLineOrCreateNew(thr, a, __LINE__);
   6864     uintptr_t offset = CacheLine::ComputeOffset(a);
   6865     line->traced().Set(offset);
   6866     G_cache->ReleaseLine(thr, a, line, __LINE__);
   6867     if (G_flags->verbosity >= 2) e->Print();
   6868   }
   6869 
   6870   INLINE void RefAndUnrefTwoSegSetPairsIfDifferent(SSID new_ssid1,
   6871                                                    SSID old_ssid1,
   6872                                                    SSID new_ssid2,
   6873                                                    SSID old_ssid2) {
   6874     bool recycle_1 = new_ssid1 != old_ssid1,
   6875          recycle_2 = new_ssid2 != old_ssid2;
   6876     if (recycle_1 && !new_ssid1.IsEmpty()) {
   6877       SegmentSet::Ref(new_ssid1, "RefAndUnrefTwoSegSetPairsIfDifferent");
   6878     }
   6879 
   6880     if (recycle_2 && !new_ssid2.IsEmpty()) {
   6881       SegmentSet::Ref(new_ssid2, "RefAndUnrefTwoSegSetPairsIfDifferent");
   6882     }
   6883 
   6884     if (recycle_1 && !old_ssid1.IsEmpty()) {
   6885       SegmentSet::Unref(old_ssid1, "RefAndUnrefTwoSegSetPairsIfDifferent");
   6886     }
   6887 
   6888     if (recycle_2 && !old_ssid2.IsEmpty()) {
   6889       SegmentSet::Unref(old_ssid2, "RefAndUnrefTwoSegSetPairsIfDifferent");
   6890     }
   6891   }
   6892 
   6893 
   6894   // return true if the current pair of read/write segment sets
   6895   // describes a race.
   6896   bool NOINLINE CheckIfRace(SSID rd_ssid, SSID wr_ssid) {
   6897     int wr_ss_size = SegmentSet::Size(wr_ssid);
   6898     int rd_ss_size = SegmentSet::Size(rd_ssid);
   6899 
   6900     DCHECK(wr_ss_size >= 2 || (wr_ss_size >= 1 && rd_ss_size >= 1));
   6901 
   6902     // check all write-write pairs
   6903     for (int w1 = 0; w1 < wr_ss_size; w1++) {
   6904       SID w1_sid = SegmentSet::GetSID(wr_ssid, w1, __LINE__);
   6905       Segment *w1_seg = Segment::Get(w1_sid);
   6906       LSID w1_ls = w1_seg->lsid(true);
   6907       for (int w2 = w1 + 1; w2 < wr_ss_size; w2++) {
   6908         DCHECK(wr_ssid.IsTuple());
   6909         SegmentSet *ss = SegmentSet::Get(wr_ssid);
   6910         LSID w2_ls = Segment::Get(ss->GetSID(w2))->lsid(true);
   6911         if (LockSet::IntersectionIsEmpty(w1_ls, w2_ls)) {
   6912           return true;
   6913         } else {
   6914           // May happen only if the locks in the intersection are hybrid locks.
   6915           DCHECK(LockSet::HasNonPhbLocks(w1_ls) &&
   6916                  LockSet::HasNonPhbLocks(w2_ls));
   6917         }
   6918       }
   6919       // check all write-read pairs
   6920       for (int r = 0; r < rd_ss_size; r++) {
   6921         SID r_sid = SegmentSet::GetSID(rd_ssid, r, __LINE__);
   6922         Segment *r_seg = Segment::Get(r_sid);
   6923         LSID r_ls = r_seg->lsid(false);
   6924         if (Segment::HappensBeforeOrSameThread(w1_sid, r_sid))
   6925           continue;
   6926         if (LockSet::IntersectionIsEmpty(w1_ls, r_ls)) {
   6927           return true;
   6928         } else {
   6929           // May happen only if the locks in the intersection are hybrid locks.
   6930           DCHECK(LockSet::HasNonPhbLocks(w1_ls) &&
   6931                  LockSet::HasNonPhbLocks(r_ls));
   6932         }
   6933       }
   6934     }
   6935     return false;
   6936   }
   6937 
   6938   // New experimental state machine.
   6939   // Set *res to the new state.
   6940   // Return true if the new state is race.
   6941   bool INLINE MemoryStateMachine(ShadowValue old_sval, TSanThread *thr,
   6942                                  bool is_w, ShadowValue *res) {
   6943     ShadowValue new_sval;
   6944     SID cur_sid = thr->sid();
   6945     DCHECK(cur_sid.valid());
   6946 
   6947     if (UNLIKELY(old_sval.IsNew())) {
   6948       // We see this memory for the first time.
   6949       DCHECK(cur_sid.valid());
   6950       if (is_w) {
   6951         new_sval.set(SSID(0), SSID(cur_sid));
   6952       } else {
   6953         new_sval.set(SSID(cur_sid), SSID(0));
   6954       }
   6955       *res = new_sval;
   6956       return false;
   6957     }
   6958 
   6959     SSID old_rd_ssid = old_sval.rd_ssid();
   6960     SSID old_wr_ssid = old_sval.wr_ssid();
   6961     SSID new_rd_ssid(0);
   6962     SSID new_wr_ssid(0);
   6963     if (is_w) {
   6964       new_rd_ssid = SegmentSet::RemoveSegmentFromSS(old_rd_ssid, cur_sid);
   6965       new_wr_ssid = SegmentSet::AddSegmentToSS(old_wr_ssid, cur_sid);
   6966     } else {
   6967       if (SegmentSet::Contains(old_wr_ssid, cur_sid)) {
   6968         // cur_sid is already in old_wr_ssid, no change to SSrd is required.
   6969         new_rd_ssid = old_rd_ssid;
   6970       } else {
   6971         new_rd_ssid = SegmentSet::AddSegmentToSS(old_rd_ssid, cur_sid);
   6972       }
   6973       new_wr_ssid = old_wr_ssid;
   6974     }
   6975 
   6976     if (UNLIKELY(G_flags->sample_events > 0)) {
   6977       if (new_rd_ssid.IsTuple() || new_wr_ssid.IsTuple()) {
   6978         static EventSampler sampler;
   6979         sampler.Sample(thr, "HasTupleSS", false);
   6980       }
   6981     }
   6982 
   6983 
   6984     new_sval.set(new_rd_ssid, new_wr_ssid);
   6985     *res = new_sval;
   6986     if (new_sval == old_sval)
   6987       return false;
   6988 
   6989     if (new_wr_ssid.IsTuple() ||
   6990         (!new_wr_ssid.IsEmpty() && !new_rd_ssid.IsEmpty())) {
   6991       return CheckIfRace(new_rd_ssid, new_wr_ssid);
   6992     }
   6993     return false;
   6994   }
   6995 
   6996 
   6997   // Fast path implementation for the case when we stay in the same thread.
   6998   // In this case we don't need to call HappensBefore(), deal with
   6999   // Tuple segment sets and check for race.
   7000   // If this function returns true, the ShadowValue *new_sval is updated
   7001   // in the same way as MemoryStateMachine() would have done it. Just faster.
   7002   INLINE bool MemoryStateMachineSameThread(bool is_w, ShadowValue old_sval,
   7003                                            TSanThread *thr,
   7004                                            ShadowValue *new_sval) {
   7005 #define MSM_STAT(i) do { if (DEBUG_MODE) \
   7006   thr->stats.msm_branch_count[i]++; } while ((void)0, 0)
   7007     SSID rd_ssid = old_sval.rd_ssid();
   7008     SSID wr_ssid = old_sval.wr_ssid();
   7009     SID cur_sid = thr->sid();
   7010     TID tid = thr->tid();
   7011     if (rd_ssid.IsEmpty()) {
   7012       if (wr_ssid.IsSingleton()) {
   7013         // *** CASE 01 ***: rd_ssid == 0, wr_ssid == singleton
   7014         SID wr_sid = wr_ssid.GetSingleton();
   7015         if (wr_sid == cur_sid) {  // --- w/r: {0, cur} => {0, cur}
   7016           MSM_STAT(1);
   7017           // no op
   7018           return true;
   7019         }
   7020         if (tid == Segment::Get(wr_sid)->tid()) {
   7021           // same thread, but the segments are different.
   7022           DCHECK(cur_sid != wr_sid);
   7023           if (is_w) {    // -------------- w: {0, wr} => {0, cur}
   7024             MSM_STAT(2);
   7025             new_sval->set(SSID(0), SSID(cur_sid));
   7026             thr->AddDeadSid(wr_sid, "FastPath01");
   7027           } else {       // -------------- r: {0, wr} => {cur, wr}
   7028             MSM_STAT(3);
   7029             new_sval->set(SSID(cur_sid), wr_ssid);
   7030           }
   7031           Segment::Ref(cur_sid, "FastPath01");
   7032           return true;
   7033         }
   7034       } else if (wr_ssid.IsEmpty()) {
   7035         // *** CASE 00 ***: rd_ssid == 0, wr_ssid == 0
   7036         if (is_w) {      // -------------- w: {0, 0} => {0, cur}
   7037           MSM_STAT(4);
   7038           new_sval->set(SSID(0), SSID(cur_sid));
   7039         } else {         // -------------- r: {0, 0} => {cur, 0}
   7040           MSM_STAT(5);
   7041           new_sval->set(SSID(cur_sid), SSID(0));
   7042         }
   7043         Segment::Ref(cur_sid, "FastPath00");
   7044         return true;
   7045       }
   7046     } else if (rd_ssid.IsSingleton()) {
   7047       SID rd_sid = rd_ssid.GetSingleton();
   7048       if (wr_ssid.IsEmpty()) {
   7049         // *** CASE 10 ***: rd_ssid == singleton, wr_ssid == 0
   7050         if (rd_sid == cur_sid) {
   7051           // same segment.
   7052           if (is_w) {    // -------------- w: {cur, 0} => {0, cur}
   7053             MSM_STAT(6);
   7054             new_sval->set(SSID(0), SSID(cur_sid));
   7055           } else {       // -------------- r: {cur, 0} => {cur, 0}
   7056             MSM_STAT(7);
   7057             // no op
   7058           }
   7059           return true;
   7060         }
   7061         if (tid == Segment::Get(rd_sid)->tid()) {
   7062           // same thread, but the segments are different.
   7063           DCHECK(cur_sid != rd_sid);
   7064           if (is_w) {  // -------------- w: {rd, 0} => {0, cur}
   7065             MSM_STAT(8);
   7066             new_sval->set(SSID(0), SSID(cur_sid));
   7067           } else {     // -------------- r: {rd, 0} => {cur, 0}
   7068             MSM_STAT(9);
   7069             new_sval->set(SSID(cur_sid), SSID(0));
   7070           }
   7071           Segment::Ref(cur_sid, "FastPath10");
   7072           thr->AddDeadSid(rd_sid, "FastPath10");
   7073           return true;
   7074         }
   7075       } else if (wr_ssid.IsSingleton()){
   7076         // *** CASE 11 ***: rd_ssid == singleton, wr_ssid == singleton
   7077         DCHECK(rd_ssid.IsSingleton());
   7078         SID wr_sid = wr_ssid.GetSingleton();
   7079         DCHECK(wr_sid != rd_sid);  // By definition of ShadowValue.
   7080         if (cur_sid == rd_sid) {
   7081           if (tid == Segment::Get(wr_sid)->tid()) {
   7082             if (is_w) {  // -------------- w: {cur, wr} => {0, cur}
   7083               MSM_STAT(10);
   7084               new_sval->set(SSID(0), SSID(cur_sid));
   7085               thr->AddDeadSid(wr_sid, "FastPath11");
   7086             } else {     // -------------- r: {cur, wr} => {cur, wr}
   7087               MSM_STAT(11);
   7088               // no op
   7089             }
   7090             return true;
   7091           }
   7092         } else if (cur_sid == wr_sid){
   7093           if (tid == Segment::Get(rd_sid)->tid()) {
   7094             if (is_w) {  // -------------- w: {rd, cur} => {rd, cur}
   7095               MSM_STAT(12);
   7096               // no op
   7097             } else {     // -------------- r: {rd, cur} => {0, cur}
   7098               MSM_STAT(13);
   7099               new_sval->set(SSID(0), SSID(cur_sid));
   7100               thr->AddDeadSid(rd_sid, "FastPath11");
   7101             }
   7102             return true;
   7103           }
   7104         } else if (tid == Segment::Get(rd_sid)->tid() &&
   7105                    tid == Segment::Get(wr_sid)->tid()) {
   7106           if (is_w) {    // -------------- w: {rd, wr} => {0, cur}
   7107             MSM_STAT(14);
   7108             new_sval->set(SSID(0), SSID(cur_sid));
   7109             thr->AddDeadSid(wr_sid, "FastPath11");
   7110           } else {       // -------------- r: {rd, wr} => {cur, wr}
   7111             MSM_STAT(15);
   7112             new_sval->set(SSID(cur_sid), wr_ssid);
   7113           }
   7114           thr->AddDeadSid(rd_sid, "FastPath11");
   7115           Segment::Ref(cur_sid, "FastPath11");
   7116           return true;
   7117         }
   7118       }
   7119     }
   7120     MSM_STAT(0);
   7121     return false;
   7122 #undef MSM_STAT
   7123   }
   7124 
   7125   // return false if we were not able to complete the task (fast_path_only).
   7126   INLINE bool HandleMemoryAccessHelper(bool is_w,
   7127                                        CacheLine *cache_line,
   7128                                        uintptr_t addr,
   7129                                        uintptr_t size,
   7130                                        uintptr_t pc,
   7131                                        TSanThread *thr,
   7132                                        bool fast_path_only) {
   7133     DCHECK((addr & (size - 1)) == 0);  // size-aligned.
   7134     uintptr_t offset = CacheLine::ComputeOffset(addr);
   7135 
   7136     ShadowValue old_sval;
   7137     ShadowValue *sval_p = NULL;
   7138 
   7139     if (UNLIKELY(!cache_line->has_shadow_value().Get(offset))) {
   7140       sval_p = cache_line->AddNewSvalAtOffset(offset);
   7141       DCHECK(sval_p->IsNew());
   7142     } else {
   7143       sval_p = cache_line->GetValuePointer(offset);
   7144     }
   7145     old_sval = *sval_p;
   7146 
   7147     bool res = false;
   7148     bool fast_path_ok = MemoryStateMachineSameThread(
   7149         is_w, old_sval, thr, sval_p);
   7150     if (fast_path_ok) {
   7151       res = true;
   7152     } else if (fast_path_only) {
   7153       res = false;
   7154     } else {
   7155       bool is_published = cache_line->published().Get(offset);
   7156       // We check only the first bit for publishing, oh well.
   7157       if (UNLIKELY(is_published)) {
   7158         const VTS *signaller_vts = GetPublisherVTS(addr);
   7159         CHECK(signaller_vts);
   7160         thr->NewSegmentForWait(signaller_vts);
   7161       }
   7162 
   7163       bool is_race = MemoryStateMachine(old_sval, thr, is_w, sval_p);
   7164 
   7165       // Check for race.
   7166       if (UNLIKELY(is_race)) {
   7167         if (thr->ShouldReportRaces()) {
   7168           if (G_flags->report_races && !cache_line->racey().Get(offset)) {
   7169             reports_.AddReport(thr, pc, is_w, addr, size,
   7170                                old_sval, *sval_p, is_published);
   7171           }
   7172           cache_line->racey().SetRange(offset, offset + size);
   7173         }
   7174       }
   7175 
   7176       // Ref/Unref segments
   7177       RefAndUnrefTwoSegSetPairsIfDifferent(sval_p->rd_ssid(),
   7178                                            old_sval.rd_ssid(),
   7179                                            sval_p->wr_ssid(),
   7180                                            old_sval.wr_ssid());
   7181       res = true;
   7182     }
   7183 
   7184 
   7185     if (DEBUG_MODE && !fast_path_only) {
   7186       // check that the SSIDs/SIDs in the new sval have sane ref counters.
   7187       CHECK(!sval_p->wr_ssid().IsEmpty() || !sval_p->rd_ssid().IsEmpty());
   7188       for (int i = 0; i < 2; i++) {
   7189         SSID ssid = i ? sval_p->rd_ssid() : sval_p->wr_ssid();
   7190         if (ssid.IsEmpty()) continue;
   7191         if (ssid.IsSingleton()) {
   7192           // singleton segment should have ref count > 0.
   7193           SID sid = ssid.GetSingleton();
   7194           Segment *seg = Segment::Get(sid);
   7195           (void)seg;
   7196           CHECK(seg->ref_count() > 0);
   7197           if (sid == thr->sid()) {
   7198             // if this is the current seg, ref count should be > 1.
   7199             CHECK(seg->ref_count() > 1);
   7200           }
   7201         } else {
   7202           SegmentSet *sset = SegmentSet::Get(ssid);
   7203           (void)sset;
   7204           CHECK(sset->ref_count() > 0);
   7205         }
   7206       }
   7207     }
   7208     return res;
   7209   }
   7210 
   7211 
   7212   // return false if we were not able to complete the task (fast_path_only).
   7213   INLINE bool HandleAccessGranularityAndExecuteHelper(
   7214       CacheLine *cache_line,
   7215       TSanThread *thr, uintptr_t addr, MopInfo *mop,
   7216       bool has_expensive_flags, bool fast_path_only) {
   7217     size_t size = mop->size();
   7218     uintptr_t pc = mop->pc();
   7219     bool is_w = mop->is_write();
   7220     uintptr_t a = addr;
   7221     uintptr_t b = 0;
   7222     uintptr_t off = CacheLine::ComputeOffset(a);
   7223 
   7224     uint16_t *granularity_mask = cache_line->granularity_mask(off);
   7225     uint16_t gr = *granularity_mask;
   7226 
   7227     // Can't do split/join on the fast path, bacause it involves segment set
   7228     // reference count manipulation that is not thread-safe.
   7229 
   7230     if        (size == 8 && (off & 7) == 0) {
   7231       if (!gr) {
   7232         *granularity_mask = gr = 1;  // 0000000000000001
   7233       }
   7234       if (GranularityIs8(off, gr)) {
   7235         if (has_expensive_flags) thr->stats.n_fast_access8++;
   7236         cache_line->DebugTrace(off, __FUNCTION__, __LINE__);
   7237         goto one_call;
   7238       } else {
   7239         if (fast_path_only) return false;
   7240         if (has_expensive_flags) thr->stats.n_slow_access8++;
   7241         cache_line->Join_1_to_2(off);
   7242         cache_line->Join_1_to_2(off + 2);
   7243         cache_line->Join_1_to_2(off + 4);
   7244         cache_line->Join_1_to_2(off + 6);
   7245         cache_line->Join_2_to_4(off);
   7246         cache_line->Join_2_to_4(off + 4);
   7247         cache_line->Join_4_to_8(off);
   7248         goto slow_path;
   7249       }
   7250     } else if (size == 4 && (off & 3) == 0) {
   7251       if (!gr) {
   7252         *granularity_mask = gr = 3 << 1;  // 0000000000000110
   7253       }
   7254       if (GranularityIs4(off, gr)) {
   7255         if (has_expensive_flags) thr->stats.n_fast_access4++;
   7256         cache_line->DebugTrace(off, __FUNCTION__, __LINE__);
   7257         goto one_call;
   7258       } else {
   7259         if (fast_path_only) return false;
   7260         if (has_expensive_flags) thr->stats.n_slow_access4++;
   7261         cache_line->Split_8_to_4(off);
   7262         cache_line->Join_1_to_2(off);
   7263         cache_line->Join_1_to_2(off + 2);
   7264         cache_line->Join_2_to_4(off);
   7265         goto slow_path;
   7266       }
   7267     } else if (size == 2 && (off & 1) == 0) {
   7268       if (!gr) {
   7269         *granularity_mask = gr = 15 << 3;  // 0000000001111000
   7270       }
   7271       if (GranularityIs2(off, gr)) {
   7272         if (has_expensive_flags) thr->stats.n_fast_access2++;
   7273         cache_line->DebugTrace(off, __FUNCTION__, __LINE__);
   7274         goto one_call;
   7275       } else {
   7276         if (fast_path_only) return false;
   7277         if (has_expensive_flags) thr->stats.n_slow_access2++;
   7278         cache_line->Split_8_to_4(off);
   7279         cache_line->Split_4_to_2(off);
   7280         cache_line->Join_1_to_2(off);
   7281         goto slow_path;
   7282       }
   7283     } else if (size == 1) {
   7284       if (!gr) {
   7285         *granularity_mask = gr = 255 << 7;  // 0111111110000000
   7286       }
   7287       if (GranularityIs1(off, gr)) {
   7288         if (has_expensive_flags) thr->stats.n_fast_access1++;
   7289         cache_line->DebugTrace(off, __FUNCTION__, __LINE__);
   7290         goto one_call;
   7291       } else {
   7292         if (fast_path_only) return false;
   7293         if (has_expensive_flags) thr->stats.n_slow_access1++;
   7294         cache_line->Split_8_to_4(off);
   7295         cache_line->Split_4_to_2(off);
   7296         cache_line->Split_2_to_1(off);
   7297         goto slow_path;
   7298       }
   7299     } else {
   7300       if (fast_path_only) return false;
   7301       if (has_expensive_flags) thr->stats.n_very_slow_access++;
   7302       // Very slow: size is not 1,2,4,8 or address is unaligned.
   7303       // Handle this access as a series of 1-byte accesses, but only
   7304       // inside the current cache line.
   7305       // TODO(kcc): do we want to handle the next cache line as well?
   7306       b = a + mop->size();
   7307       uintptr_t max_x = min(b, CacheLine::ComputeNextTag(a));
   7308       for (uintptr_t x = a; x < max_x; x++) {
   7309         off = CacheLine::ComputeOffset(x);
   7310         DCHECK(CacheLine::ComputeTag(x) == cache_line->tag());
   7311         uint16_t *granularity_mask = cache_line->granularity_mask(off);
   7312         if (!*granularity_mask) {
   7313           *granularity_mask = 1;
   7314         }
   7315         cache_line->DebugTrace(off, __FUNCTION__, __LINE__);
   7316         cache_line->Split_8_to_4(off);
   7317         cache_line->Split_4_to_2(off);
   7318         cache_line->Split_2_to_1(off);
   7319         if (!HandleMemoryAccessHelper(is_w, cache_line, x, 1, pc, thr, false))
   7320           return false;
   7321       }
   7322       return true;
   7323     }
   7324 
   7325 slow_path:
   7326     if (fast_path_only) return false;
   7327     DCHECK(cache_line);
   7328     DCHECK(size == 1 || size == 2 || size == 4 || size == 8);
   7329     DCHECK((addr & (size - 1)) == 0);  // size-aligned.
   7330     gr = *granularity_mask;
   7331     CHECK(gr);
   7332     // size is one of 1, 2, 4, 8; address is size-aligned, but the granularity
   7333     // is different.
   7334     b = a + mop->size();
   7335     for (uintptr_t x = a; x < b;) {
   7336       if (has_expensive_flags) thr->stats.n_access_slow_iter++;
   7337       off = CacheLine::ComputeOffset(x);
   7338       cache_line->DebugTrace(off, __FUNCTION__, __LINE__);
   7339       size_t s = 0;
   7340       // How many bytes are we going to access?
   7341       if     (GranularityIs8(off, gr)) s = 8;
   7342       else if(GranularityIs4(off, gr)) s = 4;
   7343       else if(GranularityIs2(off, gr)) s = 2;
   7344       else                             s = 1;
   7345       if (!HandleMemoryAccessHelper(is_w, cache_line, x, s, pc, thr, false))
   7346         return false;
   7347       x += s;
   7348     }
   7349     return true;
   7350 one_call:
   7351     return HandleMemoryAccessHelper(is_w, cache_line, addr, size, pc,
   7352                                     thr, fast_path_only);
   7353   }
   7354 
   7355   INLINE bool IsTraced(CacheLine *cache_line, uintptr_t addr,
   7356                        bool has_expensive_flags) {
   7357     if (!has_expensive_flags) return false;
   7358     if (G_flags->trace_level == 0) return false;
   7359     DCHECK(cache_line);
   7360     uintptr_t off = CacheLine::ComputeOffset(addr);
   7361     if (cache_line->traced().Get(off)) {
   7362       return true;
   7363     } else if (addr == G_flags->trace_addr) {
   7364       return true;
   7365     }
   7366     return false;
   7367   }
   7368 
   7369   void DoTrace(TSanThread *thr, uintptr_t addr, MopInfo *mop, bool need_locking) {
   7370     size_t size = mop->size();
   7371     uintptr_t pc = mop->pc();
   7372     TIL til(ts_lock, 1, need_locking);
   7373     for (uintptr_t x = addr; x < addr + size; x++) {
   7374       uintptr_t off = CacheLine::ComputeOffset(x);
   7375       CacheLine *cache_line = G_cache->GetLineOrCreateNew(thr,
   7376                                                           x, __LINE__);
   7377       ShadowValue *sval_p = cache_line->GetValuePointer(off);
   7378       if (cache_line->has_shadow_value().Get(off) != 0) {
   7379         bool is_published = cache_line->published().Get(off);
   7380         Printf("TRACE: T%d/S%d %s[%d] addr=%p sval: %s%s; line=%p P=%s\n",
   7381                raw_tid(thr), thr->sid().raw(), mop->is_write() ? "wr" : "rd",
   7382                size, addr, sval_p->ToString().c_str(),
   7383                is_published ? " P" : "",
   7384                cache_line,
   7385                cache_line->published().Empty() ?
   7386                "0" : cache_line->published().ToString().c_str());
   7387         thr->ReportStackTrace(pc);
   7388       }
   7389       G_cache->ReleaseLine(thr, x, cache_line, __LINE__);
   7390     }
   7391   }
   7392 
   7393 
   7394 #if TS_SERIALIZED == 1
   7395   INLINE  // TODO(kcc): this can also be made NOINLINE later.
   7396 #else
   7397   NOINLINE
   7398 #endif
   7399   void HandleMemoryAccessSlowLocked(TSanThread *thr,
   7400                                     uintptr_t addr,
   7401                                     MopInfo *mop,
   7402                                     bool has_expensive_flags,
   7403                                     bool need_locking) {
   7404     AssertTILHeld();
   7405     DCHECK(thr->lsid(false) == thr->segment()->lsid(false));
   7406     DCHECK(thr->lsid(true) == thr->segment()->lsid(true));
   7407     thr->FlushDeadSids();
   7408     if (TS_SERIALIZED == 0) {
   7409       // In serialized version this is the hotspot, so grab fresh SIDs
   7410       // only in non-serial variant.
   7411       thr->GetSomeFreshSids();
   7412     }
   7413     CacheLine *cache_line = G_cache->GetLineOrCreateNew(thr, addr, __LINE__);
   7414     HandleAccessGranularityAndExecuteHelper(cache_line, thr, addr,
   7415                                             mop, has_expensive_flags,
   7416                                             /*fast_path_only=*/false);
   7417     bool tracing = IsTraced(cache_line, addr, has_expensive_flags);
   7418     G_cache->ReleaseLine(thr, addr, cache_line, __LINE__);
   7419     cache_line = NULL;  // just in case.
   7420 
   7421     if (has_expensive_flags) {
   7422       if (tracing) {
   7423         DoTrace(thr, addr, mop, /*need_locking=*/false);
   7424       }
   7425       if (G_flags->sample_events > 0) {
   7426         const char *type = "SampleMemoryAccess";
   7427         static EventSampler sampler;
   7428         sampler.Sample(thr, type, false);
   7429       }
   7430     }
   7431   }
   7432 
   7433   INLINE bool HandleMemoryAccessInternal(TSanThread *thr,
   7434                                          uintptr_t *sblock_pc,
   7435                                          uintptr_t addr,
   7436                                          MopInfo *mop,
   7437                                          bool has_expensive_flags,
   7438                                          bool need_locking) {
   7439 #   define INC_STAT(stat) \
   7440         do { if (has_expensive_flags) (stat)++; } while ((void)0, 0)
   7441     if (TS_ATOMICITY && G_flags->atomicity) {
   7442       HandleMemoryAccessForAtomicityViolationDetector(thr, addr, mop);
   7443       return false;
   7444     }
   7445     DCHECK(mop->size() > 0);
   7446     DCHECK(thr->is_running());
   7447     DCHECK(!thr->ignore_reads() || !thr->ignore_writes());
   7448 
   7449     // We do not check and ignore stack now.
   7450     // On unoptimized binaries this would give ~10% speedup if ignore_stack==true,
   7451     // but if --ignore_stack==false this would cost few extra insns.
   7452     // On optimized binaries ignoring stack gives nearly nothing.
   7453     // if (thr->IgnoreMemoryIfInStack(addr)) return;
   7454 
   7455     CacheLine *cache_line = NULL;
   7456     INC_STAT(thr->stats.memory_access_sizes[mop->size() <= 16 ? mop->size() : 17 ]);
   7457     INC_STAT(thr->stats.events[mop->is_write() ? WRITE : READ]);
   7458     if (has_expensive_flags) {
   7459       thr->stats.access_to_first_1g += (addr >> 30) == 0;
   7460       thr->stats.access_to_first_2g += (addr >> 31) == 0;
   7461       thr->stats.access_to_first_4g += ((uint64_t)addr >> 32) == 0;
   7462     }
   7463 
   7464     int locked_access_case = 0;
   7465 
   7466     if (need_locking) {
   7467       // The fast (unlocked) path.
   7468       if (thr->HasRoomForDeadSids()) {
   7469         // Acquire a line w/o locks.
   7470         cache_line = G_cache->TryAcquireLine(thr, addr, __LINE__);
   7471         if (!Cache::LineIsNullOrLocked(cache_line)) {
   7472           // The line is not empty or locked -- check the tag.
   7473           if (cache_line->tag() == CacheLine::ComputeTag(addr)) {
   7474             // The line is ours and non-empty -- fire the fast path.
   7475             if (thr->HandleSblockEnter(*sblock_pc, /*allow_slow_path=*/false)) {
   7476               *sblock_pc = 0;  // don't do SblockEnter any more.
   7477               bool res = HandleAccessGranularityAndExecuteHelper(
   7478                   cache_line, thr, addr,
   7479                   mop, has_expensive_flags,
   7480                   /*fast_path_only=*/true);
   7481               bool traced = IsTraced(cache_line, addr, has_expensive_flags);
   7482               // release the line.
   7483               G_cache->ReleaseLine(thr, addr, cache_line, __LINE__);
   7484               if (res && has_expensive_flags && traced) {
   7485                 DoTrace(thr, addr, mop, /*need_locking=*/true);
   7486               }
   7487               if (res) {
   7488                 INC_STAT(thr->stats.unlocked_access_ok);
   7489                 // fast path succeded, we are done.
   7490                 return false;
   7491               } else {
   7492                 locked_access_case = 1;
   7493               }
   7494             } else {
   7495               // we were not able to handle SblockEnter.
   7496               G_cache->ReleaseLine(thr, addr, cache_line, __LINE__);
   7497               locked_access_case = 2;
   7498             }
   7499           } else {
   7500             locked_access_case = 3;
   7501             // The line has a wrong tag.
   7502             G_cache->ReleaseLine(thr, addr, cache_line, __LINE__);
   7503           }
   7504         } else if (cache_line == NULL) {
   7505           locked_access_case = 4;
   7506           // We grabbed the cache slot but it is empty, release it.
   7507           G_cache->ReleaseLine(thr, addr, cache_line, __LINE__);
   7508         } else {
   7509           locked_access_case = 5;
   7510         }
   7511       } else {
   7512         locked_access_case = 6;
   7513       }
   7514     } else {
   7515       locked_access_case = 7;
   7516     }
   7517 
   7518     if (need_locking) {
   7519       INC_STAT(thr->stats.locked_access[locked_access_case]);
   7520     }
   7521 
   7522     // Everything below goes under a lock.
   7523     TIL til(ts_lock, 2, need_locking);
   7524     thr->HandleSblockEnter(*sblock_pc, /*allow_slow_path=*/true);
   7525     *sblock_pc = 0;  // don't do SblockEnter any more.
   7526     HandleMemoryAccessSlowLocked(thr, addr, mop,
   7527                                  has_expensive_flags,
   7528                                  need_locking);
   7529     return true;
   7530 #undef INC_STAT
   7531   }
   7532 
   7533 
   7534   void HandleMemoryAccessForAtomicityViolationDetector(TSanThread *thr,
   7535                                                        uintptr_t addr,
   7536                                                        MopInfo *mop) {
   7537     CHECK(G_flags->atomicity);
   7538     TID tid = thr->tid();
   7539     if (thr->MemoryIsInStack(addr)) return;
   7540 
   7541     LSID wr_lsid = thr->lsid(0);
   7542     LSID rd_lsid = thr->lsid(1);
   7543     if (wr_lsid.raw() == 0 && rd_lsid.raw() == 0) {
   7544       thr->increment_n_mops_since_start();
   7545       return;
   7546     }
   7547     // uint64_t combined_lsid = wr_lsid.raw();
   7548     // combined_lsid = (combined_lsid << 32) | rd_lsid.raw();
   7549     // if (combined_lsid == 0) return;
   7550 
   7551 //    Printf("Era=%d T%d %s a=%p pc=%p in_stack=%d %s\n", g_lock_era,
   7552 //           tid.raw(), is_w ? "W" : "R", addr, pc, thr->MemoryIsInStack(addr),
   7553 //           PcToRtnNameAndFilePos(pc).c_str());
   7554 
   7555     BitSet *range_set = thr->lock_era_access_set(mop->is_write());
   7556     // Printf("era %d T%d access under lock pc=%p addr=%p size=%p w=%d\n",
   7557     //        g_lock_era, tid.raw(), pc, addr, size, is_w);
   7558     range_set->Add(addr, addr + mop->size());
   7559     // Printf("   %s\n", range_set->ToString().c_str());
   7560   }
   7561 
   7562 
   7563   // MALLOC
   7564   void HandleMalloc(Event *e, bool is_mmap) {
   7565     ScopedMallocCostCenter cc("HandleMalloc");
   7566     TID tid(e->tid());
   7567     uintptr_t a = e->a();
   7568     uintptr_t size = e->info();
   7569 
   7570 
   7571     if (a == 0)
   7572       return;
   7573 
   7574     #if defined(__GNUC__) && __WORDSIZE == 64
   7575     // If we are allocating a huge piece of memory,
   7576     // don't handle it because it is too slow.
   7577     // TODO(kcc): this is a workaround for NaCl. May need to fix it cleaner.
   7578     const uint64_t G84 = (1ULL << 32) * 21; // 84G.
   7579     if (size >= G84) {
   7580       return;
   7581     }
   7582     #endif
   7583     TSanThread *thr = TSanThread::Get(tid);
   7584     thr->NewSegmentForMallocEvent();
   7585     uintptr_t b = a + size;
   7586     CHECK(a <= b);
   7587     ClearMemoryState(thr, a, b);
   7588     // update heap_map
   7589     HeapInfo info;
   7590     info.ptr  = a;
   7591     info.size = size;
   7592     info.sid  = thr->sid();
   7593     Segment::Ref(info.sid, __FUNCTION__);
   7594     if (debug_malloc) {
   7595       Printf("T%d MALLOC: %p [%p %p) %s %s\n%s\n",
   7596              tid.raw(), size, a, a+size,
   7597              Segment::ToString(thr->sid()).c_str(),
   7598              thr->segment()->vts()->ToString().c_str(),
   7599              info.StackTraceString().c_str());
   7600     }
   7601 
   7602     // CHECK(!G_heap_map->count(a));  // we may have two calls
   7603                                       //  to AnnotateNewMemory.
   7604     G_heap_map->InsertInfo(a, info);
   7605 
   7606     if (is_mmap) {
   7607       // Mmap may be used for thread stack, so we should keep the mmap info
   7608       // when state is flushing.
   7609       ThreadStackInfo ts_info;
   7610       ts_info.ptr = a;
   7611       ts_info.size = size;
   7612       G_thread_stack_map->InsertInfo(a, ts_info);
   7613     }
   7614   }
   7615 
   7616   void ImitateWriteOnFree(TSanThread *thr, uintptr_t a, uintptr_t size, uintptr_t pc) {
   7617     // Handle the memory deletion as a write, but don't touch all
   7618     // the memory if there is too much of it, limit with the first 1K.
   7619     if (size && G_flags->free_is_write && !global_ignore) {
   7620       const uintptr_t kMaxWriteSizeOnFree = 2048;
   7621       uintptr_t write_size = min(kMaxWriteSizeOnFree, size);
   7622       uintptr_t step = sizeof(uintptr_t);
   7623       // We simulate 4- or 8-byte accesses to make analysis faster.
   7624       for (uintptr_t i = 0; i < write_size; i += step) {
   7625         uintptr_t this_size = write_size - i >= step ? step : write_size - i;
   7626         HandleMemoryAccess(thr, pc, a + i, this_size,
   7627                            /*is_w=*/true, /*need_locking*/false);
   7628       }
   7629     }
   7630   }
   7631 
   7632   // FREE
   7633   void HandleFree(Event *e) {
   7634     TID tid(e->tid());
   7635     TSanThread *thr = TSanThread::Get(tid);
   7636     uintptr_t a = e->a();
   7637     if (debug_free) {
   7638       e->Print();
   7639       thr->ReportStackTrace(e->pc());
   7640     }
   7641     if (a == 0)
   7642       return;
   7643     HeapInfo *info = G_heap_map->GetInfo(a);
   7644     if (!info || info->ptr != a)
   7645       return;
   7646     uintptr_t size = info->size;
   7647     uintptr_t pc = e->pc();
   7648     ImitateWriteOnFree(thr, a, size, pc);
   7649     // update G_heap_map
   7650     CHECK(info->ptr == a);
   7651     Segment::Unref(info->sid, __FUNCTION__);
   7652 
   7653     ClearMemoryState(thr, a, a + size);
   7654     G_heap_map->EraseInfo(a);
   7655 
   7656     // We imitate a Write event again, in case there will be use-after-free.
   7657     // We also need to create a new sblock so that the previous stack trace
   7658     // has free() in it.
   7659     if (G_flags->keep_history && G_flags->free_is_write) {
   7660       thr->HandleSblockEnter(pc, /*allow_slow_path*/true);
   7661     }
   7662     ImitateWriteOnFree(thr, a, size, pc);
   7663   }
   7664 
   7665   void HandleMunmap(Event *e) {
   7666     // TODO(glider): at the moment we handle only munmap()s of single mmap()ed
   7667     // regions. The correct implementation should handle arbitrary munmap()s
   7668     // that may carve the existing mappings or split them into two parts.
   7669     // It should also be possible to munmap() several mappings at a time.
   7670     uintptr_t a = e->a();
   7671     if (a == 0)
   7672       return;
   7673     HeapInfo *h_info = G_heap_map->GetInfo(a);
   7674     uintptr_t size = e->info();
   7675     if (h_info && h_info->ptr == a && h_info->size == size) {
   7676       // TODO(glider): we may want to handle memory deletion and call
   7677       // Segment::Unref for all the unmapped memory.
   7678       Segment::Unref(h_info->sid, __FUNCTION__);
   7679       G_heap_map->EraseRange(a, a + size);
   7680     }
   7681 
   7682     ThreadStackInfo *ts_info = G_thread_stack_map->GetInfo(a);
   7683     if (ts_info && ts_info->ptr == a && ts_info->size == size)
   7684       G_thread_stack_map->EraseRange(a, a + size);
   7685   }
   7686 
   7687   void HandleThreadStart(TID child_tid, TID parent_tid, CallStack *call_stack) {
   7688     // Printf("HandleThreadStart: tid=%d parent_tid=%d pc=%lx pid=%d\n",
   7689     //         child_tid.raw(), parent_tid.raw(), pc, getpid());
   7690     VTS *vts = NULL;
   7691     StackTrace *creation_context = NULL;
   7692     if (child_tid == TID(0)) {
   7693       // main thread, we are done.
   7694       vts = VTS::CreateSingleton(child_tid);
   7695     } else if (!parent_tid.valid()) {
   7696       TSanThread::StopIgnoringAccessesInT0BecauseNewThreadStarted();
   7697       Report("INFO: creating thread T%d w/o a parent\n", child_tid.raw());
   7698       vts = VTS::CreateSingleton(child_tid);
   7699     } else {
   7700       TSanThread::StopIgnoringAccessesInT0BecauseNewThreadStarted();
   7701       TSanThread *parent = TSanThread::Get(parent_tid);
   7702       CHECK(parent);
   7703       parent->HandleChildThreadStart(child_tid, &vts, &creation_context);
   7704     }
   7705 
   7706     if (!call_stack) {
   7707       call_stack = new CallStack();
   7708     }
   7709     TSanThread *new_thread = new TSanThread(child_tid, parent_tid,
   7710                                     vts, creation_context, call_stack);
   7711     CHECK(new_thread == TSanThread::Get(child_tid));
   7712     if (child_tid == TID(0)) {
   7713       new_thread->set_ignore_all_accesses(true); // until a new thread comes.
   7714     }
   7715   }
   7716 
   7717   // Executes before the first instruction of the thread but after the thread
   7718   // has been set up (e.g. the stack is in place).
   7719   void HandleThreadFirstInsn(TID tid) {
   7720     // TODO(kcc): get rid of this once we find out how to get the T0's stack.
   7721     if (tid == TID(0)) {
   7722       uintptr_t stack_min(0), stack_max(0);
   7723       GetThreadStack(tid.raw(), &stack_min, &stack_max);
   7724       TSanThread *thr = TSanThread::Get(tid);
   7725       thr->SetStack(stack_min, stack_max);
   7726       ClearMemoryState(thr, thr->min_sp(), thr->max_sp());
   7727     }
   7728   }
   7729 
   7730   // THR_STACK_TOP
   7731   void HandleThreadStackTop(Event *e) {
   7732     TID tid(e->tid());
   7733     TSanThread *thr = TSanThread::Get(tid);
   7734     // Stack grows from bottom up.
   7735     uintptr_t sp = e->a();
   7736     uintptr_t sp_min = 0, sp_max = 0;
   7737     uintptr_t stack_size_if_known = e->info();
   7738     ThreadStackInfo *stack_info;
   7739     if (stack_size_if_known) {
   7740       sp_min = sp - stack_size_if_known;
   7741       sp_max = sp;
   7742     } else if (NULL != (stack_info = G_thread_stack_map->GetInfo(sp))) {
   7743       if (debug_thread) {
   7744         Printf("T%d %s: %p\n%s\n", e->tid(), __FUNCTION__,  sp,
   7745              reports_.DescribeMemory(sp).c_str());
   7746       }
   7747       sp_min = stack_info->ptr;
   7748       sp_max = stack_info->ptr + stack_info->size;
   7749     }
   7750     if (debug_thread) {
   7751       Printf("T%d SP: %p [%p %p), size=%ldK\n",
   7752              e->tid(), sp, sp_min, sp_max, (sp_max - sp_min) >> 10);
   7753     }
   7754     if (sp_min < sp_max) {
   7755       CHECK((sp_max - sp_min) >= 8 * 1024); // stay sane.
   7756       CHECK((sp_max - sp_min) < 128 * 1024 * 1024); // stay sane.
   7757       ClearMemoryState(thr, sp_min, sp_max);
   7758       thr->SetStack(sp_min, sp_max);
   7759     }
   7760   }
   7761 
   7762   // THR_END
   7763   void HandleThreadEnd(TID tid) {
   7764     TSanThread *thr = TSanThread::Get(tid);
   7765     // Add the thread-local stats to global stats.
   7766     G_stats->Add(thr->stats);
   7767     thr->stats.Clear();
   7768 
   7769     // Printf("HandleThreadEnd: %d\n", tid.raw());
   7770     if (tid != TID(0)) {
   7771       TSanThread *child = TSanThread::Get(tid);
   7772       child->HandleThreadEnd();
   7773 
   7774 
   7775       if (debug_thread) {
   7776         Printf("T%d:  THR_END     : %s %s\n", tid.raw(),
   7777                Segment::ToString(child->sid()).c_str(),
   7778                child->vts()->ToString().c_str());
   7779       }
   7780       ClearMemoryState(thr, child->min_sp(), child->max_sp());
   7781     } else {
   7782       reports_.SetProgramFinished();
   7783     }
   7784 
   7785 
   7786     if (g_so_far_only_one_thread == false
   7787         && (thr->ignore_reads() || thr->ignore_writes())) {
   7788       Report("WARNING: T%d ended while at least one 'ignore' bit is set: "
   7789              "ignore_wr=%d ignore_rd=%d\n", tid.raw(),
   7790              thr->ignore_reads(), thr->ignore_writes());
   7791       for (int i = 0; i < 2; i++) {
   7792         StackTrace *context = thr->GetLastIgnoreContext(i);
   7793         if (context) {
   7794           Report("Last ignore_%s call was here: \n%s\n", i ? "wr" : "rd",
   7795                  context->ToString().c_str());
   7796         }
   7797       }
   7798       if (G_flags->save_ignore_context == false) {
   7799         Report("Rerun with --save_ignore_context to see where "
   7800                "IGNORE_END is missing\n");
   7801       }
   7802     }
   7803     ShowProcSelfStatus();
   7804   }
   7805 
   7806   // THR_JOIN_AFTER
   7807   void HandleThreadJoinAfter(Event *e) {
   7808     TID tid(e->tid());
   7809     TSanThread *parent_thr = TSanThread::Get(tid);
   7810     VTS *vts_at_exit = NULL;
   7811     TID child_tid = parent_thr->HandleThreadJoinAfter(&vts_at_exit, TID(e->a()));
   7812     CHECK(vts_at_exit);
   7813     CHECK(parent_thr->sid().valid());
   7814     Segment::AssertLive(parent_thr->sid(),  __LINE__);
   7815     parent_thr->NewSegmentForWait(vts_at_exit);
   7816     if (debug_thread) {
   7817       Printf("T%d:  THR_JOIN_AFTER T%d  : %s\n", tid.raw(),
   7818              child_tid.raw(), parent_thr->vts()->ToString().c_str());
   7819     }
   7820   }
   7821 
   7822  public:
   7823   // TODO(kcc): merge this into Detector class. (?)
   7824   ReportStorage reports_;
   7825 
   7826   void SetUnwindCallback(ThreadSanitizerUnwindCallback cb) {
   7827     reports_.SetUnwindCallback(cb);
   7828   }
   7829 };
   7830 
   7831 static Detector        *G_detector;
   7832 
   7833 
   7834 void TSanThread::HandleAtomicMop(uintptr_t a,
   7835                              uintptr_t pc,
   7836                              tsan_atomic_op op,
   7837                              tsan_memory_order mo,
   7838                              size_t size) {
   7839   if (op == tsan_atomic_op_fence)
   7840     return;
   7841   bool const is_store = (op != tsan_atomic_op_load);
   7842   CHECK(inside_atomic_op_ >= 0);
   7843   if (mo != tsan_memory_order_natomic)
   7844     inside_atomic_op_ += 1;
   7845   MopInfo mop (pc, size, is_store, true);
   7846   G_detector->HandleTrace(this, &mop, 1, pc, &a, false);
   7847   if (mo != tsan_memory_order_natomic)
   7848     inside_atomic_op_ -= 1;
   7849   CHECK(inside_atomic_op_ >= 0);
   7850 }
   7851 
   7852 
   7853 // -------- Flags ------------------------- {{{1
   7854 const char *usage_str =
   7855 "Usage:\n"
   7856 "  %s [options] program_to_test [program's options]\n"
   7857 "See %s for details\n";
   7858 
   7859 void ThreadSanitizerPrintUsage() {
   7860   Printf(usage_str, G_flags->tsan_program_name.c_str(),
   7861          G_flags->tsan_url.c_str());
   7862 }
   7863 
   7864 static void ReportUnknownFlagAndExit(const string &str) {
   7865   Printf("Unknown flag or flag value: %s\n", str.c_str());
   7866   ThreadSanitizerPrintUsage();
   7867   exit(1);
   7868 }
   7869 
   7870 // if arg and flag match, return true
   7871 // and set 'val' to the substring of arg after '='.
   7872 static bool FlagNameMatch(const string &arg, const string &flag, string *val) {
   7873   string f = string("--") + flag;
   7874   if (arg.size() < f.size()) return false;
   7875   for (size_t i = 0; i < f.size(); i++) {
   7876     // '-' must match '-'
   7877     // '_' may match '_' or '-'
   7878     if (f[i] == '_') {
   7879       if (arg[i] != '-' && arg[i] != '_') return false;
   7880     } else {
   7881       if (f[i] != arg[i]) return false;
   7882     }
   7883   }
   7884   if (arg.size() == f.size()) {
   7885     *val = "";
   7886     return true;
   7887   }
   7888   if (arg[f.size()] != '=') return false;
   7889   *val = arg.substr(f.size() + 1);
   7890   return true;
   7891 }
   7892 
   7893 static int FindBoolFlag(const char *name, bool default_val,
   7894                   vector<string> *args, bool *retval) {
   7895   int res = 0;
   7896   *retval = default_val;
   7897   bool cont = false;
   7898   do {
   7899     cont = false;
   7900     vector<string>::iterator it = args->begin();
   7901     for (; it != args->end(); ++it) {
   7902       string &str = *it;
   7903       string flag_value;
   7904       if (!FlagNameMatch(str, name, &flag_value)) continue;
   7905 
   7906       if (flag_value == "")            *retval = true;
   7907       else if (flag_value == "1")     *retval = true;
   7908       else if (flag_value == "true")  *retval = true;
   7909       else if (flag_value == "yes")   *retval = true;
   7910       else if (flag_value == "0")     *retval = false;
   7911       else if (flag_value == "false") *retval = false;
   7912       else if (flag_value == "no")    *retval = false;
   7913       else
   7914         ReportUnknownFlagAndExit(str);
   7915       res++;
   7916       if (G_flags->verbosity >= 1) {
   7917         Printf("%40s => %s\n", name, *retval ? "true" : "false");
   7918       }
   7919       break;
   7920     }
   7921     if (it != args->end()) {
   7922       cont = true;
   7923       args->erase(it);
   7924     }
   7925   } while (cont);
   7926   return res;
   7927 }
   7928 
   7929 static void FindIntFlag(const char *name, intptr_t default_val,
   7930                  vector<string> *args, intptr_t *retval) {
   7931   *retval = default_val;
   7932   bool cont = false;
   7933   do {
   7934     cont = false;
   7935     vector<string>::iterator it = args->begin();
   7936     for (; it != args->end(); ++it) {
   7937       string &str = *it;
   7938       string flag_value;
   7939       if (!FlagNameMatch(str, name, &flag_value)) continue;
   7940       char *end_ptr;
   7941       const char *beg_ptr = flag_value.c_str();
   7942       intptr_t int_val = my_strtol(beg_ptr, &end_ptr, 0);
   7943       if (flag_value.empty() || beg_ptr + flag_value.size() != end_ptr)
   7944         ReportUnknownFlagAndExit(str);
   7945       *retval = int_val;
   7946       if (G_flags->verbosity >= 1) {
   7947         Printf("%40s => %ld\n", name, *retval);
   7948       }
   7949       break;
   7950     }
   7951     if (it != args->end()) {
   7952       cont = true;
   7953       args->erase(it);
   7954     }
   7955   } while (cont);
   7956 }
   7957 
   7958 static void FindUIntFlag(const char *name, intptr_t default_val,
   7959                  vector<string> *args, uintptr_t *retval) {
   7960   intptr_t signed_int;
   7961   FindIntFlag(name, default_val, args, &signed_int);
   7962   CHECK_GE(signed_int, 0);
   7963   *retval = signed_int;
   7964 }
   7965 
   7966 void FindStringFlag(const char *name, vector<string> *args,
   7967                     vector<string> *retval) {
   7968   bool cont = false;
   7969   do {
   7970     cont = false;
   7971     vector<string>::iterator it = args->begin();
   7972     for (; it != args->end(); ++it) {
   7973       string &str = *it;
   7974       string flag_value;
   7975       if (!FlagNameMatch(str, name, &flag_value)) continue;
   7976       retval->push_back(flag_value);
   7977       if (G_flags->verbosity >= 1) {
   7978         Printf("%40s => %s\n", name, flag_value.c_str());
   7979       }
   7980       break;
   7981     }
   7982     if (it != args->end()) {
   7983       cont = true;
   7984       args->erase(it);
   7985     }
   7986   } while (cont);
   7987 }
   7988 
   7989 void FindStringFlag(const char *name, vector<string> *args,
   7990                     string *retval) {
   7991   vector<string> tmp;
   7992   FindStringFlag(name, args, &tmp);
   7993   if (tmp.size() > 0) {
   7994     *retval = tmp.back();
   7995   }
   7996 }
   7997 
   7998 static size_t GetMemoryLimitInMbFromProcSelfLimits() {
   7999 #ifdef VGO_linux
   8000   // Parse the memory limit section of /proc/self/limits.
   8001   string proc_self_limits = ReadFileToString("/proc/self/limits", false);
   8002   const char *max_addr_space = "Max address space";
   8003   size_t pos = proc_self_limits.find(max_addr_space);
   8004   if (pos == string::npos) return 0;
   8005   pos += strlen(max_addr_space);
   8006   while (proc_self_limits[pos] == ' ') pos++;
   8007   if (proc_self_limits[pos] == 'u')
   8008     return 0;  // 'unlimited'.
   8009   char *end;
   8010   size_t result = my_strtol(proc_self_limits.c_str() + pos, &end, 0);
   8011   result >>= 20;
   8012   return result;
   8013 #else
   8014   return 0;
   8015 #endif
   8016 }
   8017 
   8018 static size_t GetMemoryLimitInMb() {
   8019   size_t ret = -1;  // Maximum possible value.
   8020 #if defined(VGO_linux) && __WORDSIZE == 32
   8021   // Valgrind doesn't support more than 3G per process on 32-bit Linux.
   8022   ret = 3 * 1024;
   8023 #endif
   8024 
   8025   // Try /proc/self/limits.
   8026   size_t from_proc_self = GetMemoryLimitInMbFromProcSelfLimits();
   8027   if (from_proc_self && ret > from_proc_self) {
   8028     ret = from_proc_self;
   8029   }
   8030   // Try env.
   8031   const char *from_env_str =
   8032     (const char*)getenv("VALGRIND_MEMORY_LIMIT_IN_MB");
   8033   if (from_env_str) {
   8034     char *end;
   8035     size_t from_env_value = (size_t)my_strtol(from_env_str, &end, 0);
   8036     if (ret > from_env_value)
   8037       ret = from_env_value;
   8038   }
   8039   if (ret == (size_t)-1)
   8040     return 0;
   8041   return ret;
   8042 }
   8043 
   8044 bool PhaseDebugIsOn(const char *phase_name) {
   8045   CHECK(G_flags);
   8046   for (size_t i = 0; i < G_flags->debug_phase.size(); i++) {
   8047     if (G_flags->debug_phase[i] == phase_name)
   8048       return true;
   8049   }
   8050   return false;
   8051 }
   8052 
   8053 void ThreadSanitizerParseFlags(vector<string> *args) {
   8054 #ifdef TS_OFFLINE
   8055   string input_type_tmp;
   8056   FindStringFlag("input_type", args, &input_type_tmp);
   8057   if (input_type_tmp.size() > 0) {
   8058     G_flags->input_type = input_type_tmp;
   8059   } else {
   8060     G_flags->input_type = "str";
   8061   }
   8062 #endif
   8063 
   8064   // Check this first.
   8065   FindIntFlag("v", 0, args, &G_flags->verbosity);
   8066 
   8067   FindBoolFlag("ignore_stack", false, args, &G_flags->ignore_stack);
   8068   FindIntFlag("keep_history", 1, args, &G_flags->keep_history);
   8069   FindUIntFlag("segment_set_recycle_queue_size", DEBUG_MODE ? 10 : 10000, args,
   8070                &G_flags->segment_set_recycle_queue_size);
   8071   FindUIntFlag("recent_segments_cache_size", 10, args,
   8072                &G_flags->recent_segments_cache_size);
   8073 
   8074   bool fast_mode = false;
   8075   FindBoolFlag("fast_mode", false, args, &fast_mode);
   8076   if (fast_mode) {
   8077     Printf("INFO: --fast-mode is deprecated\n");
   8078   }
   8079   bool ignore_in_dtor = false;
   8080   FindBoolFlag("ignore_in_dtor", false, args, &ignore_in_dtor);
   8081   if (ignore_in_dtor) {
   8082     Printf("INFO: --ignore-in-dtor is deprecated\n");
   8083   }
   8084 
   8085   int has_phb = FindBoolFlag("pure_happens_before", true, args,
   8086                               &G_flags->pure_happens_before);
   8087   bool hybrid = false;
   8088   int has_hyb = FindBoolFlag("hybrid", false, args, &hybrid);
   8089   if (has_hyb && has_phb) {
   8090     Printf("INFO: --hybrid and --pure-happens-before"
   8091            " is mutually exclusive; ignoring the --hybrid switch\n");
   8092   } else if (has_hyb && !has_phb) {
   8093     G_flags->pure_happens_before = !hybrid;
   8094   }
   8095 
   8096   FindBoolFlag("show_expected_races", false, args,
   8097                &G_flags->show_expected_races);
   8098   FindBoolFlag("demangle", true, args, &G_flags->demangle);
   8099 
   8100   FindBoolFlag("announce_threads", false, args, &G_flags->announce_threads);
   8101   FindBoolFlag("full_output", false, args, &G_flags->full_output);
   8102   FindBoolFlag("show_states", false, args, &G_flags->show_states);
   8103   FindBoolFlag("show_proc_self_status", false, args,
   8104                &G_flags->show_proc_self_status);
   8105   FindBoolFlag("show_valgrind_context", false, args,
   8106                &G_flags->show_valgrind_context);
   8107   FindBoolFlag("suggest_happens_before_arcs", true, args,
   8108                &G_flags->suggest_happens_before_arcs);
   8109   FindBoolFlag("show_pc", false, args, &G_flags->show_pc);
   8110   FindBoolFlag("full_stack_frames", false, args, &G_flags->full_stack_frames);
   8111   FindBoolFlag("free_is_write", true, args, &G_flags->free_is_write);
   8112   FindBoolFlag("exit_after_main", false, args, &G_flags->exit_after_main);
   8113 
   8114   FindIntFlag("show_stats", 0, args, &G_flags->show_stats);
   8115   FindBoolFlag("trace_profile", false, args, &G_flags->trace_profile);
   8116   FindBoolFlag("color", false, args, &G_flags->color);
   8117   FindBoolFlag("html", false, args, &G_flags->html);
   8118 #ifdef TS_OFFLINE
   8119   bool show_pid_default = false;
   8120 #else
   8121   bool show_pid_default = true;
   8122 #endif
   8123   FindBoolFlag("show_pid", show_pid_default, args, &G_flags->show_pid);
   8124   FindBoolFlag("save_ignore_context", DEBUG_MODE ? true : false, args,
   8125                &G_flags->save_ignore_context);
   8126 
   8127   FindIntFlag("dry_run", 0, args, &G_flags->dry_run);
   8128   FindBoolFlag("report_races", true, args, &G_flags->report_races);
   8129   FindIntFlag("locking_scheme", 1, args, &G_flags->locking_scheme);
   8130   FindBoolFlag("unlock_on_mutex_destroy", true, args,
   8131                &G_flags->unlock_on_mutex_destroy);
   8132 
   8133   FindIntFlag("sample_events", 0, args, &G_flags->sample_events);
   8134   FindIntFlag("sample_events_depth", 2, args, &G_flags->sample_events_depth);
   8135 
   8136   FindIntFlag("debug_level", 1, args, &G_flags->debug_level);
   8137   FindStringFlag("debug_phase", args, &G_flags->debug_phase);
   8138   FindIntFlag("trace_level", 0, args, &G_flags->trace_level);
   8139 
   8140   FindIntFlag("literace_sampling", 0, args, &G_flags->literace_sampling);
   8141   FindIntFlag("sampling", 0, args, &G_flags->literace_sampling);
   8142   CHECK(G_flags->literace_sampling < 32);
   8143   CHECK(G_flags->literace_sampling >= 0);
   8144   FindBoolFlag("start_with_global_ignore_on", false, args,
   8145                &G_flags->start_with_global_ignore_on);
   8146 
   8147   FindStringFlag("fullpath_after", args, &G_flags->file_prefix_to_cut);
   8148   FindStringFlag("file_prefix_to_cut", args, &G_flags->file_prefix_to_cut);
   8149   for (size_t i = 0; i < G_flags->file_prefix_to_cut.size(); i++) {
   8150     G_flags->file_prefix_to_cut[i] =
   8151         ConvertToPlatformIndependentPath(G_flags->file_prefix_to_cut[i]);
   8152   }
   8153 
   8154   FindStringFlag("ignore", args, &G_flags->ignore);
   8155   FindStringFlag("whitelist", args, &G_flags->whitelist);
   8156   FindBoolFlag("ignore_unknown_pcs", false, args, &G_flags->ignore_unknown_pcs);
   8157 
   8158   FindBoolFlag("thread_coverage", false, args, &G_flags->thread_coverage);
   8159 
   8160   FindBoolFlag("atomicity", false, args, &G_flags->atomicity);
   8161   if (G_flags->atomicity) {
   8162     // When doing atomicity violation checking we should not
   8163     // create h-b arcs between Unlocks and Locks.
   8164     G_flags->pure_happens_before = false;
   8165   }
   8166 
   8167   FindBoolFlag("call_coverage", false, args, &G_flags->call_coverage);
   8168   FindStringFlag("dump_events", args, &G_flags->dump_events);
   8169   FindBoolFlag("symbolize", true, args, &G_flags->symbolize);
   8170 
   8171   FindIntFlag("trace_addr", 0, args,
   8172               reinterpret_cast<intptr_t*>(&G_flags->trace_addr));
   8173 
   8174   FindIntFlag("max_mem_in_mb", 0, args, &G_flags->max_mem_in_mb);
   8175   FindBoolFlag("offline", false, args, &G_flags->offline);
   8176   FindBoolFlag("attach_mode", false, args, &G_flags->attach_mode);
   8177   if (G_flags->max_mem_in_mb == 0) {
   8178     G_flags->max_mem_in_mb = GetMemoryLimitInMb();
   8179   }
   8180 
   8181   vector<string> summary_file_tmp;
   8182   FindStringFlag("summary_file", args, &summary_file_tmp);
   8183   if (summary_file_tmp.size() > 0) {
   8184     G_flags->summary_file = summary_file_tmp.back();
   8185   }
   8186 
   8187   vector<string> log_file_tmp;
   8188   FindStringFlag("log_file", args, &log_file_tmp);
   8189   if (log_file_tmp.size() > 0) {
   8190     G_flags->log_file = log_file_tmp.back();
   8191   }
   8192 
   8193   G_flags->tsan_program_name = "valgrind --tool=tsan";
   8194   FindStringFlag("tsan_program_name", args, &G_flags->tsan_program_name);
   8195 
   8196   G_flags->tsan_url = "http://code.google.com/p/data-race-test";
   8197   FindStringFlag("tsan_url", args, &G_flags->tsan_url);
   8198 
   8199   FindStringFlag("suppressions", args, &G_flags->suppressions);
   8200   FindBoolFlag("gen_suppressions", false, args,
   8201                &G_flags->generate_suppressions);
   8202 
   8203   FindIntFlag("error_exitcode", 0, args, &G_flags->error_exitcode);
   8204   FindIntFlag("flush_period", 0, args, &G_flags->flush_period);
   8205   FindBoolFlag("trace_children", false, args, &G_flags->trace_children);
   8206 
   8207   FindIntFlag("max_sid", kMaxSID, args, &G_flags->max_sid);
   8208   kMaxSID = G_flags->max_sid;
   8209   if (kMaxSID <= 100000) {
   8210     Printf("Error: max-sid should be at least 100000. Exiting\n");
   8211     exit(1);
   8212   }
   8213   FindIntFlag("max_sid_before_flush", (kMaxSID * 15) / 16, args,
   8214               &G_flags->max_sid_before_flush);
   8215   kMaxSIDBeforeFlush = G_flags->max_sid_before_flush;
   8216 
   8217   FindIntFlag("num_callers_in_history", kSizeOfHistoryStackTrace, args,
   8218               &G_flags->num_callers_in_history);
   8219   kSizeOfHistoryStackTrace = G_flags->num_callers_in_history;
   8220 
   8221   // Cut stack under the following default functions.
   8222   G_flags->cut_stack_below.push_back("TSanThread*ThreadBody*");
   8223   G_flags->cut_stack_below.push_back("ThreadSanitizerStartThread");
   8224   G_flags->cut_stack_below.push_back("start_thread");
   8225   G_flags->cut_stack_below.push_back("BaseThreadInitThunk");
   8226   FindStringFlag("cut_stack_below", args, &G_flags->cut_stack_below);
   8227 
   8228   FindIntFlag("num_callers", 16, args, &G_flags->num_callers);
   8229 
   8230   G_flags->max_n_threads        = 100000;
   8231 
   8232   if (G_flags->full_output) {
   8233     G_flags->announce_threads = true;
   8234     G_flags->show_pc = true;
   8235     G_flags->full_stack_frames = true;
   8236     G_flags->show_states = true;
   8237     G_flags->file_prefix_to_cut.clear();
   8238   }
   8239 
   8240   FindIntFlag("race_verifier_sleep_ms", 100, args,
   8241       &G_flags->race_verifier_sleep_ms);
   8242   FindStringFlag("race_verifier", args, &G_flags->race_verifier);
   8243   FindStringFlag("race_verifier_extra", args, &G_flags->race_verifier_extra);
   8244   g_race_verifier_active =
   8245       !(G_flags->race_verifier.empty() && G_flags->race_verifier_extra.empty());
   8246   if (g_race_verifier_active) {
   8247     Printf("INFO: ThreadSanitizer running in Race Verifier mode.\n");
   8248   }
   8249 
   8250   FindBoolFlag("nacl_untrusted", false, args, &G_flags->nacl_untrusted);
   8251   FindBoolFlag("threaded_analysis", false, args, &G_flags->threaded_analysis);
   8252 
   8253   FindBoolFlag("sched_shake", false, args, &G_flags->sched_shake);
   8254   FindBoolFlag("api_ambush", false, args, &G_flags->api_ambush);
   8255 
   8256   FindBoolFlag("enable_atomic", false, args, &G_flags->enable_atomic);
   8257 
   8258   if (!args->empty()) {
   8259     ReportUnknownFlagAndExit(args->front());
   8260   }
   8261 
   8262   debug_expected_races = PhaseDebugIsOn("expected_races");
   8263   debug_benign_races = PhaseDebugIsOn("benign_races");
   8264   debug_malloc = PhaseDebugIsOn("malloc");
   8265   debug_free = PhaseDebugIsOn("free");
   8266   debug_thread = PhaseDebugIsOn("thread");
   8267   debug_ignore = PhaseDebugIsOn("ignore");
   8268   debug_rtn = PhaseDebugIsOn("rtn");
   8269   debug_lock = PhaseDebugIsOn("lock");
   8270   debug_wrap = PhaseDebugIsOn("wrap");
   8271   debug_ins = PhaseDebugIsOn("ins");
   8272   debug_shadow_stack = PhaseDebugIsOn("shadow_stack");
   8273   debug_happens_before = PhaseDebugIsOn("happens_before");
   8274   debug_cache = PhaseDebugIsOn("cache");
   8275   debug_race_verifier = PhaseDebugIsOn("race_verifier");
   8276   debug_atomic = PhaseDebugIsOn("atomic");
   8277 }
   8278 
   8279 // -------- ThreadSanitizer ------------------ {{{1
   8280 
   8281 // Setup the list of functions/images/files to ignore.
   8282 static void SetupIgnore() {
   8283   g_ignore_lists = new IgnoreLists;
   8284   g_white_lists = new IgnoreLists;
   8285 
   8286   // Add some major ignore entries so that tsan remains sane
   8287   // even w/o any ignore file. First - for all platforms.
   8288   g_ignore_lists->ignores.push_back(IgnoreFun("ThreadSanitizerStartThread"));
   8289   g_ignore_lists->ignores.push_back(IgnoreFun("exit"));
   8290   g_ignore_lists->ignores.push_back(IgnoreFun("longjmp"));
   8291 
   8292   // Dangerous: recursively ignoring vfprintf hides races on printf arguments.
   8293   // See PrintfTests in unittest/racecheck_unittest.cc
   8294   // TODO(eugenis): Do something about this.
   8295   // http://code.google.com/p/data-race-test/issues/detail?id=53
   8296   g_ignore_lists->ignores_r.push_back(IgnoreFun("vfprintf"));
   8297 
   8298   // do not create segments in our Replace_* functions
   8299   g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_memcpy"));
   8300   g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_memchr"));
   8301   g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_strcpy"));
   8302   g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_strchr"));
   8303   g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_strchrnul"));
   8304   g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_strrchr"));
   8305   g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_strlen"));
   8306   g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_strcmp"));
   8307 
   8308   // Ignore everything in our own file.
   8309   g_ignore_lists->ignores.push_back(IgnoreFile("*ts_valgrind_intercepts.c"));
   8310 
   8311 #ifndef _MSC_VER
   8312   // POSIX ignores
   8313   g_ignore_lists->ignores.push_back(IgnoreObj("*/libpthread*"));
   8314   g_ignore_lists->ignores.push_back(IgnoreObj("*/ld-2*.so"));
   8315   g_ignore_lists->ignores.push_back(IgnoreFun("pthread_create"));
   8316   g_ignore_lists->ignores.push_back(IgnoreFun("pthread_create@*"));
   8317   g_ignore_lists->ignores.push_back(IgnoreFun("pthread_create_WRK"));
   8318   g_ignore_lists->ignores.push_back(IgnoreFun("__cxa_*"));
   8319   g_ignore_lists->ignores.push_back(
   8320       IgnoreFun("*__gnu_cxx*__exchange_and_add*"));
   8321   g_ignore_lists->ignores.push_back(IgnoreFun("__lll_mutex_*"));
   8322   g_ignore_lists->ignores.push_back(IgnoreFun("__lll_*lock_*"));
   8323   g_ignore_lists->ignores.push_back(IgnoreFun("__fprintf_chk"));
   8324   g_ignore_lists->ignores.push_back(IgnoreFun("_IO_file_xsputn*"));
   8325   // fflush internals
   8326   g_ignore_lists->ignores.push_back(IgnoreFun("_IO_adjust_column"));
   8327   g_ignore_lists->ignores.push_back(IgnoreFun("_IO_flush_all_lockp"));
   8328 
   8329   g_ignore_lists->ignores.push_back(IgnoreFun("__sigsetjmp"));
   8330   g_ignore_lists->ignores.push_back(IgnoreFun("__sigjmp_save"));
   8331   g_ignore_lists->ignores.push_back(IgnoreFun("_setjmp"));
   8332   g_ignore_lists->ignores.push_back(IgnoreFun("_longjmp_unwind"));
   8333 
   8334   g_ignore_lists->ignores.push_back(IgnoreFun("__mktime_internal"));
   8335 
   8336   // http://code.google.com/p/data-race-test/issues/detail?id=40
   8337   g_ignore_lists->ignores_r.push_back(IgnoreFun("_ZNSsD1Ev"));
   8338 
   8339   g_ignore_lists->ignores_r.push_back(IgnoreFun("gaih_inet"));
   8340   g_ignore_lists->ignores_r.push_back(IgnoreFun("getaddrinfo"));
   8341   g_ignore_lists->ignores_r.push_back(IgnoreFun("gethostbyname2_r"));
   8342 
   8343   #ifdef VGO_darwin
   8344     // Mac-only ignores
   8345     g_ignore_lists->ignores.push_back(IgnoreObj("/usr/lib/dyld"));
   8346     g_ignore_lists->ignores.push_back(IgnoreObj("/usr/lib/libobjc.A.dylib"));
   8347     g_ignore_lists->ignores.push_back(IgnoreObj("*/libSystem.*.dylib"));
   8348     g_ignore_lists->ignores_r.push_back(IgnoreFun("__CFDoExternRefOperation"));
   8349     g_ignore_lists->ignores_r.push_back(IgnoreFun("_CFAutoreleasePoolPop"));
   8350     g_ignore_lists->ignores_r.push_back(IgnoreFun("_CFAutoreleasePoolPush"));
   8351     g_ignore_lists->ignores_r.push_back(IgnoreFun("OSAtomicAdd32"));
   8352     g_ignore_lists->ignores_r.push_back(IgnoreTriple("_dispatch_Block_copy",
   8353                                             "/usr/lib/libSystem.B.dylib", "*"));
   8354 
   8355     // pthread_lib_{enter,exit} shouldn't give us any reports since they
   8356     // have IGNORE_ALL_ACCESSES_BEGIN/END but they do give the reports...
   8357     g_ignore_lists->ignores_r.push_back(IgnoreFun("pthread_lib_enter"));
   8358     g_ignore_lists->ignores_r.push_back(IgnoreFun("pthread_lib_exit"));
   8359   #endif
   8360 #else
   8361   // Windows-only ignores
   8362   g_ignore_lists->ignores.push_back(IgnoreObj("*ole32.dll"));
   8363   g_ignore_lists->ignores.push_back(IgnoreObj("*OLEAUT32.dll"));
   8364   g_ignore_lists->ignores.push_back(IgnoreObj("*MSCTF.dll"));
   8365   g_ignore_lists->ignores.push_back(IgnoreObj("*ntdll.dll"));
   8366   g_ignore_lists->ignores.push_back(IgnoreObj("*mswsock.dll"));
   8367   g_ignore_lists->ignores.push_back(IgnoreObj("*WS2_32.dll"));
   8368   g_ignore_lists->ignores.push_back(IgnoreObj("*msvcrt.dll"));
   8369   g_ignore_lists->ignores.push_back(IgnoreObj("*kernel32.dll"));
   8370   g_ignore_lists->ignores.push_back(IgnoreObj("*ADVAPI32.DLL"));
   8371 
   8372   g_ignore_lists->ignores.push_back(IgnoreFun("_EH_epilog3"));
   8373   g_ignore_lists->ignores.push_back(IgnoreFun("_EH_prolog3_catch"));
   8374   g_ignore_lists->ignores.push_back(IgnoreFun("unnamedImageEntryPoint"));
   8375   g_ignore_lists->ignores.push_back(IgnoreFun("_Mtxunlock"));
   8376   g_ignore_lists->ignores.push_back(IgnoreFun("IsNLSDefinedString"));
   8377 
   8378   g_ignore_lists->ignores_r.push_back(IgnoreFun("RtlDestroyQueryDebugBuffer"));
   8379   g_ignore_lists->ignores_r.push_back(IgnoreFun("BCryptGenerateSymmetricKey"));
   8380   g_ignore_lists->ignores_r.push_back(IgnoreFun("SHGetItemFromDataObject"));
   8381 
   8382   // http://code.google.com/p/data-race-test/issues/detail?id=53
   8383   g_ignore_lists->ignores_r.push_back(IgnoreFun("_stbuf"));
   8384   g_ignore_lists->ignores_r.push_back(IgnoreFun("_getptd"));
   8385 
   8386   // TODO(timurrrr): Add support for FLS (fiber-local-storage)
   8387   // http://code.google.com/p/data-race-test/issues/detail?id=55
   8388   g_ignore_lists->ignores_r.push_back(IgnoreFun("_freefls"));
   8389 #endif
   8390 
   8391 #ifdef ANDROID
   8392   // Android does not have a libpthread; pthread_* functions live in libc.
   8393   // We have to ignore them one-by-one.
   8394   g_ignore_lists->ignores.push_back(IgnoreFun("pthread_*"));
   8395   g_ignore_lists->ignores.push_back(IgnoreFun("__init_tls"));
   8396 #endif
   8397 
   8398   // Now read the ignore/whitelist files.
   8399   for (size_t i = 0; i < G_flags->ignore.size(); i++) {
   8400     string file_name = G_flags->ignore[i];
   8401     Report("INFO: Reading ignore file: %s\n", file_name.c_str());
   8402     string str = ReadFileToString(file_name, true);
   8403     ReadIgnoresFromString(str, g_ignore_lists);
   8404   }
   8405   for (size_t i = 0; i < G_flags->whitelist.size(); i++) {
   8406     string file_name = G_flags->whitelist[i];
   8407     Report("INFO: Reading whitelist file: %s\n", file_name.c_str());
   8408     string str = ReadFileToString(file_name, true);
   8409     ReadIgnoresFromString(str, g_white_lists);
   8410   }
   8411 }
   8412 
   8413 void ThreadSanitizerSetUnwindCallback(ThreadSanitizerUnwindCallback cb) {
   8414   G_detector->SetUnwindCallback(cb);
   8415 }
   8416 
   8417 void ThreadSanitizerNaclUntrustedRegion(uintptr_t mem_start, uintptr_t mem_end) {
   8418   g_nacl_mem_start = mem_start;
   8419   g_nacl_mem_end = mem_end;
   8420 }
   8421 
   8422 bool AddrIsInNaclUntrustedRegion(uintptr_t addr) {
   8423   return addr >= g_nacl_mem_start && addr < g_nacl_mem_end;
   8424 }
   8425 
   8426 bool ThreadSanitizerIgnoreForNacl(uintptr_t addr) {
   8427   // Ignore trusted addresses if tracing untrusted code, and ignore untrusted
   8428   // addresses otherwise.
   8429   return G_flags->nacl_untrusted != AddrIsInNaclUntrustedRegion(addr);
   8430 }
   8431 
   8432 bool ThreadSanitizerWantToInstrumentSblock(uintptr_t pc) {
   8433   string img_name, rtn_name, file_name;
   8434   int line_no;
   8435   G_stats->pc_to_strings++;
   8436   PcToStrings(pc, false, &img_name, &rtn_name, &file_name, &line_no);
   8437 
   8438   if (g_white_lists->ignores.size() > 0) {
   8439     bool in_white_list = TripleVectorMatchKnown(g_white_lists->ignores,
   8440                                                 rtn_name, img_name, file_name);
   8441     if (in_white_list) {
   8442       if (debug_ignore) {
   8443         Report("INFO: Whitelisted rtn: %s\n", rtn_name.c_str());
   8444       }
   8445     } else {
   8446       return false;
   8447     }
   8448   }
   8449 
   8450   if (G_flags->ignore_unknown_pcs && rtn_name == "(no symbols)") {
   8451     if (debug_ignore) {
   8452       Report("INFO: not instrumenting unknown function at %p\n", pc);
   8453     }
   8454     return false;
   8455   }
   8456 
   8457   bool ignore = TripleVectorMatchKnown(g_ignore_lists->ignores,
   8458                                        rtn_name, img_name, file_name) ||
   8459                 TripleVectorMatchKnown(g_ignore_lists->ignores_r,
   8460                                        rtn_name, img_name, file_name);
   8461   if (debug_ignore) {
   8462     Printf("%s: pc=%p file_name=%s img_name=%s rtn_name=%s ret=%d\n",
   8463            __FUNCTION__, pc, file_name.c_str(), img_name.c_str(),
   8464            rtn_name.c_str(), !ignore);
   8465   }
   8466   bool nacl_ignore = ThreadSanitizerIgnoreForNacl(pc);
   8467   return !(ignore || nacl_ignore);
   8468 }
   8469 
   8470 bool ThreadSanitizerWantToCreateSegmentsOnSblockEntry(uintptr_t pc) {
   8471   string rtn_name;
   8472   rtn_name = PcToRtnName(pc, false);
   8473   if (G_flags->keep_history == 0)
   8474     return false;
   8475   return !(TripleVectorMatchKnown(g_ignore_lists->ignores_hist,
   8476                                   rtn_name, "", ""));
   8477 }
   8478 
   8479 // Returns true if function at "pc" is marked as "fun_r" in the ignore file.
   8480 bool NOINLINE ThreadSanitizerIgnoreAccessesBelowFunction(uintptr_t pc) {
   8481   ScopedMallocCostCenter cc(__FUNCTION__);
   8482   typedef unordered_map<uintptr_t, bool> Cache;
   8483   static Cache *cache = NULL;
   8484   {
   8485     TIL ignore_below_lock(ts_ignore_below_lock, 18);
   8486     if (!cache)
   8487       cache = new Cache;
   8488 
   8489     // Fast path - check if we already know the answer.
   8490     Cache::iterator i = cache->find(pc);
   8491     if (i != cache->end())
   8492       return i->second;
   8493   }
   8494 
   8495   string rtn_name = PcToRtnName(pc, false);
   8496   bool ret =
   8497       TripleVectorMatchKnown(g_ignore_lists->ignores_r, rtn_name, "", "");
   8498 
   8499   if (DEBUG_MODE) {
   8500     // Heavy test for NormalizeFunctionName: test on all possible inputs in
   8501     // debug mode. TODO(timurrrr): Remove when tested.
   8502     NormalizeFunctionName(PcToRtnName(pc, true));
   8503   }
   8504 
   8505   // Grab the lock again
   8506   TIL ignore_below_lock(ts_ignore_below_lock, 19);
   8507   if (ret && debug_ignore) {
   8508     Report("INFO: ignoring all accesses below the function '%s' (%p)\n",
   8509            PcToRtnNameAndFilePos(pc).c_str(), pc);
   8510   }
   8511   return ((*cache)[pc] = ret);
   8512 }
   8513 
   8514 // We intercept a user function with this name
   8515 // and answer the user query with a non-NULL string.
   8516 extern "C" const char *ThreadSanitizerQuery(const char *query) {
   8517   const char *ret = "0";
   8518   string str(query);
   8519   if (str == "pure_happens_before" && G_flags->pure_happens_before == true) {
   8520     ret = "1";
   8521   }
   8522   if (str == "hybrid_full" &&
   8523       G_flags->pure_happens_before == false) {
   8524     ret = "1";
   8525   }
   8526   if (str == "race_verifier" && g_race_verifier_active == true) {
   8527     ret = "1";
   8528   }
   8529   if (DEBUG_MODE && G_flags->debug_level >= 2) {
   8530     Printf("ThreadSanitizerQuery(\"%s\") = \"%s\"\n", query, ret);
   8531   }
   8532   if (str == "trace-level=0") {
   8533     Report("INFO: trace-level=0\n");
   8534     G_flags->trace_level = 0;
   8535     debug_happens_before = false;
   8536   }
   8537   if (str == "trace-level=1") {
   8538     Report("INFO: trace-level=1\n");
   8539     G_flags->trace_level = 1;
   8540     debug_happens_before = true;
   8541   }
   8542   return ret;
   8543 }
   8544 
   8545 extern void ThreadSanitizerInit() {
   8546   ScopedMallocCostCenter cc("ThreadSanitizerInit");
   8547   ts_lock = new TSLock;
   8548   ts_ignore_below_lock = new TSLock;
   8549   g_so_far_only_one_thread = true;
   8550   ANNOTATE_BENIGN_RACE(&g_so_far_only_one_thread, "real benign race");
   8551   CHECK_EQ(sizeof(ShadowValue), 8);
   8552   CHECK(G_flags);
   8553   G_stats        = new Stats;
   8554   SetupIgnore();
   8555 
   8556   G_detector     = new Detector;
   8557   G_cache        = new Cache;
   8558   G_expected_races_map = new ExpectedRacesMap;
   8559   G_heap_map           = new HeapMap<HeapInfo>;
   8560   G_thread_stack_map   = new HeapMap<ThreadStackInfo>;
   8561   {
   8562     ScopedMallocCostCenter cc1("Segment::InitClassMembers");
   8563     Segment::InitClassMembers();
   8564   }
   8565   SegmentSet::InitClassMembers();
   8566   CacheLine::InitClassMembers();
   8567   TSanThread::InitClassMembers();
   8568   Lock::InitClassMembers();
   8569   LockSet::InitClassMembers();
   8570   EventSampler::InitClassMembers();
   8571   VTS::InitClassMembers();
   8572   // TODO(timurrrr): make sure *::InitClassMembers() are called only once for
   8573   // each class
   8574   g_publish_info_map = new PublishInfoMap;
   8575   g_stack_trace_free_list = new StackTraceFreeList;
   8576   g_pcq_map = new PCQMap;
   8577   g_atomicCore = new TsanAtomicCore();
   8578 
   8579 
   8580   if (G_flags->html) {
   8581     c_bold    = "<font ><b>";
   8582     c_red     = "<font color=red><b>";
   8583     c_green   = "<font color=green><b>";
   8584     c_magenta = "<font color=magenta><b>";
   8585     c_cyan    = "<font color=cyan><b>";
   8586     c_blue   = "<font color=blue><b>";
   8587     c_yellow  = "<font color=yellow><b>";
   8588     c_default = "</b></font>";
   8589   } else if (G_flags->color) {
   8590     // Enable ANSI colors.
   8591     c_bold    = "\033[1m";
   8592     c_red     = "\033[31m";
   8593     c_green   = "\033[32m";
   8594     c_yellow  = "\033[33m";
   8595     c_blue    = "\033[34m";
   8596     c_magenta = "\033[35m";
   8597     c_cyan    = "\033[36m";
   8598     c_default = "\033[0m";
   8599   }
   8600 
   8601   if (G_flags->verbosity >= 1) {
   8602     Report("INFO: Started pid %d\n",  getpid());
   8603   }
   8604   if (G_flags->start_with_global_ignore_on) {
   8605     global_ignore = true;
   8606     Report("INFO: STARTING WITH GLOBAL IGNORE ON\n");
   8607   }
   8608   ANNOTATE_BENIGN_RACE(&g_lock_era,
   8609                        "g_lock_era may be incremented in a racey way");
   8610 }
   8611 
   8612 extern void ThreadSanitizerFini() {
   8613   G_detector->HandleProgramEnd();
   8614 }
   8615 
   8616 extern void ThreadSanitizerDumpAllStacks() {
   8617   // first, print running threads.
   8618   for (int i = 0; i < TSanThread::NumberOfThreads(); i++) {
   8619     TSanThread *t = TSanThread::Get(TID(i));
   8620     if (!t || !t->is_running()) continue;
   8621     Report("T%d\n", i);
   8622     t->ReportStackTrace();
   8623   }
   8624   // now print all dead threds.
   8625   for (int i = 0; i < TSanThread::NumberOfThreads(); i++) {
   8626     TSanThread *t = TSanThread::Get(TID(i));
   8627     if (!t || t->is_running()) continue;
   8628     Report("T%d (not running)\n", i);
   8629     t->ReportStackTrace();
   8630   }
   8631 }
   8632 
   8633 
   8634 extern void ThreadSanitizerHandleOneEvent(Event *e) {
   8635   // Lock is inside on some paths.
   8636   G_detector->HandleOneEvent(e);
   8637 }
   8638 
   8639 TSanThread *ThreadSanitizerGetThreadByTid(int32_t tid) {
   8640   return TSanThread::Get(TID(tid));
   8641 }
   8642 
   8643 extern NOINLINE void ThreadSanitizerHandleTrace(int32_t tid, TraceInfo *trace_info,
   8644                                        uintptr_t *tleb) {
   8645   ThreadSanitizerHandleTrace(TSanThread::Get(TID(tid)), trace_info, tleb);
   8646 }
   8647 extern NOINLINE void ThreadSanitizerHandleTrace(TSanThread *thr, TraceInfo *trace_info,
   8648                                                 uintptr_t *tleb) {
   8649   DCHECK(thr);
   8650   // The lock is taken inside on the slow path.
   8651   G_detector->HandleTrace(thr,
   8652                           trace_info->mops(),
   8653                           trace_info->n_mops(),
   8654                           trace_info->pc(),
   8655                           tleb, /*need_locking=*/true);
   8656 }
   8657 
   8658 extern NOINLINE void ThreadSanitizerHandleOneMemoryAccess(TSanThread *thr,
   8659                                                           MopInfo mop,
   8660                                                           uintptr_t addr) {
   8661   DCHECK(thr);
   8662   G_detector->HandleTrace(thr,
   8663                           &mop,
   8664                           1,
   8665                           mop.create_sblock() ? mop.pc() : 0,
   8666                           &addr, /*need_locking=*/true);
   8667 }
   8668 
   8669 void NOINLINE ThreadSanitizerHandleRtnCall(int32_t tid, uintptr_t call_pc,
   8670                                          uintptr_t target_pc,
   8671                                          IGNORE_BELOW_RTN ignore_below) {
   8672   // This does locking on a cold path. Hot path in thread-local.
   8673   G_detector->HandleRtnCall(TID(tid), call_pc, target_pc, ignore_below);
   8674 
   8675   if (G_flags->sample_events) {
   8676     static EventSampler sampler;
   8677     TSanThread *thr = TSanThread::Get(TID(tid));
   8678     sampler.Sample(thr, "RTN_CALL", true);
   8679   }
   8680 }
   8681 void NOINLINE ThreadSanitizerHandleRtnExit(int32_t tid) {
   8682   // This is a thread-local operation, no need for locking.
   8683   TSanThread::Get(TID(tid))->HandleRtnExit();
   8684 }
   8685 
   8686 static bool ThreadSanitizerPrintReport(ThreadSanitizerReport *report) {
   8687   return G_detector->reports_.PrintReport(report);
   8688 }
   8689 
   8690 
   8691 // -------- TsanAtomicImplementation ------------------ {{{1
   8692 
   8693 // Atomic operation handler.
   8694 // The idea of atomic handling is as simple as follows.
   8695 // * First, we handle it as normal memory access,
   8696 //     however with race reporting suppressed. That is, we won't produce any
   8697 //     race reports during atomic access, but we can produce race reports
   8698 //     later during normal memory accesses that race with the access.
   8699 // * Then, we do the actual atomic memory access.
   8700 //     It's executed in an atomic fashion, because there can be simultaneous
   8701 //     atomic accesses from non-instrumented code (FUTEX_OP is a notable
   8702 //     example).
   8703 // * Finally, we update simulated memory model state according to
   8704 //     the access type and associated memory order as follows.
   8705 //     For writes and RMWs we create a new entry in the modification order
   8706 //     of the variable. For reads we scan the modification order starting
   8707 //     from the latest entry and going back in time, during the scan we decide
   8708 //     what entry the read returns. A separate VTS (happens-before edges)
   8709 //     is associated with each entry in the modification order, so that a load
   8710 //     acquires memory visibility from the exact release-sequence associated
   8711 //     with the loaded value.
   8712 // For details of memory modelling refer to sections 1.10 and 29
   8713 //     of C++0x standard:
   8714 // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2011/n3242.pdf
   8715 uint64_t ThreadSanitizerHandleAtomicOp(int32_t tid,
   8716                                        uintptr_t pc,
   8717                                        tsan_atomic_op op,
   8718                                        tsan_memory_order mo,
   8719                                        tsan_memory_order fail_mo,
   8720                                        size_t size,
   8721                                        void volatile* a,
   8722                                        uint64_t v,
   8723                                        uint64_t cmp) {
   8724   if (G_flags->enable_atomic == false) {
   8725     uint64_t newv = 0;
   8726     uint64_t prev = 0;
   8727     return tsan_atomic_do_op(op, mo, fail_mo, size, a, v, cmp, &newv, &prev);
   8728   } else {
   8729     uint64_t rv = 0;
   8730     TSanThread* thr = TSanThread::Get(TID(tid));
   8731     // Just a verification of the parameters.
   8732     tsan_atomic_verify(op, mo, fail_mo, size, a);
   8733 
   8734     {
   8735       TIL til(ts_lock, 0);
   8736       uint64_t newv = 0;
   8737       uint64_t prev = 0;
   8738       // Handle it as a plain mop. Race reports are temporally suppressed,though.
   8739       thr->HandleAtomicMop((uintptr_t)a, pc, op, mo, size);
   8740       // Do the actual atomic operation. It's executed in an atomic fashion,
   8741       // because there can be simultaneous atomic accesses
   8742       // from non-instrumented code.
   8743       rv = tsan_atomic_do_op(op, mo, fail_mo, size, a, v, cmp, &newv, &prev);
   8744 
   8745       PrintfIf(debug_atomic, "rv=%llu, newv=%llu, prev=%llu\n",
   8746                (unsigned long long)rv,
   8747                (unsigned long long)newv,
   8748                (unsigned long long)prev);
   8749 
   8750       if (op != tsan_atomic_op_fence) {
   8751         if (op == tsan_atomic_op_load) {
   8752           // For reads it replaces the return value with a random value
   8753           // from visible sequence of side-effects in the modification order
   8754           // of the variable.
   8755           rv = g_atomicCore->HandleRead(thr, (uintptr_t)a, rv,
   8756                                         tsan_atomic_is_acquire(mo));
   8757         } else if ((op == tsan_atomic_op_compare_exchange_weak
   8758             || op == tsan_atomic_op_compare_exchange_strong)
   8759             && cmp != rv) {
   8760           // Failed compare_exchange is handled as read, because, well,
   8761           // it's indeed just a read (at least logically).
   8762           g_atomicCore->HandleRead(thr, (uintptr_t)a, rv,
   8763                                    tsan_atomic_is_acquire(fail_mo));
   8764         } else {
   8765           // For writes and RMW operations it updates modification order
   8766           // of the atomic variable.
   8767           g_atomicCore->HandleWrite(thr, (uintptr_t)a, newv, prev,
   8768                                     tsan_atomic_is_acquire(mo),
   8769                                     tsan_atomic_is_release(mo),
   8770                                     tsan_atomic_is_rmw(op));
   8771         }
   8772       }
   8773     }
   8774 
   8775     PrintfIf(debug_atomic, "ATOMIC: %s-%s %p (%llu,%llu)=%llu\n",
   8776              tsan_atomic_to_str(op),
   8777              tsan_atomic_to_str(mo),
   8778              a, (unsigned long long)v, (unsigned long long)cmp,
   8779              (unsigned long long)rv);
   8780 
   8781     return rv;
   8782   }
   8783 }
   8784 
   8785 
   8786 TsanAtomicCore::TsanAtomicCore() {
   8787 }
   8788 
   8789 
   8790 void TsanAtomicCore::HandleWrite(TSanThread* thr,
   8791                                  uintptr_t a,
   8792                                  uint64_t v,
   8793                                  uint64_t prev,
   8794                                  bool const is_acquire,
   8795                                  bool const is_release,
   8796                                  bool const is_rmw) {
   8797   PrintfIf(debug_atomic, "HIST(%p): store acquire=%u, release=%u, rmw=%u\n",
   8798            (void*)a, is_acquire, is_release, is_rmw);
   8799   Atomic* atomic = &atomic_map_[a];
   8800   // Fix modification history if there were untracked accesses.
   8801   AtomicFixHist(atomic, prev);
   8802   AtomicHistoryEntry& hprv = atomic->hist
   8803       [(atomic->hist_pos - 1) % Atomic::kHistSize];
   8804   AtomicHistoryEntry& hist = atomic->hist
   8805       [atomic->hist_pos % Atomic::kHistSize];
   8806   // Fill in new entry in the modification history.
   8807   hist.val = v;
   8808   hist.tid = thr->tid();
   8809   hist.clk = thr->vts()->clk(thr->tid());
   8810   if (hist.vts != 0) {
   8811     VTS::Unref(hist.vts);
   8812     hist.vts = 0;
   8813   }
   8814   atomic->hist_pos += 1;
   8815 
   8816   // Update VTS according to memory access type and memory ordering.
   8817   if (is_rmw) {
   8818     if (is_release) {
   8819       if (hprv.vts != 0) {
   8820         hist.vts = VTS::Join(hprv.vts, thr->vts());
   8821       } else {
   8822         hist.vts = thr->vts()->Clone();
   8823       }
   8824     } else if (hprv.vts != 0) {
   8825       hist.vts = hprv.vts->Clone();
   8826     }
   8827     if (is_acquire && hprv.vts != 0) {
   8828       thr->NewSegmentForWait(hprv.vts);
   8829     }
   8830   } else {
   8831     DCHECK(is_acquire == false);
   8832     if (is_release) {
   8833       hist.vts = thr->vts()->Clone();
   8834     }
   8835   }
   8836 
   8837   // Update the thread's VTS if it's relese memory access.
   8838   if (is_release) {
   8839     thr->NewSegmentForSignal();
   8840     if (debug_happens_before) {
   8841       Printf("T%d: Signal: %p:\n    %s %s\n    %s\n",
   8842              thr->tid().raw(), a,
   8843              thr->vts()->ToString().c_str(),
   8844              Segment::ToString(thr->sid()).c_str(),
   8845              hist.vts->ToString().c_str());
   8846       if (G_flags->debug_level >= 1) {
   8847         thr->ReportStackTrace();
   8848       }
   8849     }
   8850   }
   8851 }
   8852 
   8853 
   8854 uint64_t TsanAtomicCore::HandleRead(TSanThread* thr,
   8855                                     uintptr_t a,
   8856                                     uint64_t v,
   8857                                     bool is_acquire) {
   8858   PrintfIf(debug_atomic, "HIST(%p): {\n", (void*)a);
   8859 
   8860   Atomic* atomic = &atomic_map_[a];
   8861   // Fix modification history if there were untracked accesses.
   8862   AtomicFixHist(atomic, v);
   8863   AtomicHistoryEntry* hist0 = 0;
   8864   int32_t seen_seq = 0;
   8865   int32_t const seen_seq0 = atomic->last_seen.clock(thr->tid());
   8866   // Scan modification order of the variable from the latest entry
   8867   // back in time. For each side-effect (write) we determine as to
   8868   // whether we have to yield the value or we can go back in time further.
   8869   for (int32_t i = 0; i != Atomic::kHistSize; i += 1) {
   8870     int32_t const idx = (atomic->hist_pos - i - 1);
   8871     CHECK(idx >= 0);
   8872     AtomicHistoryEntry& hist = atomic->hist[idx % Atomic::kHistSize];
   8873     PrintfIf(debug_atomic, "HIST(%p):   #%u (tid=%u, clk=%u,"
   8874            " val=%llu) vts=%u\n",
   8875            (void*)a, (unsigned)i, (unsigned)hist.tid.raw(),
   8876            (unsigned)hist.clk, (unsigned long long)hist.val,
   8877            (unsigned)thr->vts()->clk(hist.tid));
   8878     if (hist.tid.raw() == TID::kInvalidTID) {
   8879       // We hit an uninialized entry, that is, it's an access to an unitialized
   8880       // variable (potentially due to "race").
   8881       // Unfortunately, it should not happen as of now.
   8882       // TODO(dvyukov): how can we detect and report unitialized atomic reads?.
   8883       // .
   8884       hist0 = 0;
   8885       break;
   8886     } else if (i == Atomic::kHistSize - 1) {
   8887       // It's the last entry so we have to return it
   8888       // because we have to return something.
   8889       PrintfIf(debug_atomic, "HIST(%p):   replaced: last\n", (void*)a);
   8890       hist0 = &hist;
   8891       break;
   8892     } else if (seen_seq0 >= idx) {
   8893       // The thread had already seen the entry so we have to return
   8894       // at least it.
   8895       PrintfIf(debug_atomic, "HIST(%p):   replaced: stability\n", (void*)a);
   8896       hist0 = &hist;
   8897       break;
   8898     } else if (thr->vts()->clk(hist.tid) >= hist.clk) {
   8899       // The write happened-before the read, so we have to return it.
   8900       PrintfIf(debug_atomic, "HIST(%p):   replaced: ordering\n", (void*)a);
   8901       hist0 = &hist;
   8902       break;
   8903     } else if (thr->random() % 2) {
   8904       // We are not obliged to return the entry but we can (and decided to do).
   8905       PrintfIf(debug_atomic, "HIST(%p):   replaced: coherence\n", (void*)a);
   8906       seen_seq = idx;
   8907       hist0 = &hist;
   8908       break;
   8909     } else {
   8910       // Move on to the next (older) entry.
   8911       PrintfIf(debug_atomic, "HIST(%p):   can be replaced but not\n", (void*)a);
   8912     }
   8913   }
   8914 
   8915   if (hist0 != 0) {
   8916     v = hist0->val;
   8917     // Acquire mamory visibility is needed.
   8918     if (is_acquire) {
   8919       if (hist0->vts != 0) {
   8920         thr->NewSegmentForWait(hist0->vts);
   8921       }
   8922 
   8923       if (debug_happens_before) {
   8924         Printf("T%d: Wait: %p:\n    %s %s\n",
   8925                thr->tid().raw(), a,
   8926                thr->vts()->ToString().c_str(),
   8927                Segment::ToString(thr->sid()).c_str());
   8928         if (G_flags->debug_level >= 1) {
   8929           thr->ReportStackTrace();
   8930         }
   8931       }
   8932     }
   8933     if (seen_seq != 0) {
   8934       // Mark the entry as seen so we won't return any older entry later.
   8935       atomic->last_seen.update(thr->tid(), seen_seq);
   8936     }
   8937   } else {
   8938     CHECK("should never happen as of now" == 0);
   8939     PrintfIf(debug_atomic, "HIST(%p): UNITIALIZED LOAD\n", (void*)a);
   8940     v = thr->random();
   8941   }
   8942   PrintfIf(debug_atomic, "HIST(%p): } -> %llu\n",
   8943       (void*)a, (unsigned long long)v);
   8944   return v;
   8945 }
   8946 
   8947 
   8948 void TsanAtomicCore::ClearMemoryState(uintptr_t a, uintptr_t b) {
   8949   DCHECK(a <= b);
   8950   DCHECK(G_flags->enable_atomic || atomic_map_.empty());
   8951   AtomicMap::iterator begin (atomic_map_.lower_bound(a));
   8952   AtomicMap::iterator pos (begin);
   8953   for (; pos != atomic_map_.end() && pos->first <= b; ++pos) {
   8954     pos->second.reset();
   8955   }
   8956   atomic_map_.erase(begin, pos);
   8957 }
   8958 
   8959 
   8960 void TsanAtomicCore::AtomicFixHist(Atomic* atomic, uint64_t prev) {
   8961   AtomicHistoryEntry& hprv = atomic->hist
   8962       [(atomic->hist_pos - 1) % Atomic::kHistSize];
   8963   // In case we had missed an atomic access (that is, an access from
   8964   // non-instrumented code), reset whole history and initialize it
   8965   // with a single entry that happened "before world creation".
   8966   if (prev != hprv.val) {
   8967     PrintfIf(debug_atomic, "HIST RESET\n");
   8968     atomic->reset();
   8969     AtomicHistoryEntry& hist = atomic->hist
   8970         [atomic->hist_pos % Atomic::kHistSize];
   8971     hist.val = prev;
   8972     hist.tid = TID(0);
   8973     hist.clk = 0;
   8974     atomic->hist_pos += 1;
   8975   }
   8976 }
   8977 
   8978 
   8979 TsanAtomicCore::Atomic::Atomic() {
   8980   reset(true);
   8981 }
   8982 
   8983 
   8984 void TsanAtomicCore::Atomic::reset(bool init) {
   8985   hist_pos = sizeof(hist)/sizeof(hist[0]) + 1;
   8986   for (size_t i = 0; i != sizeof(hist)/sizeof(hist[0]); i += 1) {
   8987     hist[i].val = 0xBCEBC041;
   8988     hist[i].tid = TID(TID::kInvalidTID);
   8989     hist[i].clk = -1;
   8990     if (init == false && hist[i].vts != 0)
   8991       VTS::Unref(hist[i].vts);
   8992     hist[i].vts = 0;
   8993   }
   8994   last_seen.reset();
   8995 }
   8996 
   8997 
   8998 // -------- TODO -------------------------- {{{1
   8999 // - Support configurable aliases for function names (is it doable in valgrind)?
   9000 // - Correctly support atomic operations (not just ignore).
   9001 // - Handle INC as just one write
   9002 //   - same for memset, etc
   9003 // - Implement correct handling of memory accesses with different sizes.
   9004 // - Do not create HB arcs between RdUnlock and RdLock
   9005 // - Compress cache lines
   9006 // - Optimize the case where a threads signals twice in a row on the same
   9007 //   address.
   9008 // - Fix --ignore-in-dtor if --demangle=no.
   9009 // - Use cpplint (http://code.google.com/p/google-styleguide)
   9010 // - Get rid of annoying casts in printfs.
   9011 // - Compress stack traces (64-bit only. may save up to 36 bytes per segment).
   9012 // end. {{{1
   9013 // vim:shiftwidth=2:softtabstop=2:expandtab:tw=80
   9014