Home | History | Annotate | Download | only in tsan
      1 /* Copyright (c) 2008-2010, Google Inc.
      2  * All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions are
      6  * met:
      7  *
      8  *     * Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *     * Neither the name of Google Inc. nor the names of its
     11  * contributors may be used to endorse or promote products derived from
     12  * this software without specific prior written permission.
     13  *
     14  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     15  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     16  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     17  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     18  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     19  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     20  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 // This file is part of ThreadSanitizer, a dynamic data race detector.
     28 // Author: Konstantin Serebryany.
     29 // Author: Timur Iskhodzhanov.
     30 
     31 // You can find the details on this tool at
     32 // http://code.google.com/p/data-race-test
     33 
     34 #include "thread_sanitizer.h"
     35 #include "common_util.h"
     36 #include "suppressions.h"
     37 #include "ignore.h"
     38 #include "ts_lock.h"
     39 #include "dense_multimap.h"
     40 #include <stdarg.h>
     41 // -------- Constants --------------- {{{1
     42 // Segment ID (SID)      is in range [1, kMaxSID-1]
     43 // Segment Set ID (SSID) is in range [-kMaxSID+1, -1]
     44 // This is not a compile-time constant, but it can only be changed at startup.
     45 int kMaxSID = (1 << 23);
     46 // Flush state after so many SIDs have been allocated. Set by command line flag.
     47 int kMaxSIDBeforeFlush;
     48 
     49 // Lock ID (LID)      is in range [1, kMaxLID-1]
     50 // Lock Set ID (LSID) is in range [-kMaxLID+1, -1]
     51 const int kMaxLID = (1 << 23);
     52 
     53 // This is not a compile-time constant, but it can be changed only at startup.
     54 int kSizeOfHistoryStackTrace = 10;
     55 
     56 // Maximal number of segments in a SegmentSet.
     57 // If you change this constant, you also need to change several places
     58 // in SegmentSet code.
     59 const int kMaxSegmentSetSize = 4;
     60 
     61 // -------- Globals --------------- {{{1
     62 
     63 // If true, ignore all accesses in all threads.
     64 bool global_ignore;
     65 
     66 bool g_so_far_only_one_thread = false;
     67 bool g_has_entered_main = false;
     68 bool g_has_exited_main = false;
     69 
     70 size_t g_last_flush_time;
     71 
     72 // Incremented on each Lock and Unlock. Used by LockHistory.
     73 uint32_t g_lock_era = 0;
     74 
     75 uintptr_t g_nacl_mem_start = (uintptr_t)-1;
     76 uintptr_t g_nacl_mem_end = (uintptr_t)-1;
     77 
     78 bool g_race_verifier_active = false;
     79 
     80 bool debug_expected_races = false;
     81 bool debug_benign_races = false;
     82 bool debug_malloc = false;
     83 bool debug_free = false;
     84 bool debug_thread = false;
     85 bool debug_ignore = false;
     86 bool debug_rtn = false;
     87 bool debug_lock = false;
     88 bool debug_wrap = false;
     89 bool debug_ins = false;
     90 bool debug_shadow_stack = false;
     91 bool debug_happens_before = false;
     92 bool debug_cache = false;
     93 bool debug_race_verifier = false;
     94 
     95 // -------- TIL --------------- {{{1
     96 // ThreadSanitizer Internal lock (scoped).
     97 class TIL {
     98  public:
     99   TIL(TSLock *lock, int lock_site, bool need_locking = true) :
    100     lock_(lock),
    101     need_locking_(need_locking) {
    102     DCHECK(lock_);
    103     if (need_locking_ && (TS_SERIALIZED == 0)) {
    104       lock_->Lock();
    105       G_stats->lock_sites[lock_site]++;
    106     }
    107   }
    108   ~TIL() {
    109     if (need_locking_ && (TS_SERIALIZED == 0))
    110       lock_->Unlock();
    111   }
    112  private:
    113   TSLock *lock_;
    114   bool need_locking_;
    115 };
    116 
    117 static TSLock *ts_lock;
    118 static TSLock *ts_ignore_below_lock;
    119 
    120 #ifdef TS_LLVM
    121 void ThreadSanitizerLockAcquire() {
    122   ts_lock->Lock();
    123 }
    124 
    125 void ThreadSanitizerLockRelease() {
    126   ts_lock->Unlock();
    127 }
    128 #endif
    129 
    130 static INLINE void AssertTILHeld() {
    131   if (TS_SERIALIZED == 0 && DEBUG_MODE) {
    132     ts_lock->AssertHeld();
    133   }
    134 }
    135 
    136 // -------- Util ----------------------------- {{{1
    137 
    138 // Can't use ANNOTATE_UNPROTECTED_READ, it may get instrumented.
    139 template <class T>
    140 inline T INTERNAL_ANNOTATE_UNPROTECTED_READ(const volatile T &x) {
    141   ANNOTATE_IGNORE_READS_BEGIN();
    142   T res = x;
    143   ANNOTATE_IGNORE_READS_END();
    144   return res;
    145 }
    146 
    147 static string RemoveFilePrefix(string str) {
    148   for (size_t i = 0; i < G_flags->file_prefix_to_cut.size(); i++) {
    149     string prefix_to_cut = G_flags->file_prefix_to_cut[i];
    150     size_t pos = str.find(prefix_to_cut);
    151     if (pos != string::npos) {
    152       str = str.substr(pos + prefix_to_cut.size());
    153     }
    154   }
    155   if (str.find("./") == 0) {  // remove leading ./
    156     str = str.substr(2);
    157   }
    158   return str;
    159 }
    160 
    161 string PcToRtnNameAndFilePos(uintptr_t pc) {
    162   G_stats->pc_to_strings++;
    163   string img_name;
    164   string file_name;
    165   string rtn_name;
    166   int line_no = -1;
    167   PcToStrings(pc, G_flags->demangle, &img_name, &rtn_name,
    168               &file_name, &line_no);
    169   if (G_flags->demangle && !G_flags->full_stack_frames)
    170     rtn_name = NormalizeFunctionName(rtn_name);
    171   file_name = RemoveFilePrefix(file_name);
    172   if (file_name == "") {
    173     return rtn_name + " " + RemoveFilePrefix(img_name);
    174   }
    175   char buff[10];
    176   snprintf(buff, sizeof(buff), "%d", line_no);
    177   return rtn_name + " " + file_name + ":" + buff;
    178 }
    179 
    180 // -------- ID ---------------------- {{{1
    181 // We wrap int32_t into ID class and then inherit various ID type from ID.
    182 // This is done in an attempt to implement type safety of IDs, i.e.
    183 // to make it impossible to make implicit cast from one ID type to another.
    184 class ID {
    185  public:
    186   typedef int32_t T;
    187   explicit ID(T id) : id_(id) {}
    188   ID(const ID &id) : id_(id.id_) {}
    189   INLINE bool operator ==  (const ID &id) const { return id_ == id.id_; }
    190   bool operator !=  (const ID &id) const { return id_ != id.id_; }
    191   bool operator <  (const ID &id) const { return id_ < id.id_; }
    192   bool operator >  (const ID &id) const { return id_ > id.id_; }
    193   bool operator >=  (const ID &id) const { return id_ >= id.id_; }
    194   bool operator <=  (const ID &id) const { return id_ <= id.id_; }
    195 
    196   bool IsValid() const { return id_ >= 0; }
    197 
    198   const ID &operator = (const ID &id) {
    199     this->id_ = id.id_;
    200     return *this;
    201   }
    202   T raw() const { return id_; }
    203 
    204  private:
    205   T id_;
    206 };
    207 
    208 // Thread ID.
    209 // id >= 0
    210 class TID: public ID {
    211  public:
    212   static const int32_t kInvalidTID;
    213 
    214   explicit TID(T id) : ID(id) {}
    215   TID() : ID(kInvalidTID) {}
    216   bool valid() const { return raw() >= 0; }
    217 };
    218 
    219 const int32_t TID::kInvalidTID = -1;
    220 
    221 // Segment ID.
    222 // id > 0 && id < kMaxSID
    223 class SID: public ID {
    224  public:
    225   explicit SID(T id) : ID(id) {}
    226   SID() : ID(0) {}
    227   bool valid() const { return raw() > 0 && raw() < kMaxSID; }
    228 };
    229 
    230 // Lock ID.
    231 // id > 0 && id < kMaxLID
    232 class LID: public ID {
    233  public:
    234   explicit LID(T id) : ID(id) {}
    235   LID() : ID(0) {}
    236   bool valid() const { return raw() > 0 && raw() < kMaxLID; }
    237 };
    238 
    239 // LockSet ID.
    240 // Empty lockset: id == 0
    241 // Singleton:     id > 0 (id == Lock's id)
    242 // Tuple:         id < 0
    243 class LSID: public ID {
    244  public:
    245   explicit LSID(T id) : ID(id) {}
    246   LSID() : ID(INT_MAX) {}
    247   bool valid() const {
    248     return raw() < kMaxLID && raw() > -(kMaxLID);
    249   }
    250   bool IsEmpty() const { return raw() == 0; }
    251   bool IsSingleton() const { return raw() > 0; }
    252   LID GetSingleton() const { return LID(raw()); }
    253 };
    254 
    255 // SegmentSet ID.
    256 // Empty SegmentSet: id == 0
    257 // Singleton:        id > 0 (id == Segment's id)
    258 // Tuple:            id < 0
    259 class SSID: public ID {
    260  public:
    261   explicit SSID(T id) : ID(id) {}
    262   explicit SSID(SID sid) : ID(sid.raw()) {}
    263   SSID(): ID(INT_MAX) {}
    264   bool valid() const {
    265     return raw() != 0 && raw() < kMaxSID && raw() > -kMaxSID;
    266   }
    267   bool IsValidOrEmpty() { return raw() < kMaxSID && raw() > -kMaxSID; }
    268   bool IsEmpty() const { return raw() == 0; }
    269   bool IsSingleton() const {return raw() > 0; }
    270   bool IsTuple() const {return raw() < 0; }
    271   SID  GetSingleton() const {
    272     DCHECK(IsSingleton());
    273     return SID(raw());
    274   }
    275   // TODO(timurrrr): need to start SegmentSetArray indices from 1
    276   // to avoid "int ???() { return -raw() - 1; }"
    277 };
    278 
    279 // -------- Colors ----------------------------- {{{1
    280 // Colors for ansi terminals and for html.
    281 const char *c_bold    = "";
    282 const char *c_red     = "";
    283 const char *c_green   = "";
    284 const char *c_magenta = "";
    285 const char *c_cyan    = "";
    286 const char *c_blue    = "";
    287 const char *c_yellow  = "";
    288 const char *c_default = "";
    289 
    290 
    291 // -------- Forward decls ------ {{{1
    292 static void ForgetAllStateAndStartOver(Thread *thr, const char *reason);
    293 static void FlushStateIfOutOfSegments(Thread *thr);
    294 static int32_t raw_tid(Thread *t);
    295 // -------- Simple Cache ------ {{{1
    296 #include "ts_simple_cache.h"
    297 // -------- PairCache & IntPairToIntCache ------ {{{1
    298 template <typename A, typename B, typename Ret,
    299          int kHtableSize, int kArraySize = 8>
    300 class PairCache {
    301  public:
    302   PairCache() {
    303     CHECK(kHtableSize >= 0);
    304     CHECK(sizeof(Entry) == sizeof(A) + sizeof(B) + sizeof(Ret));
    305     Flush();
    306   }
    307 
    308   void Flush() {
    309     memset(this, 0, sizeof(*this));
    310 
    311     // Change the first hashtable entry so it doesn't match (0,0) on Lookup.
    312     if (kHtableSize != 0)
    313       memset(&htable_[0], 1, sizeof(Entry));
    314 
    315     // Any Lookup should fail now.
    316     for (int i = 0; i < kHtableSize; i++) {
    317       Ret tmp;
    318       DCHECK(!Lookup(htable_[i].a, htable_[i].b, &tmp));
    319     }
    320     CHECK(array_pos_    == 0);
    321     CHECK(array_filled_ == false);
    322   }
    323 
    324   void Insert(A a, B b, Ret v) {
    325     // fill the hash table
    326     if (kHtableSize != 0) {
    327       uint32_t idx  = compute_idx(a, b);
    328       htable_[idx].Fill(a, b, v);
    329     }
    330 
    331     // fill the array
    332     Ret dummy;
    333     if (kArraySize != 0 && !ArrayLookup(a, b, &dummy)) {
    334       array_[array_pos_ % kArraySize].Fill(a, b, v);
    335       array_pos_ = (array_pos_ + 1) % kArraySize;
    336       if (array_pos_ > kArraySize)
    337         array_filled_ = true;
    338     }
    339   }
    340 
    341   INLINE bool Lookup(A a, B b, Ret *v) {
    342     // check the array
    343     if (kArraySize != 0 && ArrayLookup(a, b, v)) {
    344       G_stats->ls_cache_fast++;
    345       return true;
    346     }
    347     // check the hash table.
    348     if (kHtableSize != 0) {
    349       uint32_t idx  = compute_idx(a, b);
    350       Entry & prev_e = htable_[idx];
    351       if (prev_e.Match(a, b)) {
    352         *v = prev_e.v;
    353         return true;
    354       }
    355     }
    356     return false;
    357   }
    358 
    359  private:
    360   struct Entry {
    361     A a;
    362     B b;
    363     Ret v;
    364     void Fill(A a, B b, Ret v) {
    365       this->a = a;
    366       this->b = b;
    367       this->v = v;
    368     }
    369     bool Match(A a, B b) const {
    370       return this->a == a && this->b == b;
    371     }
    372   };
    373 
    374   INLINE bool ArrayLookup(A a, B b, Ret *v) {
    375     for (int i = 0; i < (array_filled_ ? kArraySize : array_pos_); i++) {
    376       Entry & entry = array_[i];
    377       if (entry.Match(a, b)) {
    378         *v = entry.v;
    379         return true;
    380       }
    381     }
    382     return false;
    383   }
    384 
    385   uint32_t compute_idx(A a, B b) {
    386     if (kHtableSize == 0)
    387       return 0;
    388     else
    389       return combine2(a, b) % kHtableSize;
    390   }
    391 
    392   static uint32_t combine2(int a, int b) {
    393     return (a << 16) ^ b;
    394   }
    395 
    396   static uint32_t combine2(SSID a, SID b) {
    397     return combine2(a.raw(), b.raw());
    398   }
    399 
    400   Entry htable_[kHtableSize];
    401 
    402   Entry array_[kArraySize];
    403 
    404   // array_pos_    - next element to write to the array_ (mod kArraySize)
    405   // array_filled_ - set to true once we write the last element of the array
    406   int array_pos_;
    407   bool array_filled_;
    408 };
    409 
    410 template<int kHtableSize, int kArraySize = 8>
    411 class IntPairToIntCache
    412   : public PairCache<int, int, int, kHtableSize, kArraySize> {};
    413 
    414 
    415 
    416 // -------- FreeList --------------- {{{1
    417 class FreeList {
    418  public:
    419   FreeList(int obj_size, int chunk_size)
    420     : list_(0),
    421       obj_size_(obj_size),
    422       chunk_size_(chunk_size) {
    423     CHECK_GE(obj_size_, static_cast<int>(sizeof(NULL)));
    424     CHECK((obj_size_ % sizeof(NULL)) == 0);
    425     CHECK_GE(chunk_size_, 1);
    426   }
    427 
    428   void *Allocate() {
    429     if (!list_)
    430       AllocateNewChunk();
    431     CHECK(list_);
    432     List *head = list_;
    433     list_ = list_->next;
    434     return reinterpret_cast<void*>(head);
    435   }
    436 
    437   void Deallocate(void *ptr) {
    438     if (DEBUG_MODE) {
    439       memset(ptr, 0xac, obj_size_);
    440     }
    441     List *new_head = reinterpret_cast<List*>(ptr);
    442     new_head->next = list_;
    443     list_ = new_head;
    444   }
    445 
    446  private:
    447   void AllocateNewChunk() {
    448     CHECK(list_ == NULL);
    449     uint8_t *new_mem = new uint8_t[obj_size_ * chunk_size_];
    450     if (DEBUG_MODE) {
    451       memset(new_mem, 0xab, obj_size_ * chunk_size_);
    452     }
    453     for (int i = 0; i < chunk_size_; i++) {
    454       List *new_head = reinterpret_cast<List*>(new_mem + obj_size_ * i);
    455       new_head->next = list_;
    456       list_ = new_head;
    457     }
    458   }
    459   struct List {
    460     struct List *next;
    461   };
    462   List *list_;
    463 
    464 
    465   const int obj_size_;
    466   const int chunk_size_;
    467 };
    468 // -------- StackTrace -------------- {{{1
    469 class StackTraceFreeList {
    470  public:
    471   uintptr_t *GetNewMemForStackTrace(size_t capacity) {
    472     DCHECK(capacity <= (size_t)G_flags->num_callers);
    473     return reinterpret_cast<uintptr_t*>(free_lists_[capacity]->Allocate());
    474   }
    475 
    476   void TakeStackTraceBack(uintptr_t *mem, size_t capacity) {
    477     DCHECK(capacity <= (size_t)G_flags->num_callers);
    478     free_lists_[capacity]->Deallocate(mem);
    479   }
    480 
    481   StackTraceFreeList() {
    482     size_t n = G_flags->num_callers + 1;
    483     free_lists_ = new FreeList *[n];
    484     free_lists_[0] = NULL;
    485     for (size_t i = 1; i < n; i++) {
    486       free_lists_[i] = new FreeList((i+2) * sizeof(uintptr_t), 1024);
    487     }
    488   }
    489 
    490  private:
    491   FreeList **free_lists_;  // Array of G_flags->num_callers lists.
    492 };
    493 
    494 static StackTraceFreeList *g_stack_trace_free_list;
    495 
    496 class StackTrace {
    497  public:
    498   static StackTrace *CreateNewEmptyStackTrace(size_t size,
    499                                               size_t capacity = 0) {
    500     ScopedMallocCostCenter cc("StackTrace::CreateNewEmptyStackTrace()");
    501     DCHECK(g_stack_trace_free_list);
    502     DCHECK(size != 0);
    503     if (capacity == 0)
    504       capacity = size;
    505     uintptr_t *mem = g_stack_trace_free_list->GetNewMemForStackTrace(capacity);
    506     DCHECK(mem);
    507     StackTrace *res = new(mem) StackTrace(size, capacity);
    508     return res;
    509   }
    510 
    511   static void Delete(StackTrace *trace) {
    512     if (!trace) return;
    513     DCHECK(g_stack_trace_free_list);
    514     g_stack_trace_free_list->TakeStackTraceBack(
    515         reinterpret_cast<uintptr_t*>(trace), trace->capacity());
    516   }
    517 
    518   size_t size() const { return size_; }
    519   size_t capacity() const { return capacity_; }
    520 
    521   void set_size(size_t size) {
    522     CHECK(size <= capacity());
    523     size_ = size;
    524   }
    525 
    526 
    527   void Set(size_t i, uintptr_t pc) {
    528     arr_[i] = pc;
    529   }
    530 
    531   uintptr_t Get(size_t i) const {
    532     return arr_[i];
    533   }
    534 
    535   static bool CutStackBelowFunc(const string func_name) {
    536     for (size_t i = 0; i < G_flags->cut_stack_below.size(); i++) {
    537       if (StringMatch(G_flags->cut_stack_below[i], func_name)) {
    538         return true;
    539       }
    540     }
    541     return false;
    542   }
    543 
    544   static string EmbeddedStackTraceToString(const uintptr_t *emb_trace, size_t n,
    545                                            const char *indent = "    ") {
    546     string res = "";
    547     const int kBuffSize = 10000;
    548     char *buff = new char [kBuffSize];
    549     for (size_t i = 0; i < n; i++) {
    550       if (!emb_trace[i]) break;
    551       string rtn_and_file = PcToRtnNameAndFilePos(emb_trace[i]);
    552       if (rtn_and_file.find("(below main) ") == 0 ||
    553           rtn_and_file.find("ThreadSanitizerStartThread ") == 0)
    554         break;
    555 
    556       if (i == 0) res += c_bold;
    557       if (G_flags->show_pc) {
    558         snprintf(buff, kBuffSize, "%s#%-2d %p: ",
    559                  indent, static_cast<int>(i),
    560                  reinterpret_cast<void*>(emb_trace[i]));
    561       } else {
    562         snprintf(buff, kBuffSize, "%s#%-2d ", indent, static_cast<int>(i));
    563       }
    564       res += buff;
    565 
    566       res += rtn_and_file;
    567       if (i == 0) res += c_default;
    568       res += "\n";
    569 
    570       // don't print after main ...
    571       if (rtn_and_file.find("main ") == 0)
    572         break;
    573       // ... and after some default functions (see ThreadSanitizerParseFlags())
    574       // and some more functions specified via command line flag.
    575       string rtn = NormalizeFunctionName(PcToRtnName(emb_trace[i], true));
    576       if (CutStackBelowFunc(rtn))
    577         break;
    578     }
    579     delete [] buff;
    580     return res;
    581   }
    582 
    583   string ToString(const char *indent = "    ") const {
    584     if (!this) return "NO STACK TRACE\n";
    585     if (size() == 0) return "EMPTY STACK TRACE\n";
    586     return EmbeddedStackTraceToString(arr_, size(), indent);
    587   }
    588 
    589   void PrintRaw() const {
    590     for (size_t i = 0; i < size(); i++) {
    591       Printf("%p ", arr_[i]);
    592     }
    593     Printf("\n");
    594   }
    595 
    596   static bool Equals(const StackTrace *t1, const StackTrace *t2) {
    597     if (t1->size_ != t2->size_) return false;
    598     for (size_t i = 0; i < t1->size_; i++) {
    599       if (t1->arr_[i] != t2->arr_[i]) return false;
    600     }
    601     return true;
    602   }
    603 
    604   struct Less {
    605     bool operator() (const StackTrace *t1, const StackTrace *t2) const {
    606       size_t size = min(t1->size_, t2->size_);
    607       for (size_t i = 0; i < size; i++) {
    608         if (t1->arr_[i] != t2->arr_[i]) {
    609           return (t1->arr_[i] < t2->arr_[i]);
    610         }
    611       }
    612       return t1->size_ < t2->size_;
    613     }
    614   };
    615 
    616  private:
    617   StackTrace(size_t size, size_t capacity)
    618     : size_(size),
    619       capacity_(capacity) {
    620   }
    621 
    622   ~StackTrace() {}
    623 
    624   size_t size_;
    625   size_t capacity_;
    626   uintptr_t arr_[];
    627 };
    628 
    629 
    630 
    631 // -------- Lock -------------------- {{{1
    632 const char *kLockAllocCC = "kLockAllocCC";
    633 class Lock {
    634  public:
    635 
    636   static Lock *Create(uintptr_t lock_addr) {
    637     ScopedMallocCostCenter cc("LockLookup");
    638 //    Printf("Lock::Create: %p\n", lock_addr);
    639     // Destroy(lock_addr);
    640 
    641     // CHECK(Lookup(lock_addr) == NULL);
    642     Lock *res = LookupOrCreate(lock_addr);
    643     res->rd_held_ = 0;
    644     res->wr_held_ = 0;
    645     res->is_pure_happens_before_ = G_flags->pure_happens_before;
    646     res->last_lock_site_ = NULL;
    647     return res;
    648   }
    649 
    650   static void Destroy(uintptr_t lock_addr) {
    651 //    Printf("Lock::Destroy: %p\n", lock_addr);
    652   //  map_.erase(lock_addr);
    653   }
    654 
    655   static NOINLINE Lock *LookupOrCreate(uintptr_t lock_addr) {
    656     ScopedMallocCostCenter cc("LockLookup");
    657     Lock **lock = &(*map_)[lock_addr];
    658     if (*lock == NULL) {
    659 //      Printf("Lock::LookupOrCreate: %p\n", lock_addr);
    660       ScopedMallocCostCenter cc_lock("new Lock");
    661       *lock = new Lock(lock_addr, map_->size());
    662     }
    663     return *lock;
    664   }
    665 
    666   static NOINLINE Lock *Lookup(uintptr_t lock_addr) {
    667     ScopedMallocCostCenter cc("LockLookup");
    668     Map::iterator it = map_->find(lock_addr);
    669     if (it == map_->end()) return NULL;
    670     return it->second;
    671   }
    672 
    673   int       rd_held()   const { return rd_held_; }
    674   int       wr_held()   const { return wr_held_; }
    675   uintptr_t lock_addr() const { return lock_addr_; }
    676   LID       lid()       const { return lid_; }
    677   bool is_pure_happens_before() const { return is_pure_happens_before_; }
    678 
    679   // When a lock is pure happens-before, we need to create hb arcs
    680   // between all Unlock/Lock pairs except RdUnlock/RdLock.
    681   // For that purpose have two IDs on which we signal/wait.
    682   // One id is the lock_addr itself, the second id is derived
    683   // from lock_addr.
    684   uintptr_t wr_signal_addr() const { return lock_addr(); }
    685   uintptr_t rd_signal_addr() const { return lock_addr() + 1; }
    686 
    687 
    688   void set_is_pure_happens_before(bool x) { is_pure_happens_before_ = x; }
    689 
    690   void WrLock(TID tid, StackTrace *lock_site) {
    691     CHECK(!rd_held_);
    692     if (wr_held_ == 0) {
    693       thread_holding_me_in_write_mode_ = tid;
    694     } else {
    695       CHECK(thread_holding_me_in_write_mode_ == tid);
    696     }
    697     wr_held_++;
    698     StackTrace::Delete(last_lock_site_);
    699     last_lock_site_ = lock_site;
    700   }
    701 
    702   void WrUnlock() {
    703     CHECK(!rd_held_);
    704     CHECK(wr_held_ > 0);
    705     wr_held_--;
    706   }
    707 
    708   void RdLock(StackTrace *lock_site) {
    709     CHECK(!wr_held_);
    710     rd_held_++;
    711     StackTrace::Delete(last_lock_site_);
    712     last_lock_site_ = lock_site;
    713   }
    714 
    715   void RdUnlock() {
    716     CHECK(!wr_held_);
    717     CHECK(rd_held_);
    718     rd_held_--;
    719   }
    720 
    721   void set_name(const char *name) { name_ = name; }
    722   const char *name() const { return name_; }
    723 
    724   string ToString() const {
    725     string res;
    726     char buff[100];
    727     snprintf(buff, sizeof(buff), "L%d", lid_.raw());
    728     // do we need to print the address?
    729     // reinterpret_cast<void*>(lock_addr()));
    730     res = buff;
    731     if (name()) {
    732       res += string(" ") + name();
    733     }
    734     return res;
    735   }
    736 
    737   static Lock *LIDtoLock(LID lid) {
    738     // slow, but needed only for reports.
    739     for (Map::iterator it = map_->begin(); it != map_->end(); ++it) {
    740       Lock *l = it->second;
    741       if (l->lid_ == lid) {
    742         return l;
    743       }
    744     }
    745     return NULL;
    746   }
    747 
    748   static string ToString(LID lid) {
    749     Lock *lock = LIDtoLock(lid);
    750     CHECK(lock);
    751     return lock->ToString();
    752   }
    753 
    754   static void ReportLockWithOrWithoutContext(LID lid, bool with_context) {
    755     if (!with_context) {
    756       Report("   L%d\n", lid.raw());
    757       return;
    758     }
    759     Lock *lock = LIDtoLock(lid);
    760     CHECK(lock);
    761     if (lock->last_lock_site_) {
    762       Report("   %s (%p)\n%s",
    763              lock->ToString().c_str(),
    764              lock->lock_addr_,
    765              lock->last_lock_site_->ToString().c_str());
    766     } else {
    767       Report("   %s. This lock was probably destroyed"
    768                  " w/o calling Unlock()\n", lock->ToString().c_str());
    769     }
    770   }
    771 
    772   static void InitClassMembers() {
    773     map_ = new Lock::Map;
    774   }
    775 
    776  private:
    777   Lock(uintptr_t lock_addr, int32_t lid)
    778     : lock_addr_(lock_addr),
    779       lid_(lid),
    780       rd_held_(0),
    781       wr_held_(0),
    782       is_pure_happens_before_(G_flags->pure_happens_before),
    783       last_lock_site_(0),
    784       name_(NULL) {
    785   }
    786 
    787   // Data members
    788   uintptr_t lock_addr_;
    789   LID       lid_;
    790   int       rd_held_;
    791   int       wr_held_;
    792   bool      is_pure_happens_before_;
    793   StackTrace *last_lock_site_;
    794   const char *name_;
    795   TID       thread_holding_me_in_write_mode_;
    796 
    797   // Static members
    798   typedef map<uintptr_t, Lock*> Map;
    799   static Map *map_;
    800 };
    801 
    802 
    803 Lock::Map *Lock::map_;
    804 
    805 // Returns a string like "L123,L234".
    806 static string SetOfLocksToString(const set<LID> &locks) {
    807   string res;
    808   for (set<LID>::const_iterator it = locks.begin();
    809        it != locks.end(); ++it) {
    810     LID lid = *it;
    811     char buff[100];
    812     snprintf(buff, sizeof(buff), "L%d", lid.raw());
    813     if (it != locks.begin())
    814       res += ", ";
    815     res += buff;
    816   }
    817   return res;
    818 }
    819 
    820 // -------- FixedArray--------------- {{{1
    821 template <typename T, size_t SizeLimit = 1024>
    822 class FixedArray {
    823  public:
    824   explicit INLINE FixedArray(size_t array_size)
    825       : size_(array_size),
    826         array_((array_size <= SizeLimit
    827                 ? alloc_space_
    828                 : new T[array_size])) { }
    829 
    830   ~FixedArray() {
    831     if (array_ != alloc_space_) {
    832       delete[] array_;
    833     }
    834   }
    835 
    836   T* begin() { return array_; }
    837   T& operator[](int i)             { return array_[i]; }
    838 
    839  private:
    840   const size_t size_;
    841   T* array_;
    842   T alloc_space_[SizeLimit];
    843 };
    844 
    845 // -------- LockSet ----------------- {{{1
    846 class LockSet {
    847  public:
    848   NOINLINE static LSID Add(LSID lsid, Lock *lock) {
    849     ScopedMallocCostCenter cc("LockSetAdd");
    850     LID lid = lock->lid();
    851     if (lsid.IsEmpty()) {
    852       // adding to an empty lock set
    853       G_stats->ls_add_to_empty++;
    854       return LSID(lid.raw());
    855     }
    856     int cache_res;
    857     if (ls_add_cache_->Lookup(lsid.raw(), lid.raw(), &cache_res)) {
    858       G_stats->ls_add_cache_hit++;
    859       return LSID(cache_res);
    860     }
    861     LSID res;
    862     if (lsid.IsSingleton()) {
    863       LSSet set(lsid.GetSingleton(), lid);
    864       G_stats->ls_add_to_singleton++;
    865       res = ComputeId(set);
    866     } else {
    867       LSSet set(Get(lsid), lid);
    868       G_stats->ls_add_to_multi++;
    869       res = ComputeId(set);
    870     }
    871     ls_add_cache_->Insert(lsid.raw(), lid.raw(), res.raw());
    872     return res;
    873   }
    874 
    875   // If lock is present in lsid, set new_lsid to (lsid \ lock) and return true.
    876   // Otherwise set new_lsid to lsid and return false.
    877   NOINLINE static bool Remove(LSID lsid, Lock *lock, LSID *new_lsid) {
    878     *new_lsid = lsid;
    879     if (lsid.IsEmpty()) return false;
    880     LID lid = lock->lid();
    881 
    882     if (lsid.IsSingleton()) {
    883       // removing the only lock -> LSID(0)
    884       if (lsid.GetSingleton() != lid) return false;
    885       G_stats->ls_remove_from_singleton++;
    886       *new_lsid = LSID(0);
    887       return true;
    888     }
    889 
    890     int cache_res;
    891     if (ls_rem_cache_->Lookup(lsid.raw(), lid.raw(), &cache_res)) {
    892       G_stats->ls_rem_cache_hit++;
    893       *new_lsid = LSID(cache_res);
    894       return true;
    895     }
    896 
    897     LSSet &prev_set = Get(lsid);
    898     if (!prev_set.has(lid)) return false;
    899     LSSet set(prev_set, LSSet::REMOVE, lid);
    900     CHECK(set.size() == prev_set.size() - 1);
    901     G_stats->ls_remove_from_multi++;
    902     LSID res = ComputeId(set);
    903     ls_rem_cache_->Insert(lsid.raw(), lid.raw(), res.raw());
    904     *new_lsid = res;
    905     return true;
    906   }
    907 
    908   NOINLINE static bool IntersectionIsEmpty(LSID lsid1, LSID lsid2) {
    909     // at least one empty
    910     if (lsid1.IsEmpty() || lsid2.IsEmpty())
    911       return true;  // empty
    912 
    913     // both singletons
    914     if (lsid1.IsSingleton() && lsid2.IsSingleton()) {
    915       return lsid1 != lsid2;
    916     }
    917 
    918     // first is singleton, second is not
    919     if (lsid1.IsSingleton()) {
    920       const LSSet &set2 = Get(lsid2);
    921       return set2.has(LID(lsid1.raw())) == false;
    922     }
    923 
    924     // second is singleton, first is not
    925     if (lsid2.IsSingleton()) {
    926       const LSSet &set1 = Get(lsid1);
    927       return set1.has(LID(lsid2.raw())) == false;
    928     }
    929 
    930     // LockSets are equal and not empty
    931     if (lsid1 == lsid2)
    932       return false;
    933 
    934     // both are not singletons - slow path.
    935     bool ret = true,
    936          cache_hit = false;
    937     DCHECK(lsid2.raw() < 0);
    938     if (ls_intersection_cache_->Lookup(lsid1.raw(), -lsid2.raw(), &ret)) {
    939       if (!DEBUG_MODE)
    940         return ret;
    941       cache_hit = true;
    942     }
    943     const LSSet &set1 = Get(lsid1);
    944     const LSSet &set2 = Get(lsid2);
    945 
    946     FixedArray<LID> intersection(min(set1.size(), set2.size()));
    947     LID *end = std::set_intersection(set1.begin(), set1.end(),
    948                             set2.begin(), set2.end(),
    949                             intersection.begin());
    950     DCHECK(!cache_hit || (ret == (end == intersection.begin())));
    951     ret = (end == intersection.begin());
    952     ls_intersection_cache_->Insert(lsid1.raw(), -lsid2.raw(), ret);
    953     return ret;
    954   }
    955 
    956   static bool HasNonPhbLocks(LSID lsid) {
    957     if (lsid.IsEmpty())
    958       return false;
    959     if (lsid.IsSingleton())
    960       return !Lock::LIDtoLock(LID(lsid.raw()))->is_pure_happens_before();
    961 
    962     LSSet &set = Get(lsid);
    963     for (LSSet::const_iterator it = set.begin(); it != set.end(); ++it)
    964       if (!Lock::LIDtoLock(*it)->is_pure_happens_before())
    965         return true;
    966     return false;
    967   }
    968 
    969   static string ToString(LSID lsid) {
    970     if (lsid.IsEmpty()) {
    971       return "{}";
    972     } else if (lsid.IsSingleton()) {
    973       return "{" + Lock::ToString(lsid.GetSingleton()) + "}";
    974     }
    975     const LSSet &set = Get(lsid);
    976     string res = "{";
    977     for (LSSet::const_iterator it = set.begin(); it != set.end(); ++it) {
    978       if (it != set.begin()) res += ", ";
    979       res += Lock::ToString(*it);
    980     }
    981     res += "}";
    982     return res;
    983   }
    984 
    985   static void ReportLockSetWithContexts(LSID lsid,
    986                                         set<LID> *locks_reported,
    987                                         const char *descr) {
    988     if (lsid.IsEmpty()) return;
    989     Report("%s%s%s\n", c_green, descr, c_default);
    990     if (lsid.IsSingleton()) {
    991       LID lid = lsid.GetSingleton();
    992       Lock::ReportLockWithOrWithoutContext(lid,
    993                                            locks_reported->count(lid) == 0);
    994       locks_reported->insert(lid);
    995     } else {
    996       const LSSet &set = Get(lsid);
    997       for (LSSet::const_iterator it = set.begin(); it != set.end(); ++it) {
    998         LID lid = *it;
    999         Lock::ReportLockWithOrWithoutContext(lid,
   1000                                      locks_reported->count(lid) == 0);
   1001         locks_reported->insert(lid);
   1002       }
   1003     }
   1004   }
   1005 
   1006   static void AddLocksToSet(LSID lsid, set<LID> *locks) {
   1007     if (lsid.IsEmpty()) return;
   1008     if (lsid.IsSingleton()) {
   1009       locks->insert(lsid.GetSingleton());
   1010     } else {
   1011       const LSSet &set = Get(lsid);
   1012       for (LSSet::const_iterator it = set.begin(); it != set.end(); ++it) {
   1013         locks->insert(*it);
   1014       }
   1015     }
   1016   }
   1017 
   1018 
   1019   static void InitClassMembers() {
   1020     map_ = new LockSet::Map;
   1021     vec_ = new LockSet::Vec;
   1022     ls_add_cache_ = new LSCache;
   1023     ls_rem_cache_ = new LSCache;
   1024     ls_rem_cache_ = new LSCache;
   1025     ls_intersection_cache_ = new LSIntersectionCache;
   1026   }
   1027 
   1028  private:
   1029   // No instances are allowed.
   1030   LockSet() { }
   1031 
   1032   typedef DenseMultimap<LID, 3> LSSet;
   1033 
   1034   static LSSet &Get(LSID lsid) {
   1035     ScopedMallocCostCenter cc(__FUNCTION__);
   1036     int idx = -lsid.raw() - 1;
   1037     DCHECK(idx >= 0);
   1038     DCHECK(idx < static_cast<int>(vec_->size()));
   1039     return (*vec_)[idx];
   1040   }
   1041 
   1042   static LSID ComputeId(const LSSet &set) {
   1043     CHECK(set.size() > 0);
   1044     if (set.size() == 1) {
   1045       // signleton lock set has lsid == lid.
   1046       return LSID(set.begin()->raw());
   1047     }
   1048     DCHECK(map_);
   1049     DCHECK(vec_);
   1050     // multiple locks.
   1051     ScopedMallocCostCenter cc("LockSet::ComputeId");
   1052     int32_t *id = &(*map_)[set];
   1053     if (*id == 0) {
   1054       vec_->push_back(set);
   1055       *id = map_->size();
   1056       if      (set.size() == 2) G_stats->ls_size_2++;
   1057       else if (set.size() == 3) G_stats->ls_size_3++;
   1058       else if (set.size() == 4) G_stats->ls_size_4++;
   1059       else if (set.size() == 5) G_stats->ls_size_5++;
   1060       else                      G_stats->ls_size_other++;
   1061       if (*id >= 4096 && ((*id & (*id - 1)) == 0)) {
   1062         Report("INFO: %d LockSet IDs have been allocated "
   1063                "(2: %ld 3: %ld 4: %ld 5: %ld o: %ld)\n",
   1064                *id,
   1065                G_stats->ls_size_2, G_stats->ls_size_3,
   1066                G_stats->ls_size_4, G_stats->ls_size_5,
   1067                G_stats->ls_size_other
   1068                );
   1069       }
   1070     }
   1071     return LSID(-*id);
   1072   }
   1073 
   1074   typedef map<LSSet, int32_t> Map;
   1075   static Map *map_;
   1076 
   1077   static const char *kLockSetVecAllocCC;
   1078   typedef vector<LSSet> Vec;
   1079   static Vec *vec_;
   1080 
   1081 //  static const int kPrimeSizeOfLsCache = 307;
   1082 //  static const int kPrimeSizeOfLsCache = 499;
   1083   static const int kPrimeSizeOfLsCache = 1021;
   1084   typedef IntPairToIntCache<kPrimeSizeOfLsCache> LSCache;
   1085   static LSCache *ls_add_cache_;
   1086   static LSCache *ls_rem_cache_;
   1087   static LSCache *ls_int_cache_;
   1088   typedef IntPairToBoolCache<kPrimeSizeOfLsCache> LSIntersectionCache;
   1089   static LSIntersectionCache *ls_intersection_cache_;
   1090 };
   1091 
   1092 LockSet::Map *LockSet::map_;
   1093 LockSet::Vec *LockSet::vec_;
   1094 const char *LockSet::kLockSetVecAllocCC = "kLockSetVecAllocCC";
   1095 LockSet::LSCache *LockSet::ls_add_cache_;
   1096 LockSet::LSCache *LockSet::ls_rem_cache_;
   1097 LockSet::LSCache *LockSet::ls_int_cache_;
   1098 LockSet::LSIntersectionCache *LockSet::ls_intersection_cache_;
   1099 
   1100 
   1101 static string TwoLockSetsToString(LSID rd_lockset, LSID wr_lockset) {
   1102   string res;
   1103   if (rd_lockset == wr_lockset) {
   1104     res = "L";
   1105     res += LockSet::ToString(wr_lockset);
   1106   } else {
   1107     res = "WR-L";
   1108     res += LockSet::ToString(wr_lockset);
   1109     res += "/RD-L";
   1110     res += LockSet::ToString(rd_lockset);
   1111   }
   1112   return res;
   1113 }
   1114 
   1115 
   1116 
   1117 
   1118 // -------- VTS ------------------ {{{1
   1119 class VTS {
   1120  public:
   1121   static size_t MemoryRequiredForOneVts(size_t size) {
   1122     return sizeof(VTS) + size * sizeof(TS);
   1123   }
   1124 
   1125   static size_t RoundUpSizeForEfficientUseOfFreeList(size_t size) {
   1126     if (size < 32) return size;
   1127     if (size < 64) return (size + 7) & ~7;
   1128     if (size < 128) return (size + 15) & ~15;
   1129     return (size + 31) & ~31;
   1130   }
   1131 
   1132   static VTS *Create(size_t size) {
   1133     DCHECK(size > 0);
   1134     void *mem;
   1135     size_t rounded_size = RoundUpSizeForEfficientUseOfFreeList(size);
   1136     DCHECK(size <= rounded_size);
   1137     if (rounded_size <= kNumberOfFreeLists) {
   1138       // Small chunk, use FreeList.
   1139       ScopedMallocCostCenter cc("VTS::Create (from free list)");
   1140       mem = free_lists_[rounded_size]->Allocate();
   1141       G_stats->vts_create_small++;
   1142     } else {
   1143       // Large chunk, use new/delete instead of FreeList.
   1144       ScopedMallocCostCenter cc("VTS::Create (from new[])");
   1145       mem = new int8_t[MemoryRequiredForOneVts(size)];
   1146       G_stats->vts_create_big++;
   1147     }
   1148     VTS *res = new(mem) VTS(size);
   1149     G_stats->vts_total_create += size;
   1150     return res;
   1151   }
   1152 
   1153   static void Unref(VTS *vts) {
   1154     if (!vts) return;
   1155     CHECK_GT(vts->ref_count_, 0);
   1156     if (AtomicDecrementRefcount(&vts->ref_count_) == 0) {
   1157       size_t size = vts->size_;  // can't use vts->size().
   1158       size_t rounded_size = RoundUpSizeForEfficientUseOfFreeList(size);
   1159       if (rounded_size <= kNumberOfFreeLists) {
   1160         free_lists_[rounded_size]->Deallocate(vts);
   1161         G_stats->vts_delete_small++;
   1162       } else {
   1163         G_stats->vts_delete_big++;
   1164         delete vts;
   1165       }
   1166       G_stats->vts_total_delete += rounded_size;
   1167     }
   1168   }
   1169 
   1170   static VTS *CreateSingleton(TID tid, int32_t clk = 1) {
   1171     VTS *res = Create(1);
   1172     res->arr_[0].tid = tid.raw();
   1173     res->arr_[0].clk = clk;
   1174     return res;
   1175   }
   1176 
   1177   VTS *Clone() {
   1178     G_stats->vts_clone++;
   1179     AtomicIncrementRefcount(&ref_count_);
   1180     return this;
   1181   }
   1182 
   1183   static VTS *CopyAndTick(const VTS *vts, TID id_to_tick) {
   1184     CHECK(vts->ref_count_);
   1185     VTS *res = Create(vts->size());
   1186     bool found = false;
   1187     for (size_t i = 0; i < res->size(); i++) {
   1188       res->arr_[i] = vts->arr_[i];
   1189       if (res->arr_[i].tid == id_to_tick.raw()) {
   1190         res->arr_[i].clk++;
   1191         found = true;
   1192       }
   1193     }
   1194     CHECK(found);
   1195     return res;
   1196   }
   1197 
   1198   static VTS *Join(const VTS *vts_a, const VTS *vts_b) {
   1199     CHECK(vts_a->ref_count_);
   1200     CHECK(vts_b->ref_count_);
   1201     FixedArray<TS> result_ts(vts_a->size() + vts_b->size());
   1202     TS *t = result_ts.begin();
   1203     const TS *a = &vts_a->arr_[0];
   1204     const TS *b = &vts_b->arr_[0];
   1205     const TS *a_max = a + vts_a->size();
   1206     const TS *b_max = b + vts_b->size();
   1207     while (a < a_max && b < b_max) {
   1208       if (a->tid < b->tid) {
   1209         *t = *a;
   1210         a++;
   1211         t++;
   1212       } else if (a->tid > b->tid) {
   1213         *t = *b;
   1214         b++;
   1215         t++;
   1216       } else {
   1217         if (a->clk >= b->clk) {
   1218           *t = *a;
   1219         } else {
   1220           *t = *b;
   1221         }
   1222         a++;
   1223         b++;
   1224         t++;
   1225       }
   1226     }
   1227     while (a < a_max) {
   1228       *t = *a;
   1229       a++;
   1230       t++;
   1231     }
   1232     while (b < b_max) {
   1233       *t = *b;
   1234       b++;
   1235       t++;
   1236     }
   1237 
   1238     VTS *res = VTS::Create(t - result_ts.begin());
   1239     for (size_t i = 0; i < res->size(); i++) {
   1240       res->arr_[i] = result_ts[i];
   1241     }
   1242     return res;
   1243   }
   1244 
   1245   static INLINE void FlushHBCache() {
   1246     hb_cache_->Flush();
   1247   }
   1248 
   1249   static INLINE bool HappensBeforeCached(const VTS *vts_a, const VTS *vts_b) {
   1250     bool res = false;
   1251     if (hb_cache_->Lookup(vts_a->uniq_id_, vts_b->uniq_id_, &res)) {
   1252       G_stats->n_vts_hb_cached++;
   1253       DCHECK(res == HappensBefore(vts_a, vts_b));
   1254       return res;
   1255     }
   1256     res = HappensBefore(vts_a, vts_b);
   1257     hb_cache_->Insert(vts_a->uniq_id_, vts_b->uniq_id_, res);
   1258     return res;
   1259   }
   1260 
   1261   // return true if vts_a happens-before vts_b.
   1262   static NOINLINE bool HappensBefore(const VTS *vts_a, const VTS *vts_b) {
   1263     CHECK(vts_a->ref_count_);
   1264     CHECK(vts_b->ref_count_);
   1265     G_stats->n_vts_hb++;
   1266     const TS *a = &vts_a->arr_[0];
   1267     const TS *b = &vts_b->arr_[0];
   1268     const TS *a_max = a + vts_a->size();
   1269     const TS *b_max = b + vts_b->size();
   1270     bool a_less_than_b = false;
   1271     while (a < a_max && b < b_max) {
   1272       if (a->tid < b->tid) {
   1273         // a->tid is not present in b.
   1274         return false;
   1275       } else if (a->tid > b->tid) {
   1276         // b->tid is not present in a.
   1277         a_less_than_b = true;
   1278         b++;
   1279       } else {
   1280         // this tid is present in both VTSs. Compare clocks.
   1281         if (a->clk > b->clk) return false;
   1282         if (a->clk < b->clk) a_less_than_b = true;
   1283         a++;
   1284         b++;
   1285       }
   1286     }
   1287     if (a < a_max) {
   1288       // Some tids are present in a and not in b
   1289       return false;
   1290     }
   1291     if (b < b_max) {
   1292       return true;
   1293     }
   1294     return a_less_than_b;
   1295   }
   1296 
   1297   size_t size() const {
   1298     DCHECK(ref_count_);
   1299     return size_;
   1300   }
   1301 
   1302   string ToString() const {
   1303     DCHECK(ref_count_);
   1304     string res = "[";
   1305     for (size_t i = 0; i < size(); i++) {
   1306       char buff[100];
   1307       snprintf(buff, sizeof(buff), "%d:%d;", arr_[i].tid, arr_[i].clk);
   1308       if (i) res += " ";
   1309       res += buff;
   1310     }
   1311     return res + "]";
   1312   }
   1313 
   1314   void print(const char *name) const {
   1315     string str = ToString();
   1316     Printf("%s: %s\n", name, str.c_str());
   1317   }
   1318 
   1319   static void TestHappensBefore() {
   1320     // TODO(kcc): need more tests here...
   1321     const char *test_vts[] = {
   1322       "[0:1;]",
   1323       "[0:4; 2:1;]",
   1324       "[0:4; 2:2; 4:1;]",
   1325       "[0:4; 3:2; 4:1;]",
   1326       "[0:4; 3:2; 4:2;]",
   1327       "[0:4; 3:3; 4:1;]",
   1328       NULL
   1329     };
   1330 
   1331     for (int i = 0; test_vts[i]; i++) {
   1332       const VTS *vts1 = Parse(test_vts[i]);
   1333       for (int j = 0; test_vts[j]; j++) {
   1334         const VTS *vts2 = Parse(test_vts[j]);
   1335         bool hb  = HappensBefore(vts1, vts2);
   1336         Printf("HB = %d\n   %s\n   %s\n", static_cast<int>(hb),
   1337                vts1->ToString().c_str(),
   1338                vts2->ToString().c_str());
   1339         delete vts2;
   1340       }
   1341       delete vts1;
   1342     }
   1343   }
   1344 
   1345   static void Test() {
   1346     Printf("VTS::test();\n");
   1347     VTS *v1 = CreateSingleton(TID(0));
   1348     VTS *v2 = CreateSingleton(TID(1));
   1349     VTS *v3 = CreateSingleton(TID(2));
   1350     VTS *v4 = CreateSingleton(TID(3));
   1351 
   1352     VTS *v12 = Join(v1, v2);
   1353     v12->print("v12");
   1354     VTS *v34 = Join(v3, v4);
   1355     v34->print("v34");
   1356 
   1357     VTS *x1 = Parse("[0:4; 3:6; 4:2;]");
   1358     CHECK(x1);
   1359     x1->print("x1");
   1360     TestHappensBefore();
   1361   }
   1362 
   1363   // Parse VTS string in the form "[0:4; 3:6; 4:2;]".
   1364   static VTS *Parse(const char *str) {
   1365 #if 1  // TODO(kcc): need sscanf in valgrind
   1366     return NULL;
   1367 #else
   1368     vector<TS> vec;
   1369     if (!str) return NULL;
   1370     if (str[0] != '[') return NULL;
   1371     str++;
   1372     int tid = 0, clk = 0;
   1373     int consumed = 0;
   1374     while (sscanf(str, "%d:%d;%n", &tid, &clk, &consumed) > 0) {
   1375       TS ts;
   1376       ts.tid = TID(tid);
   1377       ts.clk = clk;
   1378       vec.push_back(ts);
   1379       str += consumed;
   1380       // Printf("%d:%d\n", tid, clk);
   1381     }
   1382     if (*str != ']') return NULL;
   1383     VTS *res = Create(vec.size());
   1384     for (size_t i = 0; i < vec.size(); i++) {
   1385       res->arr_[i] = vec[i];
   1386     }
   1387     return res;
   1388 #endif
   1389   }
   1390 
   1391   static void InitClassMembers() {
   1392     hb_cache_ = new HBCache;
   1393     free_lists_ = new FreeList *[kNumberOfFreeLists+1];
   1394     free_lists_[0] = 0;
   1395     for (size_t  i = 1; i <= kNumberOfFreeLists; i++) {
   1396       free_lists_[i] = new FreeList(MemoryRequiredForOneVts(i),
   1397                                     (kNumberOfFreeLists * 4) / i);
   1398     }
   1399   }
   1400 
   1401   int32_t uniq_id() const { return uniq_id_; }
   1402 
   1403  private:
   1404   explicit VTS(size_t size)
   1405     : ref_count_(1),
   1406       size_(size) {
   1407     uniq_id_counter_++;
   1408     // If we've got overflow, we are in trouble, need to have 64-bits...
   1409     CHECK_GT(uniq_id_counter_, 0);
   1410     uniq_id_ = uniq_id_counter_;
   1411   }
   1412   ~VTS() {}
   1413 
   1414   struct TS {
   1415     int32_t tid;
   1416     int32_t clk;
   1417   };
   1418 
   1419 
   1420   // data members
   1421   int32_t ref_count_;
   1422   int32_t uniq_id_;
   1423   size_t size_;
   1424   TS     arr_[];  // array of size_ elements.
   1425 
   1426 
   1427   // static data members
   1428   static int32_t uniq_id_counter_;
   1429   static const int kCacheSize = 4999;  // Has to be prime.
   1430   typedef IntPairToBoolCache<kCacheSize> HBCache;
   1431   static HBCache *hb_cache_;
   1432 
   1433   static const size_t kNumberOfFreeLists = 512;  // Must be power of two.
   1434 //  static const size_t kNumberOfFreeLists = 64; // Must be power of two.
   1435   static FreeList **free_lists_;  // Array of kNumberOfFreeLists elements.
   1436 };
   1437 
   1438 int32_t VTS::uniq_id_counter_;
   1439 VTS::HBCache *VTS::hb_cache_;
   1440 FreeList **VTS::free_lists_;
   1441 
   1442 
   1443 
   1444 // -------- Mask -------------------- {{{1
   1445 // A bit mask (32-bits on 32-bit arch and 64-bits on 64-bit arch).
   1446 class Mask {
   1447  public:
   1448   static const uintptr_t kOne = 1;
   1449   static const uintptr_t kNBits = sizeof(uintptr_t) * 8;
   1450   static const uintptr_t kNBitsLog = kNBits == 32 ? 5 : 6;
   1451 
   1452   Mask() : m_(0) {}
   1453   Mask(const Mask &m) : m_(m.m_) { }
   1454   explicit Mask(uintptr_t m) : m_(m) { }
   1455   INLINE bool Get(uintptr_t idx) const   { return m_ & (kOne << idx); }
   1456   INLINE void Set(uintptr_t idx)   { m_ |= kOne << idx; }
   1457   INLINE void Clear(uintptr_t idx) { m_ &= ~(kOne << idx); }
   1458   INLINE bool Empty() const {return m_ == 0; }
   1459 
   1460   // Clear bits in range [a,b) and return old [a,b) range.
   1461   INLINE Mask ClearRangeAndReturnOld(uintptr_t a, uintptr_t b) {
   1462     DCHECK(a < b);
   1463     DCHECK(b <= kNBits);
   1464     uintptr_t res;
   1465     uintptr_t n_bits_in_mask = (b - a);
   1466     if (n_bits_in_mask == kNBits) {
   1467       res = m_;
   1468       m_ = 0;
   1469     } else {
   1470       uintptr_t t = (kOne << n_bits_in_mask);
   1471       uintptr_t mask = (t - 1) << a;
   1472       res = m_ & mask;
   1473       m_ &= ~mask;
   1474     }
   1475     return Mask(res);
   1476   }
   1477 
   1478   INLINE void ClearRange(uintptr_t a, uintptr_t b) {
   1479     ClearRangeAndReturnOld(a, b);
   1480   }
   1481 
   1482   INLINE void SetRange(uintptr_t a, uintptr_t b) {
   1483     DCHECK(a < b);
   1484     DCHECK(b <= kNBits);
   1485     uintptr_t n_bits_in_mask = (b - a);
   1486     if (n_bits_in_mask == kNBits) {
   1487       m_ = ~0;
   1488     } else {
   1489       uintptr_t t = (kOne << n_bits_in_mask);
   1490       uintptr_t mask = (t - 1) << a;
   1491       m_ |= mask;
   1492     }
   1493   }
   1494 
   1495   INLINE uintptr_t GetRange(uintptr_t a, uintptr_t b) const {
   1496     // a bug was fixed here
   1497     DCHECK(a < b);
   1498     DCHECK(b <= kNBits);
   1499     uintptr_t n_bits_in_mask = (b - a);
   1500     if (n_bits_in_mask == kNBits) {
   1501       return m_;
   1502     } else {
   1503       uintptr_t t = (kOne << n_bits_in_mask);
   1504       uintptr_t mask = (t - 1) << a;
   1505       return m_ & mask;
   1506     }
   1507   }
   1508 
   1509   // Get index of some set bit (asumes mask is non zero).
   1510   size_t GetSomeSetBit() {
   1511     DCHECK(m_);
   1512     size_t ret;
   1513 #ifdef __GNUC__
   1514     ret =  __builtin_ctzl(m_);
   1515 #elif defined(_MSC_VER)
   1516     unsigned long index;
   1517     DCHECK(sizeof(uintptr_t) == 4);
   1518     _BitScanReverse(&index, m_);
   1519     ret = index;
   1520 #else
   1521 # error "Unsupported"
   1522 #endif
   1523     DCHECK(this->Get(ret));
   1524     return ret;
   1525   }
   1526 
   1527   size_t PopCount() {
   1528 #ifdef VGO_linux
   1529     return __builtin_popcountl(m_);
   1530 #else
   1531     CHECK(0);
   1532     return 0;
   1533 #endif
   1534   }
   1535 
   1536   void Subtract(Mask m) { m_ &= ~m.m_; }
   1537   void Union(Mask m) { m_ |= m.m_; }
   1538 
   1539   static Mask Intersection(Mask m1, Mask m2) { return Mask(m1.m_ & m2.m_); }
   1540 
   1541 
   1542   void Clear() { m_ = 0; }
   1543 
   1544 
   1545   string ToString() const {
   1546     char buff[kNBits+1];
   1547     for (uintptr_t i = 0; i < kNBits; i++) {
   1548       buff[i] = Get(i) ? '1' : '0';
   1549     }
   1550     buff[kNBits] = 0;
   1551     return buff;
   1552   }
   1553 
   1554   static void Test() {
   1555     Mask m;
   1556     m.Set(2);
   1557     Printf("%s\n", m.ToString().c_str());
   1558     m.ClearRange(0, kNBits);
   1559     Printf("%s\n", m.ToString().c_str());
   1560   }
   1561 
   1562  private:
   1563   uintptr_t m_;
   1564 };
   1565 
   1566 // -------- BitSet -------------------{{{1
   1567 // Poor man's sparse bit set.
   1568 class BitSet {
   1569  public:
   1570   // Add range [a,b). The range should be within one line (kNBitsLog).
   1571   void Add(uintptr_t a, uintptr_t b) {
   1572     uintptr_t line = a & ~(Mask::kNBits - 1);
   1573     DCHECK(a < b);
   1574     DCHECK(a - line < Mask::kNBits);
   1575     if (!(b - line <= Mask::kNBits)) {
   1576       Printf("XXXXX %p %p %p b-line=%ld size=%ld a-line=%ld\n", a, b, line,
   1577              b - line, b - a, a - line);
   1578       return;
   1579     }
   1580     DCHECK(b - line <= Mask::kNBits);
   1581     DCHECK(line == ((b - 1) & ~(Mask::kNBits - 1)));
   1582     Mask &mask= map_[line];
   1583     mask.SetRange(a - line, b - line);
   1584   }
   1585 
   1586   bool empty() { return map_.empty(); }
   1587 
   1588   size_t size() {
   1589     size_t res = 0;
   1590     for (Map::iterator it = map_.begin(); it != map_.end(); ++it) {
   1591       res += it->second.PopCount();
   1592     }
   1593     return res;
   1594   }
   1595 
   1596   string ToString() {
   1597     char buff[100];
   1598     string res;
   1599     int lines = 0;
   1600     snprintf(buff, sizeof(buff), " %ld lines %ld bits:",
   1601              (long)map_.size(), (long)size());
   1602     res += buff;
   1603     for (Map::iterator it = map_.begin(); it != map_.end(); ++it) {
   1604       Mask mask = it->second;
   1605       snprintf(buff, sizeof(buff), " l%d (%ld):", lines++, (long)mask.PopCount());
   1606       res += buff;
   1607       uintptr_t line = it->first;
   1608       bool is_in = false;
   1609       for (size_t i = 0; i < Mask::kNBits; i++) {
   1610         uintptr_t addr = line + i;
   1611         if (mask.Get(i)) {
   1612           if (!is_in) {
   1613             snprintf(buff, sizeof(buff), " [%lx,", (long)addr);
   1614             res += buff;
   1615             is_in = true;
   1616           }
   1617         } else {
   1618           if (is_in) {
   1619             snprintf(buff, sizeof(buff), "%lx);", (long)addr);
   1620             res += buff;
   1621             is_in = false;
   1622           }
   1623         }
   1624       }
   1625       if (is_in) {
   1626         snprintf(buff, sizeof(buff), "%lx);", (long)(line + Mask::kNBits));
   1627         res += buff;
   1628       }
   1629     }
   1630     return res;
   1631   }
   1632 
   1633   void Clear() { map_.clear(); }
   1634  private:
   1635   typedef map<uintptr_t, Mask> Map;
   1636   Map map_;
   1637 };
   1638 
   1639 // -------- Segment -------------------{{{1
   1640 class Segment {
   1641  public:
   1642   // for debugging...
   1643   static bool ProfileSeg(SID sid) {
   1644     // return (sid.raw() % (1 << 14)) == 0;
   1645     return false;
   1646   }
   1647 
   1648   // non-static methods
   1649 
   1650   VTS *vts() const { return vts_; }
   1651   TID tid() const { return TID(tid_); }
   1652   LSID  lsid(bool is_w) const { return lsid_[is_w]; }
   1653   uint32_t lock_era() const { return lock_era_; }
   1654 
   1655   // static methods
   1656 
   1657   static INLINE uintptr_t *embedded_stack_trace(SID sid) {
   1658     DCHECK(sid.valid());
   1659     DCHECK(kSizeOfHistoryStackTrace > 0);
   1660     size_t chunk_idx = (unsigned)sid.raw() / kChunkSizeForStacks;
   1661     size_t idx       = (unsigned)sid.raw() % kChunkSizeForStacks;
   1662     DCHECK(chunk_idx < n_stack_chunks_);
   1663     DCHECK(all_stacks_[chunk_idx] != NULL);
   1664     return &all_stacks_[chunk_idx][idx * kSizeOfHistoryStackTrace];
   1665   }
   1666 
   1667   static void ensure_space_for_stack_trace(SID sid) {
   1668     ScopedMallocCostCenter malloc_cc(__FUNCTION__);
   1669     DCHECK(sid.valid());
   1670     DCHECK(kSizeOfHistoryStackTrace > 0);
   1671     size_t chunk_idx = (unsigned)sid.raw() / kChunkSizeForStacks;
   1672     DCHECK(chunk_idx < n_stack_chunks_);
   1673     if (all_stacks_[chunk_idx])
   1674       return;
   1675     for (size_t i = 0; i <= chunk_idx; i++) {
   1676       if (all_stacks_[i]) continue;
   1677       all_stacks_[i] = new uintptr_t[
   1678           kChunkSizeForStacks * kSizeOfHistoryStackTrace];
   1679       // we don't clear this memory, it will be clreared later lazily.
   1680       // We also never delete it because it will be used until the very end.
   1681     }
   1682   }
   1683 
   1684   static string StackTraceString(SID sid) {
   1685     DCHECK(kSizeOfHistoryStackTrace > 0);
   1686     return StackTrace::EmbeddedStackTraceToString(
   1687         embedded_stack_trace(sid), kSizeOfHistoryStackTrace);
   1688   }
   1689 
   1690   // Allocate `n` fresh segments, put SIDs into `fresh_sids`.
   1691   static INLINE void AllocateFreshSegments(size_t n, SID *fresh_sids) {
   1692     ScopedMallocCostCenter malloc_cc(__FUNCTION__);
   1693     size_t i = 0;
   1694     size_t n_reusable = min(n, reusable_sids_->size());
   1695     // First, allocate from reusable_sids_.
   1696     for (; i < n_reusable; i++) {
   1697       G_stats->seg_reuse++;
   1698       DCHECK(!reusable_sids_->empty());
   1699       SID sid = reusable_sids_->back();
   1700       reusable_sids_->pop_back();
   1701       Segment *seg = GetInternal(sid);
   1702       DCHECK(!seg->seg_ref_count_);
   1703       DCHECK(!seg->vts());
   1704       DCHECK(!seg->tid().valid());
   1705       CHECK(sid.valid());
   1706       if (ProfileSeg(sid)) {
   1707        Printf("Segment: reused SID %d\n", sid.raw());
   1708       }
   1709       fresh_sids[i] = sid;
   1710     }
   1711     // allocate the rest from new sids.
   1712     for (; i < n; i++) {
   1713       G_stats->seg_create++;
   1714       CHECK(n_segments_ < kMaxSID);
   1715       Segment *seg = GetSegmentByIndex(n_segments_);
   1716 
   1717       // This VTS may not be empty due to ForgetAllState().
   1718       VTS::Unref(seg->vts_);
   1719       seg->vts_ = 0;
   1720       seg->seg_ref_count_ = 0;
   1721 
   1722       if (ProfileSeg(SID(n_segments_))) {
   1723        Printf("Segment: allocated SID %d\n", n_segments_);
   1724       }
   1725 
   1726       SID sid = fresh_sids[i] = SID(n_segments_);
   1727       if (kSizeOfHistoryStackTrace > 0) {
   1728         ensure_space_for_stack_trace(sid);
   1729       }
   1730       n_segments_++;
   1731     }
   1732   }
   1733 
   1734   // Initialize the contents of the given segment.
   1735   static INLINE void SetupFreshSid(SID sid, TID tid, VTS *vts,
   1736                                    LSID rd_lockset, LSID wr_lockset) {
   1737     DCHECK(vts);
   1738     DCHECK(tid.valid());
   1739     DCHECK(sid.valid());
   1740     Segment *seg = GetInternal(sid);
   1741     DCHECK(seg);
   1742     DCHECK(seg->seg_ref_count_ == 0);
   1743     seg->seg_ref_count_ = 0;
   1744     seg->tid_ = tid;
   1745     seg->lsid_[0] = rd_lockset;
   1746     seg->lsid_[1] = wr_lockset;
   1747     seg->vts_ = vts;
   1748     seg->lock_era_ = g_lock_era;
   1749     if (kSizeOfHistoryStackTrace) {
   1750       embedded_stack_trace(sid)[0] = 0;
   1751     }
   1752   }
   1753 
   1754   static INLINE SID AddNewSegment(TID tid, VTS *vts,
   1755                            LSID rd_lockset, LSID wr_lockset) {
   1756     ScopedMallocCostCenter malloc_cc("Segment::AddNewSegment()");
   1757     SID sid;
   1758     AllocateFreshSegments(1, &sid);
   1759     SetupFreshSid(sid, tid, vts, rd_lockset, wr_lockset);
   1760     return sid;
   1761   }
   1762 
   1763   static bool Alive(SID sid) {
   1764     Segment *seg = GetInternal(sid);
   1765     return seg->vts() != NULL;
   1766   }
   1767 
   1768   static void AssertLive(SID sid, int line) {
   1769     if (DEBUG_MODE) {
   1770       if (!(sid.raw() < INTERNAL_ANNOTATE_UNPROTECTED_READ(n_segments_))) {
   1771         Printf("Segment::AssertLive: failed on sid=%d n_segments = %dline=%d\n",
   1772                sid.raw(), n_segments_, line);
   1773       }
   1774       Segment *seg = GetInternal(sid);
   1775       if (!seg->vts()) {
   1776         Printf("Segment::AssertLive: failed on sid=%d line=%d\n",
   1777                sid.raw(), line);
   1778       }
   1779       DCHECK(seg->vts());
   1780       DCHECK(seg->tid().valid());
   1781     }
   1782   }
   1783 
   1784   static INLINE Segment *Get(SID sid) {
   1785     AssertLive(sid, __LINE__);
   1786     Segment *res = GetInternal(sid);
   1787     DCHECK(res->vts());
   1788     DCHECK(res->tid().valid());
   1789     return res;
   1790   }
   1791 
   1792   static INLINE void RecycleOneFreshSid(SID sid) {
   1793     Segment *seg = GetInternal(sid);
   1794     seg->tid_ = TID();
   1795     seg->vts_ = NULL;
   1796     reusable_sids_->push_back(sid);
   1797     if (ProfileSeg(sid)) {
   1798       Printf("Segment: recycled SID %d\n", sid.raw());
   1799     }
   1800   }
   1801 
   1802   static bool RecycleOneSid(SID sid) {
   1803     ScopedMallocCostCenter malloc_cc("Segment::RecycleOneSid()");
   1804     Segment *seg = GetInternal(sid);
   1805     DCHECK(seg->seg_ref_count_ == 0);
   1806     DCHECK(sid.raw() < n_segments_);
   1807     if (!seg->vts()) return false;  // Already recycled.
   1808     VTS::Unref(seg->vts_);
   1809     RecycleOneFreshSid(sid);
   1810     return true;
   1811   }
   1812 
   1813   int32_t ref_count() const {
   1814     return INTERNAL_ANNOTATE_UNPROTECTED_READ(seg_ref_count_);
   1815   }
   1816 
   1817   static void INLINE Ref(SID sid, const char *where) {
   1818     Segment *seg = GetInternal(sid);
   1819     if (ProfileSeg(sid)) {
   1820       Printf("SegRef   : %d ref=%d %s; tid=%d\n", sid.raw(),
   1821              seg->seg_ref_count_, where, seg->tid().raw());
   1822     }
   1823     DCHECK(seg->seg_ref_count_ >= 0);
   1824     AtomicIncrementRefcount(&seg->seg_ref_count_);
   1825   }
   1826 
   1827   static INLINE intptr_t UnrefNoRecycle(SID sid, const char *where) {
   1828     Segment *seg = GetInternal(sid);
   1829     if (ProfileSeg(sid)) {
   1830       Printf("SegUnref : %d ref=%d %s\n", sid.raw(), seg->seg_ref_count_, where);
   1831     }
   1832     DCHECK(seg->seg_ref_count_ > 0);
   1833     return AtomicDecrementRefcount(&seg->seg_ref_count_);
   1834   }
   1835 
   1836   static void INLINE Unref(SID sid, const char *where) {
   1837     if (UnrefNoRecycle(sid, where) == 0) {
   1838       RecycleOneSid(sid);
   1839     }
   1840   }
   1841 
   1842 
   1843   static void ForgetAllState() {
   1844     n_segments_ = 1;
   1845     reusable_sids_->clear();
   1846     // vts_'es will be freed in AddNewSegment.
   1847   }
   1848 
   1849   static string ToString(SID sid) {
   1850     char buff[100];
   1851     snprintf(buff, sizeof(buff), "T%d/S%d", Get(sid)->tid().raw(), sid.raw());
   1852     return buff;
   1853   }
   1854 
   1855   static string ToStringTidOnly(SID sid) {
   1856     char buff[100];
   1857     snprintf(buff, sizeof(buff), "T%d", Get(sid)->tid().raw());
   1858     return buff;
   1859   }
   1860 
   1861   static string ToStringWithLocks(SID sid) {
   1862     char buff[100];
   1863     Segment *seg = Get(sid);
   1864     snprintf(buff, sizeof(buff), "T%d/S%d ", seg->tid().raw(), sid.raw());
   1865     string res = buff;
   1866     res += TwoLockSetsToString(seg->lsid(false), seg->lsid(true));
   1867     return res;
   1868   }
   1869 
   1870   static bool INLINE HappensBeforeOrSameThread(SID a, SID b) {
   1871     if (a == b) return true;
   1872     if (Get(a)->tid() == Get(b)->tid()) return true;
   1873     return HappensBefore(a, b);
   1874   }
   1875 
   1876   static bool INLINE HappensBefore(SID a, SID b) {
   1877     DCHECK(a != b);
   1878     G_stats->n_seg_hb++;
   1879     bool res = false;
   1880     const Segment *seg_a = Get(a);
   1881     const Segment *seg_b = Get(b);
   1882     DCHECK(seg_a->tid() != seg_b->tid());
   1883     const VTS *vts_a = seg_a->vts();
   1884     const VTS *vts_b = seg_b->vts();
   1885     res = VTS::HappensBeforeCached(vts_a, vts_b);
   1886     if (0 && DEBUG_MODE) {
   1887       Printf("HB = %d\n  %s\n  %s\n", res,
   1888            vts_a->ToString().c_str(), vts_b->ToString().c_str());
   1889     }
   1890     return res;
   1891   }
   1892 
   1893   static int32_t NumberOfSegments() { return n_segments_; }
   1894 
   1895   static void ShowSegmentStats() {
   1896     Printf("Segment::ShowSegmentStats:\n");
   1897     Printf("n_segments_: %d\n", n_segments_);
   1898     Printf("reusable_sids_: %ld\n", reusable_sids_->size());
   1899     map<int, int> ref_to_freq_map;
   1900     for (int i = 1; i < n_segments_; i++) {
   1901       Segment *seg = GetInternal(SID(i));
   1902       int32_t refcount = seg->seg_ref_count_;
   1903       if (refcount > 10) refcount = 10;
   1904       ref_to_freq_map[refcount]++;
   1905     }
   1906     for (map<int, int>::iterator it = ref_to_freq_map.begin();
   1907          it != ref_to_freq_map.end(); ++it) {
   1908       Printf("ref %d => freq %d\n", it->first, it->second);
   1909     }
   1910   }
   1911 
   1912   static void InitClassMembers() {
   1913     if (G_flags->keep_history == 0)
   1914       kSizeOfHistoryStackTrace = 0;
   1915     Report("INFO: Allocating %ldMb (%ld * %ldM) for Segments.\n",
   1916            (sizeof(Segment) * kMaxSID) >> 20,
   1917            sizeof(Segment), kMaxSID >> 20);
   1918     if (kSizeOfHistoryStackTrace) {
   1919       Report("INFO: Will allocate up to %ldMb for 'previous' stack traces.\n",
   1920              (kSizeOfHistoryStackTrace * sizeof(uintptr_t) * kMaxSID) >> 20);
   1921     }
   1922 
   1923     all_segments_  = new Segment[kMaxSID];
   1924     // initialization all segments to 0.
   1925     memset(all_segments_, 0, kMaxSID * sizeof(Segment));
   1926     // initialize all_segments_[0] with garbage
   1927     memset(all_segments_, -1, sizeof(Segment));
   1928 
   1929     if (kSizeOfHistoryStackTrace > 0) {
   1930       n_stack_chunks_ = kMaxSID / kChunkSizeForStacks;
   1931       if (n_stack_chunks_ * kChunkSizeForStacks < (size_t)kMaxSID)
   1932         n_stack_chunks_++;
   1933       all_stacks_ = new uintptr_t*[n_stack_chunks_];
   1934       memset(all_stacks_, 0, sizeof(uintptr_t*) * n_stack_chunks_);
   1935     }
   1936     n_segments_    = 1;
   1937     reusable_sids_ = new vector<SID>;
   1938   }
   1939 
   1940  private:
   1941   static INLINE Segment *GetSegmentByIndex(int32_t index) {
   1942     return &all_segments_[index];
   1943   }
   1944   static INLINE Segment *GetInternal(SID sid) {
   1945     DCHECK(sid.valid());
   1946     DCHECK(sid.raw() < INTERNAL_ANNOTATE_UNPROTECTED_READ(n_segments_));
   1947     Segment *res = GetSegmentByIndex(sid.raw());
   1948     return res;
   1949   }
   1950 
   1951   // Data members.
   1952   int32_t seg_ref_count_;
   1953   LSID     lsid_[2];
   1954   TID      tid_;
   1955   uint32_t lock_era_;
   1956   VTS *vts_;
   1957 
   1958   // static class members.
   1959 
   1960   // One large array of segments. The size is set by a command line (--max-sid)
   1961   // and never changes. Once we are out of vacant segments, we flush the state.
   1962   static Segment *all_segments_;
   1963   // We store stack traces separately because their size is unknown
   1964   // at compile time and because they are needed less often.
   1965   // The stacks are stored as an array of chunks, instead of one array,
   1966   // so that for small tests we do not require too much RAM.
   1967   // We don't use vector<> or another resizable array to avoid expensive
   1968   // resizing.
   1969   enum { kChunkSizeForStacks = DEBUG_MODE ? 512 : 1 * 1024 * 1024 };
   1970   static uintptr_t **all_stacks_;
   1971   static size_t      n_stack_chunks_;
   1972 
   1973   static int32_t n_segments_;
   1974   static vector<SID> *reusable_sids_;
   1975 };
   1976 
   1977 Segment          *Segment::all_segments_;
   1978 uintptr_t       **Segment::all_stacks_;
   1979 size_t            Segment::n_stack_chunks_;
   1980 int32_t           Segment::n_segments_;
   1981 vector<SID>      *Segment::reusable_sids_;
   1982 
   1983 // -------- SegmentSet -------------- {{{1
   1984 class SegmentSet {
   1985  public:
   1986   static NOINLINE SSID AddSegmentToSS(SSID old_ssid, SID new_sid);
   1987   static NOINLINE SSID RemoveSegmentFromSS(SSID old_ssid, SID sid_to_remove);
   1988 
   1989   static INLINE SSID AddSegmentToTupleSS(SSID ssid, SID new_sid);
   1990   static INLINE SSID RemoveSegmentFromTupleSS(SSID old_ssid, SID sid_to_remove);
   1991 
   1992   SSID ComputeSSID() {
   1993     SSID res = map_->GetIdOrZero(this);
   1994     CHECK_NE(res.raw(), 0);
   1995     return res;
   1996   }
   1997 
   1998   int ref_count() const { return ref_count_; }
   1999 
   2000   static void AssertLive(SSID ssid, int line) {
   2001     DCHECK(ssid.valid());
   2002     if (DEBUG_MODE) {
   2003       if (ssid.IsSingleton()) {
   2004         Segment::AssertLive(ssid.GetSingleton(), line);
   2005       } else {
   2006         DCHECK(ssid.IsTuple());
   2007         int idx = -ssid.raw()-1;
   2008         DCHECK(idx < static_cast<int>(vec_->size()));
   2009         DCHECK(idx >= 0);
   2010         SegmentSet *res = (*vec_)[idx];
   2011         DCHECK(res);
   2012         DCHECK(res->ref_count_ >= 0);
   2013         res->Validate(line);
   2014 
   2015         if (!res) {
   2016           Printf("SegmentSet::AssertLive failed at line %d (ssid=%d)\n",
   2017                  line, ssid.raw());
   2018           DCHECK(0);
   2019         }
   2020       }
   2021     }
   2022   }
   2023 
   2024   static SegmentSet *Get(SSID ssid) {
   2025     DCHECK(ssid.valid());
   2026     DCHECK(!ssid.IsSingleton());
   2027     int idx = -ssid.raw()-1;
   2028     ANNOTATE_IGNORE_READS_BEGIN();
   2029     DCHECK(idx < static_cast<int>(vec_->size()) && idx >= 0);
   2030     ANNOTATE_IGNORE_READS_END();
   2031     SegmentSet *res = (*vec_)[idx];
   2032     DCHECK(res);
   2033     DCHECK(res->size() >= 2);
   2034     return res;
   2035   }
   2036 
   2037   void RecycleOneSegmentSet(SSID ssid) {
   2038     DCHECK(ref_count_ == 0);
   2039     DCHECK(ssid.valid());
   2040     DCHECK(!ssid.IsSingleton());
   2041     int idx = -ssid.raw()-1;
   2042     DCHECK(idx < static_cast<int>(vec_->size()) && idx >= 0);
   2043     CHECK((*vec_)[idx] == this);
   2044     // Printf("SegmentSet::RecycleOneSegmentSet: %d\n", ssid.raw());
   2045     //
   2046     // Recycle segments
   2047     for (int i = 0; i < kMaxSegmentSetSize; i++) {
   2048       SID sid = this->GetSID(i);
   2049       if (sid.raw() == 0) break;
   2050       Segment::Unref(sid, "SegmentSet::Recycle");
   2051     }
   2052     ref_count_ = -1;
   2053 
   2054     map_->Erase(this);
   2055     ready_to_be_reused_->push_back(ssid);
   2056     G_stats->ss_recycle++;
   2057   }
   2058 
   2059   static void INLINE Ref(SSID ssid, const char *where) {
   2060     DCHECK(ssid.valid());
   2061     if (ssid.IsSingleton()) {
   2062       Segment::Ref(ssid.GetSingleton(), where);
   2063     } else {
   2064       SegmentSet *sset = Get(ssid);
   2065       // Printf("SSRef   : %d ref=%d %s\n", ssid.raw(), sset->ref_count_, where);
   2066       DCHECK(sset->ref_count_ >= 0);
   2067       sset->ref_count_++;
   2068     }
   2069   }
   2070 
   2071   static void INLINE Unref(SSID ssid, const char *where) {
   2072     DCHECK(ssid.valid());
   2073     if (ssid.IsSingleton()) {
   2074       Segment::Unref(ssid.GetSingleton(), where);
   2075     } else {
   2076       SegmentSet *sset = Get(ssid);
   2077       // Printf("SSUnref : %d ref=%d %s\n", ssid.raw(), sset->ref_count_, where);
   2078       DCHECK(sset->ref_count_ > 0);
   2079       sset->ref_count_--;
   2080       if (sset->ref_count_ == 0) {
   2081         // We don't delete unused SSID straightaway due to performance reasons
   2082         // (to avoid flushing caches too often and because SSID may be reused
   2083         // again soon)
   2084         //
   2085         // Instead, we use two queues (deques):
   2086         //    ready_to_be_recycled_ and ready_to_be_reused_.
   2087         // The algorithm is following:
   2088         // 1) When refcount_ becomes zero, we push the SSID into
   2089         //    ready_to_be_recycled_.
   2090         // 2) When ready_to_be_recycled_ becomes too large, we call
   2091         //    FlushRecycleQueue().
   2092         //    In FlushRecycleQueue(), we pop the first half of
   2093         //    ready_to_be_recycled_ and for each popped SSID we do
   2094         //     * if "refcount_ > 0", do nothing (this SSID is in use again)
   2095         //     * otherwise, we recycle this SSID (delete its VTS, etc) and push
   2096         //       it into ready_to_be_reused_
   2097         // 3) When a new SegmentSet is about to be created, we re-use SSID from
   2098         //    ready_to_be_reused_ (if available)
   2099         ready_to_be_recycled_->push_back(ssid);
   2100         if (UNLIKELY(ready_to_be_recycled_->size() >
   2101                      2 * G_flags->segment_set_recycle_queue_size)) {
   2102           FlushRecycleQueue();
   2103         }
   2104       }
   2105     }
   2106   }
   2107 
   2108   static void FlushRecycleQueue() {
   2109     while (ready_to_be_recycled_->size() >
   2110         G_flags->segment_set_recycle_queue_size) {
   2111       SSID rec_ssid = ready_to_be_recycled_->front();
   2112       ready_to_be_recycled_->pop_front();
   2113       int idx = -rec_ssid.raw()-1;
   2114       SegmentSet *rec_ss = (*vec_)[idx];
   2115       DCHECK(rec_ss);
   2116       DCHECK(rec_ss == Get(rec_ssid));
   2117       // We should check that this SSID haven't been referenced again.
   2118       if (rec_ss->ref_count_ == 0) {
   2119         rec_ss->RecycleOneSegmentSet(rec_ssid);
   2120       }
   2121     }
   2122 
   2123     // SSIDs will be reused soon - need to flush some caches.
   2124     FlushCaches();
   2125   }
   2126 
   2127   string ToString() const;
   2128   void Print() {
   2129     Printf("SS%d:%s\n", -ComputeSSID().raw(), ToString().c_str());
   2130   }
   2131 
   2132   static string ToString(SSID ssid) {
   2133     CHECK(ssid.IsValidOrEmpty());
   2134     if (ssid.IsSingleton()) {
   2135       return "{" +  Segment::ToStringTidOnly(SID(ssid.raw())) + "}";
   2136     } else if (ssid.IsEmpty()) {
   2137       return "{}";
   2138     } else {
   2139       AssertLive(ssid, __LINE__);
   2140       return Get(ssid)->ToString();
   2141     }
   2142   }
   2143 
   2144 
   2145   static string ToStringWithLocks(SSID ssid);
   2146 
   2147   static void FlushCaches() {
   2148     add_segment_cache_->Flush();
   2149     remove_segment_cache_->Flush();
   2150   }
   2151 
   2152   static void ForgetAllState() {
   2153     for (size_t i = 0; i < vec_->size(); i++) {
   2154       delete (*vec_)[i];
   2155     }
   2156     map_->Clear();
   2157     vec_->clear();
   2158     ready_to_be_reused_->clear();
   2159     ready_to_be_recycled_->clear();
   2160     FlushCaches();
   2161   }
   2162 
   2163 
   2164   static void Test();
   2165 
   2166   static int32_t Size(SSID ssid) {
   2167     if (ssid.IsEmpty()) return 0;
   2168     if (ssid.IsSingleton()) return 1;
   2169     return Get(ssid)->size();
   2170   }
   2171 
   2172   SID GetSID(int32_t i) const {
   2173     DCHECK(i >= 0 && i < kMaxSegmentSetSize);
   2174     DCHECK(i == 0 || sids_[i-1].raw() != 0);
   2175     return sids_[i];
   2176   }
   2177 
   2178   void SetSID(int32_t i, SID sid) {
   2179     DCHECK(i >= 0 && i < kMaxSegmentSetSize);
   2180     DCHECK(i == 0 || sids_[i-1].raw() != 0);
   2181     sids_[i] = sid;
   2182   }
   2183 
   2184   static SID GetSID(SSID ssid, int32_t i, int line) {
   2185     DCHECK(ssid.valid());
   2186     if (ssid.IsSingleton()) {
   2187       DCHECK(i == 0);
   2188       Segment::AssertLive(ssid.GetSingleton(), line);
   2189       return ssid.GetSingleton();
   2190     } else {
   2191       AssertLive(ssid, __LINE__);
   2192       SID sid = Get(ssid)->GetSID(i);
   2193       Segment::AssertLive(sid, line);
   2194       return sid;
   2195     }
   2196   }
   2197 
   2198   static bool INLINE Contains(SSID ssid, SID seg) {
   2199     if (LIKELY(ssid.IsSingleton())) {
   2200       return ssid.GetSingleton() == seg;
   2201     } else if (LIKELY(ssid.IsEmpty())) {
   2202       return false;
   2203     }
   2204 
   2205     SegmentSet *ss = Get(ssid);
   2206     for (int i = 0; i < kMaxSegmentSetSize; i++) {
   2207       SID sid = ss->GetSID(i);
   2208       if (sid.raw() == 0) break;
   2209       if (sid == seg)
   2210         return true;
   2211     }
   2212     return false;
   2213   }
   2214 
   2215   static Segment *GetSegmentForNonSingleton(SSID ssid, int32_t i, int line) {
   2216     return Segment::Get(GetSID(ssid, i, line));
   2217   }
   2218 
   2219   void NOINLINE Validate(int line) const;
   2220 
   2221   static size_t NumberOfSegmentSets() { return vec_->size(); }
   2222 
   2223 
   2224   static void InitClassMembers() {
   2225     map_    = new Map;
   2226     vec_    = new vector<SegmentSet *>;
   2227     ready_to_be_recycled_ = new deque<SSID>;
   2228     ready_to_be_reused_ = new deque<SSID>;
   2229     add_segment_cache_ = new SsidSidToSidCache;
   2230     remove_segment_cache_ = new SsidSidToSidCache;
   2231   }
   2232 
   2233  private:
   2234   SegmentSet()  // Private CTOR
   2235     : ref_count_(0) {
   2236     // sids_ are filled with zeroes due to SID default CTOR.
   2237     if (DEBUG_MODE) {
   2238       for (int i = 0; i < kMaxSegmentSetSize; i++)
   2239         CHECK_EQ(sids_[i].raw(), 0);
   2240     }
   2241   }
   2242 
   2243   int size() const {
   2244     for (int i = 0; i < kMaxSegmentSetSize; i++) {
   2245       if (sids_[i].raw() == 0) {
   2246         CHECK_GE(i, 2);
   2247         return i;
   2248       }
   2249     }
   2250     return kMaxSegmentSetSize;
   2251   }
   2252 
   2253   static INLINE SSID AllocateAndCopy(SegmentSet *ss) {
   2254     DCHECK(ss->ref_count_ == 0);
   2255     DCHECK(sizeof(int32_t) == sizeof(SID));
   2256     SSID res_ssid;
   2257     SegmentSet *res_ss = 0;
   2258 
   2259     if (!ready_to_be_reused_->empty()) {
   2260       res_ssid = ready_to_be_reused_->front();
   2261       ready_to_be_reused_->pop_front();
   2262       int idx = -res_ssid.raw()-1;
   2263       res_ss = (*vec_)[idx];
   2264       DCHECK(res_ss);
   2265       DCHECK(res_ss->ref_count_ == -1);
   2266       G_stats->ss_reuse++;
   2267       for (int i = 0; i < kMaxSegmentSetSize; i++) {
   2268         res_ss->sids_[i] = SID(0);
   2269       }
   2270     } else {
   2271       // create a new one
   2272       ScopedMallocCostCenter cc("SegmentSet::CreateNewSegmentSet");
   2273       G_stats->ss_create++;
   2274       res_ss = new SegmentSet;
   2275       vec_->push_back(res_ss);
   2276       res_ssid = SSID(-((int32_t)vec_->size()));
   2277       CHECK(res_ssid.valid());
   2278     }
   2279     DCHECK(res_ss);
   2280     res_ss->ref_count_ = 0;
   2281     for (int i = 0; i < kMaxSegmentSetSize; i++) {
   2282       SID sid = ss->GetSID(i);
   2283       if (sid.raw() == 0) break;
   2284       Segment::Ref(sid, "SegmentSet::FindExistingOrAlocateAndCopy");
   2285       res_ss->SetSID(i, sid);
   2286     }
   2287     DCHECK(res_ss == Get(res_ssid));
   2288     map_->Insert(res_ss, res_ssid);
   2289     return res_ssid;
   2290   }
   2291 
   2292   static NOINLINE SSID FindExistingOrAlocateAndCopy(SegmentSet *ss) {
   2293     if (DEBUG_MODE) {
   2294       int size = ss->size();
   2295       if (size == 2) G_stats->ss_size_2++;
   2296       if (size == 3) G_stats->ss_size_3++;
   2297       if (size == 4) G_stats->ss_size_4++;
   2298       if (size > 4) G_stats->ss_size_other++;
   2299     }
   2300 
   2301     // First, check if there is such set already.
   2302     SSID ssid = map_->GetIdOrZero(ss);
   2303     if (ssid.raw() != 0) {  // Found.
   2304       AssertLive(ssid, __LINE__);
   2305       G_stats->ss_find++;
   2306       return ssid;
   2307     }
   2308     // If no such set, create one.
   2309     return AllocateAndCopy(ss);
   2310   }
   2311 
   2312   static INLINE SSID DoubletonSSID(SID sid1, SID sid2) {
   2313     SegmentSet tmp;
   2314     tmp.SetSID(0, sid1);
   2315     tmp.SetSID(1, sid2);
   2316     return FindExistingOrAlocateAndCopy(&tmp);
   2317   }
   2318 
   2319   // testing only
   2320   static SegmentSet *AddSegmentToTupleSS(SegmentSet *ss, SID new_sid) {
   2321     SSID ssid = AddSegmentToTupleSS(ss->ComputeSSID(), new_sid);
   2322     AssertLive(ssid, __LINE__);
   2323     return Get(ssid);
   2324   }
   2325 
   2326   static SegmentSet *Doubleton(SID sid1, SID sid2) {
   2327     SSID ssid = DoubletonSSID(sid1, sid2);
   2328     AssertLive(ssid, __LINE__);
   2329     return Get(ssid);
   2330   }
   2331 
   2332   // static data members
   2333   struct Less {
   2334     INLINE bool operator() (const SegmentSet *ss1,
   2335                             const SegmentSet *ss2) const {
   2336       for (int i = 0; i < kMaxSegmentSetSize; i++) {
   2337         SID sid1 = ss1->sids_[i],
   2338             sid2 = ss2->sids_[i];
   2339         if (sid1 != sid2) return sid1 < sid2;
   2340       }
   2341       return false;
   2342     }
   2343   };
   2344 
   2345   struct SSEq {
   2346     INLINE bool operator() (const SegmentSet *ss1,
   2347                             const SegmentSet *ss2) const {
   2348       G_stats->sseq_calls++;
   2349 
   2350       for (int i = 0; i < kMaxSegmentSetSize; i++) {
   2351         SID sid1 = ss1->sids_[i],
   2352             sid2 = ss2->sids_[i];
   2353         if (sid1 != sid2) return false;
   2354       }
   2355       return true;
   2356     }
   2357   };
   2358 
   2359   struct SSHash {
   2360     INLINE size_t operator() (const SegmentSet *ss) const {
   2361       uintptr_t res = 0;
   2362       uint32_t* sids_array = (uint32_t*)ss->sids_;
   2363       // We must have even number of SIDs.
   2364       DCHECK((kMaxSegmentSetSize % 2) == 0);
   2365 
   2366       G_stats->sshash_calls++;
   2367       // xor all SIDs together, half of them bswap-ed.
   2368       for (int i = 0; i < kMaxSegmentSetSize; i += 2) {
   2369         uintptr_t t1 = sids_array[i];
   2370         uintptr_t t2 = sids_array[i+1];
   2371         if (t2) t2 = tsan_bswap(t2);
   2372         res = res ^ t1 ^ t2;
   2373       }
   2374       return res;
   2375     }
   2376   };
   2377 
   2378   struct SSTraits {
   2379     enum {
   2380       // These values are taken from the hash_compare defaults.
   2381       bucket_size = 4,  // Must be greater than zero.
   2382       min_buckets = 8,  // Must be power of 2.
   2383     };
   2384 
   2385     INLINE size_t operator()(const SegmentSet *ss) const {
   2386       SSHash sshash;
   2387       return sshash(ss);
   2388     }
   2389 
   2390     INLINE bool operator()(const SegmentSet *ss1, const SegmentSet *ss2) const {
   2391       Less less;
   2392       return less(ss1, ss2);
   2393     }
   2394   };
   2395 
   2396   template <class MapType>
   2397   static SSID GetIdOrZeroFromMap(MapType *map, SegmentSet *ss) {
   2398     typename MapType::iterator it = map->find(ss);
   2399     if (it == map->end())
   2400       return SSID(0);
   2401     return it->second;
   2402   }
   2403 
   2404   class Map {
   2405    public:
   2406     SSID GetIdOrZero(SegmentSet *ss) {
   2407       return GetIdOrZeroFromMap(&map_, ss);
   2408     }
   2409 
   2410     void Insert(SegmentSet *ss, SSID id) {
   2411       map_[ss] = id;
   2412     }
   2413 
   2414     void Erase(SegmentSet *ss) {
   2415       CHECK(map_.erase(ss));
   2416     }
   2417 
   2418     void Clear() {
   2419       map_.clear();
   2420     }
   2421 
   2422    private:
   2423     // TODO(timurrrr): consider making a custom hash_table.
   2424 #if defined(_MSC_VER)
   2425     typedef stdext::hash_map<SegmentSet*, SSID, SSTraits > MapType__;
   2426 #elif 1
   2427     typedef unordered_map<SegmentSet*, SSID, SSHash, SSEq > MapType__;
   2428 #else
   2429     // Old code, may be useful for debugging.
   2430     typedef map<SegmentSet*, SSID, Less > MapType__;
   2431 #endif
   2432     MapType__ map_;
   2433   };
   2434 
   2435 //  typedef map<SegmentSet*, SSID, Less> Map;
   2436 
   2437   static Map                  *map_;
   2438   // TODO(kcc): use vector<SegmentSet> instead.
   2439   static vector<SegmentSet *> *vec_;
   2440   static deque<SSID>         *ready_to_be_reused_;
   2441   static deque<SSID>         *ready_to_be_recycled_;
   2442 
   2443   typedef PairCache<SSID, SID, SSID, 1009, 1> SsidSidToSidCache;
   2444   static SsidSidToSidCache    *add_segment_cache_;
   2445   static SsidSidToSidCache    *remove_segment_cache_;
   2446 
   2447   // sids_ contains up to kMaxSegmentSetSize SIDs.
   2448   // Contains zeros at the end if size < kMaxSegmentSetSize.
   2449   SID     sids_[kMaxSegmentSetSize];
   2450   int32_t ref_count_;
   2451 };
   2452 
   2453 SegmentSet::Map      *SegmentSet::map_;
   2454 vector<SegmentSet *> *SegmentSet::vec_;
   2455 deque<SSID>         *SegmentSet::ready_to_be_reused_;
   2456 deque<SSID>         *SegmentSet::ready_to_be_recycled_;
   2457 SegmentSet::SsidSidToSidCache    *SegmentSet::add_segment_cache_;
   2458 SegmentSet::SsidSidToSidCache    *SegmentSet::remove_segment_cache_;
   2459 
   2460 
   2461 
   2462 
   2463 SSID SegmentSet::RemoveSegmentFromSS(SSID old_ssid, SID sid_to_remove) {
   2464   DCHECK(old_ssid.IsValidOrEmpty());
   2465   DCHECK(sid_to_remove.valid());
   2466   SSID res;
   2467   if (remove_segment_cache_->Lookup(old_ssid, sid_to_remove, &res)) {
   2468     return res;
   2469   }
   2470 
   2471   if (old_ssid.IsEmpty()) {
   2472     res = old_ssid;  // Nothing to remove.
   2473   } else if (LIKELY(old_ssid.IsSingleton())) {
   2474     SID sid = old_ssid.GetSingleton();
   2475     if (Segment::HappensBeforeOrSameThread(sid, sid_to_remove))
   2476       res = SSID(0);  // Empty.
   2477     else
   2478       res = old_ssid;
   2479   } else {
   2480     res = RemoveSegmentFromTupleSS(old_ssid, sid_to_remove);
   2481   }
   2482   remove_segment_cache_->Insert(old_ssid, sid_to_remove, res);
   2483   return res;
   2484 }
   2485 
   2486 
   2487 // static
   2488 //
   2489 // This method returns a SSID of a SegmentSet containing "new_sid" and all those
   2490 // segments from "old_ssid" which do not happen-before "new_sid".
   2491 //
   2492 // For details, see
   2493 // http://code.google.com/p/data-race-test/wiki/ThreadSanitizerAlgorithm#State_machine
   2494 SSID SegmentSet::AddSegmentToSS(SSID old_ssid, SID new_sid) {
   2495   DCHECK(old_ssid.raw() == 0 || old_ssid.valid());
   2496   DCHECK(new_sid.valid());
   2497   Segment::AssertLive(new_sid, __LINE__);
   2498   SSID res;
   2499 
   2500   // These two TIDs will only be used if old_ssid.IsSingleton() == true.
   2501   TID old_tid;
   2502   TID new_tid;
   2503 
   2504   if (LIKELY(old_ssid.IsSingleton())) {
   2505     SID old_sid(old_ssid.raw());
   2506     DCHECK(old_sid.valid());
   2507     Segment::AssertLive(old_sid, __LINE__);
   2508 
   2509     if (UNLIKELY(old_sid == new_sid)) {
   2510       // The new segment equals the old one - nothing has changed.
   2511       return old_ssid;
   2512     }
   2513 
   2514     old_tid = Segment::Get(old_sid)->tid();
   2515     new_tid = Segment::Get(new_sid)->tid();
   2516     if (LIKELY(old_tid == new_tid)) {
   2517       // The new segment is in the same thread - just replace the SID.
   2518       return SSID(new_sid);
   2519     }
   2520 
   2521     if (Segment::HappensBefore(old_sid, new_sid)) {
   2522       // The new segment is in another thread, but old segment
   2523       // happens before the new one - just replace the SID.
   2524       return SSID(new_sid);
   2525     }
   2526 
   2527     DCHECK(!Segment::HappensBefore(new_sid, old_sid));
   2528     // The only other case is Signleton->Doubleton transition, see below.
   2529   } else if (LIKELY(old_ssid.IsEmpty())) {
   2530     return SSID(new_sid);
   2531   }
   2532 
   2533   // Lookup the cache.
   2534   if (add_segment_cache_->Lookup(old_ssid, new_sid, &res)) {
   2535     SegmentSet::AssertLive(res, __LINE__);
   2536     return res;
   2537   }
   2538 
   2539   if (LIKELY(old_ssid.IsSingleton())) {
   2540     // Signleton->Doubleton transition.
   2541     // These two TIDs were initialized before cache lookup (see above).
   2542     DCHECK(old_tid.valid());
   2543     DCHECK(new_tid.valid());
   2544 
   2545     SID old_sid(old_ssid.raw());
   2546     DCHECK(old_sid.valid());
   2547 
   2548     DCHECK(!Segment::HappensBefore(new_sid, old_sid));
   2549     DCHECK(!Segment::HappensBefore(old_sid, new_sid));
   2550     res = (old_tid < new_tid
   2551       ? DoubletonSSID(old_sid, new_sid)
   2552       : DoubletonSSID(new_sid, old_sid));
   2553     SegmentSet::AssertLive(res, __LINE__);
   2554   } else {
   2555     res = AddSegmentToTupleSS(old_ssid, new_sid);
   2556     SegmentSet::AssertLive(res, __LINE__);
   2557   }
   2558 
   2559   // Put the result into cache.
   2560   add_segment_cache_->Insert(old_ssid, new_sid, res);
   2561 
   2562   return res;
   2563 }
   2564 
   2565 SSID SegmentSet::RemoveSegmentFromTupleSS(SSID ssid, SID sid_to_remove) {
   2566   DCHECK(ssid.IsTuple());
   2567   DCHECK(ssid.valid());
   2568   AssertLive(ssid, __LINE__);
   2569   SegmentSet *ss = Get(ssid);
   2570 
   2571   int32_t old_size = 0, new_size = 0;
   2572   SegmentSet tmp;
   2573   SID * tmp_sids = tmp.sids_;
   2574   CHECK(sizeof(int32_t) == sizeof(SID));
   2575 
   2576   for (int i = 0; i < kMaxSegmentSetSize; i++, old_size++) {
   2577     SID sid = ss->GetSID(i);
   2578     if (sid.raw() == 0) break;
   2579     DCHECK(sid.valid());
   2580     Segment::AssertLive(sid, __LINE__);
   2581     if (Segment::HappensBeforeOrSameThread(sid, sid_to_remove))
   2582       continue;  // Skip this segment from the result.
   2583     tmp_sids[new_size++] = sid;
   2584   }
   2585 
   2586   if (new_size == old_size) return ssid;
   2587   if (new_size == 0) return SSID(0);
   2588   if (new_size == 1) return SSID(tmp_sids[0]);
   2589 
   2590   if (DEBUG_MODE) tmp.Validate(__LINE__);
   2591 
   2592   SSID res = FindExistingOrAlocateAndCopy(&tmp);
   2593   if (DEBUG_MODE) Get(res)->Validate(__LINE__);
   2594   return res;
   2595 }
   2596 
   2597 //  static
   2598 SSID SegmentSet::AddSegmentToTupleSS(SSID ssid, SID new_sid) {
   2599   DCHECK(ssid.IsTuple());
   2600   DCHECK(ssid.valid());
   2601   AssertLive(ssid, __LINE__);
   2602   SegmentSet *ss = Get(ssid);
   2603 
   2604   Segment::AssertLive(new_sid, __LINE__);
   2605   const Segment *new_seg = Segment::Get(new_sid);
   2606   TID            new_tid = new_seg->tid();
   2607 
   2608   int32_t old_size = 0, new_size = 0;
   2609   SID tmp_sids[kMaxSegmentSetSize + 1];
   2610   CHECK(sizeof(int32_t) == sizeof(SID));
   2611   bool inserted_new_sid = false;
   2612   // traverse all SID in current ss. tids are ordered.
   2613   for (int i = 0; i < kMaxSegmentSetSize; i++, old_size++) {
   2614     SID sid = ss->GetSID(i);
   2615     if (sid.raw() == 0) break;
   2616     DCHECK(sid.valid());
   2617     Segment::AssertLive(sid, __LINE__);
   2618     const Segment *seg = Segment::Get(sid);
   2619     TID            tid = seg->tid();
   2620 
   2621     if (sid == new_sid) {
   2622       // we are trying to insert a sid which is already there.
   2623       // SS will not change.
   2624       return ssid;
   2625     }
   2626 
   2627     if (tid == new_tid) {
   2628       if (seg->vts() == new_seg->vts() &&
   2629           seg->lsid(true) == new_seg->lsid(true) &&
   2630           seg->lsid(false) == new_seg->lsid(false)) {
   2631         // Optimization: if a segment with the same VTS and LS
   2632         // as in the current is already inside SS, don't modify the SS.
   2633         // Improves performance with --keep-history >= 1.
   2634         return ssid;
   2635       }
   2636       // we have another segment from the same thread => replace it.
   2637       tmp_sids[new_size++] = new_sid;
   2638       inserted_new_sid = true;
   2639       continue;
   2640     }
   2641 
   2642     if (tid > new_tid && !inserted_new_sid) {
   2643       // there was no segment with this tid, put it now.
   2644       tmp_sids[new_size++] = new_sid;
   2645       inserted_new_sid = true;
   2646     }
   2647 
   2648     if (!Segment::HappensBefore(sid, new_sid)) {
   2649       DCHECK(!Segment::HappensBefore(new_sid, sid));
   2650       tmp_sids[new_size++] = sid;
   2651     }
   2652   }
   2653 
   2654   if (!inserted_new_sid) {
   2655     tmp_sids[new_size++] = new_sid;
   2656   }
   2657 
   2658   CHECK_GT(new_size, 0);
   2659   if (new_size == 1) {
   2660     return SSID(new_sid.raw());  // Singleton.
   2661   }
   2662 
   2663   if (new_size > kMaxSegmentSetSize) {
   2664     CHECK(new_size == kMaxSegmentSetSize + 1);
   2665     // we need to forget one segment. Which? The oldest one.
   2666     int seg_to_forget = 0;
   2667     Segment *oldest_segment = NULL;
   2668     for (int i = 0; i < new_size; i++) {
   2669       SID sid = tmp_sids[i];
   2670       if (sid == new_sid) continue;
   2671       Segment *s = Segment::Get(tmp_sids[i]);
   2672       if (oldest_segment == NULL ||
   2673           oldest_segment->vts()->uniq_id() > s->vts()->uniq_id()) {
   2674         oldest_segment = s;
   2675         seg_to_forget = i;
   2676       }
   2677     }
   2678     DCHECK(oldest_segment);
   2679 
   2680     // Printf("seg_to_forget: %d T%d\n", tmp_sids[seg_to_forget].raw(),
   2681     //        oldest_segment->tid().raw());
   2682     for (int i = seg_to_forget; i < new_size - 1; i++) {
   2683       tmp_sids[i] = tmp_sids[i+1];
   2684     }
   2685     new_size--;
   2686   }
   2687 
   2688   CHECK(new_size <= kMaxSegmentSetSize);
   2689   SegmentSet tmp;
   2690   for (int i = 0; i < new_size; i++)
   2691     tmp.sids_[i] = tmp_sids[i];  // TODO(timurrrr): avoid copying?
   2692   if (DEBUG_MODE) tmp.Validate(__LINE__);
   2693 
   2694   SSID res = FindExistingOrAlocateAndCopy(&tmp);
   2695   if (DEBUG_MODE) Get(res)->Validate(__LINE__);
   2696   return res;
   2697 }
   2698 
   2699 
   2700 
   2701 void NOINLINE SegmentSet::Validate(int line) const {
   2702   // This is expensive!
   2703   int my_size = size();
   2704   for (int i = 0; i < my_size; i++) {
   2705     SID sid1 = GetSID(i);
   2706     CHECK(sid1.valid());
   2707     Segment::AssertLive(sid1, __LINE__);
   2708 
   2709     for (int j = i + 1; j < my_size; j++) {
   2710       SID sid2 = GetSID(j);
   2711       CHECK(sid2.valid());
   2712       Segment::AssertLive(sid2, __LINE__);
   2713 
   2714       bool hb1 = Segment::HappensBefore(sid1, sid2);
   2715       bool hb2 = Segment::HappensBefore(sid2, sid1);
   2716       if (hb1 || hb2) {
   2717         Printf("BAD at line %d: %d %d %s %s\n   %s\n   %s\n",
   2718                line, static_cast<int>(hb1), static_cast<int>(hb2),
   2719                Segment::ToString(sid1).c_str(),
   2720                Segment::ToString(sid2).c_str(),
   2721                Segment::Get(sid1)->vts()->ToString().c_str(),
   2722                Segment::Get(sid2)->vts()->ToString().c_str());
   2723       }
   2724       CHECK(!Segment::HappensBefore(GetSID(i), GetSID(j)));
   2725       CHECK(!Segment::HappensBefore(GetSID(j), GetSID(i)));
   2726       CHECK(Segment::Get(sid1)->tid() < Segment::Get(sid2)->tid());
   2727     }
   2728   }
   2729 
   2730   for (int i = my_size; i < kMaxSegmentSetSize; i++) {
   2731     CHECK_EQ(sids_[i].raw(), 0);
   2732   }
   2733 }
   2734 
   2735 string SegmentSet::ToStringWithLocks(SSID ssid) {
   2736   if (ssid.IsEmpty()) return "";
   2737   string res = "";
   2738   for (int i = 0; i < Size(ssid); i++) {
   2739     SID sid = GetSID(ssid, i, __LINE__);
   2740     if (i) res += ", ";
   2741     res += Segment::ToStringWithLocks(sid);
   2742   }
   2743   return res;
   2744 }
   2745 
   2746 string SegmentSet::ToString() const {
   2747   Validate(__LINE__);
   2748   string res = "{";
   2749   for (int i = 0; i < size(); i++) {
   2750     SID sid = GetSID(i);
   2751     if (i) res += ", ";
   2752     CHECK(sid.valid());
   2753     Segment::AssertLive(sid, __LINE__);
   2754     res += Segment::ToStringTidOnly(sid).c_str();
   2755   }
   2756   res += "}";
   2757   return res;
   2758 }
   2759 
   2760 // static
   2761 void SegmentSet::Test() {
   2762   LSID ls(0);  // dummy
   2763   SID sid1 = Segment::AddNewSegment(TID(0), VTS::Parse("[0:2;]"), ls, ls);
   2764   SID sid2 = Segment::AddNewSegment(TID(1), VTS::Parse("[0:1; 1:1]"), ls, ls);
   2765   SID sid3 = Segment::AddNewSegment(TID(2), VTS::Parse("[0:1; 2:1]"), ls, ls);
   2766   SID sid4 = Segment::AddNewSegment(TID(3), VTS::Parse("[0:1; 3:1]"), ls, ls);
   2767   SID sid5 = Segment::AddNewSegment(TID(4), VTS::Parse("[0:3; 2:2; 3:2;]"),
   2768                                     ls, ls);
   2769   SID sid6 = Segment::AddNewSegment(TID(4), VTS::Parse("[0:3; 1:2; 2:2; 3:2;]"),
   2770                                     ls, ls);
   2771 
   2772 
   2773   // SS1:{T0/S1, T2/S3}
   2774   SegmentSet *d1 = SegmentSet::Doubleton(sid1, sid3);
   2775   d1->Print();
   2776   CHECK(SegmentSet::Doubleton(sid1, sid3) == d1);
   2777   // SS2:{T0/S1, T1/S2, T2/S3}
   2778   SegmentSet *d2 = SegmentSet::AddSegmentToTupleSS(d1, sid2);
   2779   CHECK(SegmentSet::AddSegmentToTupleSS(d1, sid2) == d2);
   2780   d2->Print();
   2781 
   2782   // SS3:{T0/S1, T2/S3, T3/S4}
   2783   SegmentSet *d3 = SegmentSet::AddSegmentToTupleSS(d1, sid4);
   2784   CHECK(SegmentSet::AddSegmentToTupleSS(d1, sid4) == d3);
   2785   d3->Print();
   2786 
   2787   // SS4:{T0/S1, T1/S2, T2/S3, T3/S4}
   2788   SegmentSet *d4 = SegmentSet::AddSegmentToTupleSS(d2, sid4);
   2789   CHECK(SegmentSet::AddSegmentToTupleSS(d2, sid4) == d4);
   2790   CHECK(SegmentSet::AddSegmentToTupleSS(d3, sid2) == d4);
   2791   d4->Print();
   2792 
   2793   // SS5:{T1/S2, T4/S5}
   2794   SegmentSet *d5 = SegmentSet::AddSegmentToTupleSS(d4, sid5);
   2795   d5->Print();
   2796 
   2797   SSID ssid6 = SegmentSet::AddSegmentToTupleSS(d4->ComputeSSID(), sid6);
   2798   CHECK(ssid6.IsSingleton());
   2799   Printf("%s\n", ToString(ssid6).c_str());
   2800   CHECK_EQ(sid6.raw(), 6);
   2801   CHECK_EQ(ssid6.raw(), 6);
   2802 }
   2803 
   2804 // -------- Shadow Value ------------ {{{1
   2805 class ShadowValue {
   2806  public:
   2807   ShadowValue() {
   2808     if (DEBUG_MODE) {
   2809       rd_ssid_ = 0xDEADBEEF;
   2810       wr_ssid_ = 0xDEADBEEF;
   2811     }
   2812   }
   2813 
   2814   void Clear() {
   2815     rd_ssid_ = 0;
   2816     wr_ssid_ = 0;
   2817   }
   2818 
   2819   INLINE bool IsNew() const { return rd_ssid_ == 0 && wr_ssid_ == 0; }
   2820   // new experimental state machine.
   2821   SSID rd_ssid() const { return SSID(rd_ssid_); }
   2822   SSID wr_ssid() const { return SSID(wr_ssid_); }
   2823   INLINE void set(SSID rd_ssid, SSID wr_ssid) {
   2824     rd_ssid_ = rd_ssid.raw();
   2825     wr_ssid_ = wr_ssid.raw();
   2826   }
   2827 
   2828   // comparison
   2829   INLINE bool operator == (const ShadowValue &sval) const {
   2830     return rd_ssid_ == sval.rd_ssid_ &&
   2831         wr_ssid_ == sval.wr_ssid_;
   2832   }
   2833   bool operator != (const ShadowValue &sval) const {
   2834     return !(*this == sval);
   2835   }
   2836   bool operator <  (const ShadowValue &sval) const {
   2837     if (rd_ssid_ < sval.rd_ssid_) return true;
   2838     if (rd_ssid_ == sval.rd_ssid_ && wr_ssid_ < sval.wr_ssid_) return true;
   2839     return false;
   2840   }
   2841 
   2842   void Ref(const char *where) {
   2843     if (!rd_ssid().IsEmpty()) {
   2844       DCHECK(rd_ssid().valid());
   2845       SegmentSet::Ref(rd_ssid(), where);
   2846     }
   2847     if (!wr_ssid().IsEmpty()) {
   2848       DCHECK(wr_ssid().valid());
   2849       SegmentSet::Ref(wr_ssid(), where);
   2850     }
   2851   }
   2852 
   2853   void Unref(const char *where) {
   2854     if (!rd_ssid().IsEmpty()) {
   2855       DCHECK(rd_ssid().valid());
   2856       SegmentSet::Unref(rd_ssid(), where);
   2857     }
   2858     if (!wr_ssid().IsEmpty()) {
   2859       DCHECK(wr_ssid().valid());
   2860       SegmentSet::Unref(wr_ssid(), where);
   2861     }
   2862   }
   2863 
   2864   string ToString() const {
   2865     char buff[1000];
   2866     if (IsNew()) {
   2867       return "{New}";
   2868     }
   2869     snprintf(buff, sizeof(buff), "R: %s; W: %s",
   2870             SegmentSet::ToStringWithLocks(rd_ssid()).c_str(),
   2871             SegmentSet::ToStringWithLocks(wr_ssid()).c_str());
   2872     return buff;
   2873   }
   2874 
   2875  private:
   2876   int32_t rd_ssid_;
   2877   int32_t wr_ssid_;
   2878 };
   2879 
   2880 // -------- CacheLine --------------- {{{1
   2881 // The CacheLine is a set of Mask::kNBits (32 or 64) Shadow Values.
   2882 // The shadow values in a cache line are grouped in subsets of 8 values.
   2883 // If a particular address of memory is always accessed by aligned 8-byte
   2884 // read/write instructions, only the shadow value correspoding to the
   2885 // first byte is set, the rest shadow values are not used.
   2886 // Ditto to aligned 4- and 2-byte accesses.
   2887 // If a memory was accessed as 8 bytes and then it was accesed as 4 bytes,
   2888 // (e.g. someone used a C union) we need to split the shadow value into two.
   2889 // If the memory was accessed as 4 bytes and is now accessed as 8 bytes,
   2890 // we need to try joining the shadow values.
   2891 //
   2892 // Hence the concept of granularity_mask (which is a string of 16 bits).
   2893 // 0000000000000000 -- no accesses were observed to these 8 bytes.
   2894 // 0000000000000001 -- all accesses were 8 bytes (aligned).
   2895 // 0000000000000110 -- all accesses were 4 bytes (aligned).
   2896 // 0000000001111000 -- all accesses were 2 bytes (aligned).
   2897 // 0111111110000000 -- all accesses were 1 byte.
   2898 // 0110000000100010 -- First 4 bytes were accessed by 4 byte insns,
   2899 //   next 2 bytes by 2 byte insns, last 2 bytes by 1 byte insns.
   2900 
   2901 
   2902 INLINE bool GranularityIs8(uintptr_t off, uint16_t gr) {
   2903   return gr & 1;
   2904 }
   2905 
   2906 INLINE bool GranularityIs4(uintptr_t off, uint16_t gr) {
   2907   uintptr_t off_within_8_bytes = (off >> 2) & 1;  // 0 or 1.
   2908   return ((gr >> (1 + off_within_8_bytes)) & 1);
   2909 }
   2910 
   2911 INLINE bool GranularityIs2(uintptr_t off, uint16_t gr) {
   2912   uintptr_t off_within_8_bytes = (off >> 1) & 3;  // 0, 1, 2, or 3
   2913   return ((gr >> (3 + off_within_8_bytes)) & 1);
   2914 }
   2915 
   2916 INLINE bool GranularityIs1(uintptr_t off, uint16_t gr) {
   2917   uintptr_t off_within_8_bytes = (off) & 7;       // 0, ..., 7
   2918   return ((gr >> (7 + off_within_8_bytes)) & 1);
   2919 }
   2920 
   2921 class CacheLine {
   2922  public:
   2923   static const uintptr_t kLineSizeBits = Mask::kNBitsLog;  // Don't change this.
   2924   static const uintptr_t kLineSize = Mask::kNBits;
   2925 
   2926   static CacheLine *CreateNewCacheLine(uintptr_t tag) {
   2927     ScopedMallocCostCenter cc("CreateNewCacheLine");
   2928     void *mem = free_list_->Allocate();
   2929     DCHECK(mem);
   2930     return new (mem) CacheLine(tag);
   2931   }
   2932 
   2933   static void Delete(CacheLine *line) {
   2934     free_list_->Deallocate(line);
   2935   }
   2936 
   2937   const Mask &has_shadow_value() const { return has_shadow_value_;  }
   2938   Mask &traced() { return traced_; }
   2939   Mask &published() { return published_; }
   2940   Mask &racey()  { return racey_; }
   2941   uintptr_t tag() { return tag_; }
   2942 
   2943   void DebugTrace(uintptr_t off, const char *where_str, int where_int) {
   2944     if (DEBUG_MODE && tag() == G_flags->trace_addr) {
   2945       uintptr_t off8 = off & ~7;
   2946       Printf("CacheLine %p, off=%ld off8=%ld gr=%d "
   2947              "has_sval: %d%d%d%d%d%d%d%d (%s:%d)\n",
   2948              tag(), off, off8,
   2949              granularity_[off/8],
   2950              has_shadow_value_.Get(off8 + 0),
   2951              has_shadow_value_.Get(off8 + 1),
   2952              has_shadow_value_.Get(off8 + 2),
   2953              has_shadow_value_.Get(off8 + 3),
   2954              has_shadow_value_.Get(off8 + 4),
   2955              has_shadow_value_.Get(off8 + 5),
   2956              has_shadow_value_.Get(off8 + 6),
   2957              has_shadow_value_.Get(off8 + 7),
   2958              where_str, where_int
   2959              );
   2960     }
   2961   }
   2962 
   2963   // Add a new shadow value to a place where there was no shadow value before.
   2964   ShadowValue *AddNewSvalAtOffset(uintptr_t off) {
   2965     DebugTrace(off, __FUNCTION__, __LINE__);
   2966     CHECK(!has_shadow_value().Get(off));
   2967     has_shadow_value_.Set(off);
   2968     published_.Clear(off);
   2969     ShadowValue *res = GetValuePointer(off);
   2970     res->Clear();
   2971     DebugTrace(off, __FUNCTION__, __LINE__);
   2972     return res;
   2973   }
   2974 
   2975   // Return true if this line has no useful information in it.
   2976   bool Empty() {
   2977     // The line has shadow values.
   2978     if (!has_shadow_value().Empty()) return false;
   2979     // If the line is traced, racey or published, we want to keep it.
   2980     if (!traced().Empty()) return false;
   2981     if (!racey().Empty()) return false;
   2982     if (!published().Empty()) return false;
   2983     return true;
   2984   }
   2985 
   2986   INLINE Mask ClearRangeAndReturnOldUsed(uintptr_t from, uintptr_t to) {
   2987     traced_.ClearRange(from, to);
   2988     published_.ClearRange(from, to);
   2989     racey_.ClearRange(from, to);
   2990     for (uintptr_t x = (from + 7) / 8; x < to / 8; x++) {
   2991       granularity_[x] = 0;
   2992     }
   2993     return has_shadow_value_.ClearRangeAndReturnOld(from, to);
   2994   }
   2995 
   2996   void Clear() {
   2997     has_shadow_value_.Clear();
   2998     traced_.Clear();
   2999     published_.Clear();
   3000     racey_.Clear();
   3001     for (size_t i = 0; i < TS_ARRAY_SIZE(granularity_); i++)
   3002       granularity_[i] = 0;
   3003   }
   3004 
   3005   ShadowValue *GetValuePointer(uintptr_t offset) {
   3006     DCHECK(offset < kLineSize);
   3007     return  &vals_[offset];
   3008   }
   3009   ShadowValue  GetValue(uintptr_t offset) { return *GetValuePointer(offset); }
   3010 
   3011   static uintptr_t ComputeOffset(uintptr_t a) {
   3012     return a & (kLineSize - 1);
   3013   }
   3014   static uintptr_t ComputeTag(uintptr_t a) {
   3015     return a & ~(kLineSize - 1);
   3016   }
   3017   static uintptr_t ComputeNextTag(uintptr_t a) {
   3018     return ComputeTag(a) + kLineSize;
   3019   }
   3020 
   3021   uint16_t *granularity_mask(uintptr_t off) {
   3022     DCHECK(off < kLineSize);
   3023     return &granularity_[off / 8];
   3024   }
   3025 
   3026   void Split_8_to_4(uintptr_t off) {
   3027     DebugTrace(off, __FUNCTION__, __LINE__);
   3028     uint16_t gr = *granularity_mask(off);
   3029     if (GranularityIs8(off, gr)) {
   3030       DCHECK(!GranularityIs4(off, gr));
   3031       DCHECK(!GranularityIs2(off, gr));
   3032       DCHECK(!GranularityIs1(off, gr));
   3033       uintptr_t off_8_aligned = off & ~7;
   3034       if (has_shadow_value_.Get(off_8_aligned)) {
   3035         ShadowValue sval = GetValue(off_8_aligned);
   3036         sval.Ref("Split_8_to_4");
   3037         DCHECK(!has_shadow_value_.Get(off_8_aligned + 4));
   3038         *AddNewSvalAtOffset(off_8_aligned + 4) = sval;
   3039       }
   3040       *granularity_mask(off) = gr = 3 << 1;
   3041       DCHECK(GranularityIs4(off, gr));
   3042       DebugTrace(off, __FUNCTION__, __LINE__);
   3043     }
   3044   }
   3045 
   3046   void Split_4_to_2(uintptr_t off) {
   3047     DebugTrace(off, __FUNCTION__, __LINE__);
   3048     uint16_t gr = *granularity_mask(off);
   3049     if (GranularityIs4(off, gr)) {
   3050       DCHECK(!GranularityIs8(off, gr));
   3051       DCHECK(!GranularityIs2(off, gr));
   3052       DCHECK(!GranularityIs1(off, gr));
   3053       uint16_t off_4_aligned = off & ~3;
   3054       if (has_shadow_value_.Get(off_4_aligned)) {
   3055         ShadowValue sval = GetValue(off_4_aligned);
   3056         sval.Ref("Split_4_to_2");
   3057         DCHECK(!has_shadow_value_.Get(off_4_aligned + 2));
   3058         *AddNewSvalAtOffset(off_4_aligned + 2) = sval;
   3059       }
   3060       // Clear this 4-granularity bit.
   3061       uintptr_t off_within_8_bytes = (off >> 2) & 1;  // 0 or 1.
   3062       gr &= ~(1 << (1 + off_within_8_bytes));
   3063       // Set two 2-granularity bits.
   3064       gr |= 3 << (3 + 2 * off_within_8_bytes);
   3065       *granularity_mask(off) = gr;
   3066       DebugTrace(off, __FUNCTION__, __LINE__);
   3067     }
   3068   }
   3069 
   3070   void Split_2_to_1(uintptr_t off) {
   3071     DebugTrace(off, __FUNCTION__, __LINE__);
   3072     uint16_t gr = *granularity_mask(off);
   3073     if (GranularityIs2(off, gr)) {
   3074       DCHECK(!GranularityIs8(off, gr));
   3075       DCHECK(!GranularityIs4(off, gr));
   3076       DCHECK(!GranularityIs1(off, gr));
   3077       uint16_t off_2_aligned = off & ~1;
   3078       if (has_shadow_value_.Get(off_2_aligned)) {
   3079         ShadowValue sval = GetValue(off_2_aligned);
   3080         sval.Ref("Split_2_to_1");
   3081         DCHECK(!has_shadow_value_.Get(off_2_aligned + 1));
   3082         *AddNewSvalAtOffset(off_2_aligned + 1) = sval;
   3083       }
   3084       // Clear this 2-granularity bit.
   3085       uintptr_t off_within_8_bytes = (off >> 1) & 3;  // 0, 1, 2, or 3
   3086       gr &= ~(1 << (3 + off_within_8_bytes));
   3087       // Set two 1-granularity bits.
   3088       gr |= 3 << (7 + 2 * off_within_8_bytes);
   3089       *granularity_mask(off) = gr;
   3090       DebugTrace(off, __FUNCTION__, __LINE__);
   3091     }
   3092   }
   3093 
   3094   void Join_1_to_2(uintptr_t off) {
   3095     DebugTrace(off, __FUNCTION__, __LINE__);
   3096     DCHECK((off & 1) == 0);
   3097     uint16_t gr = *granularity_mask(off);
   3098     if (GranularityIs1(off, gr)) {
   3099       DCHECK(GranularityIs1(off + 1, gr));
   3100       if (has_shadow_value_.Get(off) && has_shadow_value_.Get(off + 1)) {
   3101         if (GetValue(off) == GetValue(off + 1)) {
   3102           ShadowValue *sval_p = GetValuePointer(off + 1);
   3103           sval_p->Unref("Join_1_to_2");
   3104           sval_p->Clear();
   3105           has_shadow_value_.Clear(off + 1);
   3106           uintptr_t off_within_8_bytes = (off >> 1) & 3;  // 0, 1, 2, or 3
   3107           // Clear two 1-granularity bits.
   3108           gr &= ~(3 << (7 + 2 * off_within_8_bytes));
   3109           // Set one 2-granularity bit.
   3110           gr |= 1 << (3 + off_within_8_bytes);
   3111           *granularity_mask(off) = gr;
   3112           DebugTrace(off, __FUNCTION__, __LINE__);
   3113         }
   3114       }
   3115     }
   3116   }
   3117 
   3118   void Join_2_to_4(uintptr_t off) {
   3119     DebugTrace(off, __FUNCTION__, __LINE__);
   3120     DCHECK((off & 3) == 0);
   3121     uint16_t gr = *granularity_mask(off);
   3122     if (GranularityIs2(off, gr) && GranularityIs2(off + 2, gr)) {
   3123       if (has_shadow_value_.Get(off) && has_shadow_value_.Get(off + 2)) {
   3124         if (GetValue(off) == GetValue(off + 2)) {
   3125           ShadowValue *sval_p = GetValuePointer(off + 2);
   3126           sval_p->Unref("Join_2_to_4");
   3127           sval_p->Clear();
   3128           has_shadow_value_.Clear(off + 2);
   3129           uintptr_t off_within_8_bytes = (off >> 2) & 1;  // 0 or 1.
   3130           // Clear two 2-granularity bits.
   3131           gr &= ~(3 << (3 + 2 * off_within_8_bytes));
   3132           // Set one 4-granularity bit.
   3133           gr |= 1 << (1 + off_within_8_bytes);
   3134           *granularity_mask(off) = gr;
   3135           DebugTrace(off, __FUNCTION__, __LINE__);
   3136         }
   3137       }
   3138     }
   3139   }
   3140 
   3141   void Join_4_to_8(uintptr_t off) {
   3142     DebugTrace(off, __FUNCTION__, __LINE__);
   3143     DCHECK((off & 7) == 0);
   3144     uint16_t gr = *granularity_mask(off);
   3145     if (GranularityIs4(off, gr) && GranularityIs4(off + 4, gr)) {
   3146       if (has_shadow_value_.Get(off) && has_shadow_value_.Get(off + 4)) {
   3147         if (GetValue(off) == GetValue(off + 4)) {
   3148           ShadowValue *sval_p = GetValuePointer(off + 4);
   3149           sval_p->Unref("Join_4_to_8");
   3150           sval_p->Clear();
   3151           has_shadow_value_.Clear(off + 4);
   3152           *granularity_mask(off) = 1;
   3153           DebugTrace(off, __FUNCTION__, __LINE__);
   3154         }
   3155       }
   3156     }
   3157   }
   3158 
   3159   static void InitClassMembers() {
   3160     if (DEBUG_MODE) {
   3161       Printf("sizeof(CacheLine) = %ld\n", sizeof(CacheLine));
   3162     }
   3163     free_list_ = new FreeList(sizeof(CacheLine), 1024);
   3164   }
   3165 
   3166  private:
   3167   explicit CacheLine(uintptr_t tag) {
   3168     tag_ = tag;
   3169     Clear();
   3170   }
   3171   ~CacheLine() { }
   3172 
   3173   uintptr_t tag_;
   3174 
   3175   // data members
   3176   Mask has_shadow_value_;
   3177   Mask traced_;
   3178   Mask racey_;
   3179   Mask published_;
   3180   uint16_t granularity_[kLineSize / 8];
   3181   ShadowValue vals_[kLineSize];
   3182 
   3183   // static data members.
   3184   static FreeList *free_list_;
   3185 };
   3186 
   3187 FreeList *CacheLine::free_list_;
   3188 
   3189 // If range [a,b) fits into one line, return that line's tag.
   3190 // Else range [a,b) is broken into these ranges:
   3191 //   [a, line1_tag)
   3192 //   [line1_tag, line2_tag)
   3193 //   [line2_tag, b)
   3194 // and 0 is returned.
   3195 uintptr_t GetCacheLinesForRange(uintptr_t a, uintptr_t b,
   3196                                 uintptr_t *line1_tag, uintptr_t *line2_tag) {
   3197   uintptr_t a_tag = CacheLine::ComputeTag(a);
   3198   uintptr_t next_tag = CacheLine::ComputeNextTag(a);
   3199   if (b < next_tag) {
   3200     return a_tag;
   3201   }
   3202   *line1_tag = next_tag;
   3203   *line2_tag = CacheLine::ComputeTag(b);
   3204   return 0;
   3205 }
   3206 
   3207 
   3208 
   3209 // -------- DirectMapCacheForRange -------------- {{{1
   3210 // Fast cache which stores cache lines for memory in range [kMin, kMax).
   3211 // The simplest way to force a program to allocate memory in first 2G
   3212 // is to set MALLOC_MMAP_MAX_=0 (works with regular malloc on linux).
   3213 
   3214 #ifdef TS_DIRECT_MAP
   3215 
   3216 template<size_t kMin, size_t kMax>
   3217 class DirectMapCacheForRange {
   3218  public:
   3219   DirectMapCacheForRange() {
   3220     Report("INFO: Allocating %ldMb for fast cache\n", sizeof(*this) >> 20);
   3221     memset(cache_, 0, sizeof(cache_));
   3222   }
   3223 
   3224   INLINE bool AddressIsInRange(uintptr_t a) {
   3225     return a >= kMin && a < kMax;
   3226   }
   3227 
   3228   INLINE CacheLine *GetLine(uintptr_t a, bool create_new_if_need) {
   3229     CHECK(AddressIsInRange(a));
   3230     uintptr_t cli = (a - kMin) >> CacheLine::kLineSizeBits;
   3231     CHECK(cli < kCacheSize);
   3232     CacheLine **cache_line_p = &cache_[cli];
   3233     if (*cache_line_p == NULL) {
   3234       if (create_new_if_need == false) return NULL;
   3235       AssertTILHeld();
   3236       uintptr_t tag = CacheLine::ComputeTag(a);
   3237       *cache_line_p = CacheLine::CreateNewCacheLine(tag);
   3238     }
   3239     DCHECK(*cache_line_p);
   3240     return *cache_line_p;
   3241   }
   3242  private:
   3243   enum { kRangeSize = kMax - kMin };
   3244   enum { kCacheSize = kRangeSize / CacheLine::kLineSize };
   3245   CacheLine *cache_[kCacheSize];
   3246 };
   3247 
   3248 #else
   3249 
   3250 template<size_t kMin, size_t kMax>
   3251 class DirectMapCacheForRange {
   3252  public:
   3253   INLINE bool AddressIsInRange(uintptr_t a) {
   3254     return false;
   3255   }
   3256 
   3257   INLINE CacheLine *GetLine(uintptr_t a, bool create_new_if_need) {
   3258     CHECK(AddressIsInRange(a));
   3259     return NULL;
   3260   }
   3261 };
   3262 
   3263 #endif
   3264 
   3265 // -------- Cache ------------------ {{{1
   3266 class Cache {
   3267  public:
   3268   Cache() {
   3269     memset(lines_, 0, sizeof(lines_));
   3270     ANNOTATE_BENIGN_RACE_SIZED(lines_, sizeof(lines_),
   3271                                "Cache::lines_ accessed without a lock");
   3272   }
   3273 
   3274   INLINE static CacheLine *kLineIsLocked() {
   3275     return (CacheLine*)1;
   3276   }
   3277 
   3278   INLINE static bool LineIsNullOrLocked(CacheLine *line) {
   3279     return (uintptr_t)line <= 1;
   3280   }
   3281 
   3282   INLINE CacheLine *TidMagic(int32_t tid) {
   3283     return kLineIsLocked();
   3284   }
   3285 
   3286   INLINE bool IsInDirectCache(uintptr_t a) {
   3287     return direct_cache_.AddressIsInRange(a);
   3288   }
   3289 
   3290   // Try to get a CacheLine for exclusive use.
   3291   // May return NULL or kLineIsLocked.
   3292   INLINE CacheLine *TryAcquireLine(Thread *thr, uintptr_t a, int call_site) {
   3293     if (IsInDirectCache(a)) {
   3294       return direct_cache_.GetLine(a, false);
   3295     }
   3296     uintptr_t cli = ComputeCacheLineIndexInCache(a);
   3297     CacheLine **addr = &lines_[cli];
   3298     CacheLine *res = (CacheLine*)AtomicExchange(
   3299            (uintptr_t*)addr, (uintptr_t)kLineIsLocked());
   3300     if (DEBUG_MODE && debug_cache) {
   3301       uintptr_t tag = CacheLine::ComputeTag(a);
   3302       if (res && res != kLineIsLocked())
   3303         Printf("TryAcquire %p empty=%d tag=%lx cli=%lx site=%d\n",
   3304                res, res->Empty(), res->tag(), cli, call_site);
   3305       else
   3306         Printf("TryAcquire tag=%lx cli=%d site=%d\n", tag, cli, call_site);
   3307     }
   3308     if (res) {
   3309       ANNOTATE_HAPPENS_AFTER((void*)cli);
   3310     }
   3311     return res;
   3312   }
   3313 
   3314   INLINE CacheLine *AcquireLine(Thread *thr, uintptr_t a, int call_site) {
   3315     CHECK(!IsInDirectCache(a));
   3316     CacheLine *line = NULL;
   3317     int iter = 0;
   3318     const int max_iter = 1 << 30;
   3319     do {
   3320       line = TryAcquireLine(thr, a, call_site);
   3321       iter++;
   3322       if ((iter % (1 << 12)) == 0) {
   3323         YIELD();
   3324         G_stats->try_acquire_line_spin++;
   3325         if (((iter & (iter - 1)) == 0)) {
   3326           Printf("T%d %s a=%p iter=%d\n", raw_tid(thr), __FUNCTION__, a, iter);
   3327         }
   3328       }
   3329       if (iter == max_iter) {
   3330         Printf("Failed to acquire a cache line: T%d a=%p site=%d\n",
   3331                raw_tid(thr), a, call_site);
   3332         CHECK(iter < max_iter);
   3333       }
   3334     } while (line == kLineIsLocked());
   3335     DCHECK(lines_[ComputeCacheLineIndexInCache(a)] == TidMagic(raw_tid(thr)));
   3336     return line;
   3337   }
   3338 
   3339   // Release a CacheLine from exclusive use.
   3340   INLINE void ReleaseLine(Thread *thr, uintptr_t a, CacheLine *line, int call_site) {
   3341     if (TS_SERIALIZED) return;
   3342     if (IsInDirectCache(a)) return;
   3343     DCHECK(line != kLineIsLocked());
   3344     uintptr_t cli = ComputeCacheLineIndexInCache(a);
   3345     DCHECK(line == NULL ||
   3346            cli == ComputeCacheLineIndexInCache(line->tag()));
   3347     CacheLine **addr = &lines_[cli];
   3348     DCHECK(*addr == TidMagic(raw_tid(thr)));
   3349     ReleaseStore((uintptr_t*)addr, (uintptr_t)line);
   3350     ANNOTATE_HAPPENS_BEFORE((void*)cli);
   3351     if (DEBUG_MODE && debug_cache) {
   3352       uintptr_t tag = CacheLine::ComputeTag(a);
   3353       if (line)
   3354         Printf("Release %p empty=%d tag=%lx cli=%lx site=%d\n",
   3355                line, line->Empty(), line->tag(), cli, call_site);
   3356       else
   3357         Printf("Release tag=%lx cli=%d site=%d\n", tag, cli, call_site);
   3358     }
   3359   }
   3360 
   3361   void AcquireAllLines(Thread *thr) {
   3362     CHECK(TS_SERIALIZED == 0);
   3363     for (size_t i = 0; i < (size_t)kNumLines; i++) {
   3364       uintptr_t tag = i << CacheLine::kLineSizeBits;
   3365       AcquireLine(thr, tag, __LINE__);
   3366       CHECK(lines_[i] == kLineIsLocked());
   3367     }
   3368   }
   3369 
   3370   // Get a CacheLine. This operation should be performed under a lock
   3371   // (whatever that is), but other threads may be acquiring the same line
   3372   // concurrently w/o a lock.
   3373   // Every call to GetLine() which returns non-null line
   3374   // should be followed by a call to ReleaseLine().
   3375   INLINE CacheLine *GetLine(Thread *thr, uintptr_t a, bool create_new_if_need, int call_site) {
   3376     uintptr_t tag = CacheLine::ComputeTag(a);
   3377     DCHECK(tag <= a);
   3378     DCHECK(tag + CacheLine::kLineSize > a);
   3379     uintptr_t cli = ComputeCacheLineIndexInCache(a);
   3380     CacheLine *res = NULL;
   3381     CacheLine *line = NULL;
   3382 
   3383     if (IsInDirectCache(a)) {
   3384       return direct_cache_.GetLine(a, create_new_if_need);
   3385     }
   3386 
   3387     if (create_new_if_need == false && lines_[cli] == 0) {
   3388       // There is no such line in the cache, nor should it be in the storage.
   3389       // Check that the storage indeed does not have this line.
   3390       // Such DCHECK is racey if tsan is multi-threaded.
   3391       DCHECK(TS_SERIALIZED == 0 || storage_.count(tag) == 0);
   3392       return NULL;
   3393     }
   3394 
   3395     if (TS_SERIALIZED) {
   3396       line = lines_[cli];
   3397     } else {
   3398       line = AcquireLine(thr, tag, call_site);
   3399     }
   3400 
   3401 
   3402     if (LIKELY(line && line->tag() == tag)) {
   3403       res = line;
   3404     } else {
   3405       res = WriteBackAndFetch(thr, line, tag, cli, create_new_if_need);
   3406       if (!res) {
   3407         ReleaseLine(thr, a, line, call_site);
   3408       }
   3409     }
   3410     if (DEBUG_MODE && debug_cache) {
   3411       if (res)
   3412         Printf("GetLine %p empty=%d tag=%lx\n", res, res->Empty(), res->tag());
   3413       else
   3414         Printf("GetLine res=NULL, line=%p tag=%lx cli=%lx\n", line, tag, cli);
   3415     }
   3416     return res;
   3417   }
   3418 
   3419   INLINE CacheLine *GetLineOrCreateNew(Thread *thr, uintptr_t a, int call_site) {
   3420     return GetLine(thr, a, true, call_site);
   3421   }
   3422   INLINE CacheLine *GetLineIfExists(Thread *thr, uintptr_t a, int call_site) {
   3423     return GetLine(thr, a, false, call_site);
   3424   }
   3425 
   3426   void ForgetAllState(Thread *thr) {
   3427     for (int i = 0; i < kNumLines; i++) {
   3428       if (TS_SERIALIZED == 0) CHECK(LineIsNullOrLocked(lines_[i]));
   3429       lines_[i] = NULL;
   3430     }
   3431     map<uintptr_t, Mask> racey_masks;
   3432     for (Map::iterator i = storage_.begin(); i != storage_.end(); ++i) {
   3433       CacheLine *line = i->second;
   3434       if (!line->racey().Empty()) {
   3435         racey_masks[line->tag()] = line->racey();
   3436       }
   3437       CacheLine::Delete(line);
   3438     }
   3439     storage_.clear();
   3440     // Restore the racey masks.
   3441     for (map<uintptr_t, Mask>::iterator it = racey_masks.begin();
   3442          it != racey_masks.end(); it++) {
   3443       CacheLine *line = GetLineOrCreateNew(thr, it->first, __LINE__);
   3444       line->racey() = it->second;
   3445       DCHECK(!line->racey().Empty());
   3446       ReleaseLine(thr, line->tag(), line, __LINE__);
   3447     }
   3448   }
   3449 
   3450   void PrintStorageStats() {
   3451     if (!G_flags->show_stats) return;
   3452     set<ShadowValue> all_svals;
   3453     map<size_t, int> sizes;
   3454     for (Map::iterator it = storage_.begin(); it != storage_.end(); ++it) {
   3455       CacheLine *line = it->second;
   3456       // uintptr_t cli = ComputeCacheLineIndexInCache(line->tag());
   3457       //if (lines_[cli] == line) {
   3458         // this line is in cache -- ignore it.
   3459       //  continue;
   3460       //}
   3461       set<ShadowValue> s;
   3462       for (uintptr_t i = 0; i < CacheLine::kLineSize; i++) {
   3463         if (line->has_shadow_value().Get(i)) {
   3464           ShadowValue sval = *(line->GetValuePointer(i));
   3465           s.insert(sval);
   3466           all_svals.insert(sval);
   3467         }
   3468       }
   3469       size_t size = s.size();
   3470       if (size > 10) size = 10;
   3471       sizes[size]++;
   3472     }
   3473     Printf("Storage sizes: %ld\n", storage_.size());
   3474     for (size_t size = 0; size <= CacheLine::kLineSize; size++) {
   3475       if (sizes[size]) {
   3476         Printf("  %ld => %d\n", size, sizes[size]);
   3477       }
   3478     }
   3479     Printf("Different svals: %ld\n", all_svals.size());
   3480     set <SSID> all_ssids;
   3481     for (set<ShadowValue>::iterator it = all_svals.begin(); it != all_svals.end(); ++it) {
   3482       ShadowValue sval = *it;
   3483       for (int i = 0; i < 2; i++) {
   3484         SSID ssid = i ? sval.rd_ssid() : sval.wr_ssid();
   3485         all_ssids.insert(ssid);
   3486       }
   3487     }
   3488     Printf("Different ssids: %ld\n", all_ssids.size());
   3489     set <SID> all_sids;
   3490     for (set<SSID>::iterator it = all_ssids.begin(); it != all_ssids.end(); ++it) {
   3491       int size = SegmentSet::Size(*it);
   3492       for (int i = 0; i < size; i++) {
   3493         SID sid = SegmentSet::GetSID(*it, i, __LINE__);
   3494         all_sids.insert(sid);
   3495       }
   3496     }
   3497     Printf("Different sids: %ld\n", all_sids.size());
   3498     for (int i = 1; i < Segment::NumberOfSegments(); i++) {
   3499       if (Segment::ProfileSeg(SID(i)) && all_sids.count(SID(i)) == 0) {
   3500         // Printf("Segment SID %d: missing in storage; ref=%d\n", i,
   3501         // Segment::Get(SID(i))->ref_count());
   3502       }
   3503     }
   3504   }
   3505 
   3506  private:
   3507   INLINE uintptr_t ComputeCacheLineIndexInCache(uintptr_t addr) {
   3508     return (addr >> CacheLine::kLineSizeBits) & (kNumLines - 1);
   3509   }
   3510 
   3511   NOINLINE CacheLine *WriteBackAndFetch(Thread *thr, CacheLine *old_line,
   3512                                         uintptr_t tag, uintptr_t cli,
   3513                                         bool create_new_if_need) {
   3514     ScopedMallocCostCenter cc("Cache::WriteBackAndFetch");
   3515     CacheLine *res;
   3516     size_t old_storage_size = storage_.size();
   3517     CacheLine **line_for_this_tag = NULL;
   3518     if (create_new_if_need) {
   3519       line_for_this_tag = &storage_[tag];
   3520     } else {
   3521       Map::iterator it = storage_.find(tag);
   3522       if (it == storage_.end()) {
   3523         if (DEBUG_MODE && debug_cache) {
   3524           Printf("WriteBackAndFetch: old_line=%ld tag=%lx cli=%ld\n",
   3525                  old_line, tag, cli);
   3526         }
   3527         return NULL;
   3528       }
   3529       line_for_this_tag = &(it->second);
   3530     }
   3531     CHECK(line_for_this_tag);
   3532     DCHECK(old_line != kLineIsLocked());
   3533     if (*line_for_this_tag == NULL) {
   3534       // creating a new cache line
   3535       CHECK(storage_.size() == old_storage_size + 1);
   3536       res = CacheLine::CreateNewCacheLine(tag);
   3537       if (DEBUG_MODE && debug_cache) {
   3538         Printf("%s %d new line %p cli=%lx\n", __FUNCTION__, __LINE__, res, cli);
   3539       }
   3540       *line_for_this_tag = res;
   3541       G_stats->cache_new_line++;
   3542     } else {
   3543       // taking an existing cache line from storage.
   3544       res = *line_for_this_tag;
   3545       if (DEBUG_MODE && debug_cache) {
   3546         Printf("%s %d exi line %p tag=%lx old=%p empty=%d cli=%lx\n",
   3547              __FUNCTION__, __LINE__, res, res->tag(), old_line,
   3548              res->Empty(), cli);
   3549       }
   3550       DCHECK(!res->Empty());
   3551       G_stats->cache_fetch++;
   3552     }
   3553 
   3554     if (TS_SERIALIZED) {
   3555       lines_[cli] = res;
   3556     } else {
   3557       DCHECK(lines_[cli] == TidMagic(raw_tid(thr)));
   3558     }
   3559 
   3560     if (old_line) {
   3561       if (DEBUG_MODE && debug_cache) {
   3562         Printf("%s %d old line %p empty=%d\n", __FUNCTION__, __LINE__,
   3563                old_line, old_line->Empty());
   3564       }
   3565       if (old_line->Empty()) {
   3566         storage_.erase(old_line->tag());
   3567         CacheLine::Delete(old_line);
   3568         G_stats->cache_delete_empty_line++;
   3569       } else {
   3570         if (debug_cache) {
   3571           DebugOnlyCheckCacheLineWhichWeReplace(old_line, res);
   3572         }
   3573       }
   3574     }
   3575     DCHECK(res->tag() == tag);
   3576 
   3577     if (G_stats->cache_max_storage_size < storage_.size()) {
   3578       G_stats->cache_max_storage_size = storage_.size();
   3579       // if ((storage_.size() % (1024 * 64)) == 0) {
   3580       //  PrintStorageStats();
   3581       // }
   3582     }
   3583 
   3584     return res;
   3585   }
   3586 
   3587   void DebugOnlyCheckCacheLineWhichWeReplace(CacheLine *old_line,
   3588                                              CacheLine *new_line) {
   3589     static int c = 0;
   3590     c++;
   3591     if ((c % 1024) == 1) {
   3592       set<int64_t> s;
   3593       for (uintptr_t i = 0; i < CacheLine::kLineSize; i++) {
   3594         if (old_line->has_shadow_value().Get(i)) {
   3595           int64_t sval = *reinterpret_cast<int64_t*>(
   3596                             old_line->GetValuePointer(i));
   3597           // Printf("%p ", sval);
   3598           s.insert(sval);
   3599         }
   3600       }
   3601       Printf("\n[%d] Cache Size=%ld %s different values: %ld\n", c,
   3602              storage_.size(), old_line->has_shadow_value().ToString().c_str(),
   3603              s.size());
   3604 
   3605       Printf("new line: %p %p\n", new_line->tag(), new_line->tag()
   3606              + CacheLine::kLineSize);
   3607       G_stats->PrintStatsForCache();
   3608     }
   3609   }
   3610 
   3611   static const int kNumLines = 1 << (DEBUG_MODE ? 14 : 21);
   3612   CacheLine *lines_[kNumLines];
   3613 
   3614   // tag => CacheLine
   3615   typedef unordered_map<uintptr_t, CacheLine*> Map;
   3616   Map storage_;
   3617 
   3618   DirectMapCacheForRange<0,  (1<<30) > direct_cache_;
   3619 };
   3620 
   3621 static  Cache *G_cache;
   3622 
   3623 // -------- Published range -------------------- {{{1
   3624 struct PublishInfo {
   3625   uintptr_t tag;   // Tag of the cache line where the mem is published.
   3626   Mask      mask;  // The bits that are actually published.
   3627   VTS      *vts;   // The point where this range has been published.
   3628 };
   3629 
   3630 
   3631 typedef multimap<uintptr_t, PublishInfo> PublishInfoMap;
   3632 
   3633 // Maps 'mem+size' to the PublishInfoMap{mem, size, vts}.
   3634 static PublishInfoMap *g_publish_info_map;
   3635 
   3636 const int kDebugPublish = 0;
   3637 
   3638 // Get a VTS where 'a' has been published,
   3639 // return NULL if 'a' was not published.
   3640 static const VTS *GetPublisherVTS(uintptr_t a) {
   3641   uintptr_t tag = CacheLine::ComputeTag(a);
   3642   uintptr_t off = CacheLine::ComputeOffset(a);
   3643   typedef PublishInfoMap::iterator Iter;
   3644 
   3645   pair<Iter, Iter> eq_range = g_publish_info_map->equal_range(tag);
   3646   for (Iter it = eq_range.first; it != eq_range.second; ++it) {
   3647     PublishInfo &info = it->second;
   3648     DCHECK(info.tag == tag);
   3649     if (info.mask.Get(off)) {
   3650       G_stats->publish_get++;
   3651       // Printf("GetPublisherVTS: a=%p vts=%p\n", a, info.vts);
   3652       return info.vts;
   3653     }
   3654   }
   3655   Printf("GetPublisherVTS returned NULL: a=%p\n", a);
   3656   return NULL;
   3657 }
   3658 
   3659 static bool CheckSanityOfPublishedMemory(uintptr_t tag, int line) {
   3660   if (!DEBUG_MODE) return true;
   3661   if (kDebugPublish)
   3662     Printf("CheckSanityOfPublishedMemory: line=%d\n", line);
   3663   typedef PublishInfoMap::iterator Iter;
   3664   pair<Iter, Iter> eq_range = g_publish_info_map->equal_range(tag);
   3665   Mask union_of_masks(0);
   3666   // iterate over all entries for this tag
   3667   for (Iter it = eq_range.first; it != eq_range.second; ++it) {
   3668     PublishInfo &info = it->second;
   3669     CHECK(info.tag  == tag);
   3670     CHECK(it->first == tag);
   3671     CHECK(info.vts);
   3672     Mask mask(info.mask);
   3673     CHECK(!mask.Empty());  // Mask should not be empty..
   3674     // And should not intersect with other masks.
   3675     CHECK(Mask::Intersection(union_of_masks, mask).Empty());
   3676     union_of_masks.Union(mask);
   3677   }
   3678   return true;
   3679 }
   3680 
   3681 // Clear the publish attribute for the bytes from 'line' that are set in 'mask'
   3682 static void ClearPublishedAttribute(CacheLine *line, Mask mask) {
   3683   CHECK(CheckSanityOfPublishedMemory(line->tag(), __LINE__));
   3684   typedef PublishInfoMap::iterator Iter;
   3685   bool deleted_some = true;
   3686   if (kDebugPublish)
   3687     Printf(" ClearPublishedAttribute: %p %s\n",
   3688            line->tag(), mask.ToString().c_str());
   3689   while (deleted_some) {
   3690     deleted_some = false;
   3691     pair<Iter, Iter> eq_range = g_publish_info_map->equal_range(line->tag());
   3692     for (Iter it = eq_range.first; it != eq_range.second; ++it) {
   3693       PublishInfo &info = it->second;
   3694       DCHECK(info.tag == line->tag());
   3695       if (kDebugPublish)
   3696         Printf("?ClearPublishedAttribute: %p %s\n", line->tag(),
   3697                info.mask.ToString().c_str());
   3698       info.mask.Subtract(mask);
   3699       if (kDebugPublish)
   3700         Printf("+ClearPublishedAttribute: %p %s\n", line->tag(),
   3701                info.mask.ToString().c_str());
   3702       G_stats->publish_clear++;
   3703       if (info.mask.Empty()) {
   3704         VTS::Unref(info.vts);
   3705         g_publish_info_map->erase(it);
   3706         deleted_some = true;
   3707         break;
   3708       }
   3709     }
   3710   }
   3711   CHECK(CheckSanityOfPublishedMemory(line->tag(), __LINE__));
   3712 }
   3713 
   3714 // Publish range [a, b) in addr's CacheLine with vts.
   3715 static void PublishRangeInOneLine(Thread *thr, uintptr_t addr, uintptr_t a,
   3716                                   uintptr_t b, VTS *vts) {
   3717   ScopedMallocCostCenter cc("PublishRangeInOneLine");
   3718   DCHECK(b <= CacheLine::kLineSize);
   3719   DCHECK(a < b);
   3720   uintptr_t tag = CacheLine::ComputeTag(addr);
   3721   CHECK(CheckSanityOfPublishedMemory(tag, __LINE__));
   3722   CacheLine *line = G_cache->GetLineOrCreateNew(thr, tag, __LINE__);
   3723 
   3724   if (1 || line->published().GetRange(a, b)) {
   3725     Mask mask(0);
   3726     mask.SetRange(a, b);
   3727     // TODO(timurrrr): add warning for re-publishing.
   3728     ClearPublishedAttribute(line, mask);
   3729   }
   3730 
   3731   line->published().SetRange(a, b);
   3732   G_cache->ReleaseLine(thr, tag, line, __LINE__);
   3733 
   3734   PublishInfo pub_info;
   3735   pub_info.tag  = tag;
   3736   pub_info.mask.SetRange(a, b);
   3737   pub_info.vts  = vts->Clone();
   3738   g_publish_info_map->insert(make_pair(tag, pub_info));
   3739   G_stats->publish_set++;
   3740   if (kDebugPublish)
   3741     Printf("PublishRange   : [%p,%p) %p %s vts=%p\n",
   3742            a, b, tag, pub_info.mask.ToString().c_str(), vts);
   3743   CHECK(CheckSanityOfPublishedMemory(tag, __LINE__));
   3744 }
   3745 
   3746 // Publish memory range [a, b).
   3747 static void PublishRange(Thread *thr, uintptr_t a, uintptr_t b, VTS *vts) {
   3748   CHECK(a);
   3749   CHECK(a < b);
   3750   if (kDebugPublish)
   3751     Printf("PublishRange   : [%p,%p), size=%d, tag=%p\n",
   3752            a, b, (int)(b - a), CacheLine::ComputeTag(a));
   3753   uintptr_t line1_tag = 0, line2_tag = 0;
   3754   uintptr_t tag = GetCacheLinesForRange(a, b, &line1_tag, &line2_tag);
   3755   if (tag) {
   3756     PublishRangeInOneLine(thr, tag, a - tag, b - tag, vts);
   3757     return;
   3758   }
   3759   uintptr_t a_tag = CacheLine::ComputeTag(a);
   3760   PublishRangeInOneLine(thr, a, a - a_tag, CacheLine::kLineSize, vts);
   3761   for (uintptr_t tag_i = line1_tag; tag_i < line2_tag;
   3762        tag_i += CacheLine::kLineSize) {
   3763     PublishRangeInOneLine(thr, tag_i, 0, CacheLine::kLineSize, vts);
   3764   }
   3765   if (b > line2_tag) {
   3766     PublishRangeInOneLine(thr, line2_tag, 0, b - line2_tag, vts);
   3767   }
   3768 }
   3769 
   3770 // -------- Clear Memory State ------------------ {{{1
   3771 static void INLINE UnrefSegmentsInMemoryRange(uintptr_t a, uintptr_t b,
   3772                                                 Mask mask, CacheLine *line) {
   3773   while (!mask.Empty()) {
   3774     uintptr_t x = mask.GetSomeSetBit();
   3775     DCHECK(mask.Get(x));
   3776     mask.Clear(x);
   3777     line->GetValuePointer(x)->Unref("Detector::UnrefSegmentsInMemoryRange");
   3778   }
   3779 }
   3780 
   3781 void INLINE ClearMemoryStateInOneLine(Thread *thr, uintptr_t addr,
   3782                                       uintptr_t beg, uintptr_t end) {
   3783   AssertTILHeld();
   3784   CacheLine *line = G_cache->GetLineIfExists(thr, addr, __LINE__);
   3785   // CacheLine *line = G_cache->GetLineOrCreateNew(addr, __LINE__);
   3786   if (line) {
   3787     DCHECK(beg < CacheLine::kLineSize);
   3788     DCHECK(end <= CacheLine::kLineSize);
   3789     DCHECK(beg < end);
   3790     Mask published = line->published();
   3791     if (UNLIKELY(!published.Empty())) {
   3792       Mask mask(published.GetRange(beg, end));
   3793       ClearPublishedAttribute(line, mask);
   3794     }
   3795     Mask old_used = line->ClearRangeAndReturnOldUsed(beg, end);
   3796     UnrefSegmentsInMemoryRange(beg, end, old_used, line);
   3797     G_cache->ReleaseLine(thr, addr, line, __LINE__);
   3798   }
   3799 }
   3800 
   3801 // clear memory state for [a,b)
   3802 void NOINLINE ClearMemoryState(Thread *thr, uintptr_t a, uintptr_t b) {
   3803   if (a == b) return;
   3804   CHECK(a < b);
   3805   uintptr_t line1_tag = 0, line2_tag = 0;
   3806   uintptr_t single_line_tag = GetCacheLinesForRange(a, b,
   3807                                                     &line1_tag, &line2_tag);
   3808   if (single_line_tag) {
   3809     ClearMemoryStateInOneLine(thr, a, a - single_line_tag,
   3810                               b - single_line_tag);
   3811     return;
   3812   }
   3813 
   3814   uintptr_t a_tag = CacheLine::ComputeTag(a);
   3815   ClearMemoryStateInOneLine(thr, a, a - a_tag, CacheLine::kLineSize);
   3816 
   3817   for (uintptr_t tag_i = line1_tag; tag_i < line2_tag;
   3818        tag_i += CacheLine::kLineSize) {
   3819     ClearMemoryStateInOneLine(thr, tag_i, 0, CacheLine::kLineSize);
   3820   }
   3821 
   3822   if (b > line2_tag) {
   3823     ClearMemoryStateInOneLine(thr, line2_tag, 0, b - line2_tag);
   3824   }
   3825 
   3826   if (DEBUG_MODE && G_flags->debug_level >= 2) {
   3827     // Check that we've cleared it. Slow!
   3828     for (uintptr_t x = a; x < b; x++) {
   3829       uintptr_t off = CacheLine::ComputeOffset(x);
   3830       CacheLine *line = G_cache->GetLineOrCreateNew(thr, x, __LINE__);
   3831       CHECK(!line->has_shadow_value().Get(off));
   3832       G_cache->ReleaseLine(thr, x, line, __LINE__);
   3833     }
   3834   }
   3835 }
   3836 
   3837 // -------- ThreadSanitizerReport -------------- {{{1
   3838 struct ThreadSanitizerReport {
   3839   // Types of reports.
   3840   enum ReportType {
   3841     DATA_RACE,
   3842     UNLOCK_FOREIGN,
   3843     UNLOCK_NONLOCKED,
   3844     INVALID_LOCK,
   3845     ATOMICITY_VIOLATION,
   3846   };
   3847 
   3848   // Common fields.
   3849   ReportType  type;
   3850   TID         tid;
   3851   StackTrace *stack_trace;
   3852 
   3853   const char *ReportName() const {
   3854     switch (type) {
   3855       case DATA_RACE:        return "Race";
   3856       case UNLOCK_FOREIGN:   return "UnlockForeign";
   3857       case UNLOCK_NONLOCKED: return "UnlockNonLocked";
   3858       case INVALID_LOCK:     return "InvalidLock";
   3859       case ATOMICITY_VIOLATION: return "AtomicityViolation";
   3860     }
   3861     CHECK(0);
   3862     return NULL;
   3863   }
   3864 
   3865   virtual ~ThreadSanitizerReport() {
   3866     StackTrace::Delete(stack_trace);
   3867   }
   3868 };
   3869 
   3870 static bool ThreadSanitizerPrintReport(ThreadSanitizerReport *report);
   3871 
   3872 // DATA_RACE.
   3873 struct ThreadSanitizerDataRaceReport : public ThreadSanitizerReport {
   3874   uintptr_t   racey_addr;
   3875   string      racey_addr_description;
   3876   uintptr_t   last_access_size;
   3877   TID         last_access_tid;
   3878   SID         last_access_sid;
   3879   bool        last_access_is_w;
   3880   LSID        last_acces_lsid[2];
   3881 
   3882   ShadowValue new_sval;
   3883   ShadowValue old_sval;
   3884 
   3885   bool        is_expected;
   3886   bool        racey_addr_was_published;
   3887 };
   3888 
   3889 // Report for bad unlock (UNLOCK_FOREIGN, UNLOCK_NONLOCKED).
   3890 struct ThreadSanitizerBadUnlockReport : public ThreadSanitizerReport {
   3891   LID lid;
   3892 };
   3893 
   3894 // Report for invalid lock addresses (INVALID_LOCK).
   3895 struct ThreadSanitizerInvalidLockReport : public ThreadSanitizerReport {
   3896   uintptr_t lock_addr;
   3897 };
   3898 
   3899 class AtomicityRegion;
   3900 
   3901 struct ThreadSanitizerAtomicityViolationReport : public ThreadSanitizerReport {
   3902   AtomicityRegion *r1, *r2, *r3;
   3903 };
   3904 
   3905 
   3906 // -------- LockHistory ------------- {{{1
   3907 // For each thread we store a limited amount of history of locks and unlocks.
   3908 // If there is a race report (in hybrid mode) we try to guess a lock
   3909 // which might have been used to pass the ownership of the object between
   3910 // threads.
   3911 //
   3912 // Thread1:                    Thread2:
   3913 // obj->UpdateMe();
   3914 // mu.Lock();
   3915 // flag = true;
   3916 // mu.Unlock(); // (*)
   3917 //                             mu.Lock();  // (**)
   3918 //                             bool f = flag;
   3919 //                             mu.Unlock();
   3920 //                             if (f)
   3921 //                                obj->UpdateMeAgain();
   3922 //
   3923 // For this code a hybrid detector may report a false race.
   3924 // LockHistory will find the lock mu and report it.
   3925 
   3926 struct LockHistory {
   3927  public:
   3928   // LockHistory which will track no more than `size` recent locks
   3929   // and the same amount of unlocks.
   3930   LockHistory(size_t size): size_(size) { }
   3931 
   3932   // Record a Lock event.
   3933   void OnLock(LID lid) {
   3934     g_lock_era++;
   3935     Push(LockHistoryElement(lid, g_lock_era), &locks_);
   3936   }
   3937 
   3938   // Record an Unlock event.
   3939   void OnUnlock(LID lid) {
   3940     g_lock_era++;
   3941     Push(LockHistoryElement(lid, g_lock_era), &unlocks_);
   3942   }
   3943 
   3944   // Find locks such that:
   3945   // - A Lock happend in `l`.
   3946   // - An Unlock happened in `u`.
   3947   // - Lock's era is greater than Unlock's era.
   3948   // - Both eras are greater or equal than min_lock_era.
   3949   static bool Intersect(const LockHistory &l, const LockHistory &u,
   3950                         int32_t min_lock_era, set<LID> *locks) {
   3951     const Queue &lq = l.locks_;
   3952     const Queue &uq = u.unlocks_;
   3953     for (size_t i = 0; i < lq.size(); i++) {
   3954       int32_t l_era = lq[i].lock_era;
   3955       if (l_era < min_lock_era) continue;
   3956       LID lid = lq[i].lid;
   3957       // We don't want to report pure happens-before locks since
   3958       // they already create h-b arcs.
   3959       if (Lock::LIDtoLock(lid)->is_pure_happens_before()) continue;
   3960       for (size_t j = 0; j < uq.size(); j++) {
   3961         int32_t u_era = uq[j].lock_era;
   3962         if (lid != uq[j].lid) continue;
   3963         // Report("LockHistory::Intersect: L%d %d %d %d\n", lid.raw(), min_lock_era, u_era, l_era);
   3964         if (u_era < min_lock_era)  continue;
   3965         if (u_era > l_era) continue;
   3966         locks->insert(lid);
   3967       }
   3968     }
   3969     return !locks->empty();
   3970   }
   3971 
   3972   void PrintLocks() const { Print(&locks_); }
   3973   void PrintUnlocks() const { Print(&unlocks_); }
   3974 
   3975  private:
   3976   struct LockHistoryElement {
   3977     LID lid;
   3978     uint32_t lock_era;
   3979     LockHistoryElement(LID l, uint32_t era)
   3980         : lid(l),
   3981         lock_era(era) {
   3982         }
   3983   };
   3984 
   3985   typedef deque<LockHistoryElement> Queue;
   3986 
   3987   void Push(LockHistoryElement e, Queue *q) {
   3988     CHECK(q->size() <= size_);
   3989     if (q->size() == size_)
   3990       q->pop_front();
   3991     q->push_back(e);
   3992   }
   3993 
   3994   void Print(const Queue *q) const {
   3995     set<LID> printed;
   3996     for (size_t i = 0; i < q->size(); i++) {
   3997       const LockHistoryElement &e = (*q)[i];
   3998       if (printed.count(e.lid)) continue;
   3999       Report("era %d: \n", e.lock_era);
   4000       Lock::ReportLockWithOrWithoutContext(e.lid, true);
   4001       printed.insert(e.lid);
   4002     }
   4003   }
   4004 
   4005   Queue locks_;
   4006   Queue unlocks_;
   4007   size_t size_;
   4008 };
   4009 
   4010 // -------- RecentSegmentsCache ------------- {{{1
   4011 // For each thread we store a limited amount of recent segments with
   4012 // the same VTS and LS as the current segment.
   4013 // When a thread enters a new basic block, we can sometimes reuse a
   4014 // recent segment if it is the same or not used anymore (see Search()).
   4015 //
   4016 // We need to flush the cache when current lockset changes or the current
   4017 // VTS changes or we do ForgetAllState.
   4018 // TODO(timurrrr): probably we can cache segments with different LSes and
   4019 // compare their LS with the current LS.
   4020 struct RecentSegmentsCache {
   4021  public:
   4022   RecentSegmentsCache(int cache_size) : cache_size_(cache_size) {}
   4023   ~RecentSegmentsCache() { Clear(); }
   4024 
   4025   void Clear() {
   4026     ShortenQueue(0);
   4027   }
   4028 
   4029   void Push(SID sid) {
   4030     queue_.push_front(sid);
   4031     Segment::Ref(sid, "RecentSegmentsCache::ShortenQueue");
   4032     ShortenQueue(cache_size_);
   4033   }
   4034 
   4035   void ForgetAllState() {
   4036     queue_.clear();  // Don't unref - the segments are already dead.
   4037   }
   4038 
   4039   INLINE SID Search(CallStack *curr_stack,
   4040                     SID curr_sid, /*OUT*/ bool *needs_refill) {
   4041     // TODO(timurrrr): we can probably move the matched segment to the head
   4042     // of the queue.
   4043 
   4044     deque<SID>::iterator it = queue_.begin();
   4045     for (; it != queue_.end(); it++) {
   4046       SID sid = *it;
   4047       Segment::AssertLive(sid, __LINE__);
   4048       Segment *seg = Segment::Get(sid);
   4049 
   4050       if (seg->ref_count() == 1 + (sid == curr_sid)) {
   4051         // The current segment is not used anywhere else,
   4052         // so just replace the stack trace in it.
   4053         // The refcount of an unused segment is equal to
   4054         // *) 1 if it is stored only in the cache,
   4055         // *) 2 if it is the current segment of the Thread.
   4056         *needs_refill = true;
   4057         return sid;
   4058       }
   4059 
   4060       // Check three top entries of the call stack of the recent segment.
   4061       // If they match the current segment stack, don't create a new segment.
   4062       // This can probably lead to a little bit wrong stack traces in rare
   4063       // occasions but we don't really care that much.
   4064       if (kSizeOfHistoryStackTrace > 0) {
   4065         size_t n = curr_stack->size();
   4066         uintptr_t *emb_trace = Segment::embedded_stack_trace(sid);
   4067         if(*emb_trace &&  // This stack trace was filled
   4068            curr_stack->size() >= 3 &&
   4069            emb_trace[0] == (*curr_stack)[n-1] &&
   4070            emb_trace[1] == (*curr_stack)[n-2] &&
   4071            emb_trace[2] == (*curr_stack)[n-3]) {
   4072           *needs_refill = false;
   4073           return sid;
   4074         }
   4075       }
   4076     }
   4077 
   4078     return SID();
   4079   }
   4080 
   4081  private:
   4082   void ShortenQueue(size_t flush_to_length) {
   4083     while(queue_.size() > flush_to_length) {
   4084       SID sid = queue_.back();
   4085       Segment::Unref(sid, "RecentSegmentsCache::ShortenQueue");
   4086       queue_.pop_back();
   4087     }
   4088   }
   4089 
   4090   deque<SID> queue_;
   4091   size_t cache_size_;
   4092 };
   4093 
   4094 // -------- TraceInfo ------------------ {{{1
   4095 vector<TraceInfo*> *TraceInfo::g_all_traces;
   4096 
   4097 TraceInfo *TraceInfo::NewTraceInfo(size_t n_mops, uintptr_t pc) {
   4098   ScopedMallocCostCenter cc("TraceInfo::NewTraceInfo");
   4099   size_t mem_size = (sizeof(TraceInfo) + (n_mops - 1) * sizeof(MopInfo));
   4100   uint8_t *mem = new uint8_t[mem_size];
   4101   memset(mem, 0xab, mem_size);
   4102   TraceInfo *res = new (mem) TraceInfo;
   4103   res->n_mops_ = n_mops;
   4104   res->pc_ = ThreadSanitizerWantToCreateSegmentsOnSblockEntry(pc) ? pc : 0;
   4105   res->counter_ = 0;
   4106   if (g_all_traces == NULL) {
   4107     g_all_traces = new vector<TraceInfo*>;
   4108   }
   4109   res->literace_storage = NULL;
   4110   if (G_flags->literace_sampling != 0) {
   4111     ScopedMallocCostCenter cc("TraceInfo::NewTraceInfo::LiteRaceStorage");
   4112     size_t index_of_this_trace = g_all_traces->size();
   4113     if ((index_of_this_trace % kLiteRaceStorageSize) == 0) {
   4114       res->literace_storage = (LiteRaceStorage*)
   4115           new LiteRaceCounters [kLiteRaceStorageSize * kLiteRaceNumTids];
   4116       memset(res->literace_storage, 0, sizeof(LiteRaceStorage));
   4117     } else {
   4118       CHECK(index_of_this_trace > 0);
   4119       res->literace_storage = (*g_all_traces)[index_of_this_trace - 1]->literace_storage;
   4120       CHECK(res->literace_storage);
   4121     }
   4122     res->storage_index = index_of_this_trace % kLiteRaceStorageSize;
   4123   }
   4124   g_all_traces->push_back(res);
   4125   return res;
   4126 }
   4127 
   4128 void TraceInfo::PrintTraceProfile() {
   4129   if (!G_flags->trace_profile) return;
   4130   if (!g_all_traces) return;
   4131   int64_t total_counter = 0;
   4132   multimap<size_t, TraceInfo*> traces;
   4133   for (size_t i = 0; i < g_all_traces->size(); i++) {
   4134     TraceInfo *trace = (*g_all_traces)[i];
   4135     traces.insert(make_pair(trace->counter(), trace));
   4136     total_counter += trace->counter();
   4137   }
   4138   if (total_counter == 0) return;
   4139   Printf("TraceProfile: %ld traces, %lld hits\n",
   4140          g_all_traces->size(), total_counter);
   4141   int i = 0;
   4142   for (multimap<size_t, TraceInfo*>::reverse_iterator it = traces.rbegin();
   4143        it != traces.rend(); ++it, i++) {
   4144     TraceInfo *trace = it->second;
   4145     int64_t c = it->first;
   4146     int64_t permile = (c * 1000) / total_counter;
   4147     CHECK(trace->n_mops() > 0);
   4148     uintptr_t pc = trace->GetMop(0)->pc();
   4149     CHECK(pc);
   4150     if (permile == 0 || i >= 20) break;
   4151     Printf("TR=%p pc: %p %p c=%lld (%lld/1000) n_mops=%ld %s\n",
   4152            trace, trace->pc(), pc, c,
   4153            permile, trace->n_mops(),
   4154            PcToRtnNameAndFilePos(pc).c_str());
   4155   }
   4156 }
   4157 
   4158 // -------- Atomicity --------------- {{{1
   4159 // An attempt to detect atomicity violations (aka high level races).
   4160 // Here we try to find a very restrictive pattern:
   4161 // Thread1                    Thread2
   4162 //   r1: {
   4163 //     mu.Lock();
   4164 //     code_r1();
   4165 //     mu.Unlock();
   4166 //   }
   4167 //   r2: {
   4168 //     mu.Lock();
   4169 //     code_r2();
   4170 //     mu.Unlock();
   4171 //   }
   4172 //                           r3: {
   4173 //                             mu.Lock();
   4174 //                             code_r3();
   4175 //                             mu.Unlock();
   4176 //                           }
   4177 // We have 3 regions of code such that
   4178 // - two of them are in one thread and 3-rd in another thread.
   4179 // - all 3 regions have the same lockset,
   4180 // - the distance between r1 and r2 is small,
   4181 // - there is no h-b arc between r2 and r3,
   4182 // - r1 and r2 have different stack traces,
   4183 //
   4184 // In this situation we report a 'Suspected atomicity violation'.
   4185 //
   4186 // Current status:
   4187 // this code detects atomicity violations on our two motivating examples
   4188 // (--gtest_filter=*Atomicity*  --gtest_also_run_disabled_tests) and does
   4189 // not overwhelm with false reports.
   4190 // However, this functionality is still raw and not tuned for performance.
   4191 
   4192 // TS_ATOMICITY is on in debug mode or if we enabled it at the build time.
   4193 #ifndef TS_ATOMICITY
   4194 # define TS_ATOMICITY DEBUG_MODE
   4195 #endif
   4196 
   4197 
   4198 struct AtomicityRegion {
   4199   int lock_era;
   4200   TID tid;
   4201   VTS *vts;
   4202   StackTrace *stack_trace;
   4203   LSID lsid[2];
   4204   BitSet access_set[2];
   4205   bool used;
   4206   int n_mops_since_start;
   4207 
   4208   void Print() {
   4209     Report("T%d era=%d nmss=%ld AtomicityRegion:\n  rd: %s\n  wr: %s\n  %s\n%s",
   4210            tid.raw(),
   4211            lock_era,
   4212            n_mops_since_start,
   4213            access_set[0].ToString().c_str(),
   4214            access_set[1].ToString().c_str(),
   4215            TwoLockSetsToString(lsid[false], lsid[true]).c_str(),
   4216            stack_trace->ToString().c_str()
   4217           );
   4218   }
   4219 };
   4220 
   4221 bool SimilarLockSetForAtomicity(AtomicityRegion *r1, AtomicityRegion *r2) {
   4222   // Compare only reader locksets (in case one region took reader locks)
   4223   return ((r1->lsid[0] == r2->lsid[0]));
   4224 }
   4225 
   4226 static deque<AtomicityRegion *> *g_atomicity_regions;
   4227 static map<StackTrace *, int, StackTrace::Less> *reported_atomicity_stacks_;
   4228 const size_t kMaxAtomicityRegions = 8;
   4229 
   4230 static void HandleAtomicityRegion(AtomicityRegion *atomicity_region) {
   4231   if (!g_atomicity_regions) {
   4232     g_atomicity_regions = new deque<AtomicityRegion*>;
   4233     reported_atomicity_stacks_ = new map<StackTrace *, int, StackTrace::Less>;
   4234   }
   4235 
   4236   if (g_atomicity_regions->size() >= kMaxAtomicityRegions) {
   4237     AtomicityRegion *to_delete = g_atomicity_regions->back();
   4238     g_atomicity_regions->pop_back();
   4239     if (!to_delete->used) {
   4240       VTS::Unref(to_delete->vts);
   4241       StackTrace::Delete(to_delete->stack_trace);
   4242       delete to_delete;
   4243     }
   4244   }
   4245   g_atomicity_regions->push_front(atomicity_region);
   4246   size_t n = g_atomicity_regions->size();
   4247 
   4248   if (0) {
   4249     for (size_t i = 0; i < n; i++) {
   4250       AtomicityRegion *r = (*g_atomicity_regions)[i];
   4251       r->Print();
   4252     }
   4253   }
   4254 
   4255   AtomicityRegion *r3 = (*g_atomicity_regions)[0];
   4256   for (size_t i = 1; i < n; i++) {
   4257     AtomicityRegion *r2 = (*g_atomicity_regions)[i];
   4258     if (r2->tid     != r3->tid &&
   4259         SimilarLockSetForAtomicity(r2, r3) &&
   4260         !VTS::HappensBeforeCached(r2->vts, r3->vts)) {
   4261       for (size_t j = i + 1; j < n; j++) {
   4262         AtomicityRegion *r1 = (*g_atomicity_regions)[j];
   4263         if (r1->tid != r2->tid) continue;
   4264         CHECK(r2->lock_era > r1->lock_era);
   4265         if (r2->lock_era - r1->lock_era > 2) break;
   4266         if (!SimilarLockSetForAtomicity(r1, r2)) continue;
   4267         if (StackTrace::Equals(r1->stack_trace, r2->stack_trace)) continue;
   4268         if (!(r1->access_set[1].empty() &&
   4269               !r2->access_set[1].empty() &&
   4270               !r3->access_set[1].empty())) continue;
   4271         CHECK(r1->n_mops_since_start <= r2->n_mops_since_start);
   4272         if (r2->n_mops_since_start - r1->n_mops_since_start > 5) continue;
   4273         if ((*reported_atomicity_stacks_)[r1->stack_trace] > 0) continue;
   4274 
   4275         (*reported_atomicity_stacks_)[r1->stack_trace]++;
   4276         (*reported_atomicity_stacks_)[r2->stack_trace]++;
   4277         (*reported_atomicity_stacks_)[r3->stack_trace]++;
   4278         r1->used = r2->used = r3->used = true;
   4279         ThreadSanitizerAtomicityViolationReport *report =
   4280             new ThreadSanitizerAtomicityViolationReport;
   4281         report->type = ThreadSanitizerReport::ATOMICITY_VIOLATION;
   4282         report->tid = TID(0);
   4283         report->stack_trace = r1->stack_trace;
   4284         report->r1 = r1;
   4285         report->r2 = r2;
   4286         report->r3 = r3;
   4287         ThreadSanitizerPrintReport(report);
   4288         break;
   4289       }
   4290     }
   4291   }
   4292 }
   4293 
   4294 // -------- Thread ------------------ {{{1
   4295 struct Thread {
   4296  public:
   4297   ThreadLocalStats stats;
   4298 
   4299   Thread(TID tid, TID parent_tid, VTS *vts, StackTrace *creation_context,
   4300          CallStack *call_stack)
   4301     : is_running_(true),
   4302       tid_(tid),
   4303       sid_(0),
   4304       parent_tid_(parent_tid),
   4305       max_sp_(0),
   4306       min_sp_(0),
   4307       stack_size_for_ignore_(0),
   4308       fun_r_ignore_(0),
   4309       min_sp_for_ignore_(0),
   4310       n_mops_since_start_(0),
   4311       creation_context_(creation_context),
   4312       announced_(false),
   4313       rd_lockset_(0),
   4314       wr_lockset_(0),
   4315       expensive_bits_(0),
   4316       vts_at_exit_(NULL),
   4317       call_stack_(call_stack),
   4318       lock_history_(128),
   4319       recent_segments_cache_(G_flags->recent_segments_cache_size) {
   4320 
   4321     NewSegmentWithoutUnrefingOld("Thread Creation", vts);
   4322     ignore_depth_[0] = ignore_depth_[1] = 0;
   4323 
   4324     HandleRtnCall(0, 0, IGNORE_BELOW_RTN_UNKNOWN);
   4325     ignore_context_[0] = NULL;
   4326     ignore_context_[1] = NULL;
   4327     if (tid != TID(0) && parent_tid.valid()) {
   4328       CHECK(creation_context_);
   4329     }
   4330 
   4331     // Add myself to the array of threads.
   4332     CHECK(tid.raw() < G_flags->max_n_threads);
   4333     CHECK(all_threads_[tid.raw()] == NULL);
   4334     n_threads_ = max(n_threads_, tid.raw() + 1);
   4335     all_threads_[tid.raw()] = this;
   4336     dead_sids_.reserve(kMaxNumDeadSids);
   4337     fresh_sids_.reserve(kMaxNumFreshSids);
   4338     ComputeExpensiveBits();
   4339   }
   4340 
   4341   TID tid() const { return tid_; }
   4342   TID parent_tid() const { return parent_tid_; }
   4343 
   4344   void increment_n_mops_since_start() {
   4345     n_mops_since_start_++;
   4346   }
   4347 
   4348   // STACK
   4349   uintptr_t max_sp() const { return max_sp_; }
   4350   uintptr_t min_sp() const { return min_sp_; }
   4351 
   4352   void SetStack(uintptr_t stack_min, uintptr_t stack_max) {
   4353     CHECK(stack_min < stack_max);
   4354     // Stay sane. Expect stack less than 64M.
   4355     CHECK(stack_max - stack_min <= 64 * 1024 * 1024);
   4356     min_sp_ = stack_min;
   4357     max_sp_ = stack_max;
   4358     if (G_flags->ignore_stack) {
   4359       min_sp_for_ignore_ = min_sp_;
   4360       stack_size_for_ignore_ = max_sp_ - min_sp_;
   4361     } else {
   4362       CHECK(min_sp_for_ignore_ == 0 &&
   4363             stack_size_for_ignore_ == 0);
   4364     }
   4365   }
   4366 
   4367   bool MemoryIsInStack(uintptr_t a) {
   4368     return a >= min_sp_ && a <= max_sp_;
   4369   }
   4370 
   4371   bool IgnoreMemoryIfInStack(uintptr_t a) {
   4372     return (a - min_sp_for_ignore_) < stack_size_for_ignore_;
   4373   }
   4374 
   4375 
   4376   bool Announce() {
   4377     if (announced_) return false;
   4378     announced_ = true;
   4379     if (tid_ == TID(0)) {
   4380       Report("INFO: T0 is program's main thread\n");
   4381     } else {
   4382       if (G_flags->announce_threads) {
   4383         Report("INFO: T%d has been created by T%d at this point: {{{\n%s}}}\n",
   4384                tid_.raw(), parent_tid_.raw(),
   4385                creation_context_->ToString().c_str());
   4386         Thread * parent = GetIfExists(parent_tid_);
   4387         CHECK(parent);
   4388         parent->Announce();
   4389       } else {
   4390         Report("INFO: T%d has been created by T%d. "
   4391                "Use --announce-threads to see the creation stack.\n",
   4392                tid_.raw(), parent_tid_.raw());
   4393       }
   4394     }
   4395     return true;
   4396   }
   4397 
   4398   string ThreadName() const {
   4399     char buff[100];
   4400     snprintf(buff, sizeof(buff), "T%d", tid().raw());
   4401     string res = buff;
   4402     if (thread_name_.length() > 0) {
   4403       res += " (";
   4404       res += thread_name_;
   4405       res += ")";
   4406     }
   4407     return res;
   4408   }
   4409 
   4410   bool is_running() const { return is_running_; }
   4411 
   4412   INLINE void ComputeExpensiveBits() {
   4413     bool has_expensive_flags = G_flags->trace_level > 0 ||
   4414         G_flags->show_stats > 1                      ||
   4415         G_flags->sample_events > 0;
   4416 
   4417     expensive_bits_ =
   4418         (ignore_depth_[0] != 0) |
   4419         ((ignore_depth_[1] != 0) << 1) |
   4420         ((has_expensive_flags == true) << 2);
   4421   }
   4422 
   4423   int expensive_bits() { return expensive_bits_; }
   4424   int ignore_reads() { return expensive_bits() & 1; }
   4425   int ignore_writes() { return (expensive_bits() >> 1) & 1; }
   4426 
   4427   // ignore
   4428   INLINE void set_ignore_accesses(bool is_w, bool on) {
   4429     ignore_depth_[is_w] += on ? 1 : -1;
   4430     CHECK(ignore_depth_[is_w] >= 0);
   4431     ComputeExpensiveBits();
   4432     if (on && G_flags->save_ignore_context) {
   4433       StackTrace::Delete(ignore_context_[is_w]);
   4434       ignore_context_[is_w] = CreateStackTrace(0, 3);
   4435     }
   4436   }
   4437   INLINE void set_ignore_all_accesses(bool on) {
   4438     set_ignore_accesses(false, on);
   4439     set_ignore_accesses(true, on);
   4440   }
   4441 
   4442   StackTrace *GetLastIgnoreContext(bool is_w) {
   4443     return ignore_context_[is_w];
   4444   }
   4445 
   4446   SID sid() const {
   4447     return sid_;
   4448   }
   4449 
   4450   Segment *segment() const {
   4451     CHECK(sid().valid());
   4452     Segment::AssertLive(sid(), __LINE__);
   4453     return Segment::Get(sid());
   4454   }
   4455 
   4456   VTS *vts() const {
   4457     return segment()->vts();
   4458   }
   4459 
   4460   void set_thread_name(const char *name) {
   4461     thread_name_ = string(name);
   4462   }
   4463 
   4464   void HandleThreadEnd() {
   4465     CHECK(is_running_);
   4466     is_running_ = false;
   4467     CHECK(!vts_at_exit_);
   4468     vts_at_exit_ = vts()->Clone();
   4469     CHECK(vts_at_exit_);
   4470     FlushDeadSids();
   4471     ReleaseFreshSids();
   4472   }
   4473 
   4474   // Return the TID of the joined child and it's vts
   4475   TID HandleThreadJoinAfter(VTS **vts_at_exit, TID joined_tid) {
   4476     CHECK(joined_tid.raw() > 0);
   4477     CHECK(GetIfExists(joined_tid) != NULL);
   4478     Thread* joined_thread  = Thread::Get(joined_tid);
   4479     // Sometimes the joined thread is not truly dead yet.
   4480     // In that case we just take the current vts.
   4481     if (joined_thread->is_running_)
   4482       *vts_at_exit = joined_thread->vts()->Clone();
   4483     else
   4484       *vts_at_exit = joined_thread->vts_at_exit_;
   4485 
   4486     if (*vts_at_exit == NULL) {
   4487       Printf("vts_at_exit==NULL; parent=%d, child=%d\n",
   4488              tid().raw(), joined_tid.raw());
   4489     }
   4490     CHECK(*vts_at_exit);
   4491     if (0)
   4492     Printf("T%d: vts_at_exit_: %s\n", joined_tid.raw(),
   4493            (*vts_at_exit)->ToString().c_str());
   4494     return joined_tid;
   4495   }
   4496 
   4497   static int NumberOfThreads() {
   4498     return INTERNAL_ANNOTATE_UNPROTECTED_READ(n_threads_);
   4499   }
   4500 
   4501   static Thread *GetIfExists(TID tid) {
   4502     if (tid.raw() < NumberOfThreads())
   4503       return Get(tid);
   4504     return NULL;
   4505   }
   4506 
   4507   static Thread *Get(TID tid) {
   4508     DCHECK(tid.raw() < NumberOfThreads());
   4509     return all_threads_[tid.raw()];
   4510   }
   4511 
   4512   void HandleAccessSet() {
   4513     BitSet *rd_set = lock_era_access_set(false);
   4514     BitSet *wr_set = lock_era_access_set(true);
   4515     if (rd_set->empty() && wr_set->empty()) return;
   4516     CHECK(G_flags->atomicity && !G_flags->pure_happens_before);
   4517     AtomicityRegion *atomicity_region = new AtomicityRegion;
   4518     atomicity_region->lock_era = g_lock_era;
   4519     atomicity_region->tid = tid();
   4520     atomicity_region->vts = vts()->Clone();
   4521     atomicity_region->lsid[0] = lsid(0);
   4522     atomicity_region->lsid[1] = lsid(1);
   4523     atomicity_region->access_set[0] = *rd_set;
   4524     atomicity_region->access_set[1] = *wr_set;
   4525     atomicity_region->stack_trace = CreateStackTrace();
   4526     atomicity_region->used = false;
   4527     atomicity_region->n_mops_since_start = this->n_mops_since_start_;
   4528     // atomicity_region->Print();
   4529     // Printf("----------- %s\n", __FUNCTION__);
   4530     // ReportStackTrace(0, 7);
   4531     HandleAtomicityRegion(atomicity_region);
   4532   }
   4533 
   4534   // Locks
   4535   void HandleLock(uintptr_t lock_addr, bool is_w_lock) {
   4536     Lock *lock = Lock::LookupOrCreate(lock_addr);
   4537 
   4538     if (debug_lock) {
   4539       Printf("T%d lid=%d %sLock   %p; %s\n",
   4540            tid_.raw(), lock->lid().raw(),
   4541            is_w_lock ? "Wr" : "Rd",
   4542            lock_addr,
   4543            LockSet::ToString(lsid(is_w_lock)).c_str());
   4544 
   4545       ReportStackTrace(0, 7);
   4546     }
   4547 
   4548     // NOTE: we assume that all locks can be acquired recurively.
   4549     // No warning about recursive locking will be issued.
   4550     if (is_w_lock) {
   4551       // Recursive locks are properly handled because LockSet is in fact a
   4552       // multiset.
   4553       wr_lockset_ = LockSet::Add(wr_lockset_, lock);
   4554       rd_lockset_ = LockSet::Add(rd_lockset_, lock);
   4555       lock->WrLock(tid_, CreateStackTrace());
   4556     } else {
   4557       if (lock->wr_held()) {
   4558         ReportStackTrace();
   4559       }
   4560       rd_lockset_ = LockSet::Add(rd_lockset_, lock);
   4561       lock->RdLock(CreateStackTrace());
   4562     }
   4563 
   4564     if (lock->is_pure_happens_before()) {
   4565       if (is_w_lock) {
   4566         HandleWait(lock->wr_signal_addr());
   4567       } else {
   4568         HandleWait(lock->rd_signal_addr());
   4569       }
   4570     }
   4571 
   4572     if (G_flags->suggest_happens_before_arcs) {
   4573       lock_history_.OnLock(lock->lid());
   4574     }
   4575     NewSegmentForLockingEvent();
   4576     lock_era_access_set_[0].Clear();
   4577     lock_era_access_set_[1].Clear();
   4578   }
   4579 
   4580   void HandleUnlock(uintptr_t lock_addr) {
   4581     HandleAccessSet();
   4582 
   4583     Lock *lock = Lock::Lookup(lock_addr);
   4584     // If the lock is not found, report an error.
   4585     if (lock == NULL) {
   4586       ThreadSanitizerInvalidLockReport *report =
   4587           new ThreadSanitizerInvalidLockReport;
   4588       report->type = ThreadSanitizerReport::INVALID_LOCK;
   4589       report->tid = tid();
   4590       report->lock_addr = lock_addr;
   4591       report->stack_trace = CreateStackTrace();
   4592       ThreadSanitizerPrintReport(report);
   4593       return;
   4594     }
   4595     bool is_w_lock = lock->wr_held();
   4596 
   4597     if (debug_lock) {
   4598       Printf("T%d lid=%d %sUnlock %p; %s\n",
   4599              tid_.raw(), lock->lid().raw(),
   4600              is_w_lock ? "Wr" : "Rd",
   4601              lock_addr,
   4602              LockSet::ToString(lsid(is_w_lock)).c_str());
   4603       ReportStackTrace(0, 7);
   4604     }
   4605 
   4606     if (lock->is_pure_happens_before()) {
   4607       // reader unlock signals only to writer lock,
   4608       // writer unlock signals to both.
   4609       if (is_w_lock) {
   4610         HandleSignal(lock->rd_signal_addr());
   4611       }
   4612       HandleSignal(lock->wr_signal_addr());
   4613     }
   4614 
   4615     if (!lock->wr_held() && !lock->rd_held()) {
   4616       ThreadSanitizerBadUnlockReport *report =
   4617           new ThreadSanitizerBadUnlockReport;
   4618       report->type = ThreadSanitizerReport::UNLOCK_NONLOCKED;
   4619       report->tid = tid();
   4620       report->lid = lock->lid();
   4621       report->stack_trace = CreateStackTrace();
   4622       ThreadSanitizerPrintReport(report);
   4623       return;
   4624     }
   4625 
   4626     bool removed = false;
   4627     if (is_w_lock) {
   4628       lock->WrUnlock();
   4629       removed =  LockSet::Remove(wr_lockset_, lock, &wr_lockset_)
   4630               && LockSet::Remove(rd_lockset_, lock, &rd_lockset_);
   4631     } else {
   4632       lock->RdUnlock();
   4633       removed = LockSet::Remove(rd_lockset_, lock, &rd_lockset_);
   4634     }
   4635 
   4636     if (!removed) {
   4637       ThreadSanitizerBadUnlockReport *report =
   4638           new ThreadSanitizerBadUnlockReport;
   4639       report->type = ThreadSanitizerReport::UNLOCK_FOREIGN;
   4640       report->tid = tid();
   4641       report->lid = lock->lid();
   4642       report->stack_trace = CreateStackTrace();
   4643       ThreadSanitizerPrintReport(report);
   4644     }
   4645 
   4646     if (G_flags->suggest_happens_before_arcs) {
   4647       lock_history_.OnUnlock(lock->lid());
   4648     }
   4649 
   4650     NewSegmentForLockingEvent();
   4651     lock_era_access_set_[0].Clear();
   4652     lock_era_access_set_[1].Clear();
   4653   }
   4654 
   4655   void HandleForgetSignaller(uintptr_t cv) {
   4656     SignallerMap::iterator it = signaller_map_->find(cv);
   4657     if (it != signaller_map_->end()) {
   4658       if (debug_happens_before) {
   4659         Printf("T%d: ForgetSignaller: %p:\n    %s\n", tid_.raw(), cv,
   4660             (it->second.vts)->ToString().c_str());
   4661         if (G_flags->debug_level >= 1) {
   4662           ReportStackTrace();
   4663         }
   4664       }
   4665       VTS::Unref(it->second.vts);
   4666       signaller_map_->erase(it);
   4667     }
   4668   }
   4669 
   4670   LSID lsid(bool is_w) {
   4671     return is_w ? wr_lockset_ : rd_lockset_;
   4672   }
   4673 
   4674   const LockHistory &lock_history() { return lock_history_; }
   4675 
   4676   // SIGNAL/WAIT events.
   4677   void HandleWait(uintptr_t cv) {
   4678 
   4679     SignallerMap::iterator it = signaller_map_->find(cv);
   4680     if (it != signaller_map_->end()) {
   4681       const VTS *signaller_vts = it->second.vts;
   4682       NewSegmentForWait(signaller_vts);
   4683     }
   4684 
   4685     if (debug_happens_before) {
   4686       Printf("T%d: Wait: %p:\n    %s %s\n", tid_.raw(),
   4687              cv,
   4688              vts()->ToString().c_str(),
   4689              Segment::ToString(sid()).c_str());
   4690       if (G_flags->debug_level >= 1) {
   4691         ReportStackTrace();
   4692       }
   4693     }
   4694   }
   4695 
   4696 
   4697   void HandleSignal(uintptr_t cv) {
   4698     Signaller *signaller = &(*signaller_map_)[cv];
   4699     if (!signaller->vts) {
   4700       signaller->vts = vts()->Clone();
   4701     } else {
   4702       VTS *new_vts = VTS::Join(signaller->vts, vts());
   4703       VTS::Unref(signaller->vts);
   4704       signaller->vts = new_vts;
   4705     }
   4706     NewSegmentForSignal();
   4707     if (debug_happens_before) {
   4708       Printf("T%d: Signal: %p:\n    %s %s\n    %s\n", tid_.raw(), cv,
   4709              vts()->ToString().c_str(), Segment::ToString(sid()).c_str(),
   4710              (signaller->vts)->ToString().c_str());
   4711       if (G_flags->debug_level >= 1) {
   4712         ReportStackTrace();
   4713       }
   4714     }
   4715   }
   4716 
   4717   void INLINE NewSegmentWithoutUnrefingOld(const char *call_site,
   4718                                            VTS *new_vts) {
   4719     DCHECK(new_vts);
   4720     SID new_sid = Segment::AddNewSegment(tid(), new_vts,
   4721                                          rd_lockset_, wr_lockset_);
   4722     SID old_sid = sid();
   4723     if (old_sid.raw() != 0 && new_vts != vts()) {
   4724       // Flush the cache if VTS changed - the VTS won't repeat.
   4725       recent_segments_cache_.Clear();
   4726     }
   4727     sid_ = new_sid;
   4728     Segment::Ref(new_sid, "Thread::NewSegmentWithoutUnrefingOld");
   4729 
   4730     if (kSizeOfHistoryStackTrace > 0) {
   4731       FillEmbeddedStackTrace(Segment::embedded_stack_trace(sid()));
   4732     }
   4733     if (0)
   4734     Printf("2: %s T%d/S%d old_sid=%d NewSegment: %s\n", call_site,
   4735            tid().raw(), sid().raw(), old_sid.raw(),
   4736          vts()->ToString().c_str());
   4737   }
   4738 
   4739   void INLINE NewSegment(const char *call_site, VTS *new_vts) {
   4740     SID old_sid = sid();
   4741     NewSegmentWithoutUnrefingOld(call_site, new_vts);
   4742     Segment::Unref(old_sid, "Thread::NewSegment");
   4743   }
   4744 
   4745   void NewSegmentForLockingEvent() {
   4746     // Flush the cache since we can't reuse segments with different lockset.
   4747     recent_segments_cache_.Clear();
   4748     NewSegment(__FUNCTION__, vts()->Clone());
   4749   }
   4750 
   4751   void NewSegmentForMallocEvent() {
   4752     // Flush the cache since we can't reuse segments with different lockset.
   4753     recent_segments_cache_.Clear();
   4754     NewSegment(__FUNCTION__, vts()->Clone());
   4755   }
   4756 
   4757 
   4758   void SetTopPc(uintptr_t pc) {
   4759     if (pc) {
   4760       DCHECK(!call_stack_->empty());
   4761       call_stack_->back() = pc;
   4762     }
   4763   }
   4764 
   4765   void NOINLINE HandleSblockEnterSlowLocked() {
   4766     AssertTILHeld();
   4767     FlushStateIfOutOfSegments(this);
   4768     this->stats.history_creates_new_segment++;
   4769     VTS *new_vts = vts()->Clone();
   4770     NewSegment("HandleSblockEnter", new_vts);
   4771     recent_segments_cache_.Push(sid());
   4772     GetSomeFreshSids();  // fill the thread-local SID cache.
   4773   }
   4774 
   4775   INLINE bool HandleSblockEnter(uintptr_t pc, bool allow_slow_path) {
   4776     DCHECK(G_flags->keep_history);
   4777     if (!pc) return true;
   4778 
   4779     this->stats.events[SBLOCK_ENTER]++;
   4780 
   4781     SetTopPc(pc);
   4782 
   4783     bool refill_stack = false;
   4784     SID match = recent_segments_cache_.Search(call_stack_, sid(),
   4785                                               /*OUT*/&refill_stack);
   4786     DCHECK(kSizeOfHistoryStackTrace > 0);
   4787 
   4788     if (match.valid()) {
   4789       // This part is 100% thread-local, no need for locking.
   4790       if (sid_ != match) {
   4791         Segment::Ref(match, "Thread::HandleSblockEnter");
   4792         this->AddDeadSid(sid_, "Thread::HandleSblockEnter");
   4793         sid_ = match;
   4794       }
   4795       if (refill_stack) {
   4796         this->stats.history_reuses_segment++;
   4797         FillEmbeddedStackTrace(Segment::embedded_stack_trace(sid()));
   4798       } else {
   4799         this->stats.history_uses_same_segment++;
   4800       }
   4801     } else if (fresh_sids_.size() > 0) {
   4802       // We have a fresh ready-to-use segment in thread local cache.
   4803       SID fresh_sid = fresh_sids_.back();
   4804       fresh_sids_.pop_back();
   4805       Segment::SetupFreshSid(fresh_sid, tid(), vts()->Clone(),
   4806                              rd_lockset_, wr_lockset_);
   4807       this->AddDeadSid(sid_, "Thread::HandleSblockEnter-1");
   4808       Segment::Ref(fresh_sid, "Thread::HandleSblockEnter-1");
   4809       sid_ = fresh_sid;
   4810       recent_segments_cache_.Push(sid());
   4811       FillEmbeddedStackTrace(Segment::embedded_stack_trace(sid()));
   4812       this->stats.history_uses_preallocated_segment++;
   4813     } else {
   4814       if (!allow_slow_path) return false;
   4815       AssertTILHeld();
   4816       // No fresh SIDs available, have to grab a lock and get few.
   4817       HandleSblockEnterSlowLocked();
   4818     }
   4819     return true;
   4820   }
   4821 
   4822   void NewSegmentForWait(const VTS *signaller_vts) {
   4823     const VTS *current_vts   = vts();
   4824     if (0)
   4825     Printf("T%d NewSegmentForWait: \n  %s\n  %s\n", tid().raw(),
   4826            current_vts->ToString().c_str(),
   4827            signaller_vts->ToString().c_str());
   4828     // We don't want to create a happens-before arc if it will be redundant.
   4829     if (!VTS::HappensBeforeCached(signaller_vts, current_vts)) {
   4830       VTS *new_vts = VTS::Join(current_vts, signaller_vts);
   4831       NewSegment("NewSegmentForWait", new_vts);
   4832     }
   4833     DCHECK(VTS::HappensBeforeCached(signaller_vts, vts()));
   4834   }
   4835 
   4836   void NewSegmentForSignal() {
   4837     VTS *cur_vts = vts();
   4838     VTS *new_vts = VTS::CopyAndTick(cur_vts, tid());
   4839     NewSegment("NewSegmentForSignal", new_vts);
   4840   }
   4841 
   4842   // When creating a child thread, we need to know
   4843   // 1. where the thread was created (ctx)
   4844   // 2. What was the vector clock of the parent thread (vts).
   4845 
   4846   struct ThreadCreateInfo {
   4847     StackTrace *ctx;
   4848     VTS        *vts;
   4849   };
   4850 
   4851   static void StopIgnoringAccessesInT0BecauseNewThreadStarted() {
   4852     AssertTILHeld();
   4853     if (g_so_far_only_one_thread) {
   4854       g_so_far_only_one_thread = false;
   4855       Get(TID(0))->set_ignore_all_accesses(false);
   4856     }
   4857   }
   4858 
   4859   // This event comes before the child is created (e.g. just
   4860   // as we entered pthread_create).
   4861   void HandleThreadCreateBefore(TID parent_tid, uintptr_t pc) {
   4862     CHECK(parent_tid == tid());
   4863     StopIgnoringAccessesInT0BecauseNewThreadStarted();
   4864     // Store ctx and vts under TID(0).
   4865     ThreadCreateInfo info;
   4866     info.ctx = CreateStackTrace(pc);
   4867     info.vts = vts()->Clone();
   4868     CHECK(info.ctx && info.vts);
   4869     child_tid_to_create_info_[TID(0)] = info;
   4870     // Tick vts.
   4871     this->NewSegmentForSignal();
   4872 
   4873     if (debug_thread) {
   4874       Printf("T%d: THR_CREATE_BEFORE\n", parent_tid.raw());
   4875     }
   4876   }
   4877 
   4878   // This event comes when we are exiting the thread creation routine.
   4879   // It may appear before *or* after THR_START event, at least with PIN.
   4880   void HandleThreadCreateAfter(TID parent_tid, TID child_tid) {
   4881     CHECK(parent_tid == tid());
   4882     // Place the info under child_tid if we did not use it yet.
   4883     if (child_tid_to_create_info_.count(TID(0))){
   4884       child_tid_to_create_info_[child_tid] = child_tid_to_create_info_[TID(0)];
   4885       child_tid_to_create_info_.erase(TID(0));
   4886     }
   4887 
   4888     if (debug_thread) {
   4889       Printf("T%d: THR_CREATE_AFTER %d\n", parent_tid.raw(), child_tid.raw());
   4890     }
   4891   }
   4892 
   4893   void HandleChildThreadStart(TID child_tid, VTS **vts, StackTrace **ctx) {
   4894     Thread *parent = this;
   4895     ThreadCreateInfo info;
   4896     if (child_tid_to_create_info_.count(child_tid)) {
   4897       // We already seen THR_CREATE_AFTER, so the info is under child_tid.
   4898       info = child_tid_to_create_info_[child_tid];
   4899       child_tid_to_create_info_.erase(child_tid);
   4900       CHECK(info.ctx && info.vts);
   4901     } else if (child_tid_to_create_info_.count(TID(0))){
   4902       // We have not seen THR_CREATE_AFTER, but already seen THR_CREATE_BEFORE.
   4903       info = child_tid_to_create_info_[TID(0)];
   4904       child_tid_to_create_info_.erase(TID(0));
   4905       CHECK(info.ctx && info.vts);
   4906     } else {
   4907       // We have not seen THR_CREATE_BEFORE/THR_CREATE_AFTER.
   4908       // If the tool is single-threaded (valgrind) these events are redundant.
   4909       info.ctx = parent->CreateStackTrace();
   4910       info.vts = parent->vts()->Clone();
   4911       parent->NewSegmentForSignal();
   4912     }
   4913     *ctx = info.ctx;
   4914     VTS *singleton = VTS::CreateSingleton(child_tid);
   4915     *vts = VTS::Join(singleton, info.vts);
   4916     VTS::Unref(singleton);
   4917     VTS::Unref(info.vts);
   4918 
   4919 
   4920     if (debug_thread) {
   4921       Printf("T%d: THR_START parent: T%d : %s %s\n", child_tid.raw(),
   4922              parent->tid().raw(),
   4923              parent->vts()->ToString().c_str(),
   4924              (*vts)->ToString().c_str());
   4925       if (G_flags->announce_threads) {
   4926         Printf("%s\n", (*ctx)->ToString().c_str());
   4927       }
   4928     }
   4929 
   4930     // Parent should have ticked its VTS so there should be no h-b.
   4931     DCHECK(!VTS::HappensBefore(parent->vts(), *vts));
   4932   }
   4933 
   4934   // Support for Cyclic Barrier, e.g. pthread_barrier_t.
   4935   // We need to create (barrier_count-1)^2 h-b arcs between
   4936   // threads blocking on a barrier. We should not create any h-b arcs
   4937   // for two calls to barrier_wait if the barrier was reset between then.
   4938   struct CyclicBarrierInfo {
   4939     // The value given to barrier_init.
   4940     uint32_t barrier_count;
   4941     // How many times we may block on this barrier before resetting.
   4942     int32_t calls_before_reset;
   4943     // How many times we entered the 'wait-before' and 'wait-after' handlers.
   4944     int32_t n_wait_before, n_wait_after;
   4945   };
   4946   // The following situation is possible:
   4947   // - N threads blocked on a barrier.
   4948   // - All N threads reached the barrier and we started getting 'wait-after'
   4949   //   events, but did not yet get all of them.
   4950   // - N threads blocked on the barrier again and we started getting
   4951   //   'wait-before' events from the next barrier epoch.
   4952   // - We continue getting 'wait-after' events from the previous epoch.
   4953   //
   4954   // We don't want to create h-b arcs between barrier events of different
   4955   // epochs, so we use 'barrier + (epoch % 4)' as an object on which we
   4956   // signal and wait (it is unlikely that more than 4 epochs are live at once.
   4957   enum { kNumberOfPossibleBarrierEpochsLiveAtOnce = 4 };
   4958   // Maps the barrier pointer to CyclicBarrierInfo.
   4959   typedef unordered_map<uintptr_t, CyclicBarrierInfo> CyclicBarrierMap;
   4960 
   4961   CyclicBarrierInfo &GetCyclicBarrierInfo(uintptr_t barrier) {
   4962     if (cyclic_barrier_map_ == NULL) {
   4963       cyclic_barrier_map_ = new CyclicBarrierMap;
   4964     }
   4965     return (*cyclic_barrier_map_)[barrier];
   4966   }
   4967 
   4968   void HandleBarrierInit(uintptr_t barrier, uint32_t n) {
   4969     CyclicBarrierInfo &info = GetCyclicBarrierInfo(barrier);
   4970     CHECK(n > 0);
   4971     memset(&info, 0, sizeof(CyclicBarrierInfo));
   4972     info.barrier_count = n;
   4973   }
   4974 
   4975   void HandleBarrierWaitBefore(uintptr_t barrier) {
   4976     CyclicBarrierInfo &info = GetCyclicBarrierInfo(barrier);
   4977 
   4978     CHECK(info.calls_before_reset >= 0);
   4979     int32_t epoch = info.n_wait_before / info.barrier_count;
   4980     epoch %= kNumberOfPossibleBarrierEpochsLiveAtOnce;
   4981     info.n_wait_before++;
   4982     if (info.calls_before_reset == 0) {
   4983       // We are blocking the first time after reset. Clear the VTS.
   4984       info.calls_before_reset = info.barrier_count;
   4985       Signaller &signaller = (*signaller_map_)[barrier + epoch];
   4986       VTS::Unref(signaller.vts);
   4987       signaller.vts = NULL;
   4988       if (debug_happens_before) {
   4989         Printf("T%d barrier %p (epoch %d) reset\n", tid().raw(),
   4990                barrier, epoch);
   4991       }
   4992     }
   4993     info.calls_before_reset--;
   4994     // Signal to all threads that blocked on this barrier.
   4995     if (debug_happens_before) {
   4996       Printf("T%d barrier %p (epoch %d) wait before\n", tid().raw(),
   4997              barrier, epoch);
   4998     }
   4999     HandleSignal(barrier + epoch);
   5000   }
   5001 
   5002   void HandleBarrierWaitAfter(uintptr_t barrier) {
   5003     CyclicBarrierInfo &info = GetCyclicBarrierInfo(barrier);
   5004     int32_t epoch = info.n_wait_after / info.barrier_count;
   5005     epoch %= kNumberOfPossibleBarrierEpochsLiveAtOnce;
   5006     info.n_wait_after++;
   5007     if (debug_happens_before) {
   5008       Printf("T%d barrier %p (epoch %d) wait after\n", tid().raw(),
   5009              barrier, epoch);
   5010     }
   5011     HandleWait(barrier + epoch);
   5012   }
   5013 
   5014   // Call stack  -------------
   5015   void PopCallStack() {
   5016     CHECK(!call_stack_->empty());
   5017     call_stack_->pop_back();
   5018   }
   5019 
   5020   void HandleRtnCall(uintptr_t call_pc, uintptr_t target_pc,
   5021                      IGNORE_BELOW_RTN ignore_below) {
   5022     this->stats.events[RTN_CALL]++;
   5023     if (!call_stack_->empty() && call_pc) {
   5024       call_stack_->back() = call_pc;
   5025     }
   5026     call_stack_->push_back(target_pc);
   5027 
   5028     bool ignore = false;
   5029     if (ignore_below == IGNORE_BELOW_RTN_UNKNOWN) {
   5030       if (ignore_below_cache_.Lookup(target_pc, &ignore) == false) {
   5031         ignore = ThreadSanitizerIgnoreAccessesBelowFunction(target_pc);
   5032         ignore_below_cache_.Insert(target_pc, ignore);
   5033         G_stats->ignore_below_cache_miss++;
   5034       } else {
   5035         // Just in case, check the result of caching.
   5036         DCHECK(ignore ==
   5037                ThreadSanitizerIgnoreAccessesBelowFunction(target_pc));
   5038       }
   5039     } else {
   5040       DCHECK(ignore_below == IGNORE_BELOW_RTN_YES ||
   5041              ignore_below == IGNORE_BELOW_RTN_NO);
   5042       ignore = ignore_below == IGNORE_BELOW_RTN_YES;
   5043     }
   5044 
   5045     if (fun_r_ignore_) {
   5046       fun_r_ignore_++;
   5047     } else if (ignore) {
   5048       fun_r_ignore_ = 1;
   5049       set_ignore_all_accesses(true);
   5050     }
   5051   }
   5052 
   5053   void HandleRtnExit() {
   5054     this->stats.events[RTN_EXIT]++;
   5055     if (!call_stack_->empty()) {
   5056       call_stack_->pop_back();
   5057       if (fun_r_ignore_) {
   5058         if (--fun_r_ignore_ == 0) {
   5059           set_ignore_all_accesses(false);
   5060         }
   5061       }
   5062     }
   5063   }
   5064 
   5065   uintptr_t GetCallstackEntry(size_t offset_from_top) {
   5066     if (offset_from_top >= call_stack_->size()) return 0;
   5067     return (*call_stack_)[call_stack_->size() - offset_from_top - 1];
   5068   }
   5069 
   5070   string CallStackRtnName(size_t offset_from_top = 0) {
   5071     if (call_stack_->size() <= offset_from_top)
   5072       return "";
   5073     uintptr_t pc = (*call_stack_)[call_stack_->size() - offset_from_top - 1];
   5074     return PcToRtnName(pc, false);
   5075   }
   5076 
   5077   string CallStackToStringRtnOnly(int len) {
   5078     string res;
   5079     for (int i = 0; i < len; i++) {
   5080       if (i)
   5081         res += " ";
   5082       res += CallStackRtnName(i);
   5083     }
   5084     return res;
   5085   }
   5086 
   5087   uintptr_t CallStackTopPc() {
   5088     if (call_stack_->empty())
   5089       return 0;
   5090     return call_stack_->back();
   5091   }
   5092 
   5093   INLINE void FillEmbeddedStackTrace(uintptr_t *emb_trace) {
   5094     size_t size = min(call_stack_->size(), (size_t)kSizeOfHistoryStackTrace);
   5095     size_t idx = call_stack_->size() - 1;
   5096     uintptr_t *pcs = call_stack_->pcs();
   5097     for (size_t i = 0; i < size; i++, idx--) {
   5098       emb_trace[i] = pcs[idx];
   5099     }
   5100     if (size < (size_t) kSizeOfHistoryStackTrace) {
   5101       emb_trace[size] = 0;
   5102     }
   5103   }
   5104 
   5105   INLINE void FillStackTrace(StackTrace *trace, size_t size) {
   5106     size_t idx = call_stack_->size() - 1;
   5107     uintptr_t *pcs = call_stack_->pcs();
   5108     for (size_t i = 0; i < size; i++, idx--) {
   5109       trace->Set(i, pcs[idx]);
   5110     }
   5111   }
   5112 
   5113   INLINE StackTrace *CreateStackTrace(uintptr_t pc = 0,
   5114                                       int max_len = -1,
   5115                                       int capacity = 0) {
   5116     if (!call_stack_->empty() && pc) {
   5117       call_stack_->back() = pc;
   5118     }
   5119     if (max_len <= 0) {
   5120       max_len = G_flags->num_callers;
   5121     }
   5122     int size = call_stack_->size();
   5123     if (size > max_len)
   5124       size = max_len;
   5125     StackTrace *res = StackTrace::CreateNewEmptyStackTrace(size, capacity);
   5126     FillStackTrace(res, size);
   5127     return res;
   5128   }
   5129 
   5130   void ReportStackTrace(uintptr_t pc = 0, int max_len = -1) {
   5131     StackTrace *trace = CreateStackTrace(pc, max_len);
   5132     Report("%s", trace->ToString().c_str());
   5133     StackTrace::Delete(trace);
   5134   }
   5135 
   5136   static void ForgetAllState() {
   5137     // G_flags->debug_level = 2;
   5138     for (int i = 0; i < Thread::NumberOfThreads(); i++) {
   5139       Thread *thr = Get(TID(i));
   5140       if (!thr->is_running()) continue;
   5141       thr->child_tid_to_create_info_.clear();
   5142       thr->recent_segments_cache_.ForgetAllState();
   5143       thr->sid_ = SID();  // Reset the old SID so we don't try to read its VTS.
   5144       VTS *singleton_vts = VTS::CreateSingleton(TID(i), 2);
   5145       thr->NewSegmentWithoutUnrefingOld("ForgetAllState", singleton_vts);
   5146       if (thr->vts_at_exit_) {
   5147         VTS::Unref(thr->vts_at_exit_);
   5148         thr->vts_at_exit_ = singleton_vts->Clone();
   5149       }
   5150       thr->dead_sids_.clear();
   5151       thr->fresh_sids_.clear();
   5152     }
   5153     signaller_map_->ClearAndDeleteElements();
   5154   }
   5155 
   5156   static void InitClassMembers() {
   5157     ScopedMallocCostCenter malloc_cc("InitClassMembers");
   5158     all_threads_        = new Thread*[G_flags->max_n_threads];
   5159     memset(all_threads_, 0, sizeof(Thread*) * G_flags->max_n_threads);
   5160     n_threads_          = 0;
   5161     signaller_map_      = new SignallerMap;
   5162   }
   5163 
   5164   BitSet *lock_era_access_set(int is_w) {
   5165     return &lock_era_access_set_[is_w];
   5166   }
   5167 
   5168   // --------- dead SIDs, fresh SIDs
   5169   // When running fast path w/o a lock we need to recycle SIDs to a thread-local
   5170   // pool. HasRoomForDeadSids and AddDeadSid may be called w/o a lock.
   5171   // FlushDeadSids should be called under a lock.
   5172   // When creating a new segment on SBLOCK_ENTER, we need to get a fresh SID
   5173   // from somewhere. We keep a pile of fresh ready-to-use SIDs in
   5174   // a thread-local array.
   5175   enum { kMaxNumDeadSids = 64,
   5176          kMaxNumFreshSids = 256, };
   5177   INLINE void AddDeadSid(SID sid, const char *where) {
   5178     if (TS_SERIALIZED) {
   5179       Segment::Unref(sid, where);
   5180     } else {
   5181       if (Segment::UnrefNoRecycle(sid, where) == 0) {
   5182         dead_sids_.push_back(sid);
   5183       }
   5184     }
   5185   }
   5186 
   5187   INLINE void FlushDeadSids() {
   5188     if (TS_SERIALIZED) return;
   5189     size_t n = dead_sids_.size();
   5190     for (size_t i = 0; i < n; i++) {
   5191       SID sid = dead_sids_[i];
   5192       Segment::AssertLive(sid, __LINE__);
   5193       DCHECK(Segment::Get(sid)->ref_count() == 0);
   5194       Segment::RecycleOneSid(sid);
   5195     }
   5196     dead_sids_.clear();
   5197   }
   5198 
   5199   INLINE bool HasRoomForDeadSids() const {
   5200     return TS_SERIALIZED ? false :
   5201         dead_sids_.size() < kMaxNumDeadSids - 2;
   5202   }
   5203 
   5204   void GetSomeFreshSids() {
   5205     size_t cur_size = fresh_sids_.size();
   5206     DCHECK(cur_size <= kMaxNumFreshSids);
   5207     if (cur_size > kMaxNumFreshSids / 2) {
   5208       // We already have quite a few fresh SIDs, do nothing.
   5209       return;
   5210     }
   5211     DCHECK(fresh_sids_.capacity() >= kMaxNumFreshSids);
   5212     size_t n_requested_sids = kMaxNumFreshSids - cur_size;
   5213     fresh_sids_.resize(kMaxNumFreshSids);
   5214     Segment::AllocateFreshSegments(n_requested_sids, &fresh_sids_[cur_size]);
   5215   }
   5216 
   5217   void ReleaseFreshSids() {
   5218     for (size_t i = 0; i < fresh_sids_.size(); i++) {
   5219       Segment::RecycleOneFreshSid(fresh_sids_[i]);
   5220     }
   5221     fresh_sids_.clear();
   5222   }
   5223 
   5224  private:
   5225   bool is_running_;
   5226   string thread_name_;
   5227 
   5228   TID    tid_;         // This thread's tid.
   5229   SID    sid_;         // Current segment ID.
   5230   TID    parent_tid_;  // Parent's tid.
   5231   bool   thread_local_copy_of_g_has_expensive_flags_;
   5232   uintptr_t  max_sp_;
   5233   uintptr_t  min_sp_;
   5234   uintptr_t  stack_size_for_ignore_;
   5235   uintptr_t  fun_r_ignore_;  // > 0 if we are inside a fun_r-ed function.
   5236   uintptr_t  min_sp_for_ignore_;
   5237   uintptr_t  n_mops_since_start_;
   5238   StackTrace *creation_context_;
   5239   bool      announced_;
   5240 
   5241   LSID   rd_lockset_;
   5242   LSID   wr_lockset_;
   5243 
   5244   // These bits should be read in the hottest loop, so we combine them all
   5245   // together.
   5246   // bit 1 -- ignore reads.
   5247   // bit 2 -- ignore writes.
   5248   // bit 3 -- have expensive flags
   5249   int expensive_bits_;
   5250   int ignore_depth_[2];
   5251   StackTrace *ignore_context_[2];
   5252 
   5253   VTS *vts_at_exit_;
   5254 
   5255   CallStack *call_stack_;
   5256 
   5257   vector<SID> dead_sids_;
   5258   vector<SID> fresh_sids_;
   5259 
   5260   PtrToBoolCache<251> ignore_below_cache_;
   5261 
   5262   LockHistory lock_history_;
   5263   BitSet lock_era_access_set_[2];
   5264   RecentSegmentsCache recent_segments_cache_;
   5265 
   5266   map<TID, ThreadCreateInfo> child_tid_to_create_info_;
   5267 
   5268   struct Signaller {
   5269     VTS *vts;
   5270   };
   5271 
   5272   class SignallerMap: public unordered_map<uintptr_t, Signaller> {
   5273     public:
   5274      void ClearAndDeleteElements() {
   5275        for (iterator it = begin(); it != end(); ++it) {
   5276          VTS::Unref(it->second.vts);
   5277        }
   5278        clear();
   5279      }
   5280   };
   5281 
   5282   // All threads. The main thread has tid 0.
   5283   static Thread **all_threads_;
   5284   static int      n_threads_;
   5285 
   5286   // signaller address -> VTS
   5287   static SignallerMap *signaller_map_;
   5288   static CyclicBarrierMap *cyclic_barrier_map_;
   5289 };
   5290 
   5291 INLINE static int32_t raw_tid(Thread *t) {
   5292   return t->tid().raw();
   5293 }
   5294 
   5295 // Thread:: static members
   5296 Thread                    **Thread::all_threads_;
   5297 int                         Thread::n_threads_;
   5298 Thread::SignallerMap       *Thread::signaller_map_;
   5299 Thread::CyclicBarrierMap   *Thread::cyclic_barrier_map_;
   5300 
   5301 
   5302 // -------- PCQ --------------------- {{{1
   5303 struct PCQ {
   5304   uintptr_t pcq_addr;
   5305   deque<VTS*> putters;
   5306 };
   5307 
   5308 typedef map<uintptr_t, PCQ> PCQMap;
   5309 static PCQMap *g_pcq_map;
   5310 
   5311 // -------- Heap info ---------------------- {{{1
   5312 #include "ts_heap_info.h"
   5313 // Information about heap memory.
   5314 
   5315 struct HeapInfo {
   5316   uintptr_t   ptr;
   5317   uintptr_t   size;
   5318   SID         sid;
   5319   HeapInfo() : ptr(0), size(0), sid(0) { }
   5320 
   5321   Segment *seg() { return Segment::Get(sid); }
   5322   TID tid() { return seg()->tid(); }
   5323   string StackTraceString() { return Segment::StackTraceString(sid); }
   5324 };
   5325 
   5326 static HeapMap<HeapInfo> *G_heap_map;
   5327 
   5328 struct ThreadStackInfo {
   5329   uintptr_t   ptr;
   5330   uintptr_t   size;
   5331   ThreadStackInfo() : ptr(0), size(0) { }
   5332 };
   5333 
   5334 static HeapMap<ThreadStackInfo> *G_thread_stack_map;
   5335 
   5336 // -------- Forget all state -------- {{{1
   5337 // We need to forget all state and start over because we've
   5338 // run out of some resources (most likely, segment IDs).
   5339 static void ForgetAllStateAndStartOver(Thread *thr, const char *reason) {
   5340   // This is done under the main lock.
   5341   AssertTILHeld();
   5342   size_t start_time = g_last_flush_time = TimeInMilliSeconds();
   5343   Report("T%d INFO: %s. Flushing state.\n", raw_tid(thr), reason);
   5344 
   5345   if (TS_SERIALIZED == 0) {
   5346     // We own the lock, but we also must acquire all cache lines
   5347     // so that the fast-path (unlocked) code does not execute while
   5348     // we are flushing.
   5349     G_cache->AcquireAllLines(thr);
   5350   }
   5351 
   5352 
   5353   if (0) {
   5354     Report("INFO: Thread Sanitizer will now forget all history.\n");
   5355     Report("INFO: This is experimental, and may fail!\n");
   5356     if (G_flags->keep_history > 0) {
   5357       Report("INFO: Consider re-running with --keep_history=0\n");
   5358     }
   5359     if (G_flags->show_stats) {
   5360         G_stats->PrintStats();
   5361     }
   5362   }
   5363 
   5364   G_stats->n_forgets++;
   5365 
   5366   Segment::ForgetAllState();
   5367   SegmentSet::ForgetAllState();
   5368   Thread::ForgetAllState();
   5369   VTS::FlushHBCache();
   5370 
   5371   G_heap_map->Clear();
   5372 
   5373   g_publish_info_map->clear();
   5374 
   5375   for (PCQMap::iterator it = g_pcq_map->begin(); it != g_pcq_map->end(); ++it) {
   5376     PCQ &pcq = it->second;
   5377     for (deque<VTS*>::iterator it2 = pcq.putters.begin();
   5378          it2 != pcq.putters.end(); ++it2) {
   5379       VTS::Unref(*it2);
   5380       *it2 = VTS::CreateSingleton(TID(0), 1);
   5381     }
   5382   }
   5383 
   5384   // Must be the last one to flush as it effectively releases the
   5385   // cach lines and enables fast path code to run in other threads.
   5386   G_cache->ForgetAllState(thr);
   5387 
   5388   size_t stop_time = TimeInMilliSeconds();
   5389   if (DEBUG_MODE || (stop_time - start_time > 0)) {
   5390     Report("T%d INFO: Flush took %ld ms\n", raw_tid(thr),
   5391            stop_time - start_time);
   5392   }
   5393 }
   5394 
   5395 static INLINE void FlushStateIfOutOfSegments(Thread *thr) {
   5396   if (Segment::NumberOfSegments() > kMaxSIDBeforeFlush) {
   5397     // too few sids left -- flush state.
   5398     if (DEBUG_MODE) {
   5399       G_cache->PrintStorageStats();
   5400       Segment::ShowSegmentStats();
   5401     }
   5402     ForgetAllStateAndStartOver(thr, "run out of segment IDs");
   5403   }
   5404 }
   5405 
   5406 // -------- Expected Race ---------------------- {{{1
   5407 typedef  HeapMap<ExpectedRace> ExpectedRacesMap;
   5408 static ExpectedRacesMap *G_expected_races_map;
   5409 static bool g_expecting_races;
   5410 static int g_found_races_since_EXPECT_RACE_BEGIN;
   5411 
   5412 ExpectedRace* ThreadSanitizerFindExpectedRace(uintptr_t addr) {
   5413   return G_expected_races_map->GetInfo(addr);
   5414 }
   5415 
   5416 // -------- Suppressions ----------------------- {{{1
   5417 static const char default_suppressions[] =
   5418 // TODO(kcc): as it gets bigger, move it into a separate object file.
   5419 "# We need to have some default suppressions, but we don't want to    \n"
   5420 "# keep them in a separate text file, so we keep the in the code.     \n"
   5421 
   5422 #ifdef VGO_darwin
   5423 "{                                                                    \n"
   5424 "   dyld tries to unlock an invalid mutex when adding/removing image. \n"
   5425 "   ThreadSanitizer:InvalidLock                                       \n"
   5426 "   fun:pthread_mutex_unlock                                          \n"
   5427 "   fun:_dyld_register_func_for_*_image                               \n"
   5428 "}                                                                    \n"
   5429 
   5430 "{                                                                      \n"
   5431 "  Benign reports in __NSOperationInternal when using workqueue threads \n"
   5432 "  ThreadSanitizer:Race                                                 \n"
   5433 "  fun:__+[__NSOperationInternal _observeValueForKeyPath:ofObject:changeKind:oldValue:newValue:indexes:context:]_block_invoke_*\n"
   5434 "  fun:_dispatch_call_block_and_release                                 \n"
   5435 "}                                                                      \n"
   5436 
   5437 "{                                                                    \n"
   5438 "  Benign race in GCD when using workqueue threads.                   \n"
   5439 "  ThreadSanitizer:Race                                               \n"
   5440 "  fun:____startOperations_block_invoke_*                             \n"
   5441 "  ...                                                                \n"
   5442 "  fun:_dispatch_call_block_and_release                               \n"
   5443 "}                                                                    \n"
   5444 
   5445 "{                                                                    \n"
   5446 "  Benign race in NSOQSchedule when using workqueue threads.          \n"
   5447 "  ThreadSanitizer:Race                                               \n"
   5448 "  fun:__doStart*                                                     \n"
   5449 "  ...                                                                \n"
   5450 "  fun:_dispatch_call_block_and_release                               \n"
   5451 "}                                                                    \n"
   5452 
   5453 
   5454 #endif
   5455 
   5456 #ifndef _MSC_VER
   5457 "{                                                                   \n"
   5458 "  False reports on std::string internals. See TSan issue #40.       \n"
   5459 "  ThreadSanitizer:Race                                              \n"
   5460 "  ...                                                               \n"
   5461 "  fun:*~basic_string*                                               \n"
   5462 "}                                                                   \n"
   5463 
   5464 #else
   5465 "{                                                                   \n"
   5466 "  False lock report inside ntdll.dll                                \n"
   5467 "  ThreadSanitizer:InvalidLock                                       \n"
   5468 "  fun:*                                                             \n"
   5469 "  obj:*ntdll.dll                                                    \n"
   5470 "}                                                                   \n"
   5471 
   5472 "{                                                                   \n"
   5473 "  False report due to lack of debug symbols in ntdll.dll  (a)       \n"
   5474 "  ThreadSanitizer:InvalidLock                                       \n"
   5475 "  fun:*SRWLock*                                                     \n"
   5476 "}                                                                   \n"
   5477 
   5478 "{                                                                   \n"
   5479 "  False report due to lack of debug symbols in ntdll.dll  (b)       \n"
   5480 "  ThreadSanitizer:UnlockForeign                                     \n"
   5481 "  fun:*SRWLock*                                                     \n"
   5482 "}                                                                   \n"
   5483 
   5484 "{                                                                   \n"
   5485 "  False report due to lack of debug symbols in ntdll.dll  (c)       \n"
   5486 "  ThreadSanitizer:UnlockNonLocked                                   \n"
   5487 "  fun:*SRWLock*                                                     \n"
   5488 "}                                                                   \n"
   5489 
   5490 "{                                                                   \n"
   5491 "  False reports on std::string internals (2). See TSan issue #40.   \n"
   5492 "  ThreadSanitizer:Race                                              \n"
   5493 "  ...                                                               \n"
   5494 "  fun:*basic_string*scalar deleting destructor*                     \n"
   5495 "}                                                                   \n"
   5496 #endif
   5497 
   5498 #ifdef TS_PIN
   5499 "{                                                                   \n"
   5500 "  Suppression for issue 54 (PIN lacks support for IFUNC)            \n"
   5501 "  ThreadSanitizer:Race                                              \n"
   5502 "  ...                                                               \n"
   5503 "  fun:*NegativeTests_Strlen::Worker*                                \n"
   5504 "}                                                                   \n"
   5505 #endif
   5506 
   5507 ;
   5508 
   5509 // -------- Report Storage --------------------- {{{1
   5510 class ReportStorage {
   5511  public:
   5512 
   5513   ReportStorage()
   5514    : n_reports(0),
   5515      n_race_reports(0),
   5516      program_finished_(0) {
   5517     if (G_flags->generate_suppressions) {
   5518       Report("INFO: generate_suppressions = true\n");
   5519     }
   5520     // Read default suppressions
   5521     int n = suppressions_.ReadFromString(default_suppressions);
   5522     if (n == -1) {
   5523       Report("Error reading default suppressions at line %d: %s\n",
   5524           suppressions_.GetErrorLineNo(),
   5525           suppressions_.GetErrorString().c_str());
   5526       exit(1);
   5527     }
   5528 
   5529     // Read user-supplied suppressions.
   5530     for (size_t i = 0; i < G_flags->suppressions.size(); i++) {
   5531       const string &supp_path = G_flags->suppressions[i];
   5532       Report("INFO: reading suppressions file %s\n", supp_path.c_str());
   5533       int n = suppressions_.ReadFromString(ReadFileToString(supp_path, true));
   5534       if (n == -1) {
   5535         Report("Error at line %d: %s\n",
   5536             suppressions_.GetErrorLineNo(),
   5537             suppressions_.GetErrorString().c_str());
   5538         exit(1);
   5539       }
   5540       Report("INFO: %6d suppression(s) read from file %s\n",
   5541              n, supp_path.c_str());
   5542     }
   5543   }
   5544 
   5545   bool NOINLINE AddReport(Thread *thr, uintptr_t pc, bool is_w, uintptr_t addr,
   5546                           int size,
   5547                           ShadowValue old_sval, ShadowValue new_sval,
   5548                           bool is_published) {
   5549     {
   5550       // Check this isn't a "_ZNSs4_Rep20_S_empty_rep_storageE" report.
   5551       uintptr_t offset;
   5552       string symbol_descr;
   5553       if (GetNameAndOffsetOfGlobalObject(addr, &symbol_descr, &offset)) {
   5554         if (StringMatch("*empty_rep_storage*", symbol_descr))
   5555           return false;
   5556         if (StringMatch("_IO_stdfile_*_lock", symbol_descr))
   5557           return false;
   5558         if (StringMatch("_IO_*_stdout_", symbol_descr))
   5559           return false;
   5560         if (StringMatch("_IO_*_stderr_", symbol_descr))
   5561           return false;
   5562       }
   5563     }
   5564 
   5565     bool is_expected = false;
   5566     ExpectedRace *expected_race = G_expected_races_map->GetInfo(addr);
   5567     if (debug_expected_races) {
   5568       Printf("Checking expected race for %lx; exp_race=%p\n",
   5569              addr, expected_race);
   5570       if (expected_race) {
   5571         Printf("  ptr=0x%lx size=0x%lx end=0x%lx\n",
   5572                expected_race->ptr, expected_race->size,
   5573                expected_race->ptr + expected_race->size);
   5574       }
   5575     }
   5576 
   5577     if (expected_race) {
   5578       if (G_flags->nacl_untrusted != expected_race->is_nacl_untrusted) {
   5579         Report("WARNING: this race is only expected in NaCl %strusted mode\n",
   5580             expected_race->is_nacl_untrusted ? "un" : "");
   5581       } else {
   5582         is_expected = true;
   5583         expected_race->count++;
   5584       }
   5585     }
   5586 
   5587     if (g_expecting_races) {
   5588       is_expected = true;
   5589       g_found_races_since_EXPECT_RACE_BEGIN++;
   5590     }
   5591 
   5592     if (is_expected && !G_flags->show_expected_races) return false;
   5593 
   5594     StackTrace *stack_trace = thr->CreateStackTrace(pc);
   5595     int n_reports_for_this_context = reported_stacks_[stack_trace]++;
   5596 
   5597     if (n_reports_for_this_context > 0) {
   5598       // we already reported a race here.
   5599       StackTrace::Delete(stack_trace);
   5600       return false;
   5601     }
   5602 
   5603 
   5604     ThreadSanitizerDataRaceReport *race_report =
   5605         new ThreadSanitizerDataRaceReport;
   5606 
   5607     race_report->type = ThreadSanitizerReport::DATA_RACE;
   5608     race_report->new_sval = new_sval;
   5609     race_report->old_sval = old_sval;
   5610     race_report->is_expected = is_expected;
   5611     race_report->last_access_is_w = is_w;
   5612     race_report->racey_addr = addr;
   5613     race_report->racey_addr_description = DescribeMemory(addr);
   5614     race_report->last_access_tid = thr->tid();
   5615     race_report->last_access_sid = thr->sid();
   5616     race_report->last_access_size = size;
   5617     race_report->stack_trace = stack_trace;
   5618     race_report->racey_addr_was_published = is_published;
   5619     race_report->last_acces_lsid[false] = thr->lsid(false);
   5620     race_report->last_acces_lsid[true] = thr->lsid(true);
   5621 
   5622     Segment *seg = Segment::Get(thr->sid());
   5623     CHECK(thr->lsid(false) == seg->lsid(false));
   5624     CHECK(thr->lsid(true) == seg->lsid(true));
   5625 
   5626     return ThreadSanitizerPrintReport(race_report);
   5627   }
   5628 
   5629   void AnnounceThreadsInSegmentSet(SSID ssid) {
   5630     if (ssid.IsEmpty()) return;
   5631     for (int s = 0; s < SegmentSet::Size(ssid); s++) {
   5632       Segment *seg = SegmentSet::GetSegmentForNonSingleton(ssid, s, __LINE__);
   5633       Thread::Get(seg->tid())->Announce();
   5634     }
   5635   }
   5636 
   5637 
   5638 
   5639   void PrintConcurrentSegmentSet(SSID ssid, TID tid, SID sid,
   5640                                  LSID lsid, bool is_w,
   5641                                  const char *descr, set<LID> *locks,
   5642                                  set<SID>* concurrent_sids) {
   5643     if (ssid.IsEmpty()) return;
   5644     bool printed_header = false;
   5645     Thread *thr1 = Thread::Get(tid);
   5646     for (int s = 0; s < SegmentSet::Size(ssid); s++) {
   5647       SID concurrent_sid = SegmentSet::GetSID(ssid, s, __LINE__);
   5648       Segment *seg = Segment::Get(concurrent_sid);
   5649       if (Segment::HappensBeforeOrSameThread(concurrent_sid, sid)) continue;
   5650       if (!LockSet::IntersectionIsEmpty(lsid, seg->lsid(is_w))) continue;
   5651       if (concurrent_sids) {
   5652         concurrent_sids->insert(concurrent_sid);
   5653       }
   5654       Thread *thr2 = Thread::Get(seg->tid());
   5655       if (!printed_header) {
   5656         Report("  %sConcurrent %s happened at (OR AFTER) these points:%s\n",
   5657                c_magenta, descr, c_default);
   5658         printed_header = true;
   5659       }
   5660 
   5661       Report("   %s (%s):\n",
   5662              thr2->ThreadName().c_str(),
   5663              TwoLockSetsToString(seg->lsid(false),
   5664                                  seg->lsid(true)).c_str());
   5665       if (G_flags->show_states) {
   5666         Report("   S%d\n", concurrent_sid.raw());
   5667       }
   5668       LockSet::AddLocksToSet(seg->lsid(false), locks);
   5669       LockSet::AddLocksToSet(seg->lsid(true), locks);
   5670       Report("%s", Segment::StackTraceString(concurrent_sid).c_str());
   5671       if (!G_flags->pure_happens_before &&
   5672           G_flags->suggest_happens_before_arcs) {
   5673         set<LID> message_locks;
   5674         // Report("Locks in T%d\n", thr1->tid().raw());
   5675         // thr1->lock_history().PrintLocks();
   5676         // Report("Unlocks in T%d\n", thr2->tid().raw());
   5677         // thr2->lock_history().PrintUnlocks();
   5678         if (LockHistory::Intersect(thr1->lock_history(), thr2->lock_history(),
   5679                                    seg->lock_era(), &message_locks)) {
   5680           Report("   Note: these locks were recently released by T%d"
   5681                  " and later acquired by T%d: {%s}\n"
   5682                  "   See http://code.google.com/p/data-race-test/wiki/"
   5683                  "PureHappensBeforeVsHybrid\n",
   5684                  thr2->tid().raw(),
   5685                  thr1->tid().raw(),
   5686                  SetOfLocksToString(message_locks).c_str());
   5687           locks->insert(message_locks.begin(), message_locks.end());
   5688         }
   5689       }
   5690     }
   5691   }
   5692 
   5693   void SetProgramFinished() {
   5694     CHECK(!program_finished_);
   5695     program_finished_ = true;
   5696   }
   5697 
   5698   string RaceInfoString(uintptr_t pc, set<SID>& concurrent_sids) {
   5699     string s;
   5700     char buf[100];
   5701     snprintf(buf, 100, "Race verifier data: %p", (void*)pc);
   5702     s += buf;
   5703     for (set<SID>::iterator it = concurrent_sids.begin();
   5704          it != concurrent_sids.end(); ++it) {
   5705       // Take the first pc of the concurrent stack trace.
   5706       uintptr_t concurrent_pc = *Segment::embedded_stack_trace(*it);
   5707       snprintf(buf, 100, ",%p", (void*)concurrent_pc);
   5708       s += buf;
   5709     }
   5710     s += "\n";
   5711     return s;
   5712   }
   5713 
   5714   void PrintRaceReport(ThreadSanitizerDataRaceReport *race) {
   5715     bool short_report = program_finished_;
   5716     if (!short_report) {
   5717       AnnounceThreadsInSegmentSet(race->new_sval.rd_ssid());
   5718       AnnounceThreadsInSegmentSet(race->new_sval.wr_ssid());
   5719     }
   5720     bool is_w = race->last_access_is_w;
   5721     TID     tid = race->last_access_tid;
   5722     Thread *thr = Thread::Get(tid);
   5723     SID     sid = race->last_access_sid;
   5724     LSID    lsid = race->last_acces_lsid[is_w];
   5725     set<LID> all_locks;
   5726 
   5727     n_race_reports++;
   5728     if (G_flags->html) {
   5729       Report("<b id=race%d>Race report #%d; </b>"
   5730              "<a href=\"#race%d\">Next;</a>  "
   5731              "<a href=\"#race%d\">Prev;</a>\n",
   5732              n_race_reports, n_race_reports,
   5733              n_race_reports+1, n_race_reports-1);
   5734     }
   5735 
   5736 
   5737     // Note the {{{ and }}}. These are for vim folds.
   5738     Report("%sWARNING: %s data race during %s of size %d at %p: {{{%s\n",
   5739            c_red,
   5740            race->is_expected ? "Expected" : "Possible",
   5741            is_w ? "write" : "read",
   5742            race->last_access_size,
   5743            race->racey_addr,
   5744            c_default);
   5745     if (!short_report) {
   5746       LockSet::AddLocksToSet(race->last_acces_lsid[false], &all_locks);
   5747       LockSet::AddLocksToSet(race->last_acces_lsid[true], &all_locks);
   5748       Report("   %s (%s):\n",
   5749              thr->ThreadName().c_str(),
   5750              TwoLockSetsToString(race->last_acces_lsid[false],
   5751                                  race->last_acces_lsid[true]).c_str());
   5752     }
   5753 
   5754     CHECK(race->stack_trace);
   5755     Report("%s", race->stack_trace->ToString().c_str());
   5756     if (short_report) {
   5757       Report(" See the full version of this report above.\n");
   5758       Report("}%s\n", "}}");
   5759       return;
   5760     }
   5761     // Report(" sid=%d; vts=%s\n", thr->sid().raw(),
   5762     //       thr->vts()->ToString().c_str());
   5763     if (G_flags->show_states) {
   5764       Report(" old state: %s\n", race->old_sval.ToString().c_str());
   5765       Report(" new state: %s\n", race->new_sval.ToString().c_str());
   5766     }
   5767     set<SID> concurrent_sids;
   5768     if (G_flags->keep_history) {
   5769       PrintConcurrentSegmentSet(race->new_sval.wr_ssid(),
   5770                                 tid, sid, lsid, true, "write(s)", &all_locks,
   5771                                 &concurrent_sids);
   5772       if (is_w) {
   5773         PrintConcurrentSegmentSet(race->new_sval.rd_ssid(),
   5774                                   tid, sid, lsid, false, "read(s)", &all_locks,
   5775                                   &concurrent_sids);
   5776       }
   5777     } else {
   5778       Report("  %sAccess history is disabled. "
   5779              "Consider running with --keep-history=1 for better reports.%s\n",
   5780              c_cyan, c_default);
   5781     }
   5782 
   5783     if (race->racey_addr_was_published) {
   5784       Report(" This memory was published\n");
   5785     }
   5786     if (race->racey_addr_description.size() > 0) {
   5787       Report("%s", race->racey_addr_description.c_str());
   5788     }
   5789     if (race->is_expected) {
   5790       ExpectedRace *expected_race =
   5791           G_expected_races_map->GetInfo(race->racey_addr);
   5792       if (expected_race) {
   5793         CHECK(expected_race->description);
   5794         Report(" Description: \"%s\"\n", expected_race->description);
   5795       }
   5796     }
   5797     set<LID>  locks_reported;
   5798 
   5799     if (!all_locks.empty()) {
   5800       Report("  %sLocks involved in this report "
   5801              "(reporting last lock sites):%s {%s}\n",
   5802              c_green, c_default,
   5803              SetOfLocksToString(all_locks).c_str());
   5804 
   5805       for (set<LID>::iterator it = all_locks.begin();
   5806            it != all_locks.end(); ++it) {
   5807         LID lid = *it;
   5808         Lock::ReportLockWithOrWithoutContext(lid, true);
   5809       }
   5810     }
   5811 
   5812     string raceInfoString = RaceInfoString(race->stack_trace->Get(0),
   5813         concurrent_sids);
   5814     Report("   %s", raceInfoString.c_str());
   5815     Report("}}}\n");
   5816   }
   5817 
   5818   bool PrintReport(ThreadSanitizerReport *report) {
   5819     CHECK(report);
   5820     // Check if we have a suppression.
   5821     vector<string> funcs_mangled;
   5822     vector<string> funcs_demangled;
   5823     vector<string> objects;
   5824 
   5825     CHECK(!g_race_verifier_active);
   5826     CHECK(report->stack_trace);
   5827     CHECK(report->stack_trace->size());
   5828     for (size_t i = 0; i < report->stack_trace->size(); i++) {
   5829       uintptr_t pc = report->stack_trace->Get(i);
   5830       string img, rtn, file;
   5831       int line;
   5832       PcToStrings(pc, false, &img, &rtn, &file, &line);
   5833       if (rtn == "(below main)" || rtn == "ThreadSanitizerStartThread")
   5834         break;
   5835 
   5836       funcs_mangled.push_back(rtn);
   5837       funcs_demangled.push_back(NormalizeFunctionName(PcToRtnName(pc, true)));
   5838       objects.push_back(img);
   5839 
   5840       if (rtn == "main")
   5841         break;
   5842     }
   5843     string suppression_name;
   5844     if (suppressions_.StackTraceSuppressed("ThreadSanitizer",
   5845                                            report->ReportName(),
   5846                                            funcs_mangled,
   5847                                            funcs_demangled,
   5848                                            objects,
   5849                                            &suppression_name)) {
   5850       used_suppressions_[suppression_name]++;
   5851       return false;
   5852     }
   5853 
   5854     // Actually print it.
   5855     if (report->type == ThreadSanitizerReport::UNLOCK_FOREIGN) {
   5856       ThreadSanitizerBadUnlockReport *bad_unlock =
   5857           reinterpret_cast<ThreadSanitizerBadUnlockReport*>(report);
   5858       Report("WARNING: Lock %s was released by thread T%d"
   5859              " which did not acquire this lock: {{{\n%s}}}\n",
   5860              Lock::ToString(bad_unlock->lid).c_str(),
   5861              bad_unlock->tid.raw(),
   5862              bad_unlock->stack_trace->ToString().c_str());
   5863     } else if (report->type == ThreadSanitizerReport::UNLOCK_NONLOCKED) {
   5864       ThreadSanitizerBadUnlockReport *bad_unlock =
   5865           reinterpret_cast<ThreadSanitizerBadUnlockReport*>(report);
   5866       Report("WARNING: Unlocking a non-locked lock %s in thread T%d: "
   5867              "{{{\n%s}}}\n",
   5868              Lock::ToString(bad_unlock->lid).c_str(),
   5869              bad_unlock->tid.raw(),
   5870              bad_unlock->stack_trace->ToString().c_str());
   5871     } else if (report->type == ThreadSanitizerReport::INVALID_LOCK) {
   5872       ThreadSanitizerInvalidLockReport *invalid_lock =
   5873           reinterpret_cast<ThreadSanitizerInvalidLockReport*>(report);
   5874       Report("WARNING: accessing an invalid lock %p in thread T%d: "
   5875              "{{{\n%s}}}\n",
   5876              invalid_lock->lock_addr,
   5877              invalid_lock->tid.raw(),
   5878              invalid_lock->stack_trace->ToString().c_str());
   5879     } else if (report->type == ThreadSanitizerReport::ATOMICITY_VIOLATION) {
   5880       ThreadSanitizerAtomicityViolationReport *av =
   5881           reinterpret_cast<ThreadSanitizerAtomicityViolationReport*>(report);
   5882       Report("WARNING: Suspected atomicity violation {{{\n");
   5883       av->r1->Print();
   5884       av->r2->Print();
   5885       av->r3->Print();
   5886       Report("}}}\n");
   5887 
   5888     } else {
   5889       CHECK(report->type == ThreadSanitizerReport::DATA_RACE);
   5890       ThreadSanitizerDataRaceReport *race =
   5891           reinterpret_cast<ThreadSanitizerDataRaceReport*>(report);
   5892       PrintRaceReport(race);
   5893     }
   5894 
   5895     n_reports++;
   5896     SetNumberOfFoundErrors(n_reports);
   5897     if (!G_flags->summary_file.empty()) {
   5898       char buff[100];
   5899       snprintf(buff, sizeof(buff),
   5900                "ThreadSanitizer: %d warning(s) reported\n", n_reports);
   5901       // We overwrite the contents of this file with the new summary.
   5902       // We don't do that at the end because even if we crash later
   5903       // we will already have the summary.
   5904       OpenFileWriteStringAndClose(G_flags->summary_file, buff);
   5905     }
   5906 
   5907     // Generate a suppression.
   5908     if (G_flags->generate_suppressions) {
   5909       string supp = "{\n";
   5910       supp += "  <Put your suppression name here>\n";
   5911       supp += string("  ThreadSanitizer:") + report->ReportName() + "\n";
   5912       for (size_t i = 0; i < funcs_mangled.size(); i++) {
   5913         const string &func = funcs_demangled[i];
   5914         if (func.size() == 0 || func == "(no symbols") {
   5915           supp += "  obj:" + objects[i] + "\n";
   5916         } else {
   5917           supp += "  fun:" + funcs_demangled[i] + "\n";
   5918         }
   5919         if (StackTrace::CutStackBelowFunc(funcs_demangled[i])) {
   5920           break;
   5921         }
   5922       }
   5923       supp += "}";
   5924       Printf("------- suppression -------\n%s\n------- end suppression -------\n",
   5925              supp.c_str());
   5926     }
   5927 
   5928     return true;
   5929   }
   5930 
   5931   void PrintUsedSuppression() {
   5932     for (map<string, int>::iterator it = used_suppressions_.begin();
   5933          it != used_suppressions_.end(); ++it) {
   5934       Report("used_suppression: %d %s\n", it->second, it->first.c_str());
   5935     }
   5936   }
   5937 
   5938   void PrintSummary() {
   5939     Report("ThreadSanitizer summary: reported %d warning(s) (%d race(s))\n",
   5940            n_reports, n_race_reports);
   5941   }
   5942 
   5943 
   5944   string DescribeMemory(uintptr_t a) {
   5945     const int kBufLen = 1023;
   5946     char buff[kBufLen+1];
   5947 
   5948     // Is this stack?
   5949     for (int i = 0; i < Thread::NumberOfThreads(); i++) {
   5950       Thread *t = Thread::Get(TID(i));
   5951       if (!t || !t->is_running()) continue;
   5952       if (t->MemoryIsInStack(a)) {
   5953         snprintf(buff, sizeof(buff),
   5954                  "  %sLocation %p is %ld bytes inside T%d's stack [%p,%p]%s\n",
   5955                  c_blue,
   5956                  reinterpret_cast<void*>(a),
   5957                  static_cast<long>(t->max_sp() - a),
   5958                  i,
   5959                  reinterpret_cast<void*>(t->min_sp()),
   5960                  reinterpret_cast<void*>(t->max_sp()),
   5961                  c_default
   5962                 );
   5963         return buff;
   5964       }
   5965     }
   5966 
   5967     HeapInfo *heap_info = G_heap_map->GetInfo(a);
   5968     if (heap_info) {
   5969       snprintf(buff, sizeof(buff),
   5970              "  %sLocation %p is %ld bytes inside a block starting at %p"
   5971              " of size %ld allocated by T%d from heap:%s\n",
   5972              c_blue,
   5973              reinterpret_cast<void*>(a),
   5974              static_cast<long>(a - heap_info->ptr),
   5975              reinterpret_cast<void*>(heap_info->ptr),
   5976              static_cast<long>(heap_info->size),
   5977              heap_info->tid().raw(), c_default);
   5978       return string(buff) + heap_info->StackTraceString().c_str();
   5979     }
   5980 
   5981 
   5982     // Is it a global object?
   5983     uintptr_t offset;
   5984     string symbol_descr;
   5985     if (GetNameAndOffsetOfGlobalObject(a, &symbol_descr, &offset)) {
   5986       snprintf(buff, sizeof(buff),
   5987               "  %sAddress %p is %d bytes inside data symbol \"",
   5988               c_blue, reinterpret_cast<void*>(a), static_cast<int>(offset));
   5989       return buff + symbol_descr + "\"" + c_default + "\n";
   5990     }
   5991 
   5992     if (G_flags->debug_level >= 2) {
   5993       string res;
   5994       // Is this near stack?
   5995       for (int i = 0; i < Thread::NumberOfThreads(); i++) {
   5996         Thread *t = Thread::Get(TID(i));
   5997         const uintptr_t kMaxStackDiff = 1024 * 16;
   5998         uintptr_t diff1 = a - t->max_sp();
   5999         uintptr_t diff2 = t->min_sp() - a;
   6000         if (diff1 < kMaxStackDiff ||
   6001             diff2 < kMaxStackDiff ||
   6002             t->MemoryIsInStack(a)) {
   6003           uintptr_t diff = t->MemoryIsInStack(a) ? 0 :
   6004               (diff1 < kMaxStackDiff ? diff1 : diff2);
   6005           snprintf(buff, sizeof(buff),
   6006                    "  %sLocation %p is within %d bytes outside T%d's stack [%p,%p]%s\n",
   6007                    c_blue,
   6008                    reinterpret_cast<void*>(a),
   6009                    static_cast<int>(diff),
   6010                    i,
   6011                    reinterpret_cast<void*>(t->min_sp()),
   6012                    reinterpret_cast<void*>(t->max_sp()),
   6013                    c_default
   6014                   );
   6015           res += buff;
   6016         }
   6017       }
   6018       if (res.size() > 0) {
   6019         return res +
   6020             "  This report _may_ indicate that valgrind incorrectly "
   6021             "computed the stack boundaries\n";
   6022       }
   6023     }
   6024 
   6025     return "";
   6026   }
   6027 
   6028  private:
   6029   map<StackTrace *, int, StackTrace::Less> reported_stacks_;
   6030   int n_reports;
   6031   int n_race_reports;
   6032   bool program_finished_;
   6033   Suppressions suppressions_;
   6034   map<string, int> used_suppressions_;
   6035 };
   6036 
   6037 // -------- Event Sampling ---------------- {{{1
   6038 // This class samples (profiles) events.
   6039 // Instances of this class should all be static.
   6040 class EventSampler {
   6041  public:
   6042 
   6043   // Sample one event
   6044   void Sample(Thread *thr, const char *event_name, bool need_locking) {
   6045     CHECK_NE(G_flags->sample_events, 0);
   6046     (counter_)++;
   6047     if ((counter_ & ((1 << G_flags->sample_events) - 1)) != 0)
   6048       return;
   6049 
   6050     TIL til(ts_lock, 8, need_locking);
   6051     string pos = thr->CallStackToStringRtnOnly(G_flags->sample_events_depth);
   6052     (*samples_)[event_name][pos]++;
   6053     total_samples_++;
   6054     if (total_samples_ >= print_after_this_number_of_samples_) {
   6055       print_after_this_number_of_samples_ +=
   6056           print_after_this_number_of_samples_ / 2;
   6057       ShowSamples();
   6058     }
   6059   }
   6060 
   6061   // Show existing samples
   6062   static void ShowSamples() {
   6063     if (G_flags->sample_events == 0) return;
   6064     Printf("ShowSamples: (all samples: %lld)\n", total_samples_);
   6065     for (SampleMapMap::iterator it1 = samples_->begin();
   6066          it1 != samples_->end(); ++it1) {
   6067       string name = it1->first;
   6068       SampleMap &m = it1->second;
   6069       int total = 0;
   6070       for (SampleMap::iterator it2 = m.begin(); it2 != m.end(); it2++) {
   6071         total += it2->second;
   6072       }
   6073 
   6074       map<int, string> reverted_map;
   6075       for (SampleMap::iterator it2 = m.begin(); it2 != m.end(); it2++) {
   6076         int n_samples = it2->second;
   6077         if (n_samples * 1000 < total) continue;
   6078         reverted_map[n_samples] = it2->first;
   6079       }
   6080       Printf("%s: total samples %'d (~%'lld events)\n", name.c_str(),
   6081              total,
   6082              (int64_t)total << G_flags->sample_events);
   6083       for (map<int, string>::iterator it = reverted_map.begin();
   6084            it != reverted_map.end(); ++it) {
   6085         Printf("%s: %d samples (~%d%%) %s\n", name.c_str(), it->first,
   6086                (it->first * 100) / total, it->second.c_str());
   6087       }
   6088       Printf("\n");
   6089     }
   6090   }
   6091 
   6092   static void InitClassMembers() {
   6093     samples_ = new SampleMapMap;
   6094     total_samples_ = 0;
   6095     print_after_this_number_of_samples_ = 1000;
   6096   }
   6097 
   6098  private:
   6099   int counter_;
   6100 
   6101   typedef map<string, int> SampleMap;
   6102   typedef map<string, SampleMap> SampleMapMap;
   6103   static SampleMapMap *samples_;
   6104   static int64_t total_samples_;
   6105   static int64_t print_after_this_number_of_samples_;
   6106 };
   6107 
   6108 EventSampler::SampleMapMap *EventSampler::samples_;
   6109 int64_t EventSampler::total_samples_;
   6110 int64_t EventSampler::print_after_this_number_of_samples_;
   6111 
   6112 // -------- Detector ---------------------- {{{1
   6113 // Collection of event handlers.
   6114 class Detector {
   6115  public:
   6116   void INLINE HandleTraceLoop(Thread *thr, uintptr_t pc,
   6117                               MopInfo *mops,
   6118                               uintptr_t *tleb, size_t n,
   6119                               int expensive_bits, bool need_locking) {
   6120     bool has_expensive_flags = (expensive_bits & 4) != 0;
   6121     size_t i = 0;
   6122     uintptr_t sblock_pc = pc;
   6123     size_t n_locks = 0;
   6124     do {
   6125       uintptr_t addr = tleb[i];
   6126       if (addr == 0) continue;  // This mop was not executed.
   6127       MopInfo *mop = &mops[i];
   6128       tleb[i] = 0;  // we've consumed this mop, clear it.
   6129       DCHECK(mop->size() != 0);
   6130       DCHECK(mop->pc() != 0);
   6131       if ((expensive_bits & 1) && mop->is_write() == false) continue;
   6132       if ((expensive_bits & 2) && mop->is_write() == true) continue;
   6133       n_locks += HandleMemoryAccessInternal(thr, &sblock_pc, addr, mop,
   6134                                  has_expensive_flags,
   6135                                  need_locking);
   6136     } while (++i < n);
   6137     if (has_expensive_flags) {
   6138       const size_t mop_stat_size = TS_ARRAY_SIZE(thr->stats.mops_per_trace);
   6139       thr->stats.mops_per_trace[min(n, mop_stat_size - 1)]++;
   6140       const size_t stat_size = TS_ARRAY_SIZE(thr->stats.locks_per_trace);
   6141       thr->stats.locks_per_trace[min(n_locks, stat_size - 1)]++;
   6142     }
   6143   }
   6144 
   6145 #ifdef _MSC_VER
   6146   NOINLINE
   6147   // With MSVC, INLINE would cause the compilation to be insanely slow.
   6148 #else
   6149   INLINE
   6150 #endif
   6151   void HandleTrace(Thread *thr, MopInfo *mops, size_t n, uintptr_t pc,
   6152                    uintptr_t *tleb, bool need_locking) {
   6153     DCHECK(n);
   6154     // 0 bit - ignore reads, 1 bit -- ignore writes,
   6155     // 2 bit - has_expensive_flags.
   6156     int expensive_bits = thr->expensive_bits();
   6157 
   6158     if (expensive_bits == 0) {
   6159       HandleTraceLoop(thr, pc, mops, tleb, n, 0, need_locking);
   6160     } else {
   6161       if ((expensive_bits & 3) == 3) {
   6162         // everything is ignored, just clear the tleb.
   6163         for (size_t i = 0; i < n; i++) tleb[i] = 0;
   6164       } else {
   6165         HandleTraceLoop(thr, pc, mops, tleb, n, expensive_bits, need_locking);
   6166       }
   6167     }
   6168     // At the end, the tleb must be cleared.
   6169     for (size_t i = 0; i < n; i++) DCHECK(tleb[i] == 0);
   6170   }
   6171 
   6172   // Special case of a trace with just one mop and no sblock.
   6173   void INLINE HandleMemoryAccess(Thread *thr, uintptr_t pc,
   6174                                  uintptr_t addr, uintptr_t size,
   6175                                  bool is_w, bool need_locking) {
   6176     CHECK(size);
   6177     MopInfo mop(pc, size, is_w, false);
   6178     HandleTrace(thr, &mop, 1, 0/*no sblock*/, &addr, need_locking);
   6179   }
   6180 
   6181   void ShowUnfreedHeap() {
   6182     // check if there is not deleted memory
   6183     // (for debugging free() interceptors, not for leak detection)
   6184     if (DEBUG_MODE && G_flags->debug_level >= 1) {
   6185       for (HeapMap<HeapInfo>::iterator it = G_heap_map->begin();
   6186            it != G_heap_map->end(); ++it) {
   6187         HeapInfo &info = it->second;
   6188         Printf("Not free()-ed memory: %p [%p, %p)\n%s\n",
   6189                info.size, info.ptr, info.ptr + info.size,
   6190                info.StackTraceString().c_str());
   6191       }
   6192     }
   6193   }
   6194 
   6195   void FlushExpectedRaces(bool print_summary) {
   6196     // Report("ThreadSanitizerValgrind: done\n");
   6197     // check if we found all expected races (for unit tests only).
   6198     static int total_missing = 0;
   6199     int this_flush_missing = 0;
   6200     for (ExpectedRacesMap::iterator it = G_expected_races_map->begin();
   6201          it != G_expected_races_map->end(); ++it) {
   6202       ExpectedRace race = it->second;
   6203       if (debug_expected_races) {
   6204         Printf("Checking if expected race fired: %p\n", race.ptr);
   6205       }
   6206       if (race.count == 0 &&
   6207           !(g_race_verifier_active && !race.is_verifiable) &&
   6208           (G_flags->nacl_untrusted == race.is_nacl_untrusted)) {
   6209         ++this_flush_missing;
   6210         Printf("Missing an expected race on %p: %s (annotated at %s)\n",
   6211                it->first,
   6212                race.description,
   6213                PcToRtnNameAndFilePos(race.pc).c_str());
   6214       }
   6215     }
   6216 
   6217     if (this_flush_missing) {
   6218       int n_errs = GetNumberOfFoundErrors();
   6219       SetNumberOfFoundErrors(n_errs + this_flush_missing);
   6220       total_missing += this_flush_missing;
   6221     }
   6222     G_expected_races_map->Clear();
   6223 
   6224     if (print_summary && total_missing > 0)
   6225       Report("WARNING: %d expected race(s) NOT detected!\n", total_missing);
   6226   }
   6227 
   6228   void HandleProgramEnd() {
   6229     FlushExpectedRaces(true);
   6230     // ShowUnfreedHeap();
   6231     EventSampler::ShowSamples();
   6232     ShowStats();
   6233     TraceInfo::PrintTraceProfile();
   6234     ShowProcSelfStatus();
   6235     reports_.PrintUsedSuppression();
   6236     reports_.PrintSummary();
   6237     // Report("ThreadSanitizerValgrind: exiting\n");
   6238   }
   6239 
   6240   void FlushIfOutOfMem(Thread *thr) {
   6241     static int max_vm_size;
   6242     static int soft_limit;
   6243     const int hard_limit = G_flags->max_mem_in_mb;
   6244     const int minimal_soft_limit = (hard_limit * 13) / 16;
   6245     const int print_info_limit   = (hard_limit * 12) / 16;
   6246 
   6247     CHECK(hard_limit > 0);
   6248 
   6249     int vm_size_in_mb = GetVmSizeInMb();
   6250     if (max_vm_size < vm_size_in_mb) {
   6251       max_vm_size = vm_size_in_mb;
   6252       if (max_vm_size > print_info_limit) {
   6253         Report("INFO: ThreadSanitizer's VmSize: %dM\n", (int)max_vm_size);
   6254       }
   6255     }
   6256 
   6257     if (soft_limit == 0) {
   6258       soft_limit = minimal_soft_limit;
   6259     }
   6260 
   6261     if (vm_size_in_mb > soft_limit) {
   6262       ForgetAllStateAndStartOver(thr,
   6263           "ThreadSanitizer is running close to its memory limit");
   6264       soft_limit = vm_size_in_mb + 1;
   6265     }
   6266   }
   6267 
   6268   // Force state flushing.
   6269   void FlushState(TID tid) {
   6270     ForgetAllStateAndStartOver(Thread::Get(tid),
   6271                                "State flushing requested by client");
   6272   }
   6273 
   6274   void FlushIfNeeded(Thread *thr) {
   6275     // Are we out of segment IDs?
   6276 #ifdef TS_VALGRIND  // GetVmSizeInMb() works only with valgrind any way.
   6277     static int counter;
   6278     counter++;  // ATTENTION: don't do this in multi-threaded code -- too slow.
   6279     CHECK(TS_SERIALIZED == 1);
   6280 
   6281     // Are we out of memory?
   6282     if (G_flags->max_mem_in_mb > 0) {
   6283       const int kFreq = 1014 * 32;
   6284       if ((counter % kFreq) == 0) {  // Don't do it too often.
   6285         // TODO(kcc): find a way to check memory limit more frequently.
   6286         TIL til(ts_lock, 7);
   6287         AssertTILHeld();
   6288         FlushIfOutOfMem(thr);
   6289       }
   6290     }
   6291 #if 0
   6292     if ((counter % (1024 * 1024 * 64)) == 0 ||
   6293         counter == (1024 * 1024)) {
   6294       // ShowStats();
   6295       EventSampler::ShowSamples();
   6296       TraceInfo::PrintTraceProfile();
   6297     }
   6298 #endif
   6299 #endif
   6300 
   6301 #if 0  // do we still need it? Hope not..
   6302     size_t flush_period = G_flags->flush_period * 1000;  // milliseconds.
   6303     if (flush_period && (counter % (1024 * 4)) == 0) {
   6304       size_t cur_time = TimeInMilliSeconds();
   6305       if (cur_time - g_last_flush_time  > flush_period) {
   6306         TIL til(ts_lock, 7);
   6307         ForgetAllStateAndStartOver(
   6308           "Doing periodic flush (period is set by --flush_period=n_seconds)");
   6309       }
   6310     }
   6311 #endif
   6312   }
   6313 
   6314   void HandleRtnCall(TID tid, uintptr_t call_pc, uintptr_t target_pc,
   6315                      IGNORE_BELOW_RTN ignore_below) {
   6316     Thread *thr = Thread::Get(tid);
   6317     thr->HandleRtnCall(call_pc, target_pc, ignore_below);
   6318     FlushIfNeeded(thr);
   6319   }
   6320 
   6321   void INLINE HandleOneEvent(Event *e) {
   6322     ScopedMallocCostCenter malloc_cc("HandleOneEvent");
   6323 
   6324     DCHECK(e);
   6325     EventType type = e->type();
   6326     DCHECK(type != NOOP);
   6327     Thread *thr = NULL;
   6328     if (type != THR_START) {
   6329       thr = Thread::Get(TID(e->tid()));
   6330       DCHECK(thr);
   6331       thr->SetTopPc(e->pc());
   6332       thr->stats.events[type]++;
   6333     }
   6334 
   6335     switch (type) {
   6336       case READ:
   6337         HandleMemoryAccess(thr, e->pc(), e->a(), e->info(), false, true);
   6338         return;
   6339       case WRITE:
   6340         HandleMemoryAccess(thr, e->pc(), e->a(), e->info(), true, true);
   6341         return;
   6342       case RTN_CALL:
   6343         HandleRtnCall(TID(e->tid()), e->pc(), e->a(),
   6344                       IGNORE_BELOW_RTN_UNKNOWN);
   6345         return;
   6346       case RTN_EXIT:
   6347         thr->HandleRtnExit();
   6348         return;
   6349       default: break;
   6350     }
   6351 
   6352     // Everything else is under a lock.
   6353     TIL til(ts_lock, 0);
   6354     AssertTILHeld();
   6355 
   6356 
   6357     if (UNLIKELY(type == THR_START)) {
   6358         HandleThreadStart(TID(e->tid()), TID(e->info()), (CallStack*)e->pc());
   6359         Thread::Get(TID(e->tid()))->stats.events[type]++;
   6360         return;
   6361     }
   6362 
   6363     FlushStateIfOutOfSegments(thr);
   6364 
   6365     // Since we have the lock, get some fresh SIDs.
   6366     thr->GetSomeFreshSids();
   6367 
   6368     switch (type) {
   6369       case THR_START   : CHECK(0); break;
   6370         break;
   6371       case SBLOCK_ENTER:
   6372         if (thr->ignore_reads() && thr->ignore_writes()) break;
   6373         thr->HandleSblockEnter(e->pc(), /*allow_slow_path=*/true);
   6374         break;
   6375       case THR_CREATE_BEFORE:
   6376         thr->HandleThreadCreateBefore(TID(e->tid()), e->pc());
   6377         break;
   6378       case THR_CREATE_AFTER:
   6379         thr->HandleThreadCreateAfter(TID(e->tid()), TID(e->info()));
   6380         break;
   6381       case THR_FIRST_INSN:
   6382         HandleThreadFirstInsn(TID(e->tid()));
   6383         break;
   6384       case THR_JOIN_AFTER     : HandleThreadJoinAfter(e);   break;
   6385       case THR_STACK_TOP      : HandleThreadStackTop(e); break;
   6386 
   6387       case THR_END     : HandleThreadEnd(TID(e->tid()));     break;
   6388       case MALLOC      : HandleMalloc(e, false);     break;
   6389       case FREE        : HandleFree(e);         break;
   6390       case MMAP        : HandleMalloc(e, true);      break;  // same as MALLOC
   6391       case MUNMAP      : HandleMunmap(e);     break;
   6392 
   6393 
   6394       case WRITER_LOCK : thr->HandleLock(e->a(), true);     break;
   6395       case READER_LOCK : thr->HandleLock(e->a(), false);    break;
   6396       case UNLOCK      : thr->HandleUnlock(e->a());       break;
   6397       case UNLOCK_OR_INIT : HandleUnlockOrInit(e); break;
   6398 
   6399       case LOCK_CREATE:
   6400       case LOCK_DESTROY: HandleLockCreateOrDestroy(e); break;
   6401 
   6402       case SIGNAL      : thr->HandleSignal(e->a());  break;
   6403       case WAIT        : thr->HandleWait(e->a());   break;
   6404 
   6405       case CYCLIC_BARRIER_INIT:
   6406         thr->HandleBarrierInit(e->a(), e->info());
   6407         break;
   6408       case CYCLIC_BARRIER_WAIT_BEFORE  :
   6409         thr->HandleBarrierWaitBefore(e->a());
   6410         break;
   6411       case CYCLIC_BARRIER_WAIT_AFTER  :
   6412         thr->HandleBarrierWaitAfter(e->a());
   6413         break;
   6414 
   6415       case PCQ_CREATE   : HandlePcqCreate(e);   break;
   6416       case PCQ_DESTROY  : HandlePcqDestroy(e);  break;
   6417       case PCQ_PUT      : HandlePcqPut(e);      break;
   6418       case PCQ_GET      : HandlePcqGet(e);      break;
   6419 
   6420 
   6421       case EXPECT_RACE :
   6422         HandleExpectRace(e->a(), e->info(),
   6423                          (const char*)e->pc(), TID(e->tid()));
   6424         break;
   6425       case BENIGN_RACE :
   6426         HandleBenignRace(e->a(), e->info(),
   6427                          (const char*)e->pc(), TID(e->tid()));
   6428         break;
   6429       case FLUSH_EXPECTED_RACES:
   6430         FlushExpectedRaces(false);
   6431         break;
   6432       case EXPECT_RACE_BEGIN:
   6433         CHECK(g_expecting_races == false);
   6434         g_expecting_races = true;
   6435         g_found_races_since_EXPECT_RACE_BEGIN = 0;
   6436         break;
   6437       case EXPECT_RACE_END:
   6438         CHECK(g_expecting_races == true);
   6439         g_expecting_races = false;
   6440         if (g_found_races_since_EXPECT_RACE_BEGIN == 0) {
   6441           int n_errs = GetNumberOfFoundErrors();
   6442           SetNumberOfFoundErrors(n_errs + 1);
   6443           Printf("WARNING: expected race not found.\n");
   6444         }
   6445         break;
   6446 
   6447       case HB_LOCK     : HandleHBLock(e);       break;
   6448       case NON_HB_LOCK : HandleNonHBLock(e);    break;
   6449 
   6450       case IGNORE_READS_BEG:  HandleIgnore(e, false, true);  break;
   6451       case IGNORE_READS_END:  HandleIgnore(e, false, false); break;
   6452       case IGNORE_WRITES_BEG: HandleIgnore(e, true, true);   break;
   6453       case IGNORE_WRITES_END: HandleIgnore(e, true, false);  break;
   6454 
   6455       case SET_THREAD_NAME:
   6456         thr->set_thread_name((const char*)e->a());
   6457         break;
   6458       case SET_LOCK_NAME: {
   6459           uintptr_t lock_addr = e->a();
   6460           const char *name = reinterpret_cast<const char *>(e->info());
   6461           Lock *lock = Lock::LookupOrCreate(lock_addr);
   6462           lock->set_name(name);
   6463         }
   6464         break;
   6465 
   6466       case PUBLISH_RANGE : HandlePublishRange(e); break;
   6467       case UNPUBLISH_RANGE :
   6468         Report("WARNING: ANNOTATE_UNPUBLISH_MEMORY_RANGE is deprecated\n");
   6469         break;
   6470 
   6471       case TRACE_MEM   : HandleTraceMem(e);   break;
   6472       case STACK_TRACE : HandleStackTrace(e); break;
   6473       case NOOP        : CHECK(0);           break;  // can't happen.
   6474       case VERBOSITY   : e->Print(); G_flags->verbosity = e->info(); break;
   6475       case FLUSH_STATE : FlushState(TID(e->tid()));       break;
   6476       default                 : CHECK(0);    break;
   6477     }
   6478   }
   6479 
   6480  private:
   6481   void ShowProcSelfStatus() {
   6482     if (G_flags->show_proc_self_status) {
   6483       string str = ReadFileToString("/proc/self/status", false);
   6484       if (!str.empty()) {
   6485         Printf("%s", str.c_str());
   6486       }
   6487     }
   6488   }
   6489 
   6490   void ShowStats() {
   6491     if (G_flags->show_stats) {
   6492       G_stats->PrintStats();
   6493       G_cache->PrintStorageStats();
   6494     }
   6495   }
   6496 
   6497   // PCQ_CREATE, PCQ_DESTROY, PCQ_PUT, PCQ_GET
   6498   void HandlePcqCreate(Event *e) {
   6499     if (G_flags->verbosity >= 2) {
   6500       e->Print();
   6501     }
   6502     PCQ pcq;
   6503     pcq.pcq_addr = e->a();
   6504     CHECK(!g_pcq_map->count(e->a()));
   6505     (*g_pcq_map)[e->a()] = pcq;
   6506   }
   6507   void HandlePcqDestroy(Event *e) {
   6508     if (G_flags->verbosity >= 2) {
   6509       e->Print();
   6510     }
   6511     CHECK(g_pcq_map->count(e->a()));
   6512     g_pcq_map->erase(e->a());
   6513   }
   6514   void HandlePcqPut(Event *e) {
   6515     if (G_flags->verbosity >= 2) {
   6516       e->Print();
   6517     }
   6518     PCQ &pcq = (*g_pcq_map)[e->a()];
   6519     CHECK(pcq.pcq_addr == e->a());
   6520     Thread *thread = Thread::Get(TID(e->tid()));
   6521     VTS *vts = thread->segment()->vts()->Clone();
   6522     pcq.putters.push_back(vts);
   6523     thread->NewSegmentForSignal();
   6524   }
   6525   void HandlePcqGet(Event *e) {
   6526     if (G_flags->verbosity >= 2) {
   6527       e->Print();
   6528     }
   6529     PCQ &pcq = (*g_pcq_map)[e->a()];
   6530     CHECK(pcq.pcq_addr == e->a());
   6531     CHECK(!pcq.putters.empty());
   6532     VTS *putter = pcq.putters.front();
   6533     pcq.putters.pop_front();
   6534     CHECK(putter);
   6535     Thread *thread = Thread::Get(TID(e->tid()));
   6536     thread->NewSegmentForWait(putter);
   6537     VTS::Unref(putter);
   6538   }
   6539 
   6540   // PUBLISH_RANGE
   6541   void HandlePublishRange(Event *e) {
   6542     if (G_flags->verbosity >= 2) {
   6543       e->Print();
   6544     }
   6545     static int reported_deprecation;
   6546     reported_deprecation++;
   6547     if (reported_deprecation < 20) {
   6548       Report("WARNING: ANNOTATE_PUBLISH_MEMORY_RANGE is deprecated and will not"
   6549              " be supported in future versions of ThreadSanitizer.\n");
   6550     }
   6551 
   6552     uintptr_t mem = e->a();
   6553     uintptr_t size = e->info();
   6554 
   6555     TID tid(e->tid());
   6556     Thread *thread = Thread::Get(tid);
   6557     VTS *vts = thread->segment()->vts();
   6558     PublishRange(thread, mem, mem + size, vts);
   6559 
   6560     thread->NewSegmentForSignal();
   6561     // Printf("Publish: [%p, %p)\n", mem, mem+size);
   6562   }
   6563 
   6564   void HandleIgnore(Event *e, bool is_w, bool on) {
   6565     if (G_flags->verbosity >= 2) {
   6566       e->Print();
   6567     }
   6568     Thread *thread = Thread::Get(TID(e->tid()));
   6569     thread->set_ignore_accesses(is_w, on);
   6570   }
   6571 
   6572   // BENIGN_RACE
   6573   void HandleBenignRace(uintptr_t ptr, uintptr_t size,
   6574                         const char *descr, TID tid) {
   6575     Thread *thr = Thread::Get(tid);
   6576     if (debug_benign_races) {
   6577       Printf("T%d: BENIGN_RACE: ptr=%p size=%ld descr='%s'\n",
   6578              tid.raw(), ptr, size, descr);
   6579     }
   6580     // Simply set all 'racey' bits in the shadow state of [ptr, ptr+size).
   6581     for (uintptr_t p = ptr; p < ptr + size; p++) {
   6582       CacheLine *line = G_cache->GetLineOrCreateNew(thr, p, __LINE__);
   6583       CHECK(line);
   6584       line->racey().Set(CacheLine::ComputeOffset(p));
   6585       G_cache->ReleaseLine(thr, p, line, __LINE__);
   6586     }
   6587   }
   6588 
   6589   // EXPECT_RACE
   6590   void HandleExpectRace(uintptr_t ptr, uintptr_t size,
   6591                         const char *descr, TID tid) {
   6592     ExpectedRace expected_race;
   6593     expected_race.ptr = ptr;
   6594     expected_race.size = size;
   6595     expected_race.count = 0;
   6596     expected_race.is_verifiable = !descr ||
   6597         (string(descr).find("UNVERIFIABLE") == string::npos);
   6598     expected_race.is_nacl_untrusted = !descr ||
   6599         (string(descr).find("NACL_UNTRUSTED") != string::npos);
   6600     // copy descr (may not have strdup)
   6601     CHECK(descr);
   6602     size_t descr_len = strlen(descr);
   6603     char *d = new char [descr_len + 1];
   6604     memcpy(d, descr, descr_len);
   6605     d[descr_len] = 0;
   6606     expected_race.description = d;
   6607 
   6608     Thread *thread = Thread::Get(tid);
   6609     expected_race.pc = thread->GetCallstackEntry(1);
   6610     G_expected_races_map->InsertInfo(ptr, expected_race);
   6611     if (debug_expected_races) {
   6612       Printf("T%d: EXPECT_RACE: ptr=%p size=%ld descr='%s'\n",
   6613              tid.raw(), ptr, size, descr);
   6614       thread->ReportStackTrace(ptr);
   6615       int i = 0;
   6616       for (ExpectedRacesMap::iterator it = G_expected_races_map->begin();
   6617            it != G_expected_races_map->end(); ++it) {
   6618         ExpectedRace &x = it->second;
   6619         Printf("  [%d] %p [0x%lx,0x%lx) size=0x%lx\n",
   6620                i, &x, x.ptr, x.ptr + x.size, x.size);
   6621         i++;
   6622       }
   6623     }
   6624   }
   6625 
   6626   void HandleStackTrace(Event *e) {
   6627     Thread *thread = Thread::Get(TID(e->tid()));
   6628     e->Print();
   6629     thread->ReportStackTrace();
   6630   }
   6631 
   6632   // HB_LOCK
   6633   void HandleHBLock(Event *e) {
   6634     if (G_flags->verbosity >= 2) {
   6635       e->Print();
   6636     }
   6637     Lock *lock = Lock::LookupOrCreate(e->a());
   6638     CHECK(lock);
   6639     lock->set_is_pure_happens_before(true);
   6640   }
   6641 
   6642   // NON_HB_LOCK
   6643   void HandleNonHBLock(Event *e) {
   6644     if (G_flags->verbosity >= 2) {
   6645       e->Print();
   6646     }
   6647     Lock *lock = Lock::LookupOrCreate(e->a());
   6648     CHECK(lock);
   6649     lock->set_is_pure_happens_before(false);
   6650   }
   6651 
   6652   // UNLOCK_OR_INIT
   6653   // This is a hack to handle posix pthread_spin_unlock which is sometimes
   6654   // the same symbol as pthread_spin_init. We need to handle unlock as init
   6655   // if the lock was not seen before or if it is currently unlocked.
   6656   // TODO(kcc): is there a way to distinguish pthread_spin_init
   6657   // and pthread_spin_unlock?
   6658   void HandleUnlockOrInit(Event *e) {
   6659     Thread *thread = Thread::Get(TID(e->tid()));
   6660     if (G_flags->verbosity >= 2) {
   6661       e->Print();
   6662       thread->ReportStackTrace();
   6663     }
   6664     uintptr_t lock_addr = e->a();
   6665     Lock *lock = Lock::Lookup(lock_addr);
   6666     if (lock && lock->wr_held()) {
   6667       // We know this lock and it is locked. Just unlock it.
   6668       thread->HandleUnlock(lock_addr);
   6669     } else {
   6670       // Never seen this lock or it is currently unlocked. Init it.
   6671       Lock::Create(lock_addr);
   6672     }
   6673   }
   6674 
   6675   void HandleLockCreateOrDestroy(Event *e) {
   6676     Thread *thread = Thread::Get(TID(e->tid()));
   6677     uintptr_t lock_addr = e->a();
   6678     if (debug_lock) {
   6679       e->Print();
   6680     }
   6681     if (e->type() == LOCK_CREATE) {
   6682       Lock::Create(lock_addr);
   6683     } else {
   6684       CHECK(e->type() == LOCK_DESTROY);
   6685       // A locked pthread_mutex_t can not be destroyed but other lock types can.
   6686       // When destroying a lock, we must unlock it.
   6687       // If there is a bug in a program when someone attempts to unlock
   6688       // a destoyed lock, we are likely to fail in an assert.
   6689       //
   6690       // We do not unlock-on-destroy after main() has exited.
   6691       // This is because global Mutex objects may be desctructed while threads
   6692       // holding them are still running. Urgh...
   6693       Lock *lock = Lock::Lookup(lock_addr);
   6694       // If the lock is not found, report an error.
   6695       if (lock == NULL) {
   6696         ThreadSanitizerInvalidLockReport *report =
   6697             new ThreadSanitizerInvalidLockReport;
   6698         report->type = ThreadSanitizerReport::INVALID_LOCK;
   6699         report->tid = TID(e->tid());
   6700         report->lock_addr = lock_addr;
   6701         report->stack_trace = thread->CreateStackTrace();
   6702         ThreadSanitizerPrintReport(report);
   6703         return;
   6704       }
   6705       if (lock->wr_held() || lock->rd_held()) {
   6706         if (G_flags->unlock_on_mutex_destroy && !g_has_exited_main) {
   6707           thread->HandleUnlock(lock_addr);
   6708         }
   6709       }
   6710       thread->HandleForgetSignaller(lock_addr);
   6711       Lock::Destroy(lock_addr);
   6712     }
   6713   }
   6714 
   6715   void HandleTraceMem(Event *e) {
   6716     if (G_flags->trace_level == 0) return;
   6717     TID tid(e->tid());
   6718     Thread *thr = Thread::Get(TID(e->tid()));
   6719     uintptr_t a = e->a();
   6720     CacheLine *line = G_cache->GetLineOrCreateNew(thr, a, __LINE__);
   6721     uintptr_t offset = CacheLine::ComputeOffset(a);
   6722     line->traced().Set(offset);
   6723     G_cache->ReleaseLine(thr, a, line, __LINE__);
   6724     if (G_flags->verbosity >= 2) e->Print();
   6725   }
   6726 
   6727   INLINE void RefAndUnrefTwoSegSetPairsIfDifferent(SSID new_ssid1,
   6728                                                    SSID old_ssid1,
   6729                                                    SSID new_ssid2,
   6730                                                    SSID old_ssid2) {
   6731     bool recycle_1 = new_ssid1 != old_ssid1,
   6732          recycle_2 = new_ssid2 != old_ssid2;
   6733     if (recycle_1 && !new_ssid1.IsEmpty()) {
   6734       SegmentSet::Ref(new_ssid1, "RefAndUnrefTwoSegSetPairsIfDifferent");
   6735     }
   6736 
   6737     if (recycle_2 && !new_ssid2.IsEmpty()) {
   6738       SegmentSet::Ref(new_ssid2, "RefAndUnrefTwoSegSetPairsIfDifferent");
   6739     }
   6740 
   6741     if (recycle_1 && !old_ssid1.IsEmpty()) {
   6742       SegmentSet::Unref(old_ssid1, "RefAndUnrefTwoSegSetPairsIfDifferent");
   6743     }
   6744 
   6745     if (recycle_2 && !old_ssid2.IsEmpty()) {
   6746       SegmentSet::Unref(old_ssid2, "RefAndUnrefTwoSegSetPairsIfDifferent");
   6747     }
   6748   }
   6749 
   6750 
   6751   // return true if the current pair of read/write segment sets
   6752   // describes a race.
   6753   bool NOINLINE CheckIfRace(SSID rd_ssid, SSID wr_ssid) {
   6754     int wr_ss_size = SegmentSet::Size(wr_ssid);
   6755     int rd_ss_size = SegmentSet::Size(rd_ssid);
   6756 
   6757     DCHECK(wr_ss_size >= 2 || (wr_ss_size >= 1 && rd_ss_size >= 1));
   6758 
   6759     // check all write-write pairs
   6760     for (int w1 = 0; w1 < wr_ss_size; w1++) {
   6761       SID w1_sid = SegmentSet::GetSID(wr_ssid, w1, __LINE__);
   6762       Segment *w1_seg = Segment::Get(w1_sid);
   6763       LSID w1_ls = w1_seg->lsid(true);
   6764       for (int w2 = w1 + 1; w2 < wr_ss_size; w2++) {
   6765         DCHECK(wr_ssid.IsTuple());
   6766         SegmentSet *ss = SegmentSet::Get(wr_ssid);
   6767         LSID w2_ls = Segment::Get(ss->GetSID(w2))->lsid(true);
   6768         if (LockSet::IntersectionIsEmpty(w1_ls, w2_ls)) {
   6769           return true;
   6770         } else {
   6771           // May happen only if the locks in the intersection are hybrid locks.
   6772           DCHECK(LockSet::HasNonPhbLocks(w1_ls) &&
   6773                  LockSet::HasNonPhbLocks(w2_ls));
   6774         }
   6775       }
   6776       // check all write-read pairs
   6777       for (int r = 0; r < rd_ss_size; r++) {
   6778         SID r_sid = SegmentSet::GetSID(rd_ssid, r, __LINE__);
   6779         Segment *r_seg = Segment::Get(r_sid);
   6780         LSID r_ls = r_seg->lsid(false);
   6781         if (Segment::HappensBeforeOrSameThread(w1_sid, r_sid))
   6782           continue;
   6783         if (LockSet::IntersectionIsEmpty(w1_ls, r_ls)) {
   6784           return true;
   6785         } else {
   6786           // May happen only if the locks in the intersection are hybrid locks.
   6787           DCHECK(LockSet::HasNonPhbLocks(w1_ls) &&
   6788                  LockSet::HasNonPhbLocks(r_ls));
   6789         }
   6790       }
   6791     }
   6792     return false;
   6793   }
   6794 
   6795   // New experimental state machine.
   6796   // Set *res to the new state.
   6797   // Return true if the new state is race.
   6798   bool INLINE MemoryStateMachine(ShadowValue old_sval, Thread *thr,
   6799                                  bool is_w, ShadowValue *res) {
   6800     ShadowValue new_sval;
   6801     SID cur_sid = thr->sid();
   6802     DCHECK(cur_sid.valid());
   6803 
   6804     if (UNLIKELY(old_sval.IsNew())) {
   6805       // We see this memory for the first time.
   6806       DCHECK(cur_sid.valid());
   6807       if (is_w) {
   6808         new_sval.set(SSID(0), SSID(cur_sid));
   6809       } else {
   6810         new_sval.set(SSID(cur_sid), SSID(0));
   6811       }
   6812       *res = new_sval;
   6813       return false;
   6814     }
   6815 
   6816     SSID old_rd_ssid = old_sval.rd_ssid();
   6817     SSID old_wr_ssid = old_sval.wr_ssid();
   6818     SSID new_rd_ssid(0);
   6819     SSID new_wr_ssid(0);
   6820     if (is_w) {
   6821       new_rd_ssid = SegmentSet::RemoveSegmentFromSS(old_rd_ssid, cur_sid);
   6822       new_wr_ssid = SegmentSet::AddSegmentToSS(old_wr_ssid, cur_sid);
   6823     } else {
   6824       if (SegmentSet::Contains(old_wr_ssid, cur_sid)) {
   6825         // cur_sid is already in old_wr_ssid, no change to SSrd is required.
   6826         new_rd_ssid = old_rd_ssid;
   6827       } else {
   6828         new_rd_ssid = SegmentSet::AddSegmentToSS(old_rd_ssid, cur_sid);
   6829       }
   6830       new_wr_ssid = old_wr_ssid;
   6831     }
   6832 
   6833     if (UNLIKELY(G_flags->sample_events > 0)) {
   6834       if (new_rd_ssid.IsTuple() || new_wr_ssid.IsTuple()) {
   6835         static EventSampler sampler;
   6836         sampler.Sample(thr, "HasTupleSS", false);
   6837       }
   6838     }
   6839 
   6840 
   6841     new_sval.set(new_rd_ssid, new_wr_ssid);
   6842     *res = new_sval;
   6843     if (new_sval == old_sval)
   6844       return false;
   6845 
   6846     if (new_wr_ssid.IsTuple() ||
   6847         (!new_wr_ssid.IsEmpty() && !new_rd_ssid.IsEmpty())) {
   6848       return CheckIfRace(new_rd_ssid, new_wr_ssid);
   6849     }
   6850     return false;
   6851   }
   6852 
   6853 
   6854   // Fast path implementation for the case when we stay in the same thread.
   6855   // In this case we don't need to call HappensBefore(), deal with
   6856   // Tuple segment sets and check for race.
   6857   // If this function returns true, the ShadowValue *new_sval is updated
   6858   // in the same way as MemoryStateMachine() would have done it. Just faster.
   6859   INLINE bool MemoryStateMachineSameThread(bool is_w, ShadowValue old_sval,
   6860                                            Thread *thr,
   6861                                            ShadowValue *new_sval) {
   6862 #define MSM_STAT(i) do { if (DEBUG_MODE) \
   6863   thr->stats.msm_branch_count[i]++; } while(0)
   6864     SSID rd_ssid = old_sval.rd_ssid();
   6865     SSID wr_ssid = old_sval.wr_ssid();
   6866     SID cur_sid = thr->sid();
   6867     TID tid = thr->tid();
   6868     if (rd_ssid.IsEmpty()) {
   6869       if (wr_ssid.IsSingleton()) {
   6870         // *** CASE 01 ***: rd_ssid == 0, wr_ssid == singleton
   6871         SID wr_sid = wr_ssid.GetSingleton();
   6872         if (wr_sid == cur_sid) {  // --- w/r: {0, cur} => {0, cur}
   6873           MSM_STAT(1);
   6874           // no op
   6875           return true;
   6876         }
   6877         if (tid == Segment::Get(wr_sid)->tid()) {
   6878           // same thread, but the segments are different.
   6879           DCHECK(cur_sid != wr_sid);
   6880           if (is_w) {    // -------------- w: {0, wr} => {0, cur}
   6881             MSM_STAT(2);
   6882             new_sval->set(SSID(0), SSID(cur_sid));
   6883             thr->AddDeadSid(wr_sid, "FastPath01");
   6884           } else {       // -------------- r: {0, wr} => {cur, wr}
   6885             MSM_STAT(3);
   6886             new_sval->set(SSID(cur_sid), wr_ssid);
   6887           }
   6888           Segment::Ref(cur_sid, "FastPath01");
   6889           return true;
   6890         }
   6891       } else if (wr_ssid.IsEmpty()) {
   6892         // *** CASE 00 ***: rd_ssid == 0, wr_ssid == 0
   6893         if (is_w) {      // -------------- w: {0, 0} => {0, cur}
   6894           MSM_STAT(4);
   6895           new_sval->set(SSID(0), SSID(cur_sid));
   6896         } else {         // -------------- r: {0, 0} => {cur, 0}
   6897           MSM_STAT(5);
   6898           new_sval->set(SSID(cur_sid), SSID(0));
   6899         }
   6900         Segment::Ref(cur_sid, "FastPath00");
   6901         return true;
   6902       }
   6903     } else if (rd_ssid.IsSingleton()) {
   6904       SID rd_sid = rd_ssid.GetSingleton();
   6905       if (wr_ssid.IsEmpty()) {
   6906         // *** CASE 10 ***: rd_ssid == singleton, wr_ssid == 0
   6907         if (rd_sid == cur_sid) {
   6908           // same segment.
   6909           if (is_w) {    // -------------- w: {cur, 0} => {0, cur}
   6910             MSM_STAT(6);
   6911             new_sval->set(SSID(0), SSID(cur_sid));
   6912           } else {       // -------------- r: {cur, 0} => {cur, 0}
   6913             MSM_STAT(7);
   6914             // no op
   6915           }
   6916           return true;
   6917         }
   6918         if (tid == Segment::Get(rd_sid)->tid()) {
   6919           // same thread, but the segments are different.
   6920           DCHECK(cur_sid != rd_sid);
   6921           if (is_w) {  // -------------- w: {rd, 0} => {0, cur}
   6922             MSM_STAT(8);
   6923             new_sval->set(SSID(0), SSID(cur_sid));
   6924           } else {     // -------------- r: {rd, 0} => {cur, 0}
   6925             MSM_STAT(9);
   6926             new_sval->set(SSID(cur_sid), SSID(0));
   6927           }
   6928           Segment::Ref(cur_sid, "FastPath10");
   6929           thr->AddDeadSid(rd_sid, "FastPath10");
   6930           return true;
   6931         }
   6932       } else if (wr_ssid.IsSingleton()){
   6933         // *** CASE 11 ***: rd_ssid == singleton, wr_ssid == singleton
   6934         DCHECK(rd_ssid.IsSingleton());
   6935         SID wr_sid = wr_ssid.GetSingleton();
   6936         DCHECK(wr_sid != rd_sid);  // By definition of ShadowValue.
   6937         if (cur_sid == rd_sid) {
   6938           if (tid == Segment::Get(wr_sid)->tid()) {
   6939             if (is_w) {  // -------------- w: {cur, wr} => {0, cur}
   6940               MSM_STAT(10);
   6941               new_sval->set(SSID(0), SSID(cur_sid));
   6942               thr->AddDeadSid(wr_sid, "FastPath11");
   6943             } else {     // -------------- r: {cur, wr} => {cur, wr}
   6944               MSM_STAT(11);
   6945               // no op
   6946             }
   6947             return true;
   6948           }
   6949         } else if (cur_sid == wr_sid){
   6950           if (tid == Segment::Get(rd_sid)->tid()) {
   6951             if (is_w) {  // -------------- w: {rd, cur} => {rd, cur}
   6952               MSM_STAT(12);
   6953               // no op
   6954             } else {     // -------------- r: {rd, cur} => {0, cur}
   6955               MSM_STAT(13);
   6956               new_sval->set(SSID(0), SSID(cur_sid));
   6957               thr->AddDeadSid(rd_sid, "FastPath11");
   6958             }
   6959             return true;
   6960           }
   6961         } else if (tid == Segment::Get(rd_sid)->tid() &&
   6962                    tid == Segment::Get(wr_sid)->tid()) {
   6963           if (is_w) {    // -------------- w: {rd, wr} => {0, cur}
   6964             MSM_STAT(14);
   6965             new_sval->set(SSID(0), SSID(cur_sid));
   6966             thr->AddDeadSid(wr_sid, "FastPath11");
   6967           } else {       // -------------- r: {rd, wr} => {cur, wr}
   6968             MSM_STAT(15);
   6969             new_sval->set(SSID(cur_sid), wr_ssid);
   6970           }
   6971           thr->AddDeadSid(rd_sid, "FastPath11");
   6972           Segment::Ref(cur_sid, "FastPath11");
   6973           return true;
   6974         }
   6975       }
   6976     }
   6977     MSM_STAT(0);
   6978     return false;
   6979 #undef MSM_STAT
   6980   }
   6981 
   6982   // return false if we were not able to complete the task (fast_path_only).
   6983   INLINE bool HandleMemoryAccessHelper(bool is_w,
   6984                                        CacheLine *cache_line,
   6985                                        uintptr_t addr,
   6986                                        uintptr_t size,
   6987                                        uintptr_t pc,
   6988                                        Thread *thr,
   6989                                        bool fast_path_only) {
   6990     DCHECK((addr & (size - 1)) == 0);  // size-aligned.
   6991     uintptr_t offset = CacheLine::ComputeOffset(addr);
   6992 
   6993     ShadowValue old_sval;
   6994     ShadowValue *sval_p = NULL;
   6995 
   6996     if (UNLIKELY(!cache_line->has_shadow_value().Get(offset))) {
   6997       sval_p = cache_line->AddNewSvalAtOffset(offset);
   6998       DCHECK(sval_p->IsNew());
   6999     } else {
   7000       sval_p = cache_line->GetValuePointer(offset);
   7001     }
   7002     old_sval = *sval_p;
   7003 
   7004     bool res = false;
   7005     bool fast_path_ok = MemoryStateMachineSameThread(
   7006         is_w, old_sval, thr, sval_p);
   7007     if (fast_path_ok) {
   7008       res = true;
   7009     } else if (fast_path_only) {
   7010       res = false;
   7011     } else {
   7012       bool is_published = cache_line->published().Get(offset);
   7013       // We check only the first bit for publishing, oh well.
   7014       if (UNLIKELY(is_published)) {
   7015         const VTS *signaller_vts = GetPublisherVTS(addr);
   7016         CHECK(signaller_vts);
   7017         thr->NewSegmentForWait(signaller_vts);
   7018       }
   7019 
   7020       bool is_race = MemoryStateMachine(old_sval, thr, is_w, sval_p);
   7021 
   7022       // Check for race.
   7023       if (UNLIKELY(is_race)) {
   7024         if (G_flags->report_races && !cache_line->racey().Get(offset)) {
   7025           reports_.AddReport(thr, pc, is_w, addr, size,
   7026                              old_sval, *sval_p, is_published);
   7027         }
   7028         cache_line->racey().SetRange(offset, offset + size);
   7029       }
   7030 
   7031       // Ref/Unref segments
   7032       RefAndUnrefTwoSegSetPairsIfDifferent(sval_p->rd_ssid(),
   7033                                            old_sval.rd_ssid(),
   7034                                            sval_p->wr_ssid(),
   7035                                            old_sval.wr_ssid());
   7036       res = true;
   7037     }
   7038 
   7039 
   7040     if (DEBUG_MODE && !fast_path_only) {
   7041       // check that the SSIDs/SIDs in the new sval have sane ref counters.
   7042       CHECK(!sval_p->wr_ssid().IsEmpty() || !sval_p->rd_ssid().IsEmpty());
   7043       for (int i = 0; i < 2; i++) {
   7044         SSID ssid = i ? sval_p->rd_ssid() : sval_p->wr_ssid();
   7045         if (ssid.IsEmpty()) continue;
   7046         if (ssid.IsSingleton()) {
   7047           // singleton segment should have ref count > 0.
   7048           SID sid = ssid.GetSingleton();
   7049           Segment *seg = Segment::Get(sid);
   7050           CHECK(seg->ref_count() > 0);
   7051           if (sid == thr->sid()) {
   7052             // if this is the current seg, ref count should be > 1.
   7053             CHECK(seg->ref_count() > 1);
   7054           }
   7055         } else {
   7056           SegmentSet *sset = SegmentSet::Get(ssid);
   7057           CHECK(sset->ref_count() > 0);
   7058         }
   7059       }
   7060     }
   7061     return res;
   7062   }
   7063 
   7064 
   7065   // return false if we were not able to complete the task (fast_path_only).
   7066   INLINE bool HandleAccessGranularityAndExecuteHelper(
   7067       CacheLine *cache_line,
   7068       Thread *thr, uintptr_t addr, MopInfo *mop,
   7069       bool has_expensive_flags, bool fast_path_only) {
   7070     size_t size = mop->size();
   7071     uintptr_t pc = mop->pc();
   7072     bool is_w = mop->is_write();
   7073     uintptr_t a = addr;
   7074     uintptr_t b = 0;
   7075     uintptr_t off = CacheLine::ComputeOffset(a);
   7076 
   7077     uint16_t *granularity_mask = cache_line->granularity_mask(off);
   7078     uint16_t gr = *granularity_mask;
   7079 
   7080     if        (size == 8 && (off & 7) == 0) {
   7081       if (!gr) {
   7082         *granularity_mask = gr = 1;  // 0000000000000001
   7083       }
   7084       if (GranularityIs8(off, gr)) {
   7085         if (has_expensive_flags) thr->stats.n_fast_access8++;
   7086         cache_line->DebugTrace(off, __FUNCTION__, __LINE__);
   7087         goto one_call;
   7088       } else {
   7089         if (has_expensive_flags) thr->stats.n_slow_access8++;
   7090         cache_line->Join_1_to_2(off);
   7091         cache_line->Join_1_to_2(off + 2);
   7092         cache_line->Join_1_to_2(off + 4);
   7093         cache_line->Join_1_to_2(off + 6);
   7094         cache_line->Join_2_to_4(off);
   7095         cache_line->Join_2_to_4(off + 4);
   7096         cache_line->Join_4_to_8(off);
   7097         goto slow_path;
   7098       }
   7099     } else if (size == 4 && (off & 3) == 0) {
   7100       if (!gr) {
   7101         *granularity_mask = gr = 3 << 1;  // 0000000000000110
   7102       }
   7103       if (GranularityIs4(off, gr)) {
   7104         if (has_expensive_flags) thr->stats.n_fast_access4++;
   7105         cache_line->DebugTrace(off, __FUNCTION__, __LINE__);
   7106         goto one_call;
   7107       } else {
   7108         if (has_expensive_flags) thr->stats.n_slow_access4++;
   7109         cache_line->Split_8_to_4(off);
   7110         cache_line->Join_1_to_2(off);
   7111         cache_line->Join_1_to_2(off + 2);
   7112         cache_line->Join_2_to_4(off);
   7113         goto slow_path;
   7114       }
   7115     } else if (size == 2 && (off & 1) == 0) {
   7116       if (!gr) {
   7117         *granularity_mask = gr = 15 << 3;  // 0000000001111000
   7118       }
   7119       if (GranularityIs2(off, gr)) {
   7120         if (has_expensive_flags) thr->stats.n_fast_access2++;
   7121         cache_line->DebugTrace(off, __FUNCTION__, __LINE__);
   7122         goto one_call;
   7123       } else {
   7124         if (has_expensive_flags) thr->stats.n_slow_access2++;
   7125         cache_line->Split_8_to_4(off);
   7126         cache_line->Split_4_to_2(off);
   7127         cache_line->Join_1_to_2(off);
   7128         goto slow_path;
   7129       }
   7130     } else if (size == 1) {
   7131       if (!gr) {
   7132         *granularity_mask = gr = 255 << 7;  // 0111111110000000
   7133       }
   7134       if (GranularityIs1(off, gr)) {
   7135         if (has_expensive_flags) thr->stats.n_fast_access1++;
   7136         cache_line->DebugTrace(off, __FUNCTION__, __LINE__);
   7137         goto one_call;
   7138       } else {
   7139         if (has_expensive_flags) thr->stats.n_slow_access1++;
   7140         cache_line->Split_8_to_4(off);
   7141         cache_line->Split_4_to_2(off);
   7142         cache_line->Split_2_to_1(off);
   7143         goto slow_path;
   7144       }
   7145     } else {
   7146       if (fast_path_only) return false;
   7147       if (has_expensive_flags) thr->stats.n_very_slow_access++;
   7148       // Very slow: size is not 1,2,4,8 or address is unaligned.
   7149       // Handle this access as a series of 1-byte accesses, but only
   7150       // inside the current cache line.
   7151       // TODO(kcc): do we want to handle the next cache line as well?
   7152       b = a + mop->size();
   7153       uintptr_t max_x = min(b, CacheLine::ComputeNextTag(a));
   7154       for (uintptr_t x = a; x < max_x; x++) {
   7155         off = CacheLine::ComputeOffset(x);
   7156         DCHECK(CacheLine::ComputeTag(x) == cache_line->tag());
   7157         uint16_t *granularity_mask = cache_line->granularity_mask(off);
   7158         if (!*granularity_mask) {
   7159           *granularity_mask = 1;
   7160         }
   7161         cache_line->DebugTrace(off, __FUNCTION__, __LINE__);
   7162         cache_line->Split_8_to_4(off);
   7163         cache_line->Split_4_to_2(off);
   7164         cache_line->Split_2_to_1(off);
   7165         if (!HandleMemoryAccessHelper(is_w, cache_line, x, 1, pc, thr, false))
   7166           return false;
   7167       }
   7168       return true;
   7169     }
   7170 
   7171 slow_path:
   7172     if (fast_path_only) return false;
   7173     DCHECK(cache_line);
   7174     DCHECK(size == 1 || size == 2 || size == 4 || size == 8);
   7175     DCHECK((addr & (size - 1)) == 0);  // size-aligned.
   7176     gr = *granularity_mask;
   7177     CHECK(gr);
   7178     // size is one of 1, 2, 4, 8; address is size-aligned, but the granularity
   7179     // is different.
   7180     b = a + mop->size();
   7181     for (uintptr_t x = a; x < b;) {
   7182       if (has_expensive_flags) thr->stats.n_access_slow_iter++;
   7183       off = CacheLine::ComputeOffset(x);
   7184       cache_line->DebugTrace(off, __FUNCTION__, __LINE__);
   7185       size_t s = 0;
   7186       // How many bytes are we going to access?
   7187       if     (GranularityIs8(off, gr)) s = 8;
   7188       else if(GranularityIs4(off, gr)) s = 4;
   7189       else if(GranularityIs2(off, gr)) s = 2;
   7190       else                             s = 1;
   7191       if (!HandleMemoryAccessHelper(is_w, cache_line, x, s, pc, thr, false))
   7192         return false;
   7193       x += s;
   7194     }
   7195     return true;
   7196 one_call:
   7197     return HandleMemoryAccessHelper(is_w, cache_line, addr, size, pc,
   7198                                     thr, fast_path_only);
   7199   }
   7200 
   7201   INLINE bool IsTraced(CacheLine *cache_line, uintptr_t addr,
   7202                        bool has_expensive_flags) {
   7203     if (!has_expensive_flags) return false;
   7204     if (G_flags->trace_level == 0) return false;
   7205     DCHECK(cache_line);
   7206     uintptr_t off = CacheLine::ComputeOffset(addr);
   7207     if (cache_line->traced().Get(off)) {
   7208       return true;
   7209     } else if (addr == G_flags->trace_addr) {
   7210       return true;
   7211     }
   7212     return false;
   7213   }
   7214 
   7215   void DoTrace(Thread *thr, uintptr_t addr, MopInfo *mop, bool need_locking) {
   7216     size_t size = mop->size();
   7217     uintptr_t pc = mop->pc();
   7218     TIL til(ts_lock, 1, need_locking);
   7219     for (uintptr_t x = addr; x < addr + size; x++) {
   7220       uintptr_t off = CacheLine::ComputeOffset(x);
   7221       CacheLine *cache_line = G_cache->GetLineOrCreateNew(thr,
   7222                                                           x, __LINE__);
   7223       ShadowValue *sval_p = cache_line->GetValuePointer(off);
   7224       if (cache_line->has_shadow_value().Get(off) != 0) {
   7225         bool is_published = cache_line->published().Get(off);
   7226         Printf("TRACE: T%d/S%d %s[%d] addr=%p sval: %s%s; line=%p P=%s\n",
   7227                raw_tid(thr), thr->sid().raw(), mop->is_write() ? "wr" : "rd",
   7228                size, addr, sval_p->ToString().c_str(),
   7229                is_published ? " P" : "",
   7230                cache_line,
   7231                cache_line->published().Empty() ?
   7232                "0" : cache_line->published().ToString().c_str());
   7233         thr->ReportStackTrace(pc);
   7234       }
   7235       G_cache->ReleaseLine(thr, x, cache_line, __LINE__);
   7236     }
   7237   }
   7238 
   7239 
   7240 #if TS_SERIALIZED == 1
   7241   INLINE  // TODO(kcc): this can also be made NOINLINE later.
   7242 #else
   7243   NOINLINE
   7244 #endif
   7245   void HandleMemoryAccessSlowLocked(Thread *thr,
   7246                                     uintptr_t addr,
   7247                                     MopInfo *mop,
   7248                                     bool has_expensive_flags,
   7249                                     bool need_locking) {
   7250     AssertTILHeld();
   7251     DCHECK(thr->lsid(false) == thr->segment()->lsid(false));
   7252     DCHECK(thr->lsid(true) == thr->segment()->lsid(true));
   7253     thr->FlushDeadSids();
   7254     if (TS_SERIALIZED == 0) {
   7255       // In serialized version this is the hotspot, so grab fresh SIDs
   7256       // only in non-serial variant.
   7257       thr->GetSomeFreshSids();
   7258     }
   7259     CacheLine *cache_line = G_cache->GetLineOrCreateNew(thr, addr, __LINE__);
   7260     HandleAccessGranularityAndExecuteHelper(cache_line, thr, addr,
   7261                                             mop, has_expensive_flags,
   7262                                             /*fast_path_only=*/false);
   7263     bool tracing = IsTraced(cache_line, addr, has_expensive_flags);
   7264     G_cache->ReleaseLine(thr, addr, cache_line, __LINE__);
   7265     cache_line = NULL;  // just in case.
   7266 
   7267     if (has_expensive_flags) {
   7268       if (tracing) {
   7269         DoTrace(thr, addr, mop, /*need_locking=*/false);
   7270       }
   7271       if (G_flags->sample_events > 0) {
   7272         const char *type = "SampleMemoryAccess";
   7273         static EventSampler sampler;
   7274         sampler.Sample(thr, type, false);
   7275       }
   7276     }
   7277   }
   7278 
   7279   INLINE bool HandleMemoryAccessInternal(Thread *thr,
   7280                                          uintptr_t *sblock_pc,
   7281                                          uintptr_t addr,
   7282                                          MopInfo *mop,
   7283                                          bool has_expensive_flags,
   7284                                          bool need_locking) {
   7285   #define INC_STAT(stat) do { if (has_expensive_flags) (stat)++; } while(0)
   7286     if (TS_ATOMICITY && G_flags->atomicity) {
   7287       HandleMemoryAccessForAtomicityViolationDetector(thr, addr, mop);
   7288       return false;
   7289     }
   7290     DCHECK(mop->size() > 0);
   7291     DCHECK(thr->is_running());
   7292     DCHECK(!thr->ignore_reads() || !thr->ignore_writes());
   7293 
   7294     // We do not check and ignore stack now.
   7295     // On unoptimized binaries this would give ~10% speedup if ignore_stack==true,
   7296     // but if --ignore_stack==false this would cost few extra insns.
   7297     // On optimized binaries ignoring stack gives nearly nothing.
   7298     // if (thr->IgnoreMemoryIfInStack(addr)) return;
   7299 
   7300     CacheLine *cache_line = NULL;
   7301     INC_STAT(thr->stats.memory_access_sizes[mop->size() <= 16 ? mop->size() : 17 ]);
   7302     INC_STAT(thr->stats.events[mop->is_write() ? WRITE : READ]);
   7303     if (has_expensive_flags) {
   7304       thr->stats.access_to_first_1g += (addr >> 30) == 0;
   7305       thr->stats.access_to_first_2g += (addr >> 31) == 0;
   7306       thr->stats.access_to_first_4g += ((uint64_t)addr >> 32) == 0;
   7307     }
   7308 
   7309     int locked_access_case = 0;
   7310 
   7311     if (need_locking) {
   7312       // The fast (unlocked) path.
   7313       if (thr->HasRoomForDeadSids()) {
   7314         // Acquire a line w/o locks.
   7315         cache_line = G_cache->TryAcquireLine(thr, addr, __LINE__);
   7316         if (has_expensive_flags && cache_line && G_cache->IsInDirectCache(addr)) {
   7317           INC_STAT(thr->stats.cache_fast_get);
   7318         }
   7319         if (!Cache::LineIsNullOrLocked(cache_line)) {
   7320           // The line is not empty or locked -- check the tag.
   7321           if (cache_line->tag() == CacheLine::ComputeTag(addr)) {
   7322             // The line is ours and non-empty -- fire the fast path.
   7323             if (thr->HandleSblockEnter(*sblock_pc, /*allow_slow_path=*/false)) {
   7324               *sblock_pc = 0;  // don't do SblockEnter any more.
   7325               bool res = HandleAccessGranularityAndExecuteHelper(
   7326                   cache_line, thr, addr,
   7327                   mop, has_expensive_flags,
   7328                   /*fast_path_only=*/true);
   7329               bool traced = IsTraced(cache_line, addr, has_expensive_flags);
   7330               // release the line.
   7331               G_cache->ReleaseLine(thr, addr, cache_line, __LINE__);
   7332               if (res && has_expensive_flags && traced) {
   7333                 DoTrace(thr, addr, mop, /*need_locking=*/true);
   7334               }
   7335               if (res) {
   7336                 INC_STAT(thr->stats.unlocked_access_ok);
   7337                 // fast path succeded, we are done.
   7338                 return false;
   7339               } else {
   7340                 locked_access_case = 1;
   7341               }
   7342             } else {
   7343               // we were not able to handle SblockEnter.
   7344               G_cache->ReleaseLine(thr, addr, cache_line, __LINE__);
   7345               locked_access_case = 2;
   7346             }
   7347           } else {
   7348             locked_access_case = 3;
   7349             // The line has a wrong tag.
   7350             G_cache->ReleaseLine(thr, addr, cache_line, __LINE__);
   7351           }
   7352         } else if (cache_line == NULL) {
   7353           locked_access_case = 4;
   7354           // We grabbed the cache slot but it is empty, release it.
   7355           G_cache->ReleaseLine(thr, addr, cache_line, __LINE__);
   7356         } else {
   7357           locked_access_case = 5;
   7358         }
   7359       } else {
   7360         locked_access_case = 6;
   7361       }
   7362     } else {
   7363       locked_access_case = 7;
   7364     }
   7365 
   7366     if (need_locking) {
   7367       INC_STAT(thr->stats.locked_access[locked_access_case]);
   7368     }
   7369 
   7370     // Everything below goes under a lock.
   7371     TIL til(ts_lock, 2, need_locking);
   7372     thr->HandleSblockEnter(*sblock_pc, /*allow_slow_path=*/true);
   7373     *sblock_pc = 0;  // don't do SblockEnter any more.
   7374     HandleMemoryAccessSlowLocked(thr, addr, mop,
   7375                                  has_expensive_flags,
   7376                                  need_locking);
   7377     return true;
   7378 #undef INC_STAT
   7379   }
   7380 
   7381 
   7382   void HandleMemoryAccessForAtomicityViolationDetector(Thread *thr,
   7383                                                        uintptr_t addr,
   7384                                                        MopInfo *mop) {
   7385     CHECK(G_flags->atomicity);
   7386     TID tid = thr->tid();
   7387     if (thr->MemoryIsInStack(addr)) return;
   7388 
   7389     LSID wr_lsid = thr->lsid(0);
   7390     LSID rd_lsid = thr->lsid(1);
   7391     if (wr_lsid.raw() == 0 && rd_lsid.raw() == 0) {
   7392       thr->increment_n_mops_since_start();
   7393       return;
   7394     }
   7395     // uint64_t combined_lsid = wr_lsid.raw();
   7396     // combined_lsid = (combined_lsid << 32) | rd_lsid.raw();
   7397     // if (combined_lsid == 0) return;
   7398 
   7399 //    Printf("Era=%d T%d %s a=%p pc=%p in_stack=%d %s\n", g_lock_era,
   7400 //           tid.raw(), is_w ? "W" : "R", addr, pc, thr->MemoryIsInStack(addr),
   7401 //           PcToRtnNameAndFilePos(pc).c_str());
   7402 
   7403     BitSet *range_set = thr->lock_era_access_set(mop->is_write());
   7404     // Printf("era %d T%d access under lock pc=%p addr=%p size=%p w=%d\n",
   7405     //        g_lock_era, tid.raw(), pc, addr, size, is_w);
   7406     range_set->Add(addr, addr + mop->size());
   7407     // Printf("   %s\n", range_set->ToString().c_str());
   7408   }
   7409 
   7410 
   7411   // MALLOC
   7412   void HandleMalloc(Event *e, bool is_mmap) {
   7413     ScopedMallocCostCenter cc("HandleMalloc");
   7414     TID tid(e->tid());
   7415     uintptr_t a = e->a();
   7416     uintptr_t size = e->info();
   7417 
   7418 
   7419     if (a == 0)
   7420       return;
   7421 
   7422     #if defined(__GNUC__) && __WORDSIZE == 64
   7423     // If we are allocating a huge piece of memory,
   7424     // don't handle it because it is too slow.
   7425     // TODO(kcc): this is a workaround for NaCl. May need to fix it cleaner.
   7426     const uint64_t G84 = (1ULL << 32) * 21; // 84G.
   7427     if (size >= G84) {
   7428       return;
   7429     }
   7430     #endif
   7431     Thread *thr = Thread::Get(tid);
   7432     thr->NewSegmentForMallocEvent();
   7433     uintptr_t b = a + size;
   7434     CHECK(a <= b);
   7435     ClearMemoryState(thr, a, b);
   7436     // update heap_map
   7437     HeapInfo info;
   7438     info.ptr  = a;
   7439     info.size = size;
   7440     info.sid  = thr->sid();
   7441     Segment::Ref(info.sid, __FUNCTION__);
   7442     if (debug_malloc) {
   7443       Printf("T%d MALLOC: %p [%p %p) %s %s\n%s\n",
   7444              tid.raw(), size, a, a+size,
   7445              Segment::ToString(thr->sid()).c_str(),
   7446              thr->segment()->vts()->ToString().c_str(),
   7447              info.StackTraceString().c_str());
   7448     }
   7449 
   7450     // CHECK(!G_heap_map->count(a));  // we may have two calls
   7451                                       //  to AnnotateNewMemory.
   7452     G_heap_map->InsertInfo(a, info);
   7453 
   7454     if (is_mmap) {
   7455       // Mmap may be used for thread stack, so we should keep the mmap info
   7456       // when state is flushing.
   7457       ThreadStackInfo ts_info;
   7458       ts_info.ptr = a;
   7459       ts_info.size = size;
   7460       G_thread_stack_map->InsertInfo(a, ts_info);
   7461     }
   7462   }
   7463 
   7464   void ImitateWriteOnFree(Thread *thr, uintptr_t a, uintptr_t size, uintptr_t pc) {
   7465     // Handle the memory deletion as a write, but don't touch all
   7466     // the memory if there is too much of it, limit with the first 1K.
   7467     if (size && G_flags->free_is_write && !global_ignore) {
   7468       const uintptr_t kMaxWriteSizeOnFree = 2048;
   7469       uintptr_t write_size = min(kMaxWriteSizeOnFree, size);
   7470       uintptr_t step = sizeof(uintptr_t);
   7471       // We simulate 4- or 8-byte accesses to make analysis faster.
   7472       for (uintptr_t i = 0; i < write_size; i += step) {
   7473         uintptr_t this_size = write_size - i >= step ? step : write_size - i;
   7474         HandleMemoryAccess(thr, pc, a + i, this_size,
   7475                            /*is_w=*/true, /*need_locking*/false);
   7476       }
   7477     }
   7478   }
   7479 
   7480   // FREE
   7481   void HandleFree(Event *e) {
   7482     TID tid(e->tid());
   7483     Thread *thr = Thread::Get(tid);
   7484     uintptr_t a = e->a();
   7485     if (debug_free) {
   7486       e->Print();
   7487       thr->ReportStackTrace(e->pc());
   7488     }
   7489     if (a == 0)
   7490       return;
   7491     HeapInfo *info = G_heap_map->GetInfo(a);
   7492     if (!info || info->ptr != a)
   7493       return;
   7494     uintptr_t size = info->size;
   7495     uintptr_t pc = e->pc();
   7496     ImitateWriteOnFree(thr, a, size, pc);
   7497     // update G_heap_map
   7498     CHECK(info->ptr == a);
   7499     Segment::Unref(info->sid, __FUNCTION__);
   7500 
   7501     ClearMemoryState(thr, a, a + size);
   7502     G_heap_map->EraseInfo(a);
   7503 
   7504     // We imitate a Write event again, in case there will be use-after-free.
   7505     // We also need to create a new sblock so that the previous stack trace
   7506     // has free() in it.
   7507     if (G_flags->keep_history && G_flags->free_is_write) {
   7508       thr->HandleSblockEnter(pc, /*allow_slow_path*/true);
   7509     }
   7510     ImitateWriteOnFree(thr, a, size, pc);
   7511   }
   7512 
   7513   void HandleMunmap(Event *e) {
   7514     // TODO(glider): at the moment we handle only munmap()s of single mmap()ed
   7515     // regions. The correct implementation should handle arbitrary munmap()s
   7516     // that may carve the existing mappings or split them into two parts.
   7517     // It should also be possible to munmap() several mappings at a time.
   7518     uintptr_t a = e->a();
   7519     HeapInfo *h_info = G_heap_map->GetInfo(a);
   7520     uintptr_t size = e->info();
   7521     if (h_info && h_info->ptr == a && h_info->size == size) {
   7522       // TODO(glider): we may want to handle memory deletion and call
   7523       // Segment::Unref for all the unmapped memory.
   7524       Segment::Unref(h_info->sid, __FUNCTION__);
   7525       G_heap_map->EraseRange(a, a + size);
   7526     }
   7527 
   7528     ThreadStackInfo *ts_info = G_thread_stack_map->GetInfo(a);
   7529     if (ts_info && ts_info->ptr == a && ts_info->size == size)
   7530       G_thread_stack_map->EraseRange(a, a + size);
   7531   }
   7532 
   7533   void HandleThreadStart(TID child_tid, TID parent_tid, CallStack *call_stack) {
   7534     // Printf("HandleThreadStart: tid=%d parent_tid=%d pc=%lx pid=%d\n",
   7535     //         child_tid.raw(), parent_tid.raw(), pc, getpid());
   7536     VTS *vts = NULL;
   7537     StackTrace *creation_context = NULL;
   7538     if (child_tid == TID(0)) {
   7539       // main thread, we are done.
   7540       vts = VTS::CreateSingleton(child_tid);
   7541     } else if (!parent_tid.valid()) {
   7542       Thread::StopIgnoringAccessesInT0BecauseNewThreadStarted();
   7543       Report("INFO: creating thread T%d w/o a parent\n", child_tid.raw());
   7544       vts = VTS::CreateSingleton(child_tid);
   7545     } else {
   7546       Thread::StopIgnoringAccessesInT0BecauseNewThreadStarted();
   7547       Thread *parent = Thread::Get(parent_tid);
   7548       CHECK(parent);
   7549       parent->HandleChildThreadStart(child_tid, &vts, &creation_context);
   7550     }
   7551 
   7552     if (!call_stack) {
   7553       call_stack = new CallStack();
   7554     }
   7555     Thread *new_thread = new Thread(child_tid, parent_tid,
   7556                                     vts, creation_context, call_stack);
   7557     CHECK(new_thread == Thread::Get(child_tid));
   7558     if (child_tid == TID(0)) {
   7559       new_thread->set_ignore_all_accesses(true); // until a new thread comes.
   7560     }
   7561   }
   7562 
   7563   // Executes before the first instruction of the thread but after the thread
   7564   // has been set up (e.g. the stack is in place).
   7565   void HandleThreadFirstInsn(TID tid) {
   7566     // TODO(kcc): get rid of this once we find out how to get the T0's stack.
   7567     if (tid == TID(0)) {
   7568       uintptr_t stack_min(0), stack_max(0);
   7569       GetThreadStack(tid.raw(), &stack_min, &stack_max);
   7570       Thread *thr = Thread::Get(tid);
   7571       thr->SetStack(stack_min, stack_max);
   7572       ClearMemoryState(thr, thr->min_sp(), thr->max_sp());
   7573     }
   7574   }
   7575 
   7576   // THR_STACK_TOP
   7577   void HandleThreadStackTop(Event *e) {
   7578     TID tid(e->tid());
   7579     Thread *thr = Thread::Get(tid);
   7580     // Stack grows from bottom up.
   7581     uintptr_t sp = e->a();
   7582     uintptr_t sp_min = 0, sp_max = 0;
   7583     uintptr_t stack_size_if_known = e->info();
   7584     ThreadStackInfo *stack_info;
   7585     if (stack_size_if_known) {
   7586       sp_min = sp - stack_size_if_known;
   7587       sp_max = sp;
   7588     } else if (NULL != (stack_info = G_thread_stack_map->GetInfo(sp))) {
   7589       if (debug_thread) {
   7590         Printf("T%d %s: %p\n%s\n", e->tid(), __FUNCTION__,  sp,
   7591              reports_.DescribeMemory(sp).c_str());
   7592       }
   7593       sp_min = stack_info->ptr;
   7594       sp_max = stack_info->ptr + stack_info->size;
   7595     }
   7596     if (debug_thread) {
   7597       Printf("T%d SP: %p [%p %p), size=%ldK\n",
   7598              e->tid(), sp, sp_min, sp_max, (sp_max - sp_min) >> 10);
   7599     }
   7600     if (sp_min < sp_max) {
   7601       CHECK((sp_max - sp_min) >= 8 * 1024); // stay sane.
   7602       CHECK((sp_max - sp_min) < 128 * 1024 * 1024); // stay sane.
   7603       ClearMemoryState(thr, sp_min, sp_max);
   7604       thr->SetStack(sp_min, sp_max);
   7605     }
   7606   }
   7607 
   7608   // THR_END
   7609   void HandleThreadEnd(TID tid) {
   7610     Thread *thr = Thread::Get(tid);
   7611     // Add the thread-local stats to global stats.
   7612     G_stats->Add(thr->stats);
   7613     thr->stats.Clear();
   7614 
   7615     // Printf("HandleThreadEnd: %d\n", tid.raw());
   7616     if (tid != TID(0)) {
   7617       Thread *child = Thread::Get(tid);
   7618       child->HandleThreadEnd();
   7619 
   7620 
   7621       if (debug_thread) {
   7622         Printf("T%d:  THR_END     : %s %s\n", tid.raw(),
   7623                Segment::ToString(child->sid()).c_str(),
   7624                child->vts()->ToString().c_str());
   7625       }
   7626       ClearMemoryState(thr, child->min_sp(), child->max_sp());
   7627     } else {
   7628       reports_.SetProgramFinished();
   7629     }
   7630 
   7631 
   7632     if (g_so_far_only_one_thread == false
   7633         && (thr->ignore_reads() || thr->ignore_writes())) {
   7634       Report("WARNING: T%d ended while at least one 'ignore' bit is set: "
   7635              "ignore_wr=%d ignore_rd=%d\n", tid.raw(),
   7636              thr->ignore_reads(), thr->ignore_writes());
   7637       for (int i = 0; i < 2; i++) {
   7638         StackTrace *context = thr->GetLastIgnoreContext(i);
   7639         if (context) {
   7640           Report("Last ignore_%s call was here: \n%s\n", i ? "wr" : "rd",
   7641                  context->ToString().c_str());
   7642         }
   7643       }
   7644       if (G_flags->save_ignore_context == false) {
   7645         Report("Rerun with --save_ignore_context to see where "
   7646                "IGNORE_END is missing\n");
   7647       }
   7648     }
   7649     ShowProcSelfStatus();
   7650   }
   7651 
   7652   // THR_JOIN_AFTER
   7653   void HandleThreadJoinAfter(Event *e) {
   7654     TID tid(e->tid());
   7655     Thread *parent_thr = Thread::Get(tid);
   7656     VTS *vts_at_exit = NULL;
   7657     TID child_tid = parent_thr->HandleThreadJoinAfter(&vts_at_exit, TID(e->a()));
   7658     CHECK(vts_at_exit);
   7659     CHECK(parent_thr->sid().valid());
   7660     Segment::AssertLive(parent_thr->sid(),  __LINE__);
   7661     parent_thr->NewSegmentForWait(vts_at_exit);
   7662     if (debug_thread) {
   7663       Printf("T%d:  THR_JOIN_AFTER T%d  : %s\n", tid.raw(),
   7664              child_tid.raw(), parent_thr->vts()->ToString().c_str());
   7665     }
   7666   }
   7667  public:
   7668   // TODO(kcc): merge this into Detector class. (?)
   7669   ReportStorage reports_;
   7670 };
   7671 
   7672 static Detector        *G_detector;
   7673 
   7674 // -------- Flags ------------------------- {{{1
   7675 const char *usage_str =
   7676 "Usage:\n"
   7677 "  %s [options] program_to_test [program's options]\n"
   7678 "See %s for details\n";
   7679 
   7680 void ThreadSanitizerPrintUsage() {
   7681   Printf(usage_str, G_flags->tsan_program_name.c_str(),
   7682          G_flags->tsan_url.c_str());
   7683 }
   7684 
   7685 static void ReportUnknownFlagAndExit(const string &str) {
   7686   Printf("Unknown flag or flag value: %s\n", str.c_str());
   7687   ThreadSanitizerPrintUsage();
   7688   exit(1);
   7689 }
   7690 
   7691 // if arg and flag match, return true
   7692 // and set 'val' to the substring of arg after '='.
   7693 static bool FlagNameMatch(const string &arg, const string &flag, string *val) {
   7694   string f = string("--") + flag;
   7695   if (arg.size() < f.size()) return false;
   7696   for (size_t i = 0; i < f.size(); i++) {
   7697     // '-' must match '-'
   7698     // '_' may match '_' or '-'
   7699     if (f[i] == '_') {
   7700       if (arg[i] != '-' && arg[i] != '_') return false;
   7701     } else {
   7702       if (f[i] != arg[i]) return false;
   7703     }
   7704   }
   7705   if (arg.size() == f.size()) {
   7706     *val = "";
   7707     return true;
   7708   }
   7709   if (arg[f.size()] != '=') return false;
   7710   *val = arg.substr(f.size() + 1);
   7711   return true;
   7712 }
   7713 
   7714 static int FindBoolFlag(const char *name, bool default_val,
   7715                   vector<string> *args, bool *retval) {
   7716   int res = 0;
   7717   *retval = default_val;
   7718   bool cont = false;
   7719   do {
   7720     cont = false;
   7721     vector<string>::iterator it = args->begin();
   7722     for (; it != args->end(); ++it) {
   7723       string &str = *it;
   7724       string flag_value;
   7725       if (!FlagNameMatch(str, name, &flag_value)) continue;
   7726 
   7727       if (flag_value == "")            *retval = true;
   7728       else if (flag_value == "1")     *retval = true;
   7729       else if (flag_value == "true")  *retval = true;
   7730       else if (flag_value == "yes")   *retval = true;
   7731       else if (flag_value == "0")     *retval = false;
   7732       else if (flag_value == "false") *retval = false;
   7733       else if (flag_value == "no")    *retval = false;
   7734       else
   7735         ReportUnknownFlagAndExit(str);
   7736       res++;
   7737       if (G_flags->verbosity >= 1) {
   7738         Printf("%40s => %s\n", name, *retval ? "true" : "false");
   7739       }
   7740       break;
   7741     }
   7742     if (it != args->end()) {
   7743       cont = true;
   7744       args->erase(it);
   7745     }
   7746   } while (cont);
   7747   return res;
   7748 }
   7749 
   7750 static void FindIntFlag(const char *name, intptr_t default_val,
   7751                  vector<string> *args, intptr_t *retval) {
   7752   *retval = default_val;
   7753   bool cont = false;
   7754   do {
   7755     cont = false;
   7756     vector<string>::iterator it = args->begin();
   7757     for (; it != args->end(); ++it) {
   7758       string &str = *it;
   7759       string flag_value;
   7760       if (!FlagNameMatch(str, name, &flag_value)) continue;
   7761       char *end_ptr;
   7762       const char *beg_ptr = flag_value.c_str();
   7763       intptr_t int_val = my_strtol(beg_ptr, &end_ptr, 0);
   7764       if (flag_value.empty() || beg_ptr + flag_value.size() != end_ptr)
   7765         ReportUnknownFlagAndExit(str);
   7766       *retval = int_val;
   7767       if (G_flags->verbosity >= 1) {
   7768         Printf("%40s => %ld\n", name, *retval);
   7769       }
   7770       break;
   7771     }
   7772     if (it != args->end()) {
   7773       cont = true;
   7774       args->erase(it);
   7775     }
   7776   } while (cont);
   7777 }
   7778 
   7779 static void FindUIntFlag(const char *name, intptr_t default_val,
   7780                  vector<string> *args, uintptr_t *retval) {
   7781   intptr_t signed_int;
   7782   FindIntFlag(name, default_val, args, &signed_int);
   7783   CHECK_GE(signed_int, 0);
   7784   *retval = signed_int;
   7785 }
   7786 
   7787 void FindStringFlag(const char *name, vector<string> *args,
   7788                     vector<string> *retval) {
   7789   bool cont = false;
   7790   do {
   7791     cont = false;
   7792     vector<string>::iterator it = args->begin();
   7793     for (; it != args->end(); ++it) {
   7794       string &str = *it;
   7795       string flag_value;
   7796       if (!FlagNameMatch(str, name, &flag_value)) continue;
   7797       retval->push_back(flag_value);
   7798       if (G_flags->verbosity >= 1) {
   7799         Printf("%40s => %s\n", name, flag_value.c_str());
   7800       }
   7801       break;
   7802     }
   7803     if (it != args->end()) {
   7804       cont = true;
   7805       args->erase(it);
   7806     }
   7807   } while (cont);
   7808 }
   7809 
   7810 void FindStringFlag(const char *name, vector<string> *args,
   7811                     string *retval) {
   7812   vector<string> tmp;
   7813   FindStringFlag(name, args, &tmp);
   7814   if (tmp.size() > 0) {
   7815     *retval = tmp.back();
   7816   }
   7817 }
   7818 
   7819 static size_t GetMemoryLimitInMbFromProcSelfLimits() {
   7820 #ifdef VGO_linux
   7821   // Parse the memory limit section of /proc/self/limits.
   7822   string proc_self_limits = ReadFileToString("/proc/self/limits", false);
   7823   const char *max_addr_space = "Max address space";
   7824   size_t pos = proc_self_limits.find(max_addr_space);
   7825   if (pos == string::npos) return 0;
   7826   pos += strlen(max_addr_space);
   7827   while(proc_self_limits[pos] == ' ') pos++;
   7828   if (proc_self_limits[pos] == 'u')
   7829     return 0;  // 'unlimited'.
   7830   char *end;
   7831   size_t result = my_strtol(proc_self_limits.c_str() + pos, &end, 0);
   7832   result >>= 20;
   7833   return result;
   7834 #else
   7835   return 0;
   7836 #endif
   7837 }
   7838 
   7839 static size_t GetMemoryLimitInMb() {
   7840   size_t ret = -1;  // Maximum possible value.
   7841 #if defined(VGO_linux) && __WORDSIZE == 32
   7842   // Valgrind doesn't support more than 3G per process on 32-bit Linux.
   7843   ret = 3 * 1024;
   7844 #endif
   7845 
   7846   // Try /proc/self/limits.
   7847   size_t from_proc_self = GetMemoryLimitInMbFromProcSelfLimits();
   7848   if (from_proc_self && ret > from_proc_self) {
   7849     ret = from_proc_self;
   7850   }
   7851   // Try env.
   7852   const char *from_env_str =
   7853     (const char*)getenv("VALGRIND_MEMORY_LIMIT_IN_MB");
   7854   if (from_env_str) {
   7855     char *end;
   7856     size_t from_env_value = (size_t)my_strtol(from_env_str, &end, 0);
   7857     if (ret > from_env_value)
   7858       ret = from_env_value;
   7859   }
   7860   if (ret == (size_t)-1)
   7861     return 0;
   7862   return ret;
   7863 }
   7864 
   7865 bool PhaseDebugIsOn(const char *phase_name) {
   7866   CHECK(G_flags);
   7867   for (size_t i = 0; i < G_flags->debug_phase.size(); i++) {
   7868     if (G_flags->debug_phase[i] == phase_name)
   7869       return true;
   7870   }
   7871   return false;
   7872 }
   7873 
   7874 void ThreadSanitizerParseFlags(vector<string> *args) {
   7875 #ifdef TS_OFFLINE
   7876   string input_type_tmp;
   7877   FindStringFlag("input_type", args, &input_type_tmp);
   7878   if (input_type_tmp.size() > 0) {
   7879     G_flags->input_type = input_type_tmp;
   7880   } else {
   7881     G_flags->input_type = "str";
   7882   }
   7883 #endif
   7884 
   7885   // Check this first.
   7886   FindIntFlag("v", 0, args, &G_flags->verbosity);
   7887 
   7888   FindBoolFlag("ignore_stack", false, args, &G_flags->ignore_stack);
   7889   FindIntFlag("keep_history", 1, args, &G_flags->keep_history);
   7890   FindUIntFlag("segment_set_recycle_queue_size", DEBUG_MODE ? 10 : 10000, args,
   7891                &G_flags->segment_set_recycle_queue_size);
   7892   FindUIntFlag("recent_segments_cache_size", 10, args,
   7893                &G_flags->recent_segments_cache_size);
   7894 
   7895   bool fast_mode = false;
   7896   FindBoolFlag("fast_mode", false, args, &fast_mode);
   7897   if (fast_mode) {
   7898     Printf("INFO: --fast-mode is deprecated\n");
   7899   }
   7900   bool ignore_in_dtor = false;
   7901   FindBoolFlag("ignore_in_dtor", false, args, &ignore_in_dtor);
   7902   if (ignore_in_dtor) {
   7903     Printf("INFO: --ignore-in-dtor is deprecated\n");
   7904   }
   7905 
   7906   int has_phb = FindBoolFlag("pure_happens_before", true, args,
   7907                               &G_flags->pure_happens_before);
   7908   bool hybrid = false;
   7909   int has_hyb = FindBoolFlag("hybrid", false, args, &hybrid);
   7910   if (has_hyb && has_phb) {
   7911     Printf("INFO: --hybrid and --pure-happens-before"
   7912            " is mutually exclusive; ignoring the --hybrid switch\n");
   7913   } else if (has_hyb && !has_phb) {
   7914     G_flags->pure_happens_before = !hybrid;
   7915   }
   7916 
   7917   FindBoolFlag("show_expected_races", false, args,
   7918                &G_flags->show_expected_races);
   7919   FindBoolFlag("demangle", true, args, &G_flags->demangle);
   7920 
   7921   FindBoolFlag("announce_threads", false, args, &G_flags->announce_threads);
   7922   FindBoolFlag("full_output", false, args, &G_flags->full_output);
   7923   FindBoolFlag("show_states", false, args, &G_flags->show_states);
   7924   FindBoolFlag("show_proc_self_status", false, args,
   7925                &G_flags->show_proc_self_status);
   7926   FindBoolFlag("show_valgrind_context", false, args,
   7927                &G_flags->show_valgrind_context);
   7928   FindBoolFlag("suggest_happens_before_arcs", true, args,
   7929                &G_flags->suggest_happens_before_arcs);
   7930   FindBoolFlag("show_pc", false, args, &G_flags->show_pc);
   7931   FindBoolFlag("full_stack_frames", false, args, &G_flags->full_stack_frames);
   7932   FindBoolFlag("free_is_write", true, args, &G_flags->free_is_write);
   7933   FindBoolFlag("exit_after_main", false, args, &G_flags->exit_after_main);
   7934 
   7935   FindIntFlag("show_stats", 0, args, &G_flags->show_stats);
   7936   FindBoolFlag("trace_profile", false, args, &G_flags->trace_profile);
   7937   FindBoolFlag("color", false, args, &G_flags->color);
   7938   FindBoolFlag("html", false, args, &G_flags->html);
   7939 #ifdef TS_OFFLINE
   7940   bool show_pid_default = false;
   7941 #else
   7942   bool show_pid_default = true;
   7943 #endif
   7944   FindBoolFlag("show_pid", show_pid_default, args, &G_flags->show_pid);
   7945   FindBoolFlag("save_ignore_context", DEBUG_MODE ? true : false, args,
   7946                &G_flags->save_ignore_context);
   7947 
   7948   FindIntFlag("dry_run", 0, args, &G_flags->dry_run);
   7949   FindBoolFlag("report_races", true, args, &G_flags->report_races);
   7950   FindIntFlag("locking_scheme", 1, args, &G_flags->locking_scheme);
   7951   FindBoolFlag("unlock_on_mutex_destroy", true, args,
   7952                &G_flags->unlock_on_mutex_destroy);
   7953 
   7954   FindIntFlag("sample_events", 0, args, &G_flags->sample_events);
   7955   FindIntFlag("sample_events_depth", 2, args, &G_flags->sample_events_depth);
   7956 
   7957   FindIntFlag("debug_level", 1, args, &G_flags->debug_level);
   7958   FindStringFlag("debug_phase", args, &G_flags->debug_phase);
   7959   FindIntFlag("trace_level", 0, args, &G_flags->trace_level);
   7960 
   7961   FindIntFlag("literace_sampling", 0, args, &G_flags->literace_sampling);
   7962   FindIntFlag("sampling", 0, args, &G_flags->literace_sampling);
   7963   CHECK(G_flags->literace_sampling < 32);
   7964   CHECK(G_flags->literace_sampling >= 0);
   7965   FindBoolFlag("start_with_global_ignore_on", false, args,
   7966                &G_flags->start_with_global_ignore_on);
   7967 
   7968   FindStringFlag("fullpath_after", args, &G_flags->file_prefix_to_cut);
   7969   FindStringFlag("file_prefix_to_cut", args, &G_flags->file_prefix_to_cut);
   7970   for (size_t i = 0; i < G_flags->file_prefix_to_cut.size(); i++) {
   7971     G_flags->file_prefix_to_cut[i] =
   7972         ConvertToPlatformIndependentPath(G_flags->file_prefix_to_cut[i]);
   7973   }
   7974 
   7975   FindStringFlag("ignore", args, &G_flags->ignore);
   7976   FindStringFlag("whitelist", args, &G_flags->whitelist);
   7977   FindBoolFlag("ignore_unknown_pcs", false, args, &G_flags->ignore_unknown_pcs);
   7978 
   7979   FindBoolFlag("thread_coverage", false, args, &G_flags->thread_coverage);
   7980 
   7981   FindBoolFlag("atomicity", false, args, &G_flags->atomicity);
   7982   if (G_flags->atomicity) {
   7983     // When doing atomicity violation checking we should not
   7984     // create h-b arcs between Unlocks and Locks.
   7985     G_flags->pure_happens_before = false;
   7986   }
   7987 
   7988   FindBoolFlag("call_coverage", false, args, &G_flags->call_coverage);
   7989   FindStringFlag("dump_events", args, &G_flags->dump_events);
   7990   FindBoolFlag("symbolize", true, args, &G_flags->symbolize);
   7991 
   7992   FindIntFlag("trace_addr", 0, args,
   7993               reinterpret_cast<intptr_t*>(&G_flags->trace_addr));
   7994 
   7995   FindIntFlag("max_mem_in_mb", 0, args, &G_flags->max_mem_in_mb);
   7996   FindBoolFlag("offline", false, args, &G_flags->offline);
   7997   FindBoolFlag("attach_mode", false, args, &G_flags->attach_mode);
   7998   if (G_flags->max_mem_in_mb == 0) {
   7999     G_flags->max_mem_in_mb = GetMemoryLimitInMb();
   8000   }
   8001 
   8002   vector<string> summary_file_tmp;
   8003   FindStringFlag("summary_file", args, &summary_file_tmp);
   8004   if (summary_file_tmp.size() > 0) {
   8005     G_flags->summary_file = summary_file_tmp.back();
   8006   }
   8007 
   8008   vector<string> log_file_tmp;
   8009   FindStringFlag("log_file", args, &log_file_tmp);
   8010   if (log_file_tmp.size() > 0) {
   8011     G_flags->log_file = log_file_tmp.back();
   8012   }
   8013 
   8014   G_flags->tsan_program_name = "valgrind --tool=tsan";
   8015   FindStringFlag("tsan_program_name", args, &G_flags->tsan_program_name);
   8016 
   8017   G_flags->tsan_url = "http://code.google.com/p/data-race-test";
   8018   FindStringFlag("tsan_url", args, &G_flags->tsan_url);
   8019 
   8020   FindStringFlag("suppressions", args, &G_flags->suppressions);
   8021   FindBoolFlag("gen_suppressions", false, args,
   8022                &G_flags->generate_suppressions);
   8023 
   8024   FindIntFlag("error_exitcode", 0, args, &G_flags->error_exitcode);
   8025   FindIntFlag("flush_period", 0, args, &G_flags->flush_period);
   8026   FindBoolFlag("trace_children", false, args, &G_flags->trace_children);
   8027 
   8028   FindIntFlag("max_sid", kMaxSID, args, &G_flags->max_sid);
   8029   kMaxSID = G_flags->max_sid;
   8030   if (kMaxSID <= 100000) {
   8031     Printf("Error: max-sid should be at least 100000. Exiting\n");
   8032     exit(1);
   8033   }
   8034   FindIntFlag("max_sid_before_flush", (kMaxSID * 15) / 16, args,
   8035               &G_flags->max_sid_before_flush);
   8036   kMaxSIDBeforeFlush = G_flags->max_sid_before_flush;
   8037 
   8038   FindIntFlag("num_callers_in_history", kSizeOfHistoryStackTrace, args,
   8039               &G_flags->num_callers_in_history);
   8040   kSizeOfHistoryStackTrace = G_flags->num_callers_in_history;
   8041 
   8042   // Cut stack under the following default functions.
   8043   G_flags->cut_stack_below.push_back("Thread*ThreadBody*");
   8044   G_flags->cut_stack_below.push_back("ThreadSanitizerStartThread");
   8045   G_flags->cut_stack_below.push_back("start_thread");
   8046   G_flags->cut_stack_below.push_back("BaseThreadInitThunk");
   8047   FindStringFlag("cut_stack_below", args, &G_flags->cut_stack_below);
   8048 
   8049   FindIntFlag("num_callers", 12, args, &G_flags->num_callers);
   8050 
   8051   G_flags->max_n_threads        = 100000;
   8052 
   8053   if (G_flags->full_output) {
   8054     G_flags->announce_threads = true;
   8055     G_flags->show_pc = true;
   8056     G_flags->full_stack_frames = true;
   8057     G_flags->show_states = true;
   8058     G_flags->file_prefix_to_cut.clear();
   8059   }
   8060 
   8061   FindIntFlag("race_verifier_sleep_ms", 100, args,
   8062       &G_flags->race_verifier_sleep_ms);
   8063   FindStringFlag("race_verifier", args, &G_flags->race_verifier);
   8064   FindStringFlag("race_verifier_extra", args, &G_flags->race_verifier_extra);
   8065   g_race_verifier_active =
   8066       !(G_flags->race_verifier.empty() && G_flags->race_verifier_extra.empty());
   8067   if (g_race_verifier_active) {
   8068     Printf("INFO: ThreadSanitizer running in Race Verifier mode.\n");
   8069   }
   8070 
   8071   FindBoolFlag("nacl_untrusted", false, args, &G_flags->nacl_untrusted);
   8072   FindBoolFlag("threaded_analysis", false, args, &G_flags->threaded_analysis);
   8073 
   8074   if (!args->empty()) {
   8075     ReportUnknownFlagAndExit(args->front());
   8076   }
   8077 
   8078   debug_expected_races = PhaseDebugIsOn("expected_races");
   8079   debug_benign_races = PhaseDebugIsOn("benign_races");
   8080   debug_malloc = PhaseDebugIsOn("malloc");
   8081   debug_free = PhaseDebugIsOn("free");
   8082   debug_thread = PhaseDebugIsOn("thread");
   8083   debug_ignore = PhaseDebugIsOn("ignore");
   8084   debug_rtn = PhaseDebugIsOn("rtn");
   8085   debug_lock = PhaseDebugIsOn("lock");
   8086   debug_wrap = PhaseDebugIsOn("wrap");
   8087   debug_ins = PhaseDebugIsOn("ins");
   8088   debug_shadow_stack = PhaseDebugIsOn("shadow_stack");
   8089   debug_happens_before = PhaseDebugIsOn("happens_before");
   8090   debug_cache = PhaseDebugIsOn("cache");
   8091   debug_race_verifier = PhaseDebugIsOn("race_verifier");
   8092 }
   8093 
   8094 // -------- ThreadSanitizer ------------------ {{{1
   8095 
   8096 // Setup the list of functions/images/files to ignore.
   8097 static void SetupIgnore() {
   8098   g_ignore_lists = new IgnoreLists;
   8099   g_white_lists = new IgnoreLists;
   8100 
   8101   // Add some major ignore entries so that tsan remains sane
   8102   // even w/o any ignore file. First - for all platforms.
   8103   g_ignore_lists->ignores.push_back(IgnoreFun("ThreadSanitizerStartThread"));
   8104   g_ignore_lists->ignores.push_back(IgnoreFun("exit"));
   8105   g_ignore_lists->ignores.push_back(IgnoreFun("longjmp"));
   8106 
   8107   // Dangerous: recursively ignoring vfprintf hides races on printf arguments.
   8108   // See PrintfTests in unittest/racecheck_unittest.cc
   8109   // TODO(eugenis): Do something about this.
   8110   // http://code.google.com/p/data-race-test/issues/detail?id=53
   8111   g_ignore_lists->ignores_r.push_back(IgnoreFun("vfprintf"));
   8112 
   8113   // do not create segments in our Replace_* functions
   8114   g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_memcpy"));
   8115   g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_memchr"));
   8116   g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_strcpy"));
   8117   g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_strchr"));
   8118   g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_strrchr"));
   8119   g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_strlen"));
   8120   g_ignore_lists->ignores_hist.push_back(IgnoreFun("Replace_strcmp"));
   8121 
   8122   // Ignore everything in our own file.
   8123   g_ignore_lists->ignores.push_back(IgnoreFile("*ts_valgrind_intercepts.c"));
   8124 
   8125 #ifndef _MSC_VER
   8126   // POSIX ignores
   8127   g_ignore_lists->ignores.push_back(IgnoreObj("*/libpthread*"));
   8128   g_ignore_lists->ignores.push_back(IgnoreObj("*/ld-2*.so"));
   8129   g_ignore_lists->ignores.push_back(IgnoreFun("pthread_create"));
   8130   g_ignore_lists->ignores.push_back(IgnoreFun("pthread_create@*"));
   8131   g_ignore_lists->ignores.push_back(IgnoreFun("pthread_create_WRK"));
   8132   g_ignore_lists->ignores.push_back(IgnoreFun("__cxa_*"));
   8133   g_ignore_lists->ignores.push_back(
   8134       IgnoreFun("*__gnu_cxx*__exchange_and_add*"));
   8135   g_ignore_lists->ignores.push_back(IgnoreFun("__lll_mutex_*"));
   8136   g_ignore_lists->ignores.push_back(IgnoreFun("__lll_*lock_*"));
   8137   g_ignore_lists->ignores.push_back(IgnoreFun("__fprintf_chk"));
   8138   g_ignore_lists->ignores.push_back(IgnoreFun("_IO_file_xsputn*"));
   8139   // fflush internals
   8140   g_ignore_lists->ignores.push_back(IgnoreFun("_IO_adjust_column"));
   8141   g_ignore_lists->ignores.push_back(IgnoreFun("_IO_flush_all_lockp"));
   8142 
   8143   g_ignore_lists->ignores.push_back(IgnoreFun("__sigsetjmp"));
   8144   g_ignore_lists->ignores.push_back(IgnoreFun("__sigjmp_save"));
   8145   g_ignore_lists->ignores.push_back(IgnoreFun("_setjmp"));
   8146   g_ignore_lists->ignores.push_back(IgnoreFun("_longjmp_unwind"));
   8147 
   8148   g_ignore_lists->ignores.push_back(IgnoreFun("__mktime_internal"));
   8149 
   8150   // http://code.google.com/p/data-race-test/issues/detail?id=40
   8151   g_ignore_lists->ignores_r.push_back(IgnoreFun("_ZNSsD1Ev"));
   8152 
   8153   g_ignore_lists->ignores_r.push_back(IgnoreFun("gaih_inet"));
   8154   g_ignore_lists->ignores_r.push_back(IgnoreFun("getaddrinfo"));
   8155   g_ignore_lists->ignores_r.push_back(IgnoreFun("gethostbyname2_r"));
   8156 
   8157   #ifdef VGO_darwin
   8158     // Mac-only ignores
   8159     g_ignore_lists->ignores.push_back(IgnoreObj("/usr/lib/dyld"));
   8160     g_ignore_lists->ignores.push_back(IgnoreObj("/usr/lib/libobjc.A.dylib"));
   8161     g_ignore_lists->ignores.push_back(IgnoreObj("*/libSystem.*.dylib"));
   8162     g_ignore_lists->ignores_r.push_back(IgnoreFun("__CFDoExternRefOperation"));
   8163     g_ignore_lists->ignores_r.push_back(IgnoreFun("_CFAutoreleasePoolPop"));
   8164     g_ignore_lists->ignores_r.push_back(IgnoreFun("_CFAutoreleasePoolPush"));
   8165     g_ignore_lists->ignores_r.push_back(IgnoreFun("OSAtomicAdd32"));
   8166     g_ignore_lists->ignores_r.push_back(IgnoreTriple("_dispatch_Block_copy",
   8167                                             "/usr/lib/libSystem.B.dylib", "*"));
   8168 
   8169     // pthread_lib_{enter,exit} shouldn't give us any reports since they
   8170     // have IGNORE_ALL_ACCESSES_BEGIN/END but they do give the reports...
   8171     g_ignore_lists->ignores_r.push_back(IgnoreFun("pthread_lib_enter"));
   8172     g_ignore_lists->ignores_r.push_back(IgnoreFun("pthread_lib_exit"));
   8173   #endif
   8174 #else
   8175   // Windows-only ignores
   8176   g_ignore_lists->ignores.push_back(IgnoreObj("*ole32.dll"));
   8177   g_ignore_lists->ignores.push_back(IgnoreObj("*OLEAUT32.dll"));
   8178   g_ignore_lists->ignores.push_back(IgnoreObj("*MSCTF.dll"));
   8179   g_ignore_lists->ignores.push_back(IgnoreObj("*ntdll.dll"));
   8180   g_ignore_lists->ignores.push_back(IgnoreObj("*mswsock.dll"));
   8181   g_ignore_lists->ignores.push_back(IgnoreObj("*WS2_32.dll"));
   8182   g_ignore_lists->ignores.push_back(IgnoreObj("*msvcrt.dll"));
   8183   g_ignore_lists->ignores.push_back(IgnoreObj("*kernel32.dll"));
   8184   g_ignore_lists->ignores.push_back(IgnoreObj("*ADVAPI32.DLL"));
   8185 
   8186   g_ignore_lists->ignores.push_back(IgnoreFun("_EH_epilog3"));
   8187   g_ignore_lists->ignores.push_back(IgnoreFun("_EH_prolog3_catch"));
   8188   g_ignore_lists->ignores.push_back(IgnoreFun("unnamedImageEntryPoint"));
   8189   g_ignore_lists->ignores.push_back(IgnoreFun("_Mtxunlock"));
   8190   g_ignore_lists->ignores.push_back(IgnoreFun("IsNLSDefinedString"));
   8191 
   8192   g_ignore_lists->ignores_r.push_back(IgnoreFun("RtlDestroyQueryDebugBuffer"));
   8193   g_ignore_lists->ignores_r.push_back(IgnoreFun("BCryptGenerateSymmetricKey"));
   8194   g_ignore_lists->ignores_r.push_back(IgnoreFun("SHGetItemFromDataObject"));
   8195 
   8196   // http://code.google.com/p/data-race-test/issues/detail?id=53
   8197   g_ignore_lists->ignores_r.push_back(IgnoreFun("_stbuf"));
   8198   g_ignore_lists->ignores_r.push_back(IgnoreFun("_getptd"));
   8199 
   8200   // TODO(timurrrr): Add support for FLS (fiber-local-storage)
   8201   // http://code.google.com/p/data-race-test/issues/detail?id=55
   8202   g_ignore_lists->ignores_r.push_back(IgnoreFun("_freefls"));
   8203 #endif
   8204 
   8205 #ifdef ANDROID
   8206   // Android does not have a libpthread; pthread_* functions live in libc.
   8207   // We have to ignore them one-by-one.
   8208   g_ignore_lists->ignores.push_back(IgnoreFun("pthread_*"));
   8209   g_ignore_lists->ignores.push_back(IgnoreFun("__init_tls"));
   8210 #endif
   8211 
   8212   // Now read the ignore/whitelist files.
   8213   for (size_t i = 0; i < G_flags->ignore.size(); i++) {
   8214     string file_name = G_flags->ignore[i];
   8215     Report("INFO: Reading ignore file: %s\n", file_name.c_str());
   8216     string str = ReadFileToString(file_name, true);
   8217     ReadIgnoresFromString(str, g_ignore_lists);
   8218   }
   8219   for (size_t i = 0; i < G_flags->whitelist.size(); i++) {
   8220     string file_name = G_flags->whitelist[i];
   8221     Report("INFO: Reading whitelist file: %s\n", file_name.c_str());
   8222     string str = ReadFileToString(file_name, true);
   8223     ReadIgnoresFromString(str, g_white_lists);
   8224   }
   8225 }
   8226 
   8227 void ThreadSanitizerNaclUntrustedRegion(uintptr_t mem_start, uintptr_t mem_end) {
   8228   g_nacl_mem_start = mem_start;
   8229   g_nacl_mem_end = mem_end;
   8230 }
   8231 
   8232 bool AddrIsInNaclUntrustedRegion(uintptr_t addr) {
   8233   return addr >= g_nacl_mem_start && addr < g_nacl_mem_end;
   8234 }
   8235 
   8236 bool ThreadSanitizerIgnoreForNacl(uintptr_t addr) {
   8237   // Ignore trusted addresses if tracing untrusted code, and ignore untrusted
   8238   // addresses otherwise.
   8239   return G_flags->nacl_untrusted != AddrIsInNaclUntrustedRegion(addr);
   8240 }
   8241 
   8242 bool ThreadSanitizerWantToInstrumentSblock(uintptr_t pc) {
   8243   string img_name, rtn_name, file_name;
   8244   int line_no;
   8245   G_stats->pc_to_strings++;
   8246   PcToStrings(pc, false, &img_name, &rtn_name, &file_name, &line_no);
   8247 
   8248   if (g_white_lists->ignores.size() > 0) {
   8249     bool in_white_list = TripleVectorMatchKnown(g_white_lists->ignores,
   8250                                                 rtn_name, img_name, file_name);
   8251     if (in_white_list) {
   8252       if (debug_ignore) {
   8253         Report("INFO: Whitelisted rtn: %s\n", rtn_name.c_str());
   8254       }
   8255     } else {
   8256       return false;
   8257     }
   8258   }
   8259 
   8260   if (G_flags->ignore_unknown_pcs && rtn_name == "(no symbols)") {
   8261     if (debug_ignore) {
   8262       Report("INFO: not instrumenting unknown function at %p\n", pc);
   8263     }
   8264     return false;
   8265   }
   8266 
   8267   bool ignore = TripleVectorMatchKnown(g_ignore_lists->ignores,
   8268                                        rtn_name, img_name, file_name) ||
   8269                 TripleVectorMatchKnown(g_ignore_lists->ignores_r,
   8270                                        rtn_name, img_name, file_name);
   8271   if (debug_ignore) {
   8272     Printf("%s: pc=%p file_name=%s img_name=%s rtn_name=%s ret=%d\n",
   8273            __FUNCTION__, pc, file_name.c_str(), img_name.c_str(),
   8274            rtn_name.c_str(), !ignore);
   8275   }
   8276   bool nacl_ignore = ThreadSanitizerIgnoreForNacl(pc);
   8277   return !(ignore || nacl_ignore);
   8278 }
   8279 
   8280 bool ThreadSanitizerWantToCreateSegmentsOnSblockEntry(uintptr_t pc) {
   8281   string rtn_name;
   8282   rtn_name = PcToRtnName(pc, false);
   8283   if (G_flags->keep_history == 0)
   8284     return false;
   8285   return !(TripleVectorMatchKnown(g_ignore_lists->ignores_hist,
   8286                                   rtn_name, "", ""));
   8287 }
   8288 
   8289 // Returns true if function at "pc" is marked as "fun_r" in the ignore file.
   8290 bool NOINLINE ThreadSanitizerIgnoreAccessesBelowFunction(uintptr_t pc) {
   8291   ScopedMallocCostCenter cc(__FUNCTION__);
   8292   typedef unordered_map<uintptr_t, bool> Cache;
   8293   static Cache *cache = NULL;
   8294   {
   8295     TIL ignore_below_lock(ts_ignore_below_lock, 18);
   8296     if (!cache)
   8297       cache = new Cache;
   8298 
   8299     // Fast path - check if we already know the answer.
   8300     Cache::iterator i = cache->find(pc);
   8301     if (i != cache->end())
   8302       return i->second;
   8303   }
   8304 
   8305   string rtn_name = PcToRtnName(pc, false);
   8306   bool ret =
   8307       TripleVectorMatchKnown(g_ignore_lists->ignores_r, rtn_name, "", "");
   8308 
   8309   if (DEBUG_MODE) {
   8310     // Heavy test for NormalizeFunctionName: test on all possible inputs in
   8311     // debug mode. TODO(timurrrr): Remove when tested.
   8312     NormalizeFunctionName(PcToRtnName(pc, true));
   8313   }
   8314 
   8315   // Grab the lock again
   8316   TIL ignore_below_lock(ts_ignore_below_lock, 19);
   8317   if (ret && debug_ignore) {
   8318     Report("INFO: ignoring all accesses below the function '%s' (%p)\n",
   8319            PcToRtnNameAndFilePos(pc).c_str(), pc);
   8320   }
   8321   return ((*cache)[pc] = ret);
   8322 }
   8323 
   8324 // We intercept a user function with this name
   8325 // and answer the user query with a non-NULL string.
   8326 extern "C" const char *ThreadSanitizerQuery(const char *query) {
   8327   const char *ret = "0";
   8328   string str(query);
   8329   if (str == "pure_happens_before" && G_flags->pure_happens_before == true) {
   8330     ret = "1";
   8331   }
   8332   if (str == "hybrid_full" &&
   8333       G_flags->pure_happens_before == false) {
   8334     ret = "1";
   8335   }
   8336   if (str == "race_verifier" && g_race_verifier_active == true) {
   8337     ret = "1";
   8338   }
   8339   if (DEBUG_MODE && G_flags->debug_level >= 2) {
   8340     Printf("ThreadSanitizerQuery(\"%s\") = \"%s\"\n", query, ret);
   8341   }
   8342   if (str == "trace-level=0") {
   8343     Report("INFO: trace-level=0\n");
   8344     G_flags->trace_level = 0;
   8345     debug_happens_before = false;
   8346   }
   8347   if (str == "trace-level=1") {
   8348     Report("INFO: trace-level=1\n");
   8349     G_flags->trace_level = 1;
   8350     debug_happens_before = true;
   8351   }
   8352   return ret;
   8353 }
   8354 
   8355 extern void ThreadSanitizerInit() {
   8356   ScopedMallocCostCenter cc("ThreadSanitizerInit");
   8357   ts_lock = new TSLock;
   8358   ts_ignore_below_lock = new TSLock;
   8359   g_so_far_only_one_thread = true;
   8360   ANNOTATE_BENIGN_RACE(&g_so_far_only_one_thread, "real benign race");
   8361   CHECK_EQ(sizeof(ShadowValue), 8);
   8362   CHECK(G_flags);
   8363   G_stats        = new Stats;
   8364   SetupIgnore();
   8365 
   8366   G_detector     = new Detector;
   8367   G_cache        = new Cache;
   8368   G_expected_races_map = new ExpectedRacesMap;
   8369   G_heap_map           = new HeapMap<HeapInfo>;
   8370   G_thread_stack_map   = new HeapMap<ThreadStackInfo>;
   8371   {
   8372     ScopedMallocCostCenter cc1("Segment::InitClassMembers");
   8373     Segment::InitClassMembers();
   8374   }
   8375   SegmentSet::InitClassMembers();
   8376   CacheLine::InitClassMembers();
   8377   Thread::InitClassMembers();
   8378   Lock::InitClassMembers();
   8379   LockSet::InitClassMembers();
   8380   EventSampler::InitClassMembers();
   8381   VTS::InitClassMembers();
   8382   // TODO(timurrrr): make sure *::InitClassMembers() are called only once for
   8383   // each class
   8384   g_publish_info_map = new PublishInfoMap;
   8385   g_stack_trace_free_list = new StackTraceFreeList;
   8386   g_pcq_map = new PCQMap;
   8387 
   8388 
   8389   if (G_flags->html) {
   8390     c_bold    = "<font ><b>";
   8391     c_red     = "<font color=red><b>";
   8392     c_green   = "<font color=green><b>";
   8393     c_magenta = "<font color=magenta><b>";
   8394     c_cyan    = "<font color=cyan><b>";
   8395     c_blue   = "<font color=blue><b>";
   8396     c_yellow  = "<font color=yellow><b>";
   8397     c_default = "</b></font>";
   8398   } else if (G_flags->color) {
   8399     // Enable ANSI colors.
   8400     c_bold    = "\033[1m";
   8401     c_red     = "\033[31m";
   8402     c_green   = "\033[32m";
   8403     c_yellow  = "\033[33m";
   8404     c_blue    = "\033[34m";
   8405     c_magenta = "\033[35m";
   8406     c_cyan    = "\033[36m";
   8407     c_default = "\033[0m";
   8408   }
   8409 
   8410   if (G_flags->verbosity >= 1) {
   8411     Report("INFO: Started pid %d\n",  getpid());
   8412   }
   8413   if (G_flags->start_with_global_ignore_on) {
   8414     global_ignore = true;
   8415     Report("INFO: STARTING WITH GLOBAL IGNORE ON\n");
   8416   }
   8417   ANNOTATE_BENIGN_RACE(&g_lock_era,
   8418                        "g_lock_era may be incremented in a racey way");
   8419 }
   8420 
   8421 extern void ThreadSanitizerFini() {
   8422   G_detector->HandleProgramEnd();
   8423 }
   8424 
   8425 extern void ThreadSanitizerDumpAllStacks() {
   8426   // first, print running threads.
   8427   for (int i = 0; i < Thread::NumberOfThreads(); i++) {
   8428     Thread *t = Thread::Get(TID(i));
   8429     if (!t || !t->is_running()) continue;
   8430     Report("T%d\n", i);
   8431     t->ReportStackTrace();
   8432   }
   8433   // now print all dead threds.
   8434   for (int i = 0; i < Thread::NumberOfThreads(); i++) {
   8435     Thread *t = Thread::Get(TID(i));
   8436     if (!t || t->is_running()) continue;
   8437     Report("T%d (not running)\n", i);
   8438     t->ReportStackTrace();
   8439   }
   8440 }
   8441 
   8442 
   8443 extern void ThreadSanitizerHandleOneEvent(Event *e) {
   8444   // Lock is inside on some paths.
   8445   G_detector->HandleOneEvent(e);
   8446 }
   8447 
   8448 Thread *ThreadSanitizerGetThreadByTid(int32_t tid) {
   8449   return Thread::Get(TID(tid));
   8450 }
   8451 
   8452 extern NOINLINE void ThreadSanitizerHandleTrace(int32_t tid, TraceInfo *trace_info,
   8453                                        uintptr_t *tleb) {
   8454   ThreadSanitizerHandleTrace(Thread::Get(TID(tid)), trace_info, tleb);
   8455 }
   8456 extern NOINLINE void ThreadSanitizerHandleTrace(Thread *thr, TraceInfo *trace_info,
   8457                                                 uintptr_t *tleb) {
   8458   DCHECK(thr);
   8459   // The lock is taken inside on the slow path.
   8460   G_detector->HandleTrace(thr,
   8461                           trace_info->mops(),
   8462                           trace_info->n_mops(),
   8463                           trace_info->pc(),
   8464                           tleb, /*need_locking=*/true);
   8465 }
   8466 
   8467 extern NOINLINE void ThreadSanitizerHandleOneMemoryAccess(Thread *thr,
   8468                                                           MopInfo mop,
   8469                                                           uintptr_t addr) {
   8470   DCHECK(thr);
   8471   G_detector->HandleTrace(thr,
   8472                           &mop,
   8473                           1,
   8474                           mop.create_sblock() ? mop.pc() : 0,
   8475                           &addr, /*need_locking=*/true);
   8476 }
   8477 
   8478 void NOINLINE ThreadSanitizerHandleRtnCall(int32_t tid, uintptr_t call_pc,
   8479                                          uintptr_t target_pc,
   8480                                          IGNORE_BELOW_RTN ignore_below) {
   8481   // This does locking on a cold path. Hot path in thread-local.
   8482   G_detector->HandleRtnCall(TID(tid), call_pc, target_pc, ignore_below);
   8483 
   8484   if (G_flags->sample_events) {
   8485     static EventSampler sampler;
   8486     Thread *thr = Thread::Get(TID(tid));
   8487     sampler.Sample(thr, "RTN_CALL", true);
   8488   }
   8489 }
   8490 void NOINLINE ThreadSanitizerHandleRtnExit(int32_t tid) {
   8491   // This is a thread-local operation, no need for locking.
   8492   Thread::Get(TID(tid))->HandleRtnExit();
   8493 }
   8494 
   8495 static bool ThreadSanitizerPrintReport(ThreadSanitizerReport *report) {
   8496   return G_detector->reports_.PrintReport(report);
   8497 }
   8498 
   8499 // -------- TODO -------------------------- {{{1
   8500 // - Support configurable aliases for function names (is it doable in valgrind)?
   8501 // - Correctly support atomic operations (not just ignore).
   8502 // - Handle INC as just one write
   8503 //   - same for memset, etc
   8504 // - Implement correct handling of memory accesses with different sizes.
   8505 // - Do not create HB arcs between RdUnlock and RdLock
   8506 // - Compress cache lines
   8507 // - Optimize the case where a threads signals twice in a row on the same
   8508 //   address.
   8509 // - Fix --ignore-in-dtor if --demangle=no.
   8510 // - Use cpplint (http://code.google.com/p/google-styleguide)
   8511 // - Get rid of annoying casts in printfs.
   8512 // - Compress stack traces (64-bit only. may save up to 36 bytes per segment).
   8513 // end. {{{1
   8514 // vim:shiftwidth=2:softtabstop=2:expandtab:tw=80
   8515