Home | History | Annotate | Download | only in helgrind
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- LibHB: a library for implementing and checking               ---*/
      4 /*--- the happens-before relationship in concurrent programs.      ---*/
      5 /*---                                                 libhb_main.c ---*/
      6 /*--------------------------------------------------------------------*/
      7 
      8 /*
      9    This file is part of LibHB, a library for implementing and checking
     10    the happens-before relationship in concurrent programs.
     11 
     12    Copyright (C) 2008-2013 OpenWorks Ltd
     13       info (at) open-works.co.uk
     14 
     15    This program is free software; you can redistribute it and/or
     16    modify it under the terms of the GNU General Public License as
     17    published by the Free Software Foundation; either version 2 of the
     18    License, or (at your option) any later version.
     19 
     20    This program is distributed in the hope that it will be useful, but
     21    WITHOUT ANY WARRANTY; without even the implied warranty of
     22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     23    General Public License for more details.
     24 
     25    You should have received a copy of the GNU General Public License
     26    along with this program; if not, write to the Free Software
     27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     28    02111-1307, USA.
     29 
     30    The GNU General Public License is contained in the file COPYING.
     31 */
     32 
     33 #include "pub_tool_basics.h"
     34 #include "pub_tool_poolalloc.h"
     35 #include "pub_tool_libcassert.h"
     36 #include "pub_tool_libcbase.h"
     37 #include "pub_tool_libcprint.h"
     38 #include "pub_tool_mallocfree.h"
     39 #include "pub_tool_wordfm.h"
     40 #include "pub_tool_sparsewa.h"
     41 #include "pub_tool_xarray.h"
     42 #include "pub_tool_oset.h"
     43 #include "pub_tool_threadstate.h"
     44 #include "pub_tool_aspacemgr.h"
     45 #include "pub_tool_execontext.h"
     46 #include "pub_tool_errormgr.h"
     47 #include "pub_tool_options.h"        // VG_(clo_stats)
     48 #include "hg_basics.h"
     49 #include "hg_wordset.h"
     50 #include "hg_lock_n_thread.h"
     51 #include "hg_errors.h"
     52 
     53 #include "libhb.h"
     54 
     55 
     56 /////////////////////////////////////////////////////////////////
     57 /////////////////////////////////////////////////////////////////
     58 //                                                             //
     59 // Debugging #defines                                          //
     60 //                                                             //
     61 /////////////////////////////////////////////////////////////////
     62 /////////////////////////////////////////////////////////////////
     63 
     64 /* Check the sanity of shadow values in the core memory state
     65    machine.  Change #if 0 to #if 1 to enable this. */
     66 #if 0
     67 #  define CHECK_MSM 1
     68 #else
     69 #  define CHECK_MSM 0
     70 #endif
     71 
     72 
     73 /* Check sanity (reference counts, etc) in the conflicting access
     74    machinery.  Change #if 0 to #if 1 to enable this. */
     75 #if 0
     76 #  define CHECK_CEM 1
     77 #else
     78 #  define CHECK_CEM 0
     79 #endif
     80 
     81 
     82 /* Check sanity in the compressed shadow memory machinery,
     83    particularly in its caching innards.  Unfortunately there's no
     84    almost-zero-cost way to make them selectable at run time.  Hence
     85    set the #if 0 to #if 1 and rebuild if you want them. */
     86 #if 0
     87 #  define CHECK_ZSM 1  /* do sanity-check CacheLine stuff */
     88 #  define inline __attribute__((noinline))
     89    /* probably want to ditch -fomit-frame-pointer too */
     90 #else
     91 #  define CHECK_ZSM 0   /* don't sanity-check CacheLine stuff */
     92 #endif
     93 
     94 
     95 /////////////////////////////////////////////////////////////////
     96 /////////////////////////////////////////////////////////////////
     97 //                                                             //
     98 // data decls: VtsID                                           //
     99 //                                                             //
    100 /////////////////////////////////////////////////////////////////
    101 /////////////////////////////////////////////////////////////////
    102 
    103 /* VtsIDs: Unique small-integer IDs for VTSs.  VtsIDs can't exceed 30
    104    bits, since they have to be packed into the lowest 30 bits of an
    105    SVal. */
    106 typedef  UInt  VtsID;
    107 #define VtsID_INVALID 0xFFFFFFFF
    108 
    109 
    110 
    111 /////////////////////////////////////////////////////////////////
    112 /////////////////////////////////////////////////////////////////
    113 //                                                             //
    114 // data decls: SVal                                            //
    115 //                                                             //
    116 /////////////////////////////////////////////////////////////////
    117 /////////////////////////////////////////////////////////////////
    118 
    119 typedef  ULong  SVal;
    120 
    121 /* This value has special significance to the implementation, and callers
    122    may not store it in the shadow memory. */
    123 #define SVal_INVALID (3ULL << 62)
    124 
    125 /* This is the default value for shadow memory.  Initially the shadow
    126    memory contains no accessible areas and so all reads produce this
    127    value.  TODO: make this caller-defineable. */
    128 #define SVal_NOACCESS (2ULL << 62)
    129 
    130 
    131 
    132 /////////////////////////////////////////////////////////////////
    133 /////////////////////////////////////////////////////////////////
    134 //                                                             //
    135 // data decls: ScalarTS                                        //
    136 //                                                             //
    137 /////////////////////////////////////////////////////////////////
    138 /////////////////////////////////////////////////////////////////
    139 
    140 /* Scalar Timestamp.  We have to store a lot of these, so there is
    141    some effort to make them as small as possible.  Logically they are
    142    a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
    143    We pack it into 64 bits by representing the Thr* using a ThrID, a
    144    small integer (18 bits), and a 46 bit integer for the timestamp
    145    number.  The 46/18 split is arbitary, but has the effect that
    146    Helgrind can only handle programs that create 2^18 or fewer threads
    147    over their entire lifetime, and have no more than 2^46 timestamp
    148    ticks (synchronisation operations on the same thread).
    149 
    150    This doesn't seem like much of a limitation.  2^46 ticks is
    151    7.06e+13, and if each tick (optimistically) takes the machine 1000
    152    cycles to process, then the minimum time to process that many ticks
    153    at a clock rate of 5 GHz is 162.9 days.  And that's doing nothing
    154    but VTS ticks, which isn't realistic.
    155 
    156    NB1: SCALARTS_N_THRBITS must be 29 or lower.  The obvious limit is
    157    32 since a ThrID is a UInt.  29 comes from the fact that
    158    'Thr_n_RCEC', which records information about old accesses, packs
    159    not only a ThrID but also 2+1 other bits (access size and
    160    writeness) in a UInt, hence limiting size to 32-(2+1) == 29.
    161 
    162    NB2: thrid values are issued upwards from 1024, and values less
    163    than that aren't valid.  This isn't per se necessary (any order
    164    will do, so long as they are unique), but it does help ensure they
    165    are less likely to get confused with the various other kinds of
    166    small-integer thread ids drifting around (eg, TId).  See also NB5.
    167 
    168    NB3: this probably also relies on the fact that Thr's are never
    169    deallocated -- they exist forever.  Hence the 1-1 mapping from
    170    Thr's to thrid values (set up in Thr__new) persists forever.
    171 
    172    NB4: temp_max_sized_VTS is allocated at startup and never freed.
    173    It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
    174    ScalarTSs.  So we can't make SCALARTS_N_THRBITS too large without
    175    making the memory use for this go sky-high.  With
    176    SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
    177    like an OK tradeoff.  If more than 256k threads need to be
    178    supported, we could change SCALARTS_N_THRBITS to 20, which would
    179    facilitate supporting 1 million threads at the cost of 8MB storage
    180    for temp_max_sized_VTS.
    181 
    182    NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
    183    ThrID == 0 to denote an empty Thr_n_RCEC record.  So ThrID == 0
    184    must never be a valid ThrID.  Given NB2 that's OK.
    185 */
    186 #define SCALARTS_N_THRBITS 18  /* valid range: 11 to 29 inclusive */
    187 
    188 #define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
    189 typedef
    190    struct {
    191       ThrID thrid : SCALARTS_N_THRBITS;
    192       ULong tym   : SCALARTS_N_TYMBITS;
    193    }
    194    ScalarTS;
    195 
    196 #define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
    197 
    198 
    199 
    200 /////////////////////////////////////////////////////////////////
    201 /////////////////////////////////////////////////////////////////
    202 //                                                             //
    203 // data decls: Filter                                          //
    204 //                                                             //
    205 /////////////////////////////////////////////////////////////////
    206 /////////////////////////////////////////////////////////////////
    207 
    208 // baseline: 5, 9
    209 #define FI_LINE_SZB_LOG2  5
    210 #define FI_NUM_LINES_LOG2 10
    211 
    212 #define FI_LINE_SZB       (1 << FI_LINE_SZB_LOG2)
    213 #define FI_NUM_LINES      (1 << FI_NUM_LINES_LOG2)
    214 
    215 #define FI_TAG_MASK        (~(Addr)(FI_LINE_SZB - 1))
    216 #define FI_GET_TAG(_a)     ((_a) & FI_TAG_MASK)
    217 
    218 #define FI_GET_LINENO(_a)  ( ((_a) >> FI_LINE_SZB_LOG2) \
    219                              & (Addr)(FI_NUM_LINES-1) )
    220 
    221 
    222 /* In the lines, each 8 bytes are treated individually, and are mapped
    223    to a UShort.  Regardless of endianness of the underlying machine,
    224    bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
    225    the highest address.
    226 
    227    Of each bit pair, the higher numbered bit is set if a R has been
    228    seen, so the actual layout is:
    229 
    230    15 14             ...  01 00
    231 
    232    R  W  for addr+7  ...  R  W  for addr+0
    233 
    234    So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
    235 */
    236 
    237 /* tags are separated from lines.  tags are Addrs and are
    238    the base address of the line. */
    239 typedef
    240    struct {
    241       UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
    242    }
    243    FiLine;
    244 
    245 typedef
    246    struct {
    247       Addr   tags[FI_NUM_LINES];
    248       FiLine lines[FI_NUM_LINES];
    249    }
    250    Filter;
    251 
    252 
    253 
    254 /////////////////////////////////////////////////////////////////
    255 /////////////////////////////////////////////////////////////////
    256 //                                                             //
    257 // data decls: Thr, ULong_n_EC                                 //
    258 //                                                             //
    259 /////////////////////////////////////////////////////////////////
    260 /////////////////////////////////////////////////////////////////
    261 
    262 // Records stacks for H1 history mechanism (DRD-style)
    263 typedef
    264    struct { ULong ull; ExeContext* ec; }
    265    ULong_n_EC;
    266 
    267 
    268 /* How many of the above records to collect for each thread?  Older
    269    ones are dumped when we run out of space.  62.5k requires 1MB per
    270    thread, since each ULong_n_EC record is 16 bytes long.  When more
    271    than N_KWs_N_STACKs_PER_THREAD are present, the older half are
    272    deleted to make space.  Hence in the worst case we will be able to
    273    produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
    274    Kw transitions (segments in this thread).  For the current setting
    275    that gives a guaranteed stack for at least the last 31.25k
    276    segments. */
    277 #define N_KWs_N_STACKs_PER_THREAD 62500
    278 
    279 
    280 struct _Thr {
    281    /* Current VTSs for this thread.  They change as we go along.  viR
    282       is the VTS to be used for reads, viW for writes.  Usually they
    283       are the same, but can differ when we deal with reader-writer
    284       locks.  It is always the case that
    285          VtsID__cmpLEQ(viW,viR) == True
    286       that is, viW must be the same, or lagging behind, viR. */
    287    VtsID viR;
    288    VtsID viW;
    289 
    290    /* Is initially False, and is set to True after the thread really
    291       has done a low-level exit.  When True, we expect to never see
    292       any more memory references done by this thread. */
    293    Bool llexit_done;
    294 
    295    /* Is initially False, and is set to True after the thread has been
    296       joined with (reaped by some other thread).  After this point, we
    297       do not expect to see any uses of .viR or .viW, so it is safe to
    298       set them to VtsID_INVALID. */
    299    Bool joinedwith_done;
    300 
    301    /* A small integer giving a unique identity to this Thr.  See
    302       comments on the definition of ScalarTS for details. */
    303    ThrID thrid : SCALARTS_N_THRBITS;
    304 
    305    /* A filter that removes references for which we believe that
    306       msmcread/msmcwrite will not change the state, nor report a
    307       race. */
    308    Filter* filter;
    309 
    310    /* A pointer back to the top level Thread structure.  There is a
    311       1-1 mapping between Thread and Thr structures -- each Thr points
    312       at its corresponding Thread, and vice versa.  Really, Thr and
    313       Thread should be merged into a single structure. */
    314    Thread* hgthread;
    315 
    316    /* The ULongs (scalar Kws) in this accumulate in strictly
    317       increasing order, without duplicates.  This is important because
    318       we need to be able to find a given scalar Kw in this array
    319       later, by binary search. */
    320    XArray* /* ULong_n_EC */ local_Kws_n_stacks;
    321 };
    322 
    323 
    324 
    325 /////////////////////////////////////////////////////////////////
    326 /////////////////////////////////////////////////////////////////
    327 //                                                             //
    328 // data decls: SO                                              //
    329 //                                                             //
    330 /////////////////////////////////////////////////////////////////
    331 /////////////////////////////////////////////////////////////////
    332 
    333 // (UInt) `echo "Synchronisation object" | md5sum`
    334 #define SO_MAGIC 0x56b3c5b0U
    335 
    336 struct _SO {
    337    struct _SO* admin_prev;
    338    struct _SO* admin_next;
    339    VtsID viR; /* r-clock of sender */
    340    VtsID viW; /* w-clock of sender */
    341    UInt  magic;
    342 };
    343 
    344 
    345 
    346 /////////////////////////////////////////////////////////////////
    347 /////////////////////////////////////////////////////////////////
    348 //                                                             //
    349 // Forward declarations                                        //
    350 //                                                             //
    351 /////////////////////////////////////////////////////////////////
    352 /////////////////////////////////////////////////////////////////
    353 
    354 /* fwds for
    355    Globals needed by other parts of the library.  These are set
    356    once at startup and then never changed. */
    357 static void        (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
    358 static ExeContext* (*main_get_EC)( Thr* ) = NULL;
    359 
    360 /* misc fn and data fwdses */
    361 static void VtsID__rcinc ( VtsID ii );
    362 static void VtsID__rcdec ( VtsID ii );
    363 
    364 static inline Bool SVal__isC ( SVal s );
    365 static inline VtsID SVal__unC_Rmin ( SVal s );
    366 static inline VtsID SVal__unC_Wmin ( SVal s );
    367 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
    368 
    369 /* A double linked list of all the SO's. */
    370 SO* admin_SO;
    371 
    372 
    373 
    374 /////////////////////////////////////////////////////////////////
    375 /////////////////////////////////////////////////////////////////
    376 //                                                             //
    377 // SECTION BEGIN compressed shadow memory                      //
    378 //                                                             //
    379 /////////////////////////////////////////////////////////////////
    380 /////////////////////////////////////////////////////////////////
    381 
    382 #ifndef __HB_ZSM_H
    383 #define __HB_ZSM_H
    384 
    385 /* Initialise the library.  Once initialised, it will (or may) call
    386    rcinc and rcdec in response to all the calls below, in order to
    387    allow the user to do reference counting on the SVals stored herein.
    388    It is important to understand, however, that due to internal
    389    caching, the reference counts are in general inaccurate, and can be
    390    both above or below the true reference count for an item.  In
    391    particular, the library may indicate that the reference count for
    392    an item is zero, when in fact it is not.
    393 
    394    To make the reference counting exact and therefore non-pointless,
    395    call zsm_flush_cache.  Immediately after it returns, the reference
    396    counts for all items, as deduced by the caller by observing calls
    397    to rcinc and rcdec, will be correct, and so any items with a zero
    398    reference count may be freed (or at least considered to be
    399    unreferenced by this library).
    400 */
    401 static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
    402 
    403 static void zsm_sset_range  ( Addr, SizeT, SVal );
    404 static void zsm_scopy_range ( Addr, Addr, SizeT );
    405 static void zsm_flush_cache ( void );
    406 
    407 #endif /* ! __HB_ZSM_H */
    408 
    409 
    410 /* Round a up to the next multiple of N.  N must be a power of 2 */
    411 #define ROUNDUP(a, N)   ((a + N - 1) & ~(N-1))
    412 /* Round a down to the next multiple of N.  N must be a power of 2 */
    413 #define ROUNDDN(a, N)   ((a) & ~(N-1))
    414 
    415 
    416 
    417 /* ------ User-supplied RC functions ------ */
    418 static void(*rcinc)(SVal) = NULL;
    419 static void(*rcdec)(SVal) = NULL;
    420 
    421 
    422 /* ------ CacheLine ------ */
    423 
    424 #define N_LINE_BITS      6 /* must be >= 3 */
    425 #define N_LINE_ARANGE    (1 << N_LINE_BITS)
    426 #define N_LINE_TREES     (N_LINE_ARANGE >> 3)
    427 
    428 typedef
    429    struct {
    430       UShort descrs[N_LINE_TREES];
    431       SVal   svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
    432    }
    433    CacheLine;
    434 
    435 #define TREE_DESCR_16_0 (1<<0)
    436 #define TREE_DESCR_32_0 (1<<1)
    437 #define TREE_DESCR_16_1 (1<<2)
    438 #define TREE_DESCR_64   (1<<3)
    439 #define TREE_DESCR_16_2 (1<<4)
    440 #define TREE_DESCR_32_1 (1<<5)
    441 #define TREE_DESCR_16_3 (1<<6)
    442 #define TREE_DESCR_8_0  (1<<7)
    443 #define TREE_DESCR_8_1  (1<<8)
    444 #define TREE_DESCR_8_2  (1<<9)
    445 #define TREE_DESCR_8_3  (1<<10)
    446 #define TREE_DESCR_8_4  (1<<11)
    447 #define TREE_DESCR_8_5  (1<<12)
    448 #define TREE_DESCR_8_6  (1<<13)
    449 #define TREE_DESCR_8_7  (1<<14)
    450 #define TREE_DESCR_DTY  (1<<15)
    451 
    452 typedef
    453    struct {
    454       SVal  dict[4]; /* can represent up to 4 diff values in the line */
    455       UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
    456                                       dict indexes */
    457       /* if dict[0] == SVal_INVALID then dict[1] is the index of the
    458          LineF to use, and dict[2..] are also SVal_INVALID. */
    459    }
    460    LineZ; /* compressed rep for a cache line */
    461 
    462 typedef
    463    struct {
    464       Bool inUse;
    465       SVal w64s[N_LINE_ARANGE];
    466    }
    467    LineF; /* full rep for a cache line */
    468 
    469 /* Shadow memory.
    470    Primary map is a WordFM Addr SecMap*.
    471    SecMaps cover some page-size-ish section of address space and hold
    472      a compressed representation.
    473    CacheLine-sized chunks of SecMaps are copied into a Cache, being
    474    decompressed when moved into the cache and recompressed on the
    475    way out.  Because of this, the cache must operate as a writeback
    476    cache, not a writethrough one.
    477 
    478    Each SecMap must hold a power-of-2 number of CacheLines.  Hence
    479    N_SECMAP_BITS must >= N_LINE_BITS.
    480 */
    481 #define N_SECMAP_BITS   13
    482 #define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
    483 
    484 // # CacheLines held by a SecMap
    485 #define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
    486 
    487 /* The data in the SecMap is held in the array of LineZs.  Each LineZ
    488    either carries the required data directly, in a compressed
    489    representation, or it holds (in .dict[0]) an index to the LineF in
    490    .linesF that holds the full representation.
    491 
    492    Currently-unused LineF's have their .inUse bit set to zero.
    493    Since each in-use LineF is referred to be exactly one LineZ,
    494    the number of .linesZ[] that refer to .linesF should equal
    495    the number of .linesF[] that have .inUse == True.
    496 
    497    RC obligations: the RCs presented to the user include exactly
    498    the values in:
    499    * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
    500    * F reps that are in use (.inUse == True)
    501 
    502    Hence the following actions at the following transitions are required:
    503 
    504    F rep: .inUse==True  -> .inUse==False        -- rcdec_LineF
    505    F rep: .inUse==False -> .inUse==True         -- rcinc_LineF
    506    Z rep: .dict[0] from other to SVal_INVALID   -- rcdec_LineZ
    507    Z rep: .dict[0] from SVal_INVALID to other   -- rcinc_LineZ
    508 */
    509 typedef
    510    struct {
    511       UInt   magic;
    512       LineZ  linesZ[N_SECMAP_ZLINES];
    513       LineF* linesF;
    514       UInt   linesF_size;
    515    }
    516    SecMap;
    517 
    518 #define SecMap_MAGIC   0x571e58cbU
    519 
    520 static inline Bool is_sane_SecMap ( SecMap* sm ) {
    521    return sm != NULL && sm->magic == SecMap_MAGIC;
    522 }
    523 
    524 /* ------ Cache ------ */
    525 
    526 #define N_WAY_BITS 16
    527 #define N_WAY_NENT (1 << N_WAY_BITS)
    528 
    529 /* Each tag is the address of the associated CacheLine, rounded down
    530    to a CacheLine address boundary.  A CacheLine size must be a power
    531    of 2 and must be 8 or more.  Hence an easy way to initialise the
    532    cache so it is empty is to set all the tag values to any value % 8
    533    != 0, eg 1.  This means all queries in the cache initially miss.
    534    It does however require us to detect and not writeback, any line
    535    with a bogus tag. */
    536 typedef
    537    struct {
    538       CacheLine lyns0[N_WAY_NENT];
    539       Addr      tags0[N_WAY_NENT];
    540    }
    541    Cache;
    542 
    543 static inline Bool is_valid_scache_tag ( Addr tag ) {
    544    /* a valid tag should be naturally aligned to the start of
    545       a CacheLine. */
    546    return 0 == (tag & (N_LINE_ARANGE - 1));
    547 }
    548 
    549 
    550 /* --------- Primary data structures --------- */
    551 
    552 /* Shadow memory primary map */
    553 static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
    554 static Cache   cache_shmem;
    555 
    556 
    557 static UWord stats__secmaps_search       = 0; // # SM finds
    558 static UWord stats__secmaps_search_slow  = 0; // # SM lookupFMs
    559 static UWord stats__secmaps_allocd       = 0; // # SecMaps issued
    560 static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
    561 static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
    562 static UWord stats__secmap_linesZ_bytes  = 0; // .. using this much storage
    563 static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
    564 static UWord stats__secmap_linesF_bytes  = 0; //  .. using this much storage
    565 static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
    566 static UWord stats__cache_Z_fetches      = 0; // # Z lines fetched
    567 static UWord stats__cache_Z_wbacks       = 0; // # Z lines written back
    568 static UWord stats__cache_F_fetches      = 0; // # F lines fetched
    569 static UWord stats__cache_F_wbacks       = 0; // # F lines written back
    570 static UWord stats__cache_invals         = 0; // # cache invals
    571 static UWord stats__cache_flushes        = 0; // # cache flushes
    572 static UWord stats__cache_totrefs        = 0; // # total accesses
    573 static UWord stats__cache_totmisses      = 0; // # misses
    574 static ULong stats__cache_make_New_arange = 0; // total arange made New
    575 static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
    576 static UWord stats__cline_normalises     = 0; // # calls to cacheline_normalise
    577 static UWord stats__cline_cread64s       = 0; // # calls to s_m_read64
    578 static UWord stats__cline_cread32s       = 0; // # calls to s_m_read32
    579 static UWord stats__cline_cread16s       = 0; // # calls to s_m_read16
    580 static UWord stats__cline_cread08s       = 0; // # calls to s_m_read8
    581 static UWord stats__cline_cwrite64s      = 0; // # calls to s_m_write64
    582 static UWord stats__cline_cwrite32s      = 0; // # calls to s_m_write32
    583 static UWord stats__cline_cwrite16s      = 0; // # calls to s_m_write16
    584 static UWord stats__cline_cwrite08s      = 0; // # calls to s_m_write8
    585 static UWord stats__cline_sread08s       = 0; // # calls to s_m_set8
    586 static UWord stats__cline_swrite08s      = 0; // # calls to s_m_get8
    587 static UWord stats__cline_swrite16s      = 0; // # calls to s_m_get8
    588 static UWord stats__cline_swrite32s      = 0; // # calls to s_m_get8
    589 static UWord stats__cline_swrite64s      = 0; // # calls to s_m_get8
    590 static UWord stats__cline_scopy08s       = 0; // # calls to s_m_copy8
    591 static UWord stats__cline_64to32splits   = 0; // # 64-bit accesses split
    592 static UWord stats__cline_32to16splits   = 0; // # 32-bit accesses split
    593 static UWord stats__cline_16to8splits    = 0; // # 16-bit accesses split
    594 static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
    595 static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
    596 static UWord stats__cline_16to8pulldown  = 0; // # calls to pulldown_to_8
    597 static UWord stats__vts__tick            = 0; // # calls to VTS__tick
    598 static UWord stats__vts__join            = 0; // # calls to VTS__join
    599 static UWord stats__vts__cmpLEQ          = 0; // # calls to VTS__cmpLEQ
    600 static UWord stats__vts__cmp_structural  = 0; // # calls to VTS__cmp_structural
    601 
    602 // # calls to VTS__cmp_structural w/ slow case
    603 static UWord stats__vts__cmp_structural_slow = 0;
    604 
    605 // # calls to VTS__indexAt_SLOW
    606 static UWord stats__vts__indexat_slow = 0;
    607 
    608 // # calls to vts_set__find__or__clone_and_add
    609 static UWord stats__vts_set__focaa    = 0;
    610 
    611 // # calls to vts_set__find__or__clone_and_add that lead to an
    612 // allocation
    613 static UWord stats__vts_set__focaa_a  = 0;
    614 
    615 
    616 static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
    617    return a & ~(N_SECMAP_ARANGE - 1);
    618 }
    619 static inline UWord shmem__get_SecMap_offset ( Addr a ) {
    620    return a & (N_SECMAP_ARANGE - 1);
    621 }
    622 
    623 
    624 /*----------------------------------------------------------------*/
    625 /*--- map_shmem :: WordFM Addr SecMap                          ---*/
    626 /*--- shadow memory (low level handlers) (shmem__* fns)        ---*/
    627 /*----------------------------------------------------------------*/
    628 
    629 /*--------------- SecMap allocation --------------- */
    630 
    631 static HChar* shmem__bigchunk_next = NULL;
    632 static HChar* shmem__bigchunk_end1 = NULL;
    633 
    634 static void* shmem__bigchunk_alloc ( SizeT n )
    635 {
    636    const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
    637    tl_assert(n > 0);
    638    n = VG_ROUNDUP(n, 16);
    639    tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
    640    tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
    641              <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
    642    if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
    643       if (0)
    644       VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
    645                   (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
    646       shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
    647       if (shmem__bigchunk_next == NULL)
    648          VG_(out_of_memory_NORETURN)(
    649             "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
    650       shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
    651    }
    652    tl_assert(shmem__bigchunk_next);
    653    tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
    654    tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
    655    shmem__bigchunk_next += n;
    656    return shmem__bigchunk_next - n;
    657 }
    658 
    659 static SecMap* shmem__alloc_SecMap ( void )
    660 {
    661    Word    i, j;
    662    SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
    663    if (0) VG_(printf)("alloc_SecMap %p\n",sm);
    664    tl_assert(sm);
    665    sm->magic = SecMap_MAGIC;
    666    for (i = 0; i < N_SECMAP_ZLINES; i++) {
    667       sm->linesZ[i].dict[0] = SVal_NOACCESS;
    668       sm->linesZ[i].dict[1] = SVal_INVALID;
    669       sm->linesZ[i].dict[2] = SVal_INVALID;
    670       sm->linesZ[i].dict[3] = SVal_INVALID;
    671       for (j = 0; j < N_LINE_ARANGE/4; j++)
    672          sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
    673    }
    674    sm->linesF      = NULL;
    675    sm->linesF_size = 0;
    676    stats__secmaps_allocd++;
    677    stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
    678    stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
    679    stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
    680    return sm;
    681 }
    682 
    683 typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
    684 static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
    685 
    686 static SecMap* shmem__find_SecMap ( Addr ga )
    687 {
    688    SecMap* sm    = NULL;
    689    Addr    gaKey = shmem__round_to_SecMap_base(ga);
    690    // Cache
    691    stats__secmaps_search++;
    692    if (LIKELY(gaKey == smCache[0].gaKey))
    693       return smCache[0].sm;
    694    if (LIKELY(gaKey == smCache[1].gaKey)) {
    695       SMCacheEnt tmp = smCache[0];
    696       smCache[0] = smCache[1];
    697       smCache[1] = tmp;
    698       return smCache[0].sm;
    699    }
    700    if (gaKey == smCache[2].gaKey) {
    701       SMCacheEnt tmp = smCache[1];
    702       smCache[1] = smCache[2];
    703       smCache[2] = tmp;
    704       return smCache[1].sm;
    705    }
    706    // end Cache
    707    stats__secmaps_search_slow++;
    708    if (VG_(lookupFM)( map_shmem,
    709                       NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
    710       tl_assert(sm != NULL);
    711       smCache[2] = smCache[1];
    712       smCache[1] = smCache[0];
    713       smCache[0].gaKey = gaKey;
    714       smCache[0].sm    = sm;
    715    } else {
    716       tl_assert(sm == NULL);
    717    }
    718    return sm;
    719 }
    720 
    721 static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
    722 {
    723    SecMap* sm = shmem__find_SecMap ( ga );
    724    if (LIKELY(sm)) {
    725       return sm;
    726    } else {
    727       /* create a new one */
    728       Addr gaKey = shmem__round_to_SecMap_base(ga);
    729       sm = shmem__alloc_SecMap();
    730       tl_assert(sm);
    731       VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
    732       return sm;
    733    }
    734 }
    735 
    736 
    737 /* ------------ LineF and LineZ related ------------ */
    738 
    739 static void rcinc_LineF ( LineF* lineF ) {
    740    UWord i;
    741    tl_assert(lineF->inUse);
    742    for (i = 0; i < N_LINE_ARANGE; i++)
    743       rcinc(lineF->w64s[i]);
    744 }
    745 
    746 static void rcdec_LineF ( LineF* lineF ) {
    747    UWord i;
    748    tl_assert(lineF->inUse);
    749    for (i = 0; i < N_LINE_ARANGE; i++)
    750       rcdec(lineF->w64s[i]);
    751 }
    752 
    753 static void rcinc_LineZ ( LineZ* lineZ ) {
    754    tl_assert(lineZ->dict[0] != SVal_INVALID);
    755    rcinc(lineZ->dict[0]);
    756    if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
    757    if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
    758    if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
    759 }
    760 
    761 static void rcdec_LineZ ( LineZ* lineZ ) {
    762    tl_assert(lineZ->dict[0] != SVal_INVALID);
    763    rcdec(lineZ->dict[0]);
    764    if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
    765    if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
    766    if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
    767 }
    768 
    769 inline
    770 static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
    771    Word bix, shft, mask, prep;
    772    tl_assert(ix >= 0);
    773    bix  = ix >> 2;
    774    shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
    775    mask = 3 << shft;
    776    prep = b2 << shft;
    777    arr[bix] = (arr[bix] & ~mask) | prep;
    778 }
    779 
    780 inline
    781 static UWord read_twobit_array ( UChar* arr, UWord ix ) {
    782    Word bix, shft;
    783    tl_assert(ix >= 0);
    784    bix  = ix >> 2;
    785    shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
    786    return (arr[bix] >> shft) & 3;
    787 }
    788 
    789 /* Given address 'tag', find either the Z or F line containing relevant
    790    data, so it can be read into the cache.
    791 */
    792 static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
    793                                   /*OUT*/LineF** fp, Addr tag ) {
    794    LineZ* lineZ;
    795    LineF* lineF;
    796    UWord   zix;
    797    SecMap* sm    = shmem__find_or_alloc_SecMap(tag);
    798    UWord   smoff = shmem__get_SecMap_offset(tag);
    799    /* since smoff is derived from a valid tag, it should be
    800       cacheline-aligned. */
    801    tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
    802    zix = smoff >> N_LINE_BITS;
    803    tl_assert(zix < N_SECMAP_ZLINES);
    804    lineZ = &sm->linesZ[zix];
    805    lineF = NULL;
    806    if (lineZ->dict[0] == SVal_INVALID) {
    807       UInt fix = (UInt)lineZ->dict[1];
    808       tl_assert(sm->linesF);
    809       tl_assert(sm->linesF_size > 0);
    810       tl_assert(fix >= 0 && fix < sm->linesF_size);
    811       lineF = &sm->linesF[fix];
    812       tl_assert(lineF->inUse);
    813       lineZ = NULL;
    814    }
    815    *zp = lineZ;
    816    *fp = lineF;
    817 }
    818 
    819 /* Given address 'tag', return the relevant SecMap and the index of
    820    the LineZ within it, in the expectation that the line is to be
    821    overwritten.  Regardless of whether 'tag' is currently associated
    822    with a Z or F representation, to rcdec on the current
    823    representation, in recognition of the fact that the contents are
    824    just about to be overwritten. */
    825 static __attribute__((noinline))
    826 void find_Z_for_writing ( /*OUT*/SecMap** smp,
    827                           /*OUT*/Word* zixp,
    828                           Addr tag ) {
    829    LineZ* lineZ;
    830    LineF* lineF;
    831    UWord   zix;
    832    SecMap* sm    = shmem__find_or_alloc_SecMap(tag);
    833    UWord   smoff = shmem__get_SecMap_offset(tag);
    834    /* since smoff is derived from a valid tag, it should be
    835       cacheline-aligned. */
    836    tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
    837    zix = smoff >> N_LINE_BITS;
    838    tl_assert(zix < N_SECMAP_ZLINES);
    839    lineZ = &sm->linesZ[zix];
    840    lineF = NULL;
    841    /* re RCs, we are freeing up this LineZ/LineF so that new data can
    842       be parked in it.  Hence have to rcdec it accordingly. */
    843    /* If lineZ has an associated lineF, free it up. */
    844    if (lineZ->dict[0] == SVal_INVALID) {
    845       UInt fix = (UInt)lineZ->dict[1];
    846       tl_assert(sm->linesF);
    847       tl_assert(sm->linesF_size > 0);
    848       tl_assert(fix >= 0 && fix < sm->linesF_size);
    849       lineF = &sm->linesF[fix];
    850       tl_assert(lineF->inUse);
    851       rcdec_LineF(lineF);
    852       lineF->inUse = False;
    853    } else {
    854       rcdec_LineZ(lineZ);
    855    }
    856    *smp  = sm;
    857    *zixp = zix;
    858 }
    859 
    860 static __attribute__((noinline))
    861 void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
    862    UInt        i, new_size;
    863    LineF* nyu;
    864 
    865    if (sm->linesF) {
    866       tl_assert(sm->linesF_size > 0);
    867    } else {
    868       tl_assert(sm->linesF_size == 0);
    869    }
    870 
    871    if (sm->linesF) {
    872       for (i = 0; i < sm->linesF_size; i++) {
    873          if (!sm->linesF[i].inUse) {
    874             *fixp = (Word)i;
    875             return;
    876          }
    877       }
    878    }
    879 
    880    /* No free F line found.  Expand existing array and try again. */
    881    new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
    882    nyu      = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
    883                            new_size * sizeof(LineF) );
    884    tl_assert(nyu);
    885 
    886    stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
    887    stats__secmap_linesF_bytes  += (new_size - sm->linesF_size)
    888                                   * sizeof(LineF);
    889 
    890    if (0)
    891    VG_(printf)("SM %p: expand F array from %d to %d\n",
    892                sm, (Int)sm->linesF_size, new_size);
    893 
    894    for (i = 0; i < new_size; i++)
    895       nyu[i].inUse = False;
    896 
    897    if (sm->linesF) {
    898       for (i = 0; i < sm->linesF_size; i++) {
    899          tl_assert(sm->linesF[i].inUse);
    900          nyu[i] = sm->linesF[i];
    901       }
    902       VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
    903       HG_(free)(sm->linesF);
    904    }
    905 
    906    sm->linesF      = nyu;
    907    sm->linesF_size = new_size;
    908 
    909    for (i = 0; i < sm->linesF_size; i++) {
    910       if (!sm->linesF[i].inUse) {
    911          *fixp = (Word)i;
    912          return;
    913       }
    914     }
    915 
    916     /*NOTREACHED*/
    917     tl_assert(0);
    918 }
    919 
    920 
    921 /* ------------ CacheLine and implicit-tree related ------------ */
    922 
    923 __attribute__((unused))
    924 static void pp_CacheLine ( CacheLine* cl ) {
    925    Word i;
    926    if (!cl) {
    927       VG_(printf)("%s","pp_CacheLine(NULL)\n");
    928       return;
    929    }
    930    for (i = 0; i < N_LINE_TREES; i++)
    931       VG_(printf)("   descr: %04lx\n", (UWord)cl->descrs[i]);
    932    for (i = 0; i < N_LINE_ARANGE; i++)
    933       VG_(printf)("    sval: %08lx\n", (UWord)cl->svals[i]);
    934 }
    935 
    936 static UChar descr_to_validbits ( UShort descr )
    937 {
    938    /* a.k.a Party Time for gcc's constant folder */
    939 #  define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
    940                 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0)  \
    941              ( (UShort) ( ( (b8_7)  << 14) | ( (b8_6)  << 13) | \
    942                           ( (b8_5)  << 12) | ( (b8_4)  << 11) | \
    943                           ( (b8_3)  << 10) | ( (b8_2)  << 9)  | \
    944                           ( (b8_1)  << 8)  | ( (b8_0)  << 7)  | \
    945                           ( (b16_3) << 6)  | ( (b32_1) << 5)  | \
    946                           ( (b16_2) << 4)  | ( (b64)   << 3)  | \
    947                           ( (b16_1) << 2)  | ( (b32_0) << 1)  | \
    948                           ( (b16_0) << 0) ) )
    949 
    950 #  define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
    951              ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
    952                          ( (bit5) << 5) | ( (bit4) << 4) | \
    953                          ( (bit3) << 3) | ( (bit2) << 2) | \
    954                          ( (bit1) << 1) | ( (bit0) << 0) ) )
    955 
    956    /* these should all get folded out at compile time */
    957    tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
    958    tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
    959    tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
    960    tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
    961    tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
    962    tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
    963    tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
    964    tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
    965    tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
    966 
    967    switch (descr) {
    968    /*
    969               +--------------------------------- TREE_DESCR_8_7
    970               |             +------------------- TREE_DESCR_8_0
    971               |             |  +---------------- TREE_DESCR_16_3
    972               |             |  | +-------------- TREE_DESCR_32_1
    973               |             |  | | +------------ TREE_DESCR_16_2
    974               |             |  | | |  +--------- TREE_DESCR_64
    975               |             |  | | |  |  +------ TREE_DESCR_16_1
    976               |             |  | | |  |  | +---- TREE_DESCR_32_0
    977               |             |  | | |  |  | | +-- TREE_DESCR_16_0
    978               |             |  | | |  |  | | |
    979               |             |  | | |  |  | | |   GRANULARITY, 7 -> 0 */
    980    case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8  8 8 8 8 */
    981                                                  return BYTE(1,1,1,1,1,1,1,1);
    982    case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16   8 8 8 8 */
    983                                                  return BYTE(1,1,0,1,1,1,1,1);
    984    case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16  8 8  8 8 8 8 */
    985                                                  return BYTE(0,1,1,1,1,1,1,1);
    986    case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16  16   8 8 8 8 */
    987                                                  return BYTE(0,1,0,1,1,1,1,1);
    988 
    989    case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8  8 8 16 */
    990                                                  return BYTE(1,1,1,1,1,1,0,1);
    991    case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16   8 8 16 */
    992                                                  return BYTE(1,1,0,1,1,1,0,1);
    993    case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16  8 8  8 8 16 */
    994                                                  return BYTE(0,1,1,1,1,1,0,1);
    995    case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16  16   8 8 16 */
    996                                                  return BYTE(0,1,0,1,1,1,0,1);
    997 
    998    case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8  16 8 8 */
    999                                                  return BYTE(1,1,1,1,0,1,1,1);
   1000    case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16   16 8 8 */
   1001                                                  return BYTE(1,1,0,1,0,1,1,1);
   1002    case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16  8 8  16 8 8 */
   1003                                                  return BYTE(0,1,1,1,0,1,1,1);
   1004    case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16  16   16 8 8 */
   1005                                                  return BYTE(0,1,0,1,0,1,1,1);
   1006 
   1007    case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8  16 16 */
   1008                                                  return BYTE(1,1,1,1,0,1,0,1);
   1009    case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16   16 16 */
   1010                                                  return BYTE(1,1,0,1,0,1,0,1);
   1011    case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16  8 8  16 16 */
   1012                                                  return BYTE(0,1,1,1,0,1,0,1);
   1013    case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16  16   16 16 */
   1014                                                  return BYTE(0,1,0,1,0,1,0,1);
   1015 
   1016    case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32  8 8 8 8 */
   1017                                                  return BYTE(0,0,0,1,1,1,1,1);
   1018    case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32  8 8 16  */
   1019                                                  return BYTE(0,0,0,1,1,1,0,1);
   1020    case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32  16  8 8 */
   1021                                                  return BYTE(0,0,0,1,0,1,1,1);
   1022    case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32  16  16  */
   1023                                                  return BYTE(0,0,0,1,0,1,0,1);
   1024 
   1025    case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8  32 */
   1026                                                  return BYTE(1,1,1,1,0,0,0,1);
   1027    case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16   32 */
   1028                                                  return BYTE(1,1,0,1,0,0,0,1);
   1029    case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16  8 8  32 */
   1030                                                  return BYTE(0,1,1,1,0,0,0,1);
   1031    case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16  16   32 */
   1032                                                  return BYTE(0,1,0,1,0,0,0,1);
   1033 
   1034    case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
   1035                                                  return BYTE(0,0,0,1,0,0,0,1);
   1036 
   1037    case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
   1038                                                  return BYTE(0,0,0,0,0,0,0,1);
   1039 
   1040    default: return BYTE(0,0,0,0,0,0,0,0);
   1041                    /* INVALID - any valid descr produces at least one
   1042                       valid bit in tree[0..7]*/
   1043    }
   1044    /* NOTREACHED*/
   1045    tl_assert(0);
   1046 
   1047 #  undef DESCR
   1048 #  undef BYTE
   1049 }
   1050 
   1051 __attribute__((unused))
   1052 static Bool is_sane_Descr ( UShort descr ) {
   1053    return descr_to_validbits(descr) != 0;
   1054 }
   1055 
   1056 static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
   1057    VG_(sprintf)(dst,
   1058                 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
   1059                 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
   1060                 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
   1061                 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
   1062                 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
   1063                 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
   1064                 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
   1065                 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
   1066                 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
   1067                 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
   1068                 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
   1069                 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
   1070                 (Int)((descr & TREE_DESCR_64)   ? 1 : 0),
   1071                 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
   1072                 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
   1073                 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
   1074    );
   1075 }
   1076 static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
   1077    VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
   1078                      (Int)((byte & 128) ? 1 : 0),
   1079                      (Int)((byte &  64) ? 1 : 0),
   1080                      (Int)((byte &  32) ? 1 : 0),
   1081                      (Int)((byte &  16) ? 1 : 0),
   1082                      (Int)((byte &   8) ? 1 : 0),
   1083                      (Int)((byte &   4) ? 1 : 0),
   1084                      (Int)((byte &   2) ? 1 : 0),
   1085                      (Int)((byte &   1) ? 1 : 0)
   1086    );
   1087 }
   1088 
   1089 static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
   1090    Word  i;
   1091    UChar validbits = descr_to_validbits(descr);
   1092    HChar buf[128], buf2[128];
   1093    if (validbits == 0)
   1094       goto bad;
   1095    for (i = 0; i < 8; i++) {
   1096       if (validbits & (1<<i)) {
   1097          if (tree[i] == SVal_INVALID)
   1098             goto bad;
   1099       } else {
   1100          if (tree[i] != SVal_INVALID)
   1101             goto bad;
   1102       }
   1103    }
   1104    return True;
   1105   bad:
   1106    sprintf_Descr( buf, descr );
   1107    sprintf_Byte( buf2, validbits );
   1108    VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
   1109    VG_(printf)("   validbits 0x%02lx    %s\n", (UWord)validbits, buf2);
   1110    VG_(printf)("       descr 0x%04lx  %s\n", (UWord)descr, buf);
   1111    for (i = 0; i < 8; i++)
   1112       VG_(printf)("   [%ld] 0x%016llx\n", i, tree[i]);
   1113    VG_(printf)("%s","}\n");
   1114    return 0;
   1115 }
   1116 
   1117 static Bool is_sane_CacheLine ( CacheLine* cl )
   1118 {
   1119    Word tno, cloff;
   1120 
   1121    if (!cl) goto bad;
   1122 
   1123    for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
   1124       UShort descr = cl->descrs[tno];
   1125       SVal*  tree  = &cl->svals[cloff];
   1126       if (!is_sane_Descr_and_Tree(descr, tree))
   1127          goto bad;
   1128    }
   1129    tl_assert(cloff == N_LINE_ARANGE);
   1130    return True;
   1131   bad:
   1132    pp_CacheLine(cl);
   1133    return False;
   1134 }
   1135 
   1136 static UShort normalise_tree ( /*MOD*/SVal* tree )
   1137 {
   1138    UShort descr;
   1139    /* pre: incoming tree[0..7] does not have any invalid shvals, in
   1140       particular no zeroes. */
   1141    if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
   1142                 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
   1143                 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
   1144                 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
   1145       tl_assert(0);
   1146 
   1147    descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
   1148            | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
   1149            | TREE_DESCR_8_1 | TREE_DESCR_8_0;
   1150    /* build 16-bit layer */
   1151    if (tree[1] == tree[0]) {
   1152       tree[1] = SVal_INVALID;
   1153       descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
   1154       descr |= TREE_DESCR_16_0;
   1155    }
   1156    if (tree[3] == tree[2]) {
   1157       tree[3] = SVal_INVALID;
   1158       descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
   1159       descr |= TREE_DESCR_16_1;
   1160    }
   1161    if (tree[5] == tree[4]) {
   1162       tree[5] = SVal_INVALID;
   1163       descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
   1164       descr |= TREE_DESCR_16_2;
   1165    }
   1166    if (tree[7] == tree[6]) {
   1167       tree[7] = SVal_INVALID;
   1168       descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
   1169       descr |= TREE_DESCR_16_3;
   1170    }
   1171    /* build 32-bit layer */
   1172    if (tree[2] == tree[0]
   1173        && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
   1174       tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
   1175       descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
   1176       descr |= TREE_DESCR_32_0;
   1177    }
   1178    if (tree[6] == tree[4]
   1179        && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
   1180       tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
   1181       descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
   1182       descr |= TREE_DESCR_32_1;
   1183    }
   1184    /* build 64-bit layer */
   1185    if (tree[4] == tree[0]
   1186        && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
   1187       tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
   1188       descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
   1189       descr |= TREE_DESCR_64;
   1190    }
   1191    return descr;
   1192 }
   1193 
   1194 /* This takes a cacheline where all the data is at the leaves
   1195    (w8[..]) and builds a correctly normalised tree. */
   1196 static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
   1197 {
   1198    Word tno, cloff;
   1199    for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
   1200       SVal* tree = &cl->svals[cloff];
   1201       cl->descrs[tno] = normalise_tree( tree );
   1202    }
   1203    tl_assert(cloff == N_LINE_ARANGE);
   1204    if (CHECK_ZSM)
   1205       tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
   1206    stats__cline_normalises++;
   1207 }
   1208 
   1209 
   1210 typedef struct { UChar count; SVal sval; } CountedSVal;
   1211 
   1212 static
   1213 void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
   1214                                /*OUT*/Word* dstUsedP,
   1215                                Word nDst, CacheLine* src )
   1216 {
   1217    Word  tno, cloff, dstUsed;
   1218 
   1219    tl_assert(nDst == N_LINE_ARANGE);
   1220    dstUsed = 0;
   1221 
   1222    for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
   1223       UShort descr = src->descrs[tno];
   1224       SVal*  tree  = &src->svals[cloff];
   1225 
   1226       /* sequentialise the tree described by (descr,tree). */
   1227 #     define PUT(_n,_v)                                \
   1228          do { dst[dstUsed  ].count = (_n);             \
   1229               dst[dstUsed++].sval  = (_v);             \
   1230          } while (0)
   1231 
   1232       /* byte 0 */
   1233       if (descr & TREE_DESCR_64)   PUT(8, tree[0]); else
   1234       if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
   1235       if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
   1236       if (descr & TREE_DESCR_8_0)  PUT(1, tree[0]);
   1237       /* byte 1 */
   1238       if (descr & TREE_DESCR_8_1)  PUT(1, tree[1]);
   1239       /* byte 2 */
   1240       if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
   1241       if (descr & TREE_DESCR_8_2)  PUT(1, tree[2]);
   1242       /* byte 3 */
   1243       if (descr & TREE_DESCR_8_3)  PUT(1, tree[3]);
   1244       /* byte 4 */
   1245       if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
   1246       if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
   1247       if (descr & TREE_DESCR_8_4)  PUT(1, tree[4]);
   1248       /* byte 5 */
   1249       if (descr & TREE_DESCR_8_5)  PUT(1, tree[5]);
   1250       /* byte 6 */
   1251       if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
   1252       if (descr & TREE_DESCR_8_6)  PUT(1, tree[6]);
   1253       /* byte 7 */
   1254       if (descr & TREE_DESCR_8_7)  PUT(1, tree[7]);
   1255 
   1256 #     undef PUT
   1257       /* END sequentialise the tree described by (descr,tree). */
   1258 
   1259    }
   1260    tl_assert(cloff == N_LINE_ARANGE);
   1261    tl_assert(dstUsed <= nDst);
   1262 
   1263    *dstUsedP = dstUsed;
   1264 }
   1265 
   1266 /* Write the cacheline 'wix' to backing store.  Where it ends up
   1267    is determined by its tag field. */
   1268 static __attribute__((noinline)) void cacheline_wback ( UWord wix )
   1269 {
   1270    Word        i, j, k, m;
   1271    Addr        tag;
   1272    SecMap*     sm;
   1273    CacheLine*  cl;
   1274    LineZ* lineZ;
   1275    LineF* lineF;
   1276    Word        zix, fix, csvalsUsed;
   1277    CountedSVal csvals[N_LINE_ARANGE];
   1278    SVal        sv;
   1279 
   1280    if (0)
   1281    VG_(printf)("scache wback line %d\n", (Int)wix);
   1282 
   1283    tl_assert(wix >= 0 && wix < N_WAY_NENT);
   1284 
   1285    tag =  cache_shmem.tags0[wix];
   1286    cl  = &cache_shmem.lyns0[wix];
   1287 
   1288    /* The cache line may have been invalidated; if so, ignore it. */
   1289    if (!is_valid_scache_tag(tag))
   1290       return;
   1291 
   1292    /* Where are we going to put it? */
   1293    sm         = NULL;
   1294    lineZ      = NULL;
   1295    lineF      = NULL;
   1296    zix = fix = -1;
   1297 
   1298    /* find the Z line to write in and rcdec it or the associated F
   1299       line. */
   1300    find_Z_for_writing( &sm, &zix, tag );
   1301 
   1302    tl_assert(sm);
   1303    tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
   1304    lineZ = &sm->linesZ[zix];
   1305 
   1306    /* Generate the data to be stored */
   1307    if (CHECK_ZSM)
   1308       tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
   1309 
   1310    csvalsUsed = -1;
   1311    sequentialise_CacheLine( csvals, &csvalsUsed,
   1312                             N_LINE_ARANGE, cl );
   1313    tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
   1314    if (0) VG_(printf)("%lu ", csvalsUsed);
   1315 
   1316    lineZ->dict[0] = lineZ->dict[1]
   1317                   = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
   1318 
   1319    /* i indexes actual shadow values, k is cursor in csvals */
   1320    i = 0;
   1321    for (k = 0; k < csvalsUsed; k++) {
   1322 
   1323       sv = csvals[k].sval;
   1324       if (CHECK_ZSM)
   1325          tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
   1326       /* do we already have it? */
   1327       if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
   1328       if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
   1329       if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
   1330       if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
   1331       /* no.  look for a free slot. */
   1332       if (CHECK_ZSM)
   1333          tl_assert(sv != SVal_INVALID);
   1334       if (lineZ->dict[0]
   1335           == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
   1336       if (lineZ->dict[1]
   1337           == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
   1338       if (lineZ->dict[2]
   1339           == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
   1340       if (lineZ->dict[3]
   1341           == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
   1342       break; /* we'll have to use the f rep */
   1343      dict_ok:
   1344       m = csvals[k].count;
   1345       if (m == 8) {
   1346          write_twobit_array( lineZ->ix2s, i+0, j );
   1347          write_twobit_array( lineZ->ix2s, i+1, j );
   1348          write_twobit_array( lineZ->ix2s, i+2, j );
   1349          write_twobit_array( lineZ->ix2s, i+3, j );
   1350          write_twobit_array( lineZ->ix2s, i+4, j );
   1351          write_twobit_array( lineZ->ix2s, i+5, j );
   1352          write_twobit_array( lineZ->ix2s, i+6, j );
   1353          write_twobit_array( lineZ->ix2s, i+7, j );
   1354          i += 8;
   1355       }
   1356       else if (m == 4) {
   1357          write_twobit_array( lineZ->ix2s, i+0, j );
   1358          write_twobit_array( lineZ->ix2s, i+1, j );
   1359          write_twobit_array( lineZ->ix2s, i+2, j );
   1360          write_twobit_array( lineZ->ix2s, i+3, j );
   1361          i += 4;
   1362       }
   1363       else if (m == 1) {
   1364          write_twobit_array( lineZ->ix2s, i+0, j );
   1365          i += 1;
   1366       }
   1367       else if (m == 2) {
   1368          write_twobit_array( lineZ->ix2s, i+0, j );
   1369          write_twobit_array( lineZ->ix2s, i+1, j );
   1370          i += 2;
   1371       }
   1372       else {
   1373          tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
   1374       }
   1375 
   1376    }
   1377 
   1378    if (LIKELY(i == N_LINE_ARANGE)) {
   1379       /* Construction of the compressed representation was
   1380          successful. */
   1381       rcinc_LineZ(lineZ);
   1382       stats__cache_Z_wbacks++;
   1383    } else {
   1384       /* Cannot use the compressed(z) representation.  Use the full(f)
   1385          rep instead. */
   1386       tl_assert(i >= 0 && i < N_LINE_ARANGE);
   1387       alloc_F_for_writing( sm, &fix );
   1388       tl_assert(sm->linesF);
   1389       tl_assert(sm->linesF_size > 0);
   1390       tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
   1391       lineF = &sm->linesF[fix];
   1392       tl_assert(!lineF->inUse);
   1393       lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
   1394       lineZ->dict[1] = (SVal)fix;
   1395       lineF->inUse = True;
   1396       i = 0;
   1397       for (k = 0; k < csvalsUsed; k++) {
   1398          if (CHECK_ZSM)
   1399             tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
   1400          sv = csvals[k].sval;
   1401          if (CHECK_ZSM)
   1402             tl_assert(sv != SVal_INVALID);
   1403          for (m = csvals[k].count; m > 0; m--) {
   1404             lineF->w64s[i] = sv;
   1405             i++;
   1406          }
   1407       }
   1408       tl_assert(i == N_LINE_ARANGE);
   1409       rcinc_LineF(lineF);
   1410       stats__cache_F_wbacks++;
   1411    }
   1412 }
   1413 
   1414 /* Fetch the cacheline 'wix' from the backing store.  The tag
   1415    associated with 'wix' is assumed to have already been filled in;
   1416    hence that is used to determine where in the backing store to read
   1417    from. */
   1418 static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
   1419 {
   1420    Word       i;
   1421    Addr       tag;
   1422    CacheLine* cl;
   1423    LineZ*     lineZ;
   1424    LineF*     lineF;
   1425 
   1426    if (0)
   1427    VG_(printf)("scache fetch line %d\n", (Int)wix);
   1428 
   1429    tl_assert(wix >= 0 && wix < N_WAY_NENT);
   1430 
   1431    tag =  cache_shmem.tags0[wix];
   1432    cl  = &cache_shmem.lyns0[wix];
   1433 
   1434    /* reject nonsense requests */
   1435    tl_assert(is_valid_scache_tag(tag));
   1436 
   1437    lineZ = NULL;
   1438    lineF = NULL;
   1439    find_ZF_for_reading( &lineZ, &lineF, tag );
   1440    tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
   1441 
   1442    /* expand the data into the bottom layer of the tree, then get
   1443       cacheline_normalise to build the descriptor array. */
   1444    if (lineF) {
   1445       tl_assert(lineF->inUse);
   1446       for (i = 0; i < N_LINE_ARANGE; i++) {
   1447          cl->svals[i] = lineF->w64s[i];
   1448       }
   1449       stats__cache_F_fetches++;
   1450    } else {
   1451       for (i = 0; i < N_LINE_ARANGE; i++) {
   1452          SVal sv;
   1453          UWord ix = read_twobit_array( lineZ->ix2s, i );
   1454          /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
   1455          sv = lineZ->dict[ix];
   1456          tl_assert(sv != SVal_INVALID);
   1457          cl->svals[i] = sv;
   1458       }
   1459       stats__cache_Z_fetches++;
   1460    }
   1461    normalise_CacheLine( cl );
   1462 }
   1463 
   1464 static void shmem__invalidate_scache ( void ) {
   1465    Word wix;
   1466    if (0) VG_(printf)("%s","scache inval\n");
   1467    tl_assert(!is_valid_scache_tag(1));
   1468    for (wix = 0; wix < N_WAY_NENT; wix++) {
   1469       cache_shmem.tags0[wix] = 1/*INVALID*/;
   1470    }
   1471    stats__cache_invals++;
   1472 }
   1473 
   1474 static void shmem__flush_and_invalidate_scache ( void ) {
   1475    Word wix;
   1476    Addr tag;
   1477    if (0) VG_(printf)("%s","scache flush and invalidate\n");
   1478    tl_assert(!is_valid_scache_tag(1));
   1479    for (wix = 0; wix < N_WAY_NENT; wix++) {
   1480       tag = cache_shmem.tags0[wix];
   1481       if (tag == 1/*INVALID*/) {
   1482          /* already invalid; nothing to do */
   1483       } else {
   1484          tl_assert(is_valid_scache_tag(tag));
   1485          cacheline_wback( wix );
   1486       }
   1487       cache_shmem.tags0[wix] = 1/*INVALID*/;
   1488    }
   1489    stats__cache_flushes++;
   1490    stats__cache_invals++;
   1491 }
   1492 
   1493 
   1494 static inline Bool aligned16 ( Addr a ) {
   1495    return 0 == (a & 1);
   1496 }
   1497 static inline Bool aligned32 ( Addr a ) {
   1498    return 0 == (a & 3);
   1499 }
   1500 static inline Bool aligned64 ( Addr a ) {
   1501    return 0 == (a & 7);
   1502 }
   1503 static inline UWord get_cacheline_offset ( Addr a ) {
   1504    return (UWord)(a & (N_LINE_ARANGE - 1));
   1505 }
   1506 static inline Addr cacheline_ROUNDUP ( Addr a ) {
   1507    return ROUNDUP(a, N_LINE_ARANGE);
   1508 }
   1509 static inline Addr cacheline_ROUNDDN ( Addr a ) {
   1510    return ROUNDDN(a, N_LINE_ARANGE);
   1511 }
   1512 static inline UWord get_treeno ( Addr a ) {
   1513    return get_cacheline_offset(a) >> 3;
   1514 }
   1515 static inline UWord get_tree_offset ( Addr a ) {
   1516    return a & 7;
   1517 }
   1518 
   1519 static __attribute__((noinline))
   1520        CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
   1521 static inline CacheLine* get_cacheline ( Addr a )
   1522 {
   1523    /* tag is 'a' with the in-line offset masked out,
   1524       eg a[31]..a[4] 0000 */
   1525    Addr       tag = a & ~(N_LINE_ARANGE - 1);
   1526    UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
   1527    stats__cache_totrefs++;
   1528    if (LIKELY(tag == cache_shmem.tags0[wix])) {
   1529       return &cache_shmem.lyns0[wix];
   1530    } else {
   1531       return get_cacheline_MISS( a );
   1532    }
   1533 }
   1534 
   1535 static __attribute__((noinline))
   1536        CacheLine* get_cacheline_MISS ( Addr a )
   1537 {
   1538    /* tag is 'a' with the in-line offset masked out,
   1539       eg a[31]..a[4] 0000 */
   1540 
   1541    CacheLine* cl;
   1542    Addr*      tag_old_p;
   1543    Addr       tag = a & ~(N_LINE_ARANGE - 1);
   1544    UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
   1545 
   1546    tl_assert(tag != cache_shmem.tags0[wix]);
   1547 
   1548    /* Dump the old line into the backing store. */
   1549    stats__cache_totmisses++;
   1550 
   1551    cl        = &cache_shmem.lyns0[wix];
   1552    tag_old_p = &cache_shmem.tags0[wix];
   1553 
   1554    if (is_valid_scache_tag( *tag_old_p )) {
   1555       /* EXPENSIVE and REDUNDANT: callee does it */
   1556       if (CHECK_ZSM)
   1557          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
   1558       cacheline_wback( wix );
   1559    }
   1560    /* and reload the new one */
   1561    *tag_old_p = tag;
   1562    cacheline_fetch( wix );
   1563    if (CHECK_ZSM)
   1564       tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
   1565    return cl;
   1566 }
   1567 
   1568 static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
   1569    stats__cline_64to32pulldown++;
   1570    switch (toff) {
   1571       case 0: case 4:
   1572          tl_assert(descr & TREE_DESCR_64);
   1573          tree[4] = tree[0];
   1574          descr &= ~TREE_DESCR_64;
   1575          descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
   1576          break;
   1577       default:
   1578          tl_assert(0);
   1579    }
   1580    return descr;
   1581 }
   1582 
   1583 static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
   1584    stats__cline_32to16pulldown++;
   1585    switch (toff) {
   1586       case 0: case 2:
   1587          if (!(descr & TREE_DESCR_32_0)) {
   1588             descr = pulldown_to_32(tree, 0, descr);
   1589          }
   1590          tl_assert(descr & TREE_DESCR_32_0);
   1591          tree[2] = tree[0];
   1592          descr &= ~TREE_DESCR_32_0;
   1593          descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
   1594          break;
   1595       case 4: case 6:
   1596          if (!(descr & TREE_DESCR_32_1)) {
   1597             descr = pulldown_to_32(tree, 4, descr);
   1598          }
   1599          tl_assert(descr & TREE_DESCR_32_1);
   1600          tree[6] = tree[4];
   1601          descr &= ~TREE_DESCR_32_1;
   1602          descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
   1603          break;
   1604       default:
   1605          tl_assert(0);
   1606    }
   1607    return descr;
   1608 }
   1609 
   1610 static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
   1611    stats__cline_16to8pulldown++;
   1612    switch (toff) {
   1613       case 0: case 1:
   1614          if (!(descr & TREE_DESCR_16_0)) {
   1615             descr = pulldown_to_16(tree, 0, descr);
   1616          }
   1617          tl_assert(descr & TREE_DESCR_16_0);
   1618          tree[1] = tree[0];
   1619          descr &= ~TREE_DESCR_16_0;
   1620          descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
   1621          break;
   1622       case 2: case 3:
   1623          if (!(descr & TREE_DESCR_16_1)) {
   1624             descr = pulldown_to_16(tree, 2, descr);
   1625          }
   1626          tl_assert(descr & TREE_DESCR_16_1);
   1627          tree[3] = tree[2];
   1628          descr &= ~TREE_DESCR_16_1;
   1629          descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
   1630          break;
   1631       case 4: case 5:
   1632          if (!(descr & TREE_DESCR_16_2)) {
   1633             descr = pulldown_to_16(tree, 4, descr);
   1634          }
   1635          tl_assert(descr & TREE_DESCR_16_2);
   1636          tree[5] = tree[4];
   1637          descr &= ~TREE_DESCR_16_2;
   1638          descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
   1639          break;
   1640       case 6: case 7:
   1641          if (!(descr & TREE_DESCR_16_3)) {
   1642             descr = pulldown_to_16(tree, 6, descr);
   1643          }
   1644          tl_assert(descr & TREE_DESCR_16_3);
   1645          tree[7] = tree[6];
   1646          descr &= ~TREE_DESCR_16_3;
   1647          descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
   1648          break;
   1649       default:
   1650          tl_assert(0);
   1651    }
   1652    return descr;
   1653 }
   1654 
   1655 
   1656 static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
   1657    UShort mask;
   1658    switch (toff) {
   1659       case 0:
   1660          mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
   1661          tl_assert( (descr & mask) == mask );
   1662          descr &= ~mask;
   1663          descr |= TREE_DESCR_16_0;
   1664          break;
   1665       case 2:
   1666          mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
   1667          tl_assert( (descr & mask) == mask );
   1668          descr &= ~mask;
   1669          descr |= TREE_DESCR_16_1;
   1670          break;
   1671       case 4:
   1672          mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
   1673          tl_assert( (descr & mask) == mask );
   1674          descr &= ~mask;
   1675          descr |= TREE_DESCR_16_2;
   1676          break;
   1677       case 6:
   1678          mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
   1679          tl_assert( (descr & mask) == mask );
   1680          descr &= ~mask;
   1681          descr |= TREE_DESCR_16_3;
   1682          break;
   1683       default:
   1684          tl_assert(0);
   1685    }
   1686    return descr;
   1687 }
   1688 
   1689 static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
   1690    UShort mask;
   1691    switch (toff) {
   1692       case 0:
   1693          if (!(descr & TREE_DESCR_16_0))
   1694             descr = pullup_descr_to_16(descr, 0);
   1695          if (!(descr & TREE_DESCR_16_1))
   1696             descr = pullup_descr_to_16(descr, 2);
   1697          mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
   1698          tl_assert( (descr & mask) == mask );
   1699          descr &= ~mask;
   1700          descr |= TREE_DESCR_32_0;
   1701          break;
   1702       case 4:
   1703          if (!(descr & TREE_DESCR_16_2))
   1704             descr = pullup_descr_to_16(descr, 4);
   1705          if (!(descr & TREE_DESCR_16_3))
   1706             descr = pullup_descr_to_16(descr, 6);
   1707          mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
   1708          tl_assert( (descr & mask) == mask );
   1709          descr &= ~mask;
   1710          descr |= TREE_DESCR_32_1;
   1711          break;
   1712       default:
   1713          tl_assert(0);
   1714    }
   1715    return descr;
   1716 }
   1717 
   1718 static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
   1719    switch (toff) {
   1720       case 0: case 4:
   1721          return 0 != (descr & TREE_DESCR_64);
   1722       default:
   1723          tl_assert(0);
   1724    }
   1725 }
   1726 
   1727 static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
   1728    switch (toff) {
   1729       case 0:
   1730          return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
   1731       case 2:
   1732          return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
   1733       case 4:
   1734          return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
   1735       case 6:
   1736          return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
   1737       default:
   1738          tl_assert(0);
   1739    }
   1740 }
   1741 
   1742 /* ------------ Cache management ------------ */
   1743 
   1744 static void zsm_flush_cache ( void )
   1745 {
   1746    shmem__flush_and_invalidate_scache();
   1747 }
   1748 
   1749 
   1750 static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
   1751 {
   1752    tl_assert( sizeof(UWord) == sizeof(Addr) );
   1753 
   1754    rcinc = p_rcinc;
   1755    rcdec = p_rcdec;
   1756 
   1757    tl_assert(map_shmem == NULL);
   1758    map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
   1759                            HG_(free),
   1760                            NULL/*unboxed UWord cmp*/);
   1761    tl_assert(map_shmem != NULL);
   1762    shmem__invalidate_scache();
   1763 
   1764    /* a SecMap must contain an integral number of CacheLines */
   1765    tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
   1766    /* also ... a CacheLine holds an integral number of trees */
   1767    tl_assert(0 == (N_LINE_ARANGE % 8));
   1768 }
   1769 
   1770 /////////////////////////////////////////////////////////////////
   1771 /////////////////////////////////////////////////////////////////
   1772 //                                                             //
   1773 // SECTION END compressed shadow memory                        //
   1774 //                                                             //
   1775 /////////////////////////////////////////////////////////////////
   1776 /////////////////////////////////////////////////////////////////
   1777 
   1778 
   1779 
   1780 /////////////////////////////////////////////////////////////////
   1781 /////////////////////////////////////////////////////////////////
   1782 //                                                             //
   1783 // SECTION BEGIN vts primitives                                //
   1784 //                                                             //
   1785 /////////////////////////////////////////////////////////////////
   1786 /////////////////////////////////////////////////////////////////
   1787 
   1788 
   1789 /* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
   1790    being compact stand-ins for Thr*'s.  Use these functions to map
   1791    between them. */
   1792 static ThrID Thr__to_ThrID   ( Thr*  thr   ); /* fwds */
   1793 static Thr*  Thr__from_ThrID ( ThrID thrid ); /* fwds */
   1794 
   1795 __attribute__((noreturn))
   1796 static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
   1797 {
   1798    if (due_to_nThrs) {
   1799       const HChar* s =
   1800          "\n"
   1801          "Helgrind: cannot continue, run aborted: too many threads.\n"
   1802          "Sorry.  Helgrind can only handle programs that create\n"
   1803          "%'llu or fewer threads over their entire lifetime.\n"
   1804          "\n";
   1805       VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
   1806    } else {
   1807       const HChar* s =
   1808          "\n"
   1809          "Helgrind: cannot continue, run aborted: too many\n"
   1810          "synchronisation events.  Sorry. Helgrind can only handle\n"
   1811          "programs which perform %'llu or fewer\n"
   1812          "inter-thread synchronisation events (locks, unlocks, etc).\n"
   1813          "\n";
   1814       VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
   1815    }
   1816    VG_(exit)(1);
   1817    /*NOTREACHED*/
   1818    tl_assert(0); /*wtf?!*/
   1819 }
   1820 
   1821 
   1822 /* The dead thread (ThrID, actually) table.  A thread may only be
   1823    listed here if we have been notified thereof by libhb_async_exit.
   1824    New entries are added at the end.  The order isn't important, but
   1825    the ThrID values must be unique.  This table lists the identity of
   1826    all threads that have ever died -- none are ever removed.  We keep
   1827    this table so as to be able to prune entries from VTSs.  We don't
   1828    actually need to keep the set of threads that have ever died --
   1829    only the threads that have died since the previous round of
   1830    pruning.  But it's useful for sanity check purposes to keep the
   1831    entire set, so we do. */
   1832 static XArray* /* of ThrID */ verydead_thread_table = NULL;
   1833 
   1834 /* Arbitrary total ordering on ThrIDs. */
   1835 static Int cmp__ThrID ( const void* v1, const void* v2 ) {
   1836    ThrID id1 = *(const ThrID*)v1;
   1837    ThrID id2 = *(const ThrID*)v2;
   1838    if (id1 < id2) return -1;
   1839    if (id1 > id2) return 1;
   1840    return 0;
   1841 }
   1842 
   1843 static void verydead_thread_table_init ( void )
   1844 {
   1845    tl_assert(!verydead_thread_table);
   1846    verydead_thread_table
   1847      = VG_(newXA)( HG_(zalloc),
   1848                    "libhb.verydead_thread_table_init.1",
   1849                    HG_(free), sizeof(ThrID) );
   1850    tl_assert(verydead_thread_table);
   1851    VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
   1852 }
   1853 
   1854 
   1855 /* A VTS contains .ts, its vector clock, and also .id, a field to hold
   1856    a backlink for the caller's convenience.  Since we have no idea
   1857    what to set that to in the library, it always gets set to
   1858    VtsID_INVALID. */
   1859 typedef
   1860    struct {
   1861       VtsID    id;
   1862       UInt     usedTS;
   1863       UInt     sizeTS;
   1864       ScalarTS ts[0];
   1865    }
   1866    VTS;
   1867 
   1868 /* Allocate a VTS capable of storing 'sizeTS' entries. */
   1869 static VTS* VTS__new ( const HChar* who, UInt sizeTS );
   1870 
   1871 /* Make a clone of 'vts', sizing the new array to exactly match the
   1872    number of ScalarTSs present. */
   1873 static VTS* VTS__clone ( const HChar* who, VTS* vts );
   1874 
   1875 /* Make a clone of 'vts' with the thrids in 'thrids' removed.  The new
   1876    array is sized exactly to hold the number of required elements.
   1877    'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
   1878    must be in strictly increasing order. */
   1879 static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel );
   1880 
   1881 /* Delete this VTS in its entirety. */
   1882 static void VTS__delete ( VTS* vts );
   1883 
   1884 /* Create a new singleton VTS in 'out'.  Caller must have
   1885    pre-allocated 'out' sufficiently big to hold the result in all
   1886    possible cases. */
   1887 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
   1888 
   1889 /* Create in 'out' a VTS which is the same as 'vts' except with
   1890    vts[me]++, so to speak.  Caller must have pre-allocated 'out'
   1891    sufficiently big to hold the result in all possible cases. */
   1892 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
   1893 
   1894 /* Create in 'out' a VTS which is the join (max) of 'a' and
   1895    'b'. Caller must have pre-allocated 'out' sufficiently big to hold
   1896    the result in all possible cases. */
   1897 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
   1898 
   1899 /* Compute the partial ordering relation of the two args.  Although we
   1900    could be completely general and return an enumeration value (EQ,
   1901    LT, GT, UN), in fact we only need LEQ, and so we may as well
   1902    hardwire that fact.
   1903 
   1904    Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
   1905    invald ThrID).  In the latter case, the returned ThrID indicates
   1906    the discovered point for which they are not.  There may be more
   1907    than one such point, but we only care about seeing one of them, not
   1908    all of them.  This rather strange convention is used because
   1909    sometimes we want to know the actual index at which they first
   1910    differ. */
   1911 static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
   1912 
   1913 /* Compute an arbitrary structural (total) ordering on the two args,
   1914    based on their VCs, so they can be looked up in a table, tree, etc.
   1915    Returns -1, 0 or 1. */
   1916 static Word VTS__cmp_structural ( VTS* a, VTS* b );
   1917 
   1918 /* Debugging only.  Display the given VTS in the buffer. */
   1919 static void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
   1920 
   1921 /* Debugging only.  Return vts[index], so to speak. */
   1922 static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
   1923 
   1924 /* Notify the VTS machinery that a thread has been declared
   1925    comprehensively dead: that is, it has done an async exit AND it has
   1926    been joined with.  This should ensure that its local clocks (.viR
   1927    and .viW) will never again change, and so all mentions of this
   1928    thread from all VTSs in the system may be removed. */
   1929 static void VTS__declare_thread_very_dead ( Thr* idx );
   1930 
   1931 /*--------------- to do with Vector Timestamps ---------------*/
   1932 
   1933 static Bool is_sane_VTS ( VTS* vts )
   1934 {
   1935    UWord     i, n;
   1936    ScalarTS  *st1, *st2;
   1937    if (!vts) return False;
   1938    if (vts->usedTS > vts->sizeTS) return False;
   1939    n = vts->usedTS;
   1940    if (n == 1) {
   1941       st1 = &vts->ts[0];
   1942       if (st1->tym == 0)
   1943          return False;
   1944    }
   1945    else
   1946    if (n >= 2) {
   1947       for (i = 0; i < n-1; i++) {
   1948          st1 = &vts->ts[i];
   1949          st2 = &vts->ts[i+1];
   1950          if (st1->thrid >= st2->thrid)
   1951             return False;
   1952          if (st1->tym == 0 || st2->tym == 0)
   1953             return False;
   1954       }
   1955    }
   1956    return True;
   1957 }
   1958 
   1959 
   1960 /* Create a new, empty VTS.
   1961 */
   1962 static VTS* VTS__new ( const HChar* who, UInt sizeTS )
   1963 {
   1964    VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
   1965    tl_assert(vts->usedTS == 0);
   1966    vts->sizeTS = sizeTS;
   1967    *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
   1968    return vts;
   1969 }
   1970 
   1971 /* Clone this VTS.
   1972 */
   1973 static VTS* VTS__clone ( const HChar* who, VTS* vts )
   1974 {
   1975    tl_assert(vts);
   1976    tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
   1977    UInt nTS = vts->usedTS;
   1978    VTS* clone = VTS__new(who, nTS);
   1979    clone->id = vts->id;
   1980    clone->sizeTS = nTS;
   1981    clone->usedTS = nTS;
   1982    UInt i;
   1983    for (i = 0; i < nTS; i++) {
   1984       clone->ts[i] = vts->ts[i];
   1985    }
   1986    tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
   1987    return clone;
   1988 }
   1989 
   1990 
   1991 /* Make a clone of a VTS with specified ThrIDs removed.  'thridsToDel'
   1992    must be in strictly increasing order.  We could obviously do this
   1993    much more efficiently (in linear time) if necessary.
   1994 */
   1995 static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel )
   1996 {
   1997    UInt i, j;
   1998    tl_assert(vts);
   1999    tl_assert(thridsToDel);
   2000    tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
   2001    UInt nTS = vts->usedTS;
   2002    /* Figure out how many ScalarTSs will remain in the output. */
   2003    UInt nReq = nTS;
   2004    for (i = 0; i < nTS; i++) {
   2005       ThrID thrid = vts->ts[i].thrid;
   2006       if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
   2007          nReq--;
   2008    }
   2009    tl_assert(nReq <= nTS);
   2010    /* Copy the ones that will remain. */
   2011    VTS* res = VTS__new(who, nReq);
   2012    j = 0;
   2013    for (i = 0; i < nTS; i++) {
   2014       ThrID thrid = vts->ts[i].thrid;
   2015       if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
   2016          continue;
   2017       res->ts[j++] = vts->ts[i];
   2018    }
   2019    tl_assert(j == nReq);
   2020    tl_assert(j == res->sizeTS);
   2021    res->usedTS = j;
   2022    tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
   2023    return res;
   2024 }
   2025 
   2026 
   2027 /* Delete this VTS in its entirety.
   2028 */
   2029 static void VTS__delete ( VTS* vts )
   2030 {
   2031    tl_assert(vts);
   2032    tl_assert(vts->usedTS <= vts->sizeTS);
   2033    tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
   2034    HG_(free)(vts);
   2035 }
   2036 
   2037 
   2038 /* Create a new singleton VTS.
   2039 */
   2040 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
   2041 {
   2042    tl_assert(thr);
   2043    tl_assert(tym >= 1);
   2044    tl_assert(out);
   2045    tl_assert(out->usedTS == 0);
   2046    tl_assert(out->sizeTS >= 1);
   2047    UInt hi = out->usedTS++;
   2048    out->ts[hi].thrid = Thr__to_ThrID(thr);
   2049    out->ts[hi].tym   = tym;
   2050 }
   2051 
   2052 
   2053 /* Return a new VTS in which vts[me]++, so to speak.  'vts' itself is
   2054    not modified.
   2055 */
   2056 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
   2057 {
   2058    UInt      i, n;
   2059    ThrID     me_thrid;
   2060    Bool      found = False;
   2061 
   2062    stats__vts__tick++;
   2063 
   2064    tl_assert(out);
   2065    tl_assert(out->usedTS == 0);
   2066    if (vts->usedTS >= ThrID_MAX_VALID)
   2067       scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
   2068    tl_assert(out->sizeTS >= 1 + vts->usedTS);
   2069 
   2070    tl_assert(me);
   2071    me_thrid = Thr__to_ThrID(me);
   2072    tl_assert(is_sane_VTS(vts));
   2073    n = vts->usedTS;
   2074 
   2075    /* Copy all entries which precede 'me'. */
   2076    for (i = 0; i < n; i++) {
   2077       ScalarTS* here = &vts->ts[i];
   2078       if (UNLIKELY(here->thrid >= me_thrid))
   2079          break;
   2080       UInt hi = out->usedTS++;
   2081       out->ts[hi] = *here;
   2082    }
   2083 
   2084    /* 'i' now indicates the next entry to copy, if any.
   2085        There are 3 possibilities:
   2086        (a) there is no next entry (we used them all up already):
   2087            add (me_thrid,1) to the output, and quit
   2088        (b) there is a next entry, and its thrid > me_thrid:
   2089            add (me_thrid,1) to the output, then copy the remaining entries
   2090        (c) there is a next entry, and its thrid == me_thrid:
   2091            copy it to the output but increment its timestamp value.
   2092            Then copy the remaining entries.  (c) is the common case.
   2093    */
   2094    tl_assert(i >= 0 && i <= n);
   2095    if (i == n) { /* case (a) */
   2096       UInt hi = out->usedTS++;
   2097       out->ts[hi].thrid = me_thrid;
   2098       out->ts[hi].tym   = 1;
   2099    } else {
   2100       /* cases (b) and (c) */
   2101       ScalarTS* here = &vts->ts[i];
   2102       if (me_thrid == here->thrid) { /* case (c) */
   2103          if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
   2104             /* We're hosed.  We have to stop. */
   2105             scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
   2106          }
   2107          UInt hi = out->usedTS++;
   2108          out->ts[hi].thrid = here->thrid;
   2109          out->ts[hi].tym   = here->tym + 1;
   2110          i++;
   2111          found = True;
   2112       } else { /* case (b) */
   2113          UInt hi = out->usedTS++;
   2114          out->ts[hi].thrid = me_thrid;
   2115          out->ts[hi].tym   = 1;
   2116       }
   2117       /* And copy any remaining entries. */
   2118       for (/*keepgoing*/; i < n; i++) {
   2119          ScalarTS* here2 = &vts->ts[i];
   2120          UInt hi = out->usedTS++;
   2121          out->ts[hi] = *here2;
   2122       }
   2123    }
   2124 
   2125    tl_assert(is_sane_VTS(out));
   2126    tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
   2127    tl_assert(out->usedTS <= out->sizeTS);
   2128 }
   2129 
   2130 
   2131 /* Return a new VTS constructed as the join (max) of the 2 args.
   2132    Neither arg is modified.
   2133 */
   2134 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
   2135 {
   2136    UInt     ia, ib, useda, usedb;
   2137    ULong    tyma, tymb, tymMax;
   2138    ThrID    thrid;
   2139    UInt     ncommon = 0;
   2140 
   2141    stats__vts__join++;
   2142 
   2143    tl_assert(a);
   2144    tl_assert(b);
   2145    useda = a->usedTS;
   2146    usedb = b->usedTS;
   2147 
   2148    tl_assert(out);
   2149    tl_assert(out->usedTS == 0);
   2150    /* overly conservative test, but doing better involves comparing
   2151       the two VTSs, which we don't want to do at this point. */
   2152    if (useda + usedb >= ThrID_MAX_VALID)
   2153       scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
   2154    tl_assert(out->sizeTS >= useda + usedb);
   2155 
   2156    ia = ib = 0;
   2157 
   2158    while (1) {
   2159 
   2160       /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
   2161          from a and b in order, where thrid is the next ThrID
   2162          occurring in either a or b, and tyma/b are the relevant
   2163          scalar timestamps, taking into account implicit zeroes. */
   2164       tl_assert(ia >= 0 && ia <= useda);
   2165       tl_assert(ib >= 0 && ib <= usedb);
   2166 
   2167       if        (ia == useda && ib == usedb) {
   2168          /* both empty - done */
   2169          break;
   2170 
   2171       } else if (ia == useda && ib != usedb) {
   2172          /* a empty, use up b */
   2173          ScalarTS* tmpb = &b->ts[ib];
   2174          thrid = tmpb->thrid;
   2175          tyma  = 0;
   2176          tymb  = tmpb->tym;
   2177          ib++;
   2178 
   2179       } else if (ia != useda && ib == usedb) {
   2180          /* b empty, use up a */
   2181          ScalarTS* tmpa = &a->ts[ia];
   2182          thrid = tmpa->thrid;
   2183          tyma  = tmpa->tym;
   2184          tymb  = 0;
   2185          ia++;
   2186 
   2187       } else {
   2188          /* both not empty; extract lowest-ThrID'd triple */
   2189          ScalarTS* tmpa = &a->ts[ia];
   2190          ScalarTS* tmpb = &b->ts[ib];
   2191          if (tmpa->thrid < tmpb->thrid) {
   2192             /* a has the lowest unconsidered ThrID */
   2193             thrid = tmpa->thrid;
   2194             tyma  = tmpa->tym;
   2195             tymb  = 0;
   2196             ia++;
   2197          } else if (tmpa->thrid > tmpb->thrid) {
   2198             /* b has the lowest unconsidered ThrID */
   2199             thrid = tmpb->thrid;
   2200             tyma  = 0;
   2201             tymb  = tmpb->tym;
   2202             ib++;
   2203          } else {
   2204             /* they both next mention the same ThrID */
   2205             tl_assert(tmpa->thrid == tmpb->thrid);
   2206             thrid = tmpa->thrid; /* == tmpb->thrid */
   2207             tyma  = tmpa->tym;
   2208             tymb  = tmpb->tym;
   2209             ia++;
   2210             ib++;
   2211             ncommon++;
   2212          }
   2213       }
   2214 
   2215       /* having laboriously determined (thr, tyma, tymb), do something
   2216          useful with it. */
   2217       tymMax = tyma > tymb ? tyma : tymb;
   2218       if (tymMax > 0) {
   2219          UInt hi = out->usedTS++;
   2220          out->ts[hi].thrid = thrid;
   2221          out->ts[hi].tym   = tymMax;
   2222       }
   2223 
   2224    }
   2225 
   2226    tl_assert(is_sane_VTS(out));
   2227    tl_assert(out->usedTS <= out->sizeTS);
   2228    tl_assert(out->usedTS == useda + usedb - ncommon);
   2229 }
   2230 
   2231 
   2232 /* Determine if 'a' <= 'b', in the partial ordering.  Returns zero if
   2233    they are, or the first ThrID for which they are not (no valid ThrID
   2234    has the value zero).  This rather strange convention is used
   2235    because sometimes we want to know the actual index at which they
   2236    first differ. */
   2237 static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
   2238 {
   2239    Word  ia, ib, useda, usedb;
   2240    ULong tyma, tymb;
   2241 
   2242    stats__vts__cmpLEQ++;
   2243 
   2244    tl_assert(a);
   2245    tl_assert(b);
   2246    useda = a->usedTS;
   2247    usedb = b->usedTS;
   2248 
   2249    ia = ib = 0;
   2250 
   2251    while (1) {
   2252 
   2253       /* This logic is to enumerate doubles (tyma, tymb) drawn
   2254          from a and b in order, and tyma/b are the relevant
   2255          scalar timestamps, taking into account implicit zeroes. */
   2256       ThrID thrid;
   2257 
   2258       tl_assert(ia >= 0 && ia <= useda);
   2259       tl_assert(ib >= 0 && ib <= usedb);
   2260 
   2261       if        (ia == useda && ib == usedb) {
   2262          /* both empty - done */
   2263          break;
   2264 
   2265       } else if (ia == useda && ib != usedb) {
   2266          /* a empty, use up b */
   2267          ScalarTS* tmpb = &b->ts[ib];
   2268          tyma  = 0;
   2269          tymb  = tmpb->tym;
   2270          thrid = tmpb->thrid;
   2271          ib++;
   2272 
   2273       } else if (ia != useda && ib == usedb) {
   2274          /* b empty, use up a */
   2275          ScalarTS* tmpa = &a->ts[ia];
   2276          tyma  = tmpa->tym;
   2277          thrid = tmpa->thrid;
   2278          tymb  = 0;
   2279          ia++;
   2280 
   2281       } else {
   2282          /* both not empty; extract lowest-ThrID'd triple */
   2283          ScalarTS* tmpa = &a->ts[ia];
   2284          ScalarTS* tmpb = &b->ts[ib];
   2285          if (tmpa->thrid < tmpb->thrid) {
   2286             /* a has the lowest unconsidered ThrID */
   2287             tyma  = tmpa->tym;
   2288             thrid = tmpa->thrid;
   2289             tymb  = 0;
   2290             ia++;
   2291          }
   2292          else
   2293          if (tmpa->thrid > tmpb->thrid) {
   2294             /* b has the lowest unconsidered ThrID */
   2295             tyma  = 0;
   2296             tymb  = tmpb->tym;
   2297             thrid = tmpb->thrid;
   2298             ib++;
   2299          } else {
   2300             /* they both next mention the same ThrID */
   2301             tl_assert(tmpa->thrid == tmpb->thrid);
   2302             tyma  = tmpa->tym;
   2303             thrid = tmpa->thrid;
   2304             tymb  = tmpb->tym;
   2305             ia++;
   2306             ib++;
   2307          }
   2308       }
   2309 
   2310       /* having laboriously determined (tyma, tymb), do something
   2311          useful with it. */
   2312       if (tyma > tymb) {
   2313          /* not LEQ at this index.  Quit, since the answer is
   2314             determined already. */
   2315          tl_assert(thrid >= 1024);
   2316          return thrid;
   2317       }
   2318    }
   2319 
   2320    return 0; /* all points are LEQ => return an invalid ThrID */
   2321 }
   2322 
   2323 
   2324 /* Compute an arbitrary structural (total) ordering on the two args,
   2325    based on their VCs, so they can be looked up in a table, tree, etc.
   2326    Returns -1, 0 or 1.  (really just 'deriving Ord' :-) This can be
   2327    performance critical so there is some effort expended to make it sa
   2328    fast as possible.
   2329 */
   2330 Word VTS__cmp_structural ( VTS* a, VTS* b )
   2331 {
   2332    /* We just need to generate an arbitrary total ordering based on
   2333       a->ts and b->ts.  Preferably do it in a way which comes across likely
   2334       differences relatively quickly. */
   2335    Word     i;
   2336    Word     useda = 0,    usedb = 0;
   2337    ScalarTS *ctsa = NULL, *ctsb = NULL;
   2338 
   2339    stats__vts__cmp_structural++;
   2340 
   2341    tl_assert(a);
   2342    tl_assert(b);
   2343 
   2344    ctsa = &a->ts[0]; useda = a->usedTS;
   2345    ctsb = &b->ts[0]; usedb = b->usedTS;
   2346 
   2347    if (LIKELY(useda == usedb)) {
   2348       ScalarTS *tmpa = NULL, *tmpb = NULL;
   2349       stats__vts__cmp_structural_slow++;
   2350       /* Same length vectors.  Find the first difference, if any, as
   2351          fast as possible. */
   2352       for (i = 0; i < useda; i++) {
   2353          tmpa = &ctsa[i];
   2354          tmpb = &ctsb[i];
   2355          if (LIKELY(tmpa->tym == tmpb->tym
   2356                     && tmpa->thrid == tmpb->thrid))
   2357             continue;
   2358          else
   2359             break;
   2360       }
   2361       if (UNLIKELY(i == useda)) {
   2362          /* They're identical. */
   2363          return 0;
   2364       } else {
   2365          tl_assert(i >= 0 && i < useda);
   2366          if (tmpa->tym < tmpb->tym) return -1;
   2367          if (tmpa->tym > tmpb->tym) return 1;
   2368          if (tmpa->thrid < tmpb->thrid) return -1;
   2369          if (tmpa->thrid > tmpb->thrid) return 1;
   2370          /* we just established them as non-identical, hence: */
   2371       }
   2372       /*NOTREACHED*/
   2373       tl_assert(0);
   2374    }
   2375 
   2376    if (useda < usedb) return -1;
   2377    if (useda > usedb) return 1;
   2378    /*NOTREACHED*/
   2379    tl_assert(0);
   2380 }
   2381 
   2382 
   2383 /* Debugging only.  Display the given VTS in the buffer.
   2384 */
   2385 void VTS__show ( HChar* buf, Int nBuf, VTS* vts )
   2386 {
   2387    ScalarTS* st;
   2388    HChar     unit[64];
   2389    Word      i, n;
   2390    Int       avail = nBuf;
   2391    tl_assert(vts && vts->ts);
   2392    tl_assert(nBuf > 16);
   2393    buf[0] = '[';
   2394    buf[1] = 0;
   2395    n =  vts->usedTS;
   2396    for (i = 0; i < n; i++) {
   2397       tl_assert(avail >= 40);
   2398       st = &vts->ts[i];
   2399       VG_(memset)(unit, 0, sizeof(unit));
   2400       VG_(sprintf)(unit, i < n-1 ? "%u:%llu " : "%u:%llu",
   2401                          st->thrid, (ULong)st->tym);
   2402       if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
   2403          VG_(strcat)(buf, " ...]");
   2404          buf[nBuf-1] = 0;
   2405          return;
   2406       }
   2407       VG_(strcat)(buf, unit);
   2408       avail -= VG_(strlen)(unit);
   2409    }
   2410    VG_(strcat)(buf, "]");
   2411    buf[nBuf-1] = 0;
   2412 }
   2413 
   2414 
   2415 /* Debugging only.  Return vts[index], so to speak.
   2416 */
   2417 ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
   2418 {
   2419    UWord i, n;
   2420    ThrID idx_thrid = Thr__to_ThrID(idx);
   2421    stats__vts__indexat_slow++;
   2422    tl_assert(vts && vts->ts);
   2423    n = vts->usedTS;
   2424    for (i = 0; i < n; i++) {
   2425       ScalarTS* st = &vts->ts[i];
   2426       if (st->thrid == idx_thrid)
   2427          return st->tym;
   2428    }
   2429    return 0;
   2430 }
   2431 
   2432 
   2433 /* See comment on prototype above.
   2434 */
   2435 static void VTS__declare_thread_very_dead ( Thr* thr )
   2436 {
   2437    if (0) VG_(printf)("VTQ:  tae %p\n", thr);
   2438 
   2439    tl_assert(thr->llexit_done);
   2440    tl_assert(thr->joinedwith_done);
   2441 
   2442    ThrID nyu;
   2443    nyu = Thr__to_ThrID(thr);
   2444    VG_(addToXA)( verydead_thread_table, &nyu );
   2445 
   2446    /* We can only get here if we're assured that we'll never again
   2447       need to look at this thread's ::viR or ::viW.  Set them to
   2448       VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
   2449       mostly so that we don't wind up pruning them (as that would be
   2450       nonsensical: the only interesting ScalarTS entry for a dead
   2451       thread is its own index, and the pruning will remove that.). */
   2452    VtsID__rcdec(thr->viR);
   2453    VtsID__rcdec(thr->viW);
   2454    thr->viR = VtsID_INVALID;
   2455    thr->viW = VtsID_INVALID;
   2456 }
   2457 
   2458 
   2459 /////////////////////////////////////////////////////////////////
   2460 /////////////////////////////////////////////////////////////////
   2461 //                                                             //
   2462 // SECTION END vts primitives                                  //
   2463 //                                                             //
   2464 /////////////////////////////////////////////////////////////////
   2465 /////////////////////////////////////////////////////////////////
   2466 
   2467 
   2468 
   2469 /////////////////////////////////////////////////////////////////
   2470 /////////////////////////////////////////////////////////////////
   2471 //                                                             //
   2472 // SECTION BEGIN main library                                  //
   2473 //                                                             //
   2474 /////////////////////////////////////////////////////////////////
   2475 /////////////////////////////////////////////////////////////////
   2476 
   2477 
   2478 /////////////////////////////////////////////////////////
   2479 //                                                     //
   2480 // VTS set                                             //
   2481 //                                                     //
   2482 /////////////////////////////////////////////////////////
   2483 
   2484 static WordFM* /* WordFM VTS* void */ vts_set = NULL;
   2485 
   2486 static void vts_set_init ( void )
   2487 {
   2488    tl_assert(!vts_set);
   2489    vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
   2490                          HG_(free),
   2491                          (Word(*)(UWord,UWord))VTS__cmp_structural );
   2492    tl_assert(vts_set);
   2493 }
   2494 
   2495 /* Given a VTS, look in vts_set to see if we already have a
   2496    structurally identical one.  If yes, return the pair (True, pointer
   2497    to the existing one).  If no, clone this one, add the clone to the
   2498    set, and return (False, pointer to the clone). */
   2499 static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
   2500 {
   2501    UWord keyW, valW;
   2502    stats__vts_set__focaa++;
   2503    tl_assert(cand->id == VtsID_INVALID);
   2504    /* lookup cand (by value) */
   2505    if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
   2506       /* found it */
   2507       tl_assert(valW == 0);
   2508       /* if this fails, cand (by ref) was already present (!) */
   2509       tl_assert(keyW != (UWord)cand);
   2510       *res = (VTS*)keyW;
   2511       return True;
   2512    } else {
   2513       /* not present.  Clone, add and return address of clone. */
   2514       stats__vts_set__focaa_a++;
   2515       VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
   2516       tl_assert(clone != cand);
   2517       VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
   2518       *res = clone;
   2519       return False;
   2520    }
   2521 }
   2522 
   2523 
   2524 /////////////////////////////////////////////////////////
   2525 //                                                     //
   2526 // VTS table                                           //
   2527 //                                                     //
   2528 /////////////////////////////////////////////////////////
   2529 
   2530 static void VtsID__invalidate_caches ( void ); /* fwds */
   2531 
   2532 /* A type to hold VTS table entries.  Invariants:
   2533    If .vts == NULL, then this entry is not in use, so:
   2534    - .rc == 0
   2535    - this entry is on the freelist (unfortunately, does not imply
   2536      any constraints on value for .freelink)
   2537    If .vts != NULL, then this entry is in use:
   2538    - .vts is findable in vts_set
   2539    - .vts->id == this entry number
   2540    - no specific value for .rc (even 0 is OK)
   2541    - this entry is not on freelist, so .freelink == VtsID_INVALID
   2542 */
   2543 typedef
   2544    struct {
   2545       VTS*  vts;      /* vts, in vts_set */
   2546       UWord rc;       /* reference count - enough for entire aspace */
   2547       VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
   2548       VtsID remap;    /* used only during pruning */
   2549    }
   2550    VtsTE;
   2551 
   2552 /* The VTS table. */
   2553 static XArray* /* of VtsTE */ vts_tab = NULL;
   2554 
   2555 /* An index into the VTS table, indicating the start of the list of
   2556    free (available for use) entries.  If the list is empty, this is
   2557    VtsID_INVALID. */
   2558 static VtsID vts_tab_freelist = VtsID_INVALID;
   2559 
   2560 /* Do a GC of vts_tab when the freelist becomes empty AND the size of
   2561    vts_tab equals or exceeds this size.  After GC, the value here is
   2562    set appropriately so as to check for the next GC point. */
   2563 static Word vts_next_GC_at = 1000;
   2564 
   2565 static void vts_tab_init ( void )
   2566 {
   2567    vts_tab
   2568       = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
   2569                     HG_(free), sizeof(VtsTE) );
   2570    vts_tab_freelist
   2571       = VtsID_INVALID;
   2572    tl_assert(vts_tab);
   2573 }
   2574 
   2575 /* Add ii to the free list, checking that it looks out-of-use. */
   2576 static void add_to_free_list ( VtsID ii )
   2577 {
   2578    VtsTE* ie = VG_(indexXA)( vts_tab, ii );
   2579    tl_assert(ie->vts == NULL);
   2580    tl_assert(ie->rc == 0);
   2581    tl_assert(ie->freelink == VtsID_INVALID);
   2582    ie->freelink = vts_tab_freelist;
   2583    vts_tab_freelist = ii;
   2584 }
   2585 
   2586 /* Get an entry from the free list.  This will return VtsID_INVALID if
   2587    the free list is empty. */
   2588 static VtsID get_from_free_list ( void )
   2589 {
   2590    VtsID  ii;
   2591    VtsTE* ie;
   2592    if (vts_tab_freelist == VtsID_INVALID)
   2593       return VtsID_INVALID;
   2594    ii = vts_tab_freelist;
   2595    ie = VG_(indexXA)( vts_tab, ii );
   2596    tl_assert(ie->vts == NULL);
   2597    tl_assert(ie->rc == 0);
   2598    vts_tab_freelist = ie->freelink;
   2599    return ii;
   2600 }
   2601 
   2602 /* Produce a new VtsID that can be used, either by getting it from
   2603    the freelist, or, if that is empty, by expanding vts_tab. */
   2604 static VtsID get_new_VtsID ( void )
   2605 {
   2606    VtsID ii;
   2607    VtsTE te;
   2608    ii = get_from_free_list();
   2609    if (ii != VtsID_INVALID)
   2610       return ii;
   2611    te.vts = NULL;
   2612    te.rc = 0;
   2613    te.freelink = VtsID_INVALID;
   2614    te.remap    = VtsID_INVALID;
   2615    ii = (VtsID)VG_(addToXA)( vts_tab, &te );
   2616    return ii;
   2617 }
   2618 
   2619 
   2620 /* Indirect callback from lib_zsm. */
   2621 static void VtsID__rcinc ( VtsID ii )
   2622 {
   2623    VtsTE* ie;
   2624    /* VG_(indexXA) does a range check for us */
   2625    ie = VG_(indexXA)( vts_tab, ii );
   2626    tl_assert(ie->vts); /* else it's not in use */
   2627    tl_assert(ie->rc < ~0UL); /* else we can't continue */
   2628    tl_assert(ie->vts->id == ii);
   2629    ie->rc++;
   2630 }
   2631 
   2632 /* Indirect callback from lib_zsm. */
   2633 static void VtsID__rcdec ( VtsID ii )
   2634 {
   2635    VtsTE* ie;
   2636    /* VG_(indexXA) does a range check for us */
   2637    ie = VG_(indexXA)( vts_tab, ii );
   2638    tl_assert(ie->vts); /* else it's not in use */
   2639    tl_assert(ie->rc > 0); /* else RC snafu */
   2640    tl_assert(ie->vts->id == ii);
   2641    ie->rc--;
   2642 }
   2643 
   2644 
   2645 /* Look up 'cand' in our collection of VTSs.  If present, return the
   2646    VtsID for the pre-existing version.  If not present, clone it, add
   2647    the clone to both vts_tab and vts_set, allocate a fresh VtsID for
   2648    it, and return that. */
   2649 static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
   2650 {
   2651    VTS* in_tab = NULL;
   2652    tl_assert(cand->id == VtsID_INVALID);
   2653    Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
   2654    tl_assert(in_tab);
   2655    if (already_have) {
   2656       /* We already have a copy of 'cand'.  Use that. */
   2657       VtsTE* ie;
   2658       tl_assert(in_tab->id != VtsID_INVALID);
   2659       ie = VG_(indexXA)( vts_tab, in_tab->id );
   2660       tl_assert(ie->vts == in_tab);
   2661       return in_tab->id;
   2662    } else {
   2663       VtsID  ii = get_new_VtsID();
   2664       VtsTE* ie = VG_(indexXA)( vts_tab, ii );
   2665       ie->vts = in_tab;
   2666       ie->rc = 0;
   2667       ie->freelink = VtsID_INVALID;
   2668       in_tab->id = ii;
   2669       return ii;
   2670    }
   2671 }
   2672 
   2673 
   2674 static void show_vts_stats ( const HChar* caller )
   2675 {
   2676    UWord nSet, nTab, nLive;
   2677    ULong totrc;
   2678    UWord n, i;
   2679    nSet = VG_(sizeFM)( vts_set );
   2680    nTab = VG_(sizeXA)( vts_tab );
   2681    totrc = 0;
   2682    nLive = 0;
   2683    n = VG_(sizeXA)( vts_tab );
   2684    for (i = 0; i < n; i++) {
   2685       VtsTE* ie = VG_(indexXA)( vts_tab, i );
   2686       if (ie->vts) {
   2687          nLive++;
   2688          totrc += (ULong)ie->rc;
   2689       } else {
   2690          tl_assert(ie->rc == 0);
   2691       }
   2692    }
   2693    VG_(printf)("  show_vts_stats %s\n", caller);
   2694    VG_(printf)("    vts_tab size %4lu\n", nTab);
   2695    VG_(printf)("    vts_tab live %4lu\n", nLive);
   2696    VG_(printf)("    vts_set size %4lu\n", nSet);
   2697    VG_(printf)("        total rc %4llu\n", totrc);
   2698 }
   2699 
   2700 
   2701 /* --- Helpers for VtsID pruning --- */
   2702 
   2703 static
   2704 void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
   2705                    /*MOD*/XArray* /* of VtsTE */ new_tab,
   2706                    VtsID* ii )
   2707 {
   2708    VtsTE *old_te, *new_te;
   2709    VtsID old_id, new_id;
   2710    /* We're relying here on VG_(indexXA)'s range checking to assert on
   2711       any stupid values, in particular *ii == VtsID_INVALID. */
   2712    old_id = *ii;
   2713    old_te = VG_(indexXA)( old_tab, old_id );
   2714    old_te->rc--;
   2715    new_id = old_te->remap;
   2716    new_te = VG_(indexXA)( new_tab, new_id );
   2717    new_te->rc++;
   2718    *ii = new_id;
   2719 }
   2720 
   2721 static
   2722 void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
   2723                             /*MOD*/XArray* /* of VtsTE */ new_tab,
   2724                             SVal* s )
   2725 {
   2726    SVal old_sv, new_sv;
   2727    old_sv = *s;
   2728    if (SVal__isC(old_sv)) {
   2729       VtsID rMin, wMin;
   2730       rMin = SVal__unC_Rmin(old_sv);
   2731       wMin = SVal__unC_Wmin(old_sv);
   2732       remap_VtsID( old_tab, new_tab, &rMin );
   2733       remap_VtsID( old_tab, new_tab, &wMin );
   2734       new_sv = SVal__mkC( rMin, wMin );
   2735       *s = new_sv;
   2736   }
   2737 }
   2738 
   2739 
   2740 /* NOT TO BE CALLED FROM WITHIN libzsm. */
   2741 __attribute__((noinline))
   2742 static void vts_tab__do_GC ( Bool show_stats )
   2743 {
   2744    UWord i, nTab, nLive, nFreed;
   2745 
   2746    /* ---------- BEGIN VTS GC ---------- */
   2747    /* check this is actually necessary. */
   2748    tl_assert(vts_tab_freelist == VtsID_INVALID);
   2749 
   2750    /* empty the caches for partial order checks and binary joins.  We
   2751       could do better and prune out the entries to be deleted, but it
   2752       ain't worth the hassle. */
   2753    VtsID__invalidate_caches();
   2754 
   2755    /* First, make the reference counts up to date. */
   2756    zsm_flush_cache();
   2757 
   2758    nTab = VG_(sizeXA)( vts_tab );
   2759 
   2760    if (show_stats) {
   2761       VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
   2762       show_vts_stats("before GC");
   2763    }
   2764 
   2765    /* Now we can inspect the entire vts_tab.  Any entries with zero
   2766       .rc fields are now no longer in use and can be put back on the
   2767       free list, removed from vts_set, and deleted. */
   2768    nFreed = 0;
   2769    for (i = 0; i < nTab; i++) {
   2770       Bool present;
   2771       UWord oldK = 0, oldV = 12345;
   2772       VtsTE* te = VG_(indexXA)( vts_tab, i );
   2773       if (te->vts == NULL) {
   2774          tl_assert(te->rc == 0);
   2775          continue; /* already on the free list (presumably) */
   2776       }
   2777       if (te->rc > 0)
   2778          continue; /* in use */
   2779       /* Ok, we got one we can free. */
   2780       tl_assert(te->vts->id == i);
   2781       /* first, remove it from vts_set. */
   2782       present = VG_(delFromFM)( vts_set,
   2783                                 &oldK, &oldV, (UWord)te->vts );
   2784       tl_assert(present); /* else it isn't in vts_set ?! */
   2785       tl_assert(oldV == 0); /* no info stored in vts_set val fields */
   2786       tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
   2787       /* now free the VTS itself */
   2788       VTS__delete(te->vts);
   2789       te->vts = NULL;
   2790       /* and finally put this entry on the free list */
   2791       tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
   2792       add_to_free_list( i );
   2793       nFreed++;
   2794    }
   2795 
   2796    /* Now figure out when the next GC should be.  We'll allow the
   2797       number of VTSs to double before GCing again.  Except of course
   2798       that since we can't (or, at least, don't) shrink vts_tab, we
   2799       can't set the threshhold value smaller than it. */
   2800    tl_assert(nFreed <= nTab);
   2801    nLive = nTab - nFreed;
   2802    tl_assert(nLive >= 0 && nLive <= nTab);
   2803    vts_next_GC_at = 2 * nLive;
   2804    if (vts_next_GC_at < nTab)
   2805       vts_next_GC_at = nTab;
   2806 
   2807    if (show_stats) {
   2808       show_vts_stats("after GC");
   2809       VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
   2810    }
   2811 
   2812    if (VG_(clo_stats)) {
   2813       static UInt ctr = 1;
   2814       tl_assert(nTab > 0);
   2815       VG_(message)(Vg_DebugMsg,
   2816                   "libhb: VTS GC: #%u  old size %lu  live %lu  (%2llu%%)\n",
   2817                   ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
   2818    }
   2819    /* ---------- END VTS GC ---------- */
   2820 
   2821    /* Decide whether to do VTS pruning.  We have one of three
   2822       settings. */
   2823    static UInt pruning_auto_ctr = 0; /* do not make non-static */
   2824 
   2825    Bool do_pruning = False;
   2826    switch (HG_(clo_vts_pruning)) {
   2827       case 0: /* never */
   2828          break;
   2829       case 1: /* auto */
   2830          do_pruning = (++pruning_auto_ctr % 5) == 0;
   2831          break;
   2832       case 2: /* always */
   2833          do_pruning = True;
   2834          break;
   2835       default:
   2836          tl_assert(0);
   2837    }
   2838 
   2839    /* The rest of this routine only handles pruning, so we can
   2840       quit at this point if it is not to be done. */
   2841    if (!do_pruning)
   2842       return;
   2843 
   2844    /* ---------- BEGIN VTS PRUNING ---------- */
   2845    /* We begin by sorting the backing table on its .thr values, so as
   2846       to (1) check they are unique [else something has gone wrong,
   2847       since it means we must have seen some Thr* exiting more than
   2848       once, which can't happen], and (2) so that we can quickly look
   2849       up the dead-thread entries as we work through the VTSs. */
   2850    VG_(sortXA)( verydead_thread_table );
   2851    /* Sanity check: check for unique .sts.thr values. */
   2852    UWord nBT = VG_(sizeXA)( verydead_thread_table );
   2853    if (nBT > 0) {
   2854       ThrID thrid1, thrid2;
   2855       thrid2 = *(ThrID*)VG_(indexXA)( verydead_thread_table, 0 );
   2856       for (i = 1; i < nBT; i++) {
   2857          thrid1 = thrid2;
   2858          thrid2 = *(ThrID*)VG_(indexXA)( verydead_thread_table, i );
   2859          tl_assert(thrid1 < thrid2);
   2860       }
   2861    }
   2862    /* Ok, so the dead thread table has unique and in-order keys. */
   2863 
   2864    /* We will run through the old table, and create a new table and
   2865       set, at the same time setting the .remap entries in the old
   2866       table to point to the new entries.  Then, visit every VtsID in
   2867       the system, and replace all of them with new ones, using the
   2868       .remap entries in the old table.  Finally, we can delete the old
   2869       table and set. */
   2870 
   2871    XArray* /* of VtsTE */ new_tab
   2872       = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
   2873                     HG_(free), sizeof(VtsTE) );
   2874 
   2875    /* WordFM VTS* void */
   2876    WordFM* new_set
   2877       = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
   2878                     HG_(free),
   2879                     (Word(*)(UWord,UWord))VTS__cmp_structural );
   2880 
   2881    /* Visit each old VTS.  For each one:
   2882 
   2883       * make a pruned version
   2884 
   2885       * search new_set for the pruned version, yielding either
   2886         Nothing (not present) or the new VtsID for it.
   2887 
   2888       * if not present, allocate a new VtsID for it, insert (pruned
   2889         VTS, new VtsID) in the tree, and set
   2890         remap_table[old VtsID] = new VtsID.
   2891 
   2892       * if present, set remap_table[old VtsID] = new VtsID, where
   2893         new VtsID was determined by the tree lookup.  Then free up
   2894         the clone.
   2895    */
   2896 
   2897    UWord nBeforePruning = 0, nAfterPruning = 0;
   2898    UWord nSTSsBefore = 0, nSTSsAfter = 0;
   2899    VtsID new_VtsID_ctr = 0;
   2900 
   2901    for (i = 0; i < nTab; i++) {
   2902 
   2903       /* For each old VTS .. */
   2904       VtsTE* old_te  = VG_(indexXA)( vts_tab, i );
   2905       VTS*   old_vts = old_te->vts;
   2906       tl_assert(old_te->remap == VtsID_INVALID);
   2907 
   2908       /* Skip it if not in use */
   2909       if (old_te->rc == 0) {
   2910          tl_assert(old_vts == NULL);
   2911          continue;
   2912       }
   2913       tl_assert(old_vts != NULL);
   2914       tl_assert(old_vts->id == i);
   2915       tl_assert(old_vts->ts != NULL);
   2916 
   2917       /* It is in use. Make a pruned version. */
   2918       nBeforePruning++;
   2919       nSTSsBefore += old_vts->usedTS;
   2920       VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
   2921                                    old_vts, verydead_thread_table);
   2922       tl_assert(new_vts->sizeTS == new_vts->usedTS);
   2923       tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
   2924                 == 0x0ddC0ffeeBadF00dULL);
   2925 
   2926       /* Get rid of the old VTS and the tree entry.  It's a bit more
   2927          complex to incrementally delete the VTSs now than to nuke
   2928          them all after we're done, but the upside is that we don't
   2929          wind up temporarily storing potentially two complete copies
   2930          of each VTS and hence spiking memory use. */
   2931       UWord oldK = 0, oldV = 12345;
   2932       Bool  present = VG_(delFromFM)( vts_set,
   2933                                       &oldK, &oldV, (UWord)old_vts );
   2934       tl_assert(present); /* else it isn't in vts_set ?! */
   2935       tl_assert(oldV == 0); /* no info stored in vts_set val fields */
   2936       tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
   2937       /* now free the VTS itself */
   2938       VTS__delete(old_vts);
   2939       old_te->vts = NULL;
   2940       old_vts = NULL;
   2941 
   2942       /* NO MENTIONS of old_vts allowed beyond this point. */
   2943 
   2944       /* Ok, we have the pruned copy in new_vts.  See if a
   2945          structurally identical version is already present in new_set.
   2946          If so, delete the one we just made and move on; if not, add
   2947          it. */
   2948       VTS*  identical_version = NULL;
   2949       UWord valW = 12345;
   2950       if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
   2951                         (UWord)new_vts)) {
   2952          // already have it
   2953          tl_assert(valW == 0);
   2954          tl_assert(identical_version != NULL);
   2955          tl_assert(identical_version != new_vts);
   2956          VTS__delete(new_vts);
   2957          new_vts = identical_version;
   2958          tl_assert(new_vts->id != VtsID_INVALID);
   2959       } else {
   2960          tl_assert(valW == 12345);
   2961          tl_assert(identical_version == NULL);
   2962          new_vts->id = new_VtsID_ctr++;
   2963          Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
   2964          tl_assert(!b);
   2965          VtsTE new_te;
   2966          new_te.vts      = new_vts;
   2967          new_te.rc       = 0;
   2968          new_te.freelink = VtsID_INVALID;
   2969          new_te.remap    = VtsID_INVALID;
   2970          Word j = VG_(addToXA)( new_tab, &new_te );
   2971          tl_assert(j <= i);
   2972          tl_assert(j == new_VtsID_ctr - 1);
   2973          // stats
   2974          nAfterPruning++;
   2975          nSTSsAfter += new_vts->usedTS;
   2976       }
   2977       old_te->remap = new_vts->id;
   2978 
   2979    } /* for (i = 0; i < nTab; i++) */
   2980 
   2981    /* At this point, we have:
   2982       * the old VTS table, with its .remap entries set,
   2983         and with all .vts == NULL.
   2984       * the old VTS tree should be empty, since it and the old VTSs
   2985         it contained have been incrementally deleted was we worked
   2986         through the old table.
   2987       * the new VTS table, with all .rc == 0, all .freelink and .remap
   2988         == VtsID_INVALID.
   2989       * the new VTS tree.
   2990    */
   2991    tl_assert( VG_(sizeFM)(vts_set) == 0 );
   2992 
   2993    /* Now actually apply the mapping. */
   2994    /* Visit all the VtsIDs in the entire system.  Where do we expect
   2995       to find them?
   2996       (a) in shadow memory -- the LineZs and LineFs
   2997       (b) in our collection of struct _Thrs.
   2998       (c) in our collection of struct _SOs.
   2999       Nowhere else, AFAICS.  Not in the zsm cache, because that just
   3000       got invalidated.
   3001 
   3002       Using the .remap fields in vts_tab, map each old VtsID to a new
   3003       VtsID.  For each old VtsID, dec its rc; and for each new one,
   3004       inc it.  This sets up the new refcounts, and it also gives a
   3005       cheap sanity check of the old ones: all old refcounts should be
   3006       zero after this operation.
   3007    */
   3008 
   3009    /* Do the mappings for (a) above: iterate over the Primary shadow
   3010       mem map (WordFM Addr SecMap*). */
   3011    UWord secmapW = 0;
   3012    VG_(initIterFM)( map_shmem );
   3013    while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
   3014       UWord   j;
   3015       SecMap* sm = (SecMap*)secmapW;
   3016       tl_assert(sm->magic == SecMap_MAGIC);
   3017       /* Deal with the LineZs */
   3018       for (i = 0; i < N_SECMAP_ZLINES; i++) {
   3019          LineZ* lineZ = &sm->linesZ[i];
   3020          if (lineZ->dict[0] == SVal_INVALID)
   3021             continue; /* not in use -- data is in F rep instead */
   3022          for (j = 0; j < 4; j++)
   3023             remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
   3024       }
   3025       /* Deal with the LineFs */
   3026       for (i = 0; i < sm->linesF_size; i++) {
   3027          LineF* lineF = &sm->linesF[i];
   3028          if (!lineF->inUse)
   3029             continue;
   3030          for (j = 0; j < N_LINE_ARANGE; j++)
   3031             remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
   3032       }
   3033    }
   3034    VG_(doneIterFM)( map_shmem );
   3035 
   3036    /* Do the mappings for (b) above: visit our collection of struct
   3037       _Thrs. */
   3038    Thread* hgthread = get_admin_threads();
   3039    tl_assert(hgthread);
   3040    while (hgthread) {
   3041       Thr* hbthr = hgthread->hbthr;
   3042       tl_assert(hbthr);
   3043       /* Threads that are listed in the prunable set have their viR
   3044          and viW set to VtsID_INVALID, so we can't mess with them. */
   3045       if (hbthr->llexit_done && hbthr->joinedwith_done) {
   3046          tl_assert(hbthr->viR == VtsID_INVALID);
   3047          tl_assert(hbthr->viW == VtsID_INVALID);
   3048          hgthread = hgthread->admin;
   3049          continue;
   3050       }
   3051       remap_VtsID( vts_tab, new_tab, &hbthr->viR );
   3052       remap_VtsID( vts_tab, new_tab, &hbthr->viW );
   3053       hgthread = hgthread->admin;
   3054    }
   3055 
   3056    /* Do the mappings for (c) above: visit the struct _SOs. */
   3057    SO* so = admin_SO;
   3058    while (so) {
   3059       if (so->viR != VtsID_INVALID)
   3060          remap_VtsID( vts_tab, new_tab, &so->viR );
   3061       if (so->viW != VtsID_INVALID)
   3062          remap_VtsID( vts_tab, new_tab, &so->viW );
   3063       so = so->admin_next;
   3064    }
   3065 
   3066    /* So, we're nearly done (with this incredibly complex operation).
   3067       Check the refcounts for the old VtsIDs all fell to zero, as
   3068       expected.  Any failure is serious. */
   3069    for (i = 0; i < nTab; i++) {
   3070       VtsTE* te = VG_(indexXA)( vts_tab, i );
   3071       tl_assert(te->vts == NULL);
   3072       /* This is the assert proper.  Note we're also asserting
   3073          zeroness for old entries which are unmapped (hence have
   3074          .remap == VtsID_INVALID).  That's OK. */
   3075       tl_assert(te->rc == 0);
   3076    }
   3077 
   3078    /* Install the new table and set. */
   3079    VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
   3080    vts_set = new_set;
   3081    VG_(deleteXA)( vts_tab );
   3082    vts_tab = new_tab;
   3083 
   3084    /* The freelist of vts_tab entries is empty now, because we've
   3085       compacted all of the live entries at the low end of the
   3086       table. */
   3087    vts_tab_freelist = VtsID_INVALID;
   3088 
   3089    /* Sanity check vts_set and vts_tab. */
   3090 
   3091    /* Because all the live entries got slid down to the bottom of vts_tab: */
   3092    tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
   3093 
   3094    /* Assert that the vts_tab and vts_set entries point at each other
   3095       in the required way */
   3096    UWord wordK = 0, wordV = 0;
   3097    VG_(initIterFM)( vts_set );
   3098    while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
   3099       tl_assert(wordK != 0);
   3100       tl_assert(wordV == 0);
   3101       VTS* vts = (VTS*)wordK;
   3102       tl_assert(vts->id != VtsID_INVALID);
   3103       VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
   3104       tl_assert(te->vts == vts);
   3105    }
   3106    VG_(doneIterFM)( vts_set );
   3107 
   3108    /* Also iterate over the table, and check each entry is
   3109       plausible. */
   3110    nTab = VG_(sizeXA)( vts_tab );
   3111    for (i = 0; i < nTab; i++) {
   3112       VtsTE* te = VG_(indexXA)( vts_tab, i );
   3113       tl_assert(te->vts);
   3114       tl_assert(te->vts->id == i);
   3115       tl_assert(te->rc > 0); /* 'cos we just GC'd */
   3116       tl_assert(te->freelink == VtsID_INVALID); /* in use */
   3117       tl_assert(te->remap == VtsID_INVALID); /* not relevant */
   3118    }
   3119 
   3120    /* And we're done.  Bwahahaha. Ha. Ha. Ha. */
   3121    if (VG_(clo_stats)) {
   3122       static UInt ctr = 1;
   3123       tl_assert(nTab > 0);
   3124       VG_(message)(
   3125          Vg_DebugMsg,
   3126          "libhb: VTS PR: #%u  before %lu (avg sz %lu)  "
   3127             "after %lu (avg sz %lu)\n",
   3128          ctr++,
   3129          nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
   3130          nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
   3131       );
   3132    }
   3133    if (0)
   3134    VG_(printf)("VTQ: before pruning %lu (avg sz %lu), "
   3135                "after pruning %lu (avg sz %lu)\n",
   3136                nBeforePruning, nSTSsBefore / nBeforePruning,
   3137                nAfterPruning, nSTSsAfter / nAfterPruning);
   3138    /* ---------- END VTS PRUNING ---------- */
   3139 }
   3140 
   3141 
   3142 /////////////////////////////////////////////////////////
   3143 //                                                     //
   3144 // Vts IDs                                             //
   3145 //                                                     //
   3146 /////////////////////////////////////////////////////////
   3147 
   3148 //////////////////////////
   3149 /* A temporary, max-sized VTS which is used as a temporary (the first
   3150    argument) in VTS__singleton, VTS__tick and VTS__join operations. */
   3151 static VTS* temp_max_sized_VTS = NULL;
   3152 
   3153 //////////////////////////
   3154 static ULong stats__cmpLEQ_queries = 0;
   3155 static ULong stats__cmpLEQ_misses  = 0;
   3156 static ULong stats__join2_queries  = 0;
   3157 static ULong stats__join2_misses   = 0;
   3158 
   3159 static inline UInt ROL32 ( UInt w, Int n ) {
   3160    w = (w << n) | (w >> (32-n));
   3161    return w;
   3162 }
   3163 static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
   3164    UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
   3165    return hash % nTab;
   3166 }
   3167 
   3168 #define N_CMPLEQ_CACHE 1023
   3169 static
   3170    struct { VtsID vi1; VtsID vi2; Bool leq; }
   3171    cmpLEQ_cache[N_CMPLEQ_CACHE];
   3172 
   3173 #define N_JOIN2_CACHE 1023
   3174 static
   3175    struct { VtsID vi1; VtsID vi2; VtsID res; }
   3176    join2_cache[N_JOIN2_CACHE];
   3177 
   3178 static void VtsID__invalidate_caches ( void ) {
   3179    Int i;
   3180    for (i = 0; i < N_CMPLEQ_CACHE; i++) {
   3181       cmpLEQ_cache[i].vi1 = VtsID_INVALID;
   3182       cmpLEQ_cache[i].vi2 = VtsID_INVALID;
   3183       cmpLEQ_cache[i].leq = False;
   3184    }
   3185    for (i = 0; i < N_JOIN2_CACHE; i++) {
   3186      join2_cache[i].vi1 = VtsID_INVALID;
   3187      join2_cache[i].vi2 = VtsID_INVALID;
   3188      join2_cache[i].res = VtsID_INVALID;
   3189    }
   3190 }
   3191 //////////////////////////
   3192 
   3193 //static Bool VtsID__is_valid ( VtsID vi ) {
   3194 //   VtsTE* ve;
   3195 //   if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
   3196 //      return False;
   3197 //   ve = VG_(indexXA)( vts_tab, vi );
   3198 //   if (!ve->vts)
   3199 //      return False;
   3200 //   tl_assert(ve->vts->id == vi);
   3201 //   return True;
   3202 //}
   3203 
   3204 static VTS* VtsID__to_VTS ( VtsID vi ) {
   3205    VtsTE* te = VG_(indexXA)( vts_tab, vi );
   3206    tl_assert(te->vts);
   3207    return te->vts;
   3208 }
   3209 
   3210 static void VtsID__pp ( VtsID vi ) {
   3211    HChar buf[100];
   3212    VTS* vts = VtsID__to_VTS(vi);
   3213    VTS__show( buf, sizeof(buf)-1, vts );
   3214    buf[sizeof(buf)-1] = 0;
   3215    VG_(printf)("%s", buf);
   3216 }
   3217 
   3218 /* compute partial ordering relation of vi1 and vi2. */
   3219 __attribute__((noinline))
   3220 static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
   3221    UInt hash;
   3222    Bool leq;
   3223    VTS  *v1, *v2;
   3224    //if (vi1 == vi2) return True;
   3225    tl_assert(vi1 != vi2);
   3226    ////++
   3227    stats__cmpLEQ_queries++;
   3228    hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
   3229    if (cmpLEQ_cache[hash].vi1 == vi1
   3230        && cmpLEQ_cache[hash].vi2 == vi2)
   3231       return cmpLEQ_cache[hash].leq;
   3232    stats__cmpLEQ_misses++;
   3233    ////--
   3234    v1  = VtsID__to_VTS(vi1);
   3235    v2  = VtsID__to_VTS(vi2);
   3236    leq = VTS__cmpLEQ( v1, v2 ) == 0;
   3237    ////++
   3238    cmpLEQ_cache[hash].vi1 = vi1;
   3239    cmpLEQ_cache[hash].vi2 = vi2;
   3240    cmpLEQ_cache[hash].leq = leq;
   3241    ////--
   3242    return leq;
   3243 }
   3244 static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
   3245    return LIKELY(vi1 == vi2)  ? True  : VtsID__cmpLEQ_WRK(vi1, vi2);
   3246 }
   3247 
   3248 /* compute binary join */
   3249 __attribute__((noinline))
   3250 static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
   3251    UInt  hash;
   3252    VtsID res;
   3253    VTS   *vts1, *vts2;
   3254    //if (vi1 == vi2) return vi1;
   3255    tl_assert(vi1 != vi2);
   3256    ////++
   3257    stats__join2_queries++;
   3258    hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
   3259    if (join2_cache[hash].vi1 == vi1
   3260        && join2_cache[hash].vi2 == vi2)
   3261       return join2_cache[hash].res;
   3262    stats__join2_misses++;
   3263    ////--
   3264    vts1 = VtsID__to_VTS(vi1);
   3265    vts2 = VtsID__to_VTS(vi2);
   3266    temp_max_sized_VTS->usedTS = 0;
   3267    VTS__join(temp_max_sized_VTS, vts1,vts2);
   3268    res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
   3269    ////++
   3270    join2_cache[hash].vi1 = vi1;
   3271    join2_cache[hash].vi2 = vi2;
   3272    join2_cache[hash].res = res;
   3273    ////--
   3274    return res;
   3275 }
   3276 static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
   3277    return LIKELY(vi1 == vi2)  ? vi1  : VtsID__join2_WRK(vi1, vi2);
   3278 }
   3279 
   3280 /* create a singleton VTS, namely [thr:1] */
   3281 static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
   3282    temp_max_sized_VTS->usedTS = 0;
   3283    VTS__singleton(temp_max_sized_VTS, thr,tym);
   3284    return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
   3285 }
   3286 
   3287 /* tick operation, creates value 1 if specified index is absent */
   3288 static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
   3289    VTS* vts = VtsID__to_VTS(vi);
   3290    temp_max_sized_VTS->usedTS = 0;
   3291    VTS__tick(temp_max_sized_VTS, idx,vts);
   3292    return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
   3293 }
   3294 
   3295 /* index into a VTS (only for assertions) */
   3296 static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
   3297    VTS* vts = VtsID__to_VTS(vi);
   3298    return VTS__indexAt_SLOW( vts, idx );
   3299 }
   3300 
   3301 /* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
   3302    any, really) element in vi1 which is pointwise greater-than the
   3303    corresponding element in vi2.  If no such element exists, return
   3304    NULL.  This needs to be fairly quick since it is called every time
   3305    a race is detected. */
   3306 static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
   3307 {
   3308    VTS  *vts1, *vts2;
   3309    Thr*  diffthr;
   3310    ThrID diffthrid;
   3311    tl_assert(vi1 != vi2);
   3312    vts1 = VtsID__to_VTS(vi1);
   3313    vts2 = VtsID__to_VTS(vi2);
   3314    tl_assert(vts1 != vts2);
   3315    diffthrid = VTS__cmpLEQ(vts1, vts2);
   3316    diffthr = Thr__from_ThrID(diffthrid);
   3317    tl_assert(diffthr); /* else they are LEQ ! */
   3318    return diffthr;
   3319 }
   3320 
   3321 
   3322 /////////////////////////////////////////////////////////
   3323 //                                                     //
   3324 // Filters                                             //
   3325 //                                                     //
   3326 /////////////////////////////////////////////////////////
   3327 
   3328 /* Forget everything we know -- clear the filter and let everything
   3329    through.  This needs to be as fast as possible, since it is called
   3330    every time the running thread changes, and every time a thread's
   3331    vector clocks change, which can be quite frequent.  The obvious
   3332    fast way to do this is simply to stuff in tags which we know are
   3333    not going to match anything, since they're not aligned to the start
   3334    of a line. */
   3335 static void Filter__clear ( Filter* fi, const HChar* who )
   3336 {
   3337    UWord i;
   3338    if (0) VG_(printf)("  Filter__clear(%p, %s)\n", fi, who);
   3339    for (i = 0; i < FI_NUM_LINES; i += 8) {
   3340       fi->tags[i+0] = 1; /* impossible value -- cannot match */
   3341       fi->tags[i+1] = 1;
   3342       fi->tags[i+2] = 1;
   3343       fi->tags[i+3] = 1;
   3344       fi->tags[i+4] = 1;
   3345       fi->tags[i+5] = 1;
   3346       fi->tags[i+6] = 1;
   3347       fi->tags[i+7] = 1;
   3348    }
   3349    tl_assert(i == FI_NUM_LINES);
   3350 }
   3351 
   3352 /* Clearing an arbitrary range in the filter.  Unfortunately
   3353    we have to do this due to core-supplied new/die-mem events. */
   3354 
   3355 static void Filter__clear_1byte ( Filter* fi, Addr a )
   3356 {
   3357    Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
   3358    UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
   3359    FiLine* line   = &fi->lines[lineno];
   3360    UWord   loff   = (a - atag) / 8;
   3361    UShort  mask   = 0x3 << (2 * (a & 7));
   3362    /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
   3363    if (LIKELY( fi->tags[lineno] == atag )) {
   3364       /* hit.  clear the bits. */
   3365       UShort  u16  = line->u16s[loff];
   3366       line->u16s[loff] = u16 & ~mask; /* clear them */
   3367    } else {
   3368       /* miss.  The filter doesn't hold this address, so ignore. */
   3369    }
   3370 }
   3371 
   3372 static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
   3373 {
   3374    Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
   3375    UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
   3376    FiLine* line   = &fi->lines[lineno];
   3377    UWord   loff   = (a - atag) / 8;
   3378    if (LIKELY( fi->tags[lineno] == atag )) {
   3379       line->u16s[loff] = 0;
   3380    } else {
   3381     /* miss.  The filter doesn't hold this address, so ignore. */
   3382    }
   3383 }
   3384 
   3385 static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
   3386 {
   3387   //VG_(printf)("%lu ", len);
   3388    /* slowly do part preceding 8-alignment */
   3389    while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
   3390       Filter__clear_1byte( fi, a );
   3391       a++;
   3392       len--;
   3393    }
   3394    /* vector loop */
   3395    while (len >= 8) {
   3396       Filter__clear_8bytes_aligned( fi, a );
   3397       a += 8;
   3398       len -= 8;
   3399    }
   3400    /* slowly do tail */
   3401    while (UNLIKELY(len > 0)) {
   3402       Filter__clear_1byte( fi, a );
   3403       a++;
   3404       len--;
   3405    }
   3406 }
   3407 
   3408 
   3409 /* ------ Read handlers for the filter. ------ */
   3410 
   3411 static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
   3412 {
   3413    if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
   3414       return False;
   3415    {
   3416      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
   3417      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
   3418      FiLine* line   = &fi->lines[lineno];
   3419      UWord   loff   = (a - atag) / 8;
   3420      UShort  mask   = 0xAAAA;
   3421      if (LIKELY( fi->tags[lineno] == atag )) {
   3422         /* hit.  check line and update. */
   3423         UShort u16  = line->u16s[loff];
   3424         Bool   ok   = (u16 & mask) == mask; /* all R bits set? */
   3425         line->u16s[loff] = u16 | mask; /* set them */
   3426         return ok;
   3427      } else {
   3428         /* miss.  nuke existing line and re-use it. */
   3429         UWord i;
   3430         fi->tags[lineno] = atag;
   3431         for (i = 0; i < FI_LINE_SZB / 8; i++)
   3432            line->u16s[i] = 0;
   3433         line->u16s[loff] = mask;
   3434         return False;
   3435      }
   3436    }
   3437 }
   3438 
   3439 static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
   3440 {
   3441    if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
   3442       return False;
   3443    {
   3444      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
   3445      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
   3446      FiLine* line   = &fi->lines[lineno];
   3447      UWord   loff   = (a - atag) / 8;
   3448      UShort  mask   = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
   3449      if (LIKELY( fi->tags[lineno] == atag )) {
   3450         /* hit.  check line and update. */
   3451         UShort  u16  = line->u16s[loff];
   3452         Bool    ok   = (u16 & mask) == mask; /* 4 x R bits set? */
   3453         line->u16s[loff] = u16 | mask; /* set them */
   3454         return ok;
   3455      } else {
   3456         /* miss.  nuke existing line and re-use it. */
   3457         UWord   i;
   3458         fi->tags[lineno] = atag;
   3459         for (i = 0; i < FI_LINE_SZB / 8; i++)
   3460            line->u16s[i] = 0;
   3461         line->u16s[loff] = mask;
   3462         return False;
   3463      }
   3464    }
   3465 }
   3466 
   3467 static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
   3468 {
   3469    if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
   3470       return False;
   3471    {
   3472      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
   3473      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
   3474      FiLine* line   = &fi->lines[lineno];
   3475      UWord   loff   = (a - atag) / 8;
   3476      UShort  mask   = 0xA << (2 * (a & 6));
   3477      /* mask is A000, 0A00, 00A0 or 000A */
   3478      if (LIKELY( fi->tags[lineno] == atag )) {
   3479         /* hit.  check line and update. */
   3480         UShort  u16  = line->u16s[loff];
   3481         Bool    ok   = (u16 & mask) == mask; /* 2 x R bits set? */
   3482         line->u16s[loff] = u16 | mask; /* set them */
   3483         return ok;
   3484      } else {
   3485         /* miss.  nuke existing line and re-use it. */
   3486         UWord   i;
   3487         fi->tags[lineno] = atag;
   3488         for (i = 0; i < FI_LINE_SZB / 8; i++)
   3489            line->u16s[i] = 0;
   3490         line->u16s[loff] = mask;
   3491         return False;
   3492      }
   3493    }
   3494 }
   3495 
   3496 static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
   3497 {
   3498    {
   3499      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
   3500      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
   3501      FiLine* line   = &fi->lines[lineno];
   3502      UWord   loff   = (a - atag) / 8;
   3503      UShort  mask   = 0x2 << (2 * (a & 7));
   3504      /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
   3505      if (LIKELY( fi->tags[lineno] == atag )) {
   3506         /* hit.  check line and update. */
   3507         UShort  u16  = line->u16s[loff];
   3508         Bool    ok   = (u16 & mask) == mask; /* 1 x R bits set? */
   3509         line->u16s[loff] = u16 | mask; /* set them */
   3510         return ok;
   3511      } else {
   3512         /* miss.  nuke existing line and re-use it. */
   3513         UWord   i;
   3514         fi->tags[lineno] = atag;
   3515         for (i = 0; i < FI_LINE_SZB / 8; i++)
   3516            line->u16s[i] = 0;
   3517         line->u16s[loff] = mask;
   3518         return False;
   3519      }
   3520    }
   3521 }
   3522 
   3523 
   3524 /* ------ Write handlers for the filter. ------ */
   3525 
   3526 static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
   3527 {
   3528    if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
   3529       return False;
   3530    {
   3531      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
   3532      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
   3533      FiLine* line   = &fi->lines[lineno];
   3534      UWord   loff   = (a - atag) / 8;
   3535      UShort  mask   = 0xFFFF;
   3536      if (LIKELY( fi->tags[lineno] == atag )) {
   3537         /* hit.  check line and update. */
   3538         UShort u16  = line->u16s[loff];
   3539         Bool   ok   = (u16 & mask) == mask; /* all R & W bits set? */
   3540         line->u16s[loff] = u16 | mask; /* set them */
   3541         return ok;
   3542      } else {
   3543         /* miss.  nuke existing line and re-use it. */
   3544         UWord i;
   3545         fi->tags[lineno] = atag;
   3546         for (i = 0; i < FI_LINE_SZB / 8; i++)
   3547            line->u16s[i] = 0;
   3548         line->u16s[loff] = mask;
   3549         return False;
   3550      }
   3551    }
   3552 }
   3553 
   3554 static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
   3555 {
   3556    if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
   3557       return False;
   3558    {
   3559      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
   3560      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
   3561      FiLine* line   = &fi->lines[lineno];
   3562      UWord   loff   = (a - atag) / 8;
   3563      UShort  mask   = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
   3564      if (LIKELY( fi->tags[lineno] == atag )) {
   3565         /* hit.  check line and update. */
   3566         UShort  u16  = line->u16s[loff];
   3567         Bool    ok   = (u16 & mask) == mask; /* 4 x R & W bits set? */
   3568         line->u16s[loff] = u16 | mask; /* set them */
   3569         return ok;
   3570      } else {
   3571         /* miss.  nuke existing line and re-use it. */
   3572         UWord   i;
   3573         fi->tags[lineno] = atag;
   3574         for (i = 0; i < FI_LINE_SZB / 8; i++)
   3575            line->u16s[i] = 0;
   3576         line->u16s[loff] = mask;
   3577         return False;
   3578      }
   3579    }
   3580 }
   3581 
   3582 static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
   3583 {
   3584    if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
   3585       return False;
   3586    {
   3587      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
   3588      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
   3589      FiLine* line   = &fi->lines[lineno];
   3590      UWord   loff   = (a - atag) / 8;
   3591      UShort  mask   = 0xF << (2 * (a & 6));
   3592      /* mask is F000, 0F00, 00F0 or 000F */
   3593      if (LIKELY( fi->tags[lineno] == atag )) {
   3594         /* hit.  check line and update. */
   3595         UShort  u16  = line->u16s[loff];
   3596         Bool    ok   = (u16 & mask) == mask; /* 2 x R & W bits set? */
   3597         line->u16s[loff] = u16 | mask; /* set them */
   3598         return ok;
   3599      } else {
   3600         /* miss.  nuke existing line and re-use it. */
   3601         UWord   i;
   3602         fi->tags[lineno] = atag;
   3603         for (i = 0; i < FI_LINE_SZB / 8; i++)
   3604            line->u16s[i] = 0;
   3605         line->u16s[loff] = mask;
   3606         return False;
   3607      }
   3608    }
   3609 }
   3610 
   3611 static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
   3612 {
   3613    {
   3614      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
   3615      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
   3616      FiLine* line   = &fi->lines[lineno];
   3617      UWord   loff   = (a - atag) / 8;
   3618      UShort  mask   = 0x3 << (2 * (a & 7));
   3619      /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
   3620      if (LIKELY( fi->tags[lineno] == atag )) {
   3621         /* hit.  check line and update. */
   3622         UShort  u16  = line->u16s[loff];
   3623         Bool    ok   = (u16 & mask) == mask; /* 1 x R bits set? */
   3624         line->u16s[loff] = u16 | mask; /* set them */
   3625         return ok;
   3626      } else {
   3627         /* miss.  nuke existing line and re-use it. */
   3628         UWord   i;
   3629         fi->tags[lineno] = atag;
   3630         for (i = 0; i < FI_LINE_SZB / 8; i++)
   3631            line->u16s[i] = 0;
   3632         line->u16s[loff] = mask;
   3633         return False;
   3634      }
   3635    }
   3636 }
   3637 
   3638 
   3639 /////////////////////////////////////////////////////////
   3640 //                                                     //
   3641 // Threads                                             //
   3642 //                                                     //
   3643 /////////////////////////////////////////////////////////
   3644 
   3645 /* Maps ThrID values to their Thr*s (which contain ThrID values that
   3646    should point back to the relevant slot in the array.  Lowest
   3647    numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
   3648 static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
   3649 
   3650 /* And a counter to dole out ThrID values.  For rationale/background,
   3651    see comments on definition of ScalarTS (far) above. */
   3652 static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
   3653 
   3654 static ThrID Thr__to_ThrID ( Thr* thr ) {
   3655    return thr->thrid;
   3656 }
   3657 static Thr* Thr__from_ThrID ( UInt thrid ) {
   3658    Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
   3659    tl_assert(thr->thrid == thrid);
   3660    return thr;
   3661 }
   3662 
   3663 static Thr* Thr__new ( void )
   3664 {
   3665    Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
   3666    thr->viR = VtsID_INVALID;
   3667    thr->viW = VtsID_INVALID;
   3668    thr->llexit_done = False;
   3669    thr->joinedwith_done = False;
   3670    thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
   3671    if (HG_(clo_history_level) == 1)
   3672       thr->local_Kws_n_stacks
   3673          = VG_(newXA)( HG_(zalloc),
   3674                        "libhb.Thr__new.3 (local_Kws_and_stacks)",
   3675                        HG_(free), sizeof(ULong_n_EC) );
   3676 
   3677    /* Add this Thr* <-> ThrID binding to the mapping, and
   3678       cross-check */
   3679    if (!thrid_to_thr_map) {
   3680       thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
   3681                                      HG_(free), sizeof(Thr*) );
   3682       tl_assert(thrid_to_thr_map);
   3683    }
   3684 
   3685    if (thrid_counter >= ThrID_MAX_VALID) {
   3686       /* We're hosed.  We have to stop. */
   3687       scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
   3688    }
   3689 
   3690    thr->thrid = thrid_counter++;
   3691    Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
   3692    tl_assert(ix + 1024 == thr->thrid);
   3693 
   3694    return thr;
   3695 }
   3696 
   3697 static void note_local_Kw_n_stack_for ( Thr* thr )
   3698 {
   3699    Word       nPresent;
   3700    ULong_n_EC pair;
   3701    tl_assert(thr);
   3702 
   3703    // We only collect this info at history level 1 (approx)
   3704    if (HG_(clo_history_level) != 1)
   3705       return;
   3706 
   3707    /* This is the scalar Kw for thr. */
   3708    pair.ull = VtsID__indexAt( thr->viW, thr );
   3709    pair.ec  = main_get_EC( thr );
   3710    tl_assert(pair.ec);
   3711    tl_assert(thr->local_Kws_n_stacks);
   3712 
   3713    /* check that we're not adding duplicates */
   3714    nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
   3715 
   3716    /* Throw away old stacks, if necessary.  We can't accumulate stuff
   3717       indefinitely. */
   3718    if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
   3719       VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
   3720       nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
   3721       if (0)
   3722          VG_(printf)("LOCAL Kw: thr %p,  Kw %llu,  ec %p (!!! gc !!!)\n",
   3723                      thr, pair.ull, pair.ec );
   3724    }
   3725 
   3726    if (nPresent > 0) {
   3727       ULong_n_EC* prevPair
   3728          = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
   3729       tl_assert( prevPair->ull <= pair.ull );
   3730    }
   3731 
   3732    if (nPresent == 0)
   3733       pair.ec = NULL;
   3734 
   3735    VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
   3736 
   3737    if (0)
   3738       VG_(printf)("LOCAL Kw: thr %p,  Kw %llu,  ec %p\n",
   3739                   thr, pair.ull, pair.ec );
   3740    if (0)
   3741       VG_(pp_ExeContext)(pair.ec);
   3742 }
   3743 
   3744 static Int cmp__ULong_n_EC__by_ULong ( const ULong_n_EC* pair1,
   3745                                        const ULong_n_EC* pair2 )
   3746 {
   3747    if (pair1->ull < pair2->ull) return -1;
   3748    if (pair1->ull > pair2->ull) return 1;
   3749    return 0;
   3750 }
   3751 
   3752 
   3753 /////////////////////////////////////////////////////////
   3754 //                                                     //
   3755 // Shadow Values                                       //
   3756 //                                                     //
   3757 /////////////////////////////////////////////////////////
   3758 
   3759 // type SVal, SVal_INVALID and SVal_NOACCESS are defined by
   3760 // hb_zsm.h.  We have to do everything else here.
   3761 
   3762 /* SVal is 64 bit unsigned int.
   3763 
   3764       <---------30--------->    <---------30--------->
   3765    00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X   C(Rmin,Wmin)
   3766    10 X--------------------X XX X--------------------X   A: SVal_NOACCESS
   3767    11 0--------------------0 00 0--------------------0   A: SVal_INVALID
   3768 
   3769 */
   3770 #define SVAL_TAGMASK (3ULL << 62)
   3771 
   3772 static inline Bool SVal__isC ( SVal s ) {
   3773    return (0ULL << 62) == (s & SVAL_TAGMASK);
   3774 }
   3775 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
   3776    //tl_assert(VtsID__is_valid(rmini));
   3777    //tl_assert(VtsID__is_valid(wmini));
   3778    return (((ULong)rmini) << 32) | ((ULong)wmini);
   3779 }
   3780 static inline VtsID SVal__unC_Rmin ( SVal s ) {
   3781    tl_assert(SVal__isC(s));
   3782    return (VtsID)(s >> 32);
   3783 }
   3784 static inline VtsID SVal__unC_Wmin ( SVal s ) {
   3785    tl_assert(SVal__isC(s));
   3786    return (VtsID)(s & 0xFFFFFFFFULL);
   3787 }
   3788 
   3789 static inline Bool SVal__isA ( SVal s ) {
   3790    return (2ULL << 62) == (s & SVAL_TAGMASK);
   3791 }
   3792 static inline SVal SVal__mkA ( void ) {
   3793    return 2ULL << 62;
   3794 }
   3795 
   3796 /* Direct callback from lib_zsm. */
   3797 static void SVal__rcinc ( SVal s ) {
   3798    if (SVal__isC(s)) {
   3799       VtsID__rcinc( SVal__unC_Rmin(s) );
   3800       VtsID__rcinc( SVal__unC_Wmin(s) );
   3801    }
   3802 }
   3803 
   3804 /* Direct callback from lib_zsm. */
   3805 static void SVal__rcdec ( SVal s ) {
   3806    if (SVal__isC(s)) {
   3807       VtsID__rcdec( SVal__unC_Rmin(s) );
   3808       VtsID__rcdec( SVal__unC_Wmin(s) );
   3809    }
   3810 }
   3811 
   3812 
   3813 /////////////////////////////////////////////////////////
   3814 //                                                     //
   3815 // Change-event map2                                   //
   3816 //                                                     //
   3817 /////////////////////////////////////////////////////////
   3818 
   3819 #define EVENT_MAP_GC_DISCARD_FRACTION  0.5
   3820 
   3821 /* This is in two parts:
   3822 
   3823    1. A hash table of RCECs.  This is a set of reference-counted stack
   3824       traces.  When the reference count of a stack trace becomes zero,
   3825       it is removed from the set and freed up.  The intent is to have
   3826       a set of stack traces which can be referred to from (2), but to
   3827       only represent each one once.  The set is indexed/searched by
   3828       ordering on the stack trace vectors.
   3829 
   3830    2. A SparseWA of OldRefs.  These store information about each old
   3831       ref that we need to record.  It is indexed by address of the
   3832       location for which the information is recorded.  For LRU
   3833       purposes, each OldRef also contains a generation number,
   3834       indicating when it was most recently accessed.
   3835 
   3836       The important part of an OldRef is, however, its accs[] array.
   3837       This is an array of N_OLDREF_ACCS which binds (thread, R/W,
   3838       size) triples to RCECs.  This allows us to collect the last
   3839       access-traceback by up to N_OLDREF_ACCS different triples for
   3840       this location.  The accs[] array is a MTF-array.  If a binding
   3841       falls off the end, that's too bad -- we will lose info about
   3842       that triple's access to this location.
   3843 
   3844       When the SparseWA becomes too big, we can throw away the OldRefs
   3845       whose generation numbers are below some threshold; hence doing
   3846       approximate LRU discarding.  For each discarded OldRef we must
   3847       of course decrement the reference count on the all RCECs it
   3848       refers to, in order that entries from (1) eventually get
   3849       discarded too.
   3850 
   3851    A major improvement in reliability of this mechanism would be to
   3852    have a dynamically sized OldRef.accs[] array, so no entries ever
   3853    fall off the end.  In investigations (Dec 08) it appears that a
   3854    major cause for the non-availability of conflicting-access traces
   3855    in race reports is caused by the fixed size of this array.  I
   3856    suspect for most OldRefs, only a few entries are used, but for a
   3857    minority of cases there is an overflow, leading to info lossage.
   3858    Investigations also suggest this is very workload and scheduling
   3859    sensitive.  Therefore a dynamic sizing would be better.
   3860 
   3861    However, dynamic sizing would defeat the use of a PoolAllocator
   3862    for OldRef structures.  And that's important for performance.  So
   3863    it's not straightforward to do.
   3864 */
   3865 
   3866 
   3867 static UWord stats__ctxt_rcdec1 = 0;
   3868 static UWord stats__ctxt_rcdec2 = 0;
   3869 static UWord stats__ctxt_rcdec3 = 0;
   3870 static UWord stats__ctxt_rcdec_calls = 0;
   3871 static UWord stats__ctxt_rcdec_discards = 0;
   3872 static UWord stats__ctxt_rcdec1_eq = 0;
   3873 
   3874 static UWord stats__ctxt_tab_curr = 0;
   3875 static UWord stats__ctxt_tab_max  = 0;
   3876 
   3877 static UWord stats__ctxt_tab_qs   = 0;
   3878 static UWord stats__ctxt_tab_cmps = 0;
   3879 
   3880 
   3881 ///////////////////////////////////////////////////////
   3882 //// Part (1): A hash table of RCECs
   3883 ///
   3884 
   3885 #define N_FRAMES 8
   3886 
   3887 // (UInt) `echo "Reference Counted Execution Context" | md5sum`
   3888 #define RCEC_MAGIC 0xab88abb2UL
   3889 
   3890 //#define N_RCEC_TAB 98317 /* prime */
   3891 #define N_RCEC_TAB 196613 /* prime */
   3892 
   3893 typedef
   3894    struct _RCEC {
   3895       UWord magic;  /* sanity check only */
   3896       struct _RCEC* next;
   3897       UWord rc;
   3898       UWord rcX; /* used for crosschecking */
   3899       UWord frames_hash;          /* hash of all the frames */
   3900       UWord frames[N_FRAMES];
   3901    }
   3902    RCEC;
   3903 
   3904 static RCEC** contextTab = NULL; /* hash table of RCEC*s */
   3905 
   3906 
   3907 /* Gives an arbitrary total order on RCEC .frames fields */
   3908 static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
   3909    Word i;
   3910    tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
   3911    tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
   3912    if (ec1->frames_hash < ec2->frames_hash) return -1;
   3913    if (ec1->frames_hash > ec2->frames_hash) return  1;
   3914    for (i = 0; i < N_FRAMES; i++) {
   3915       if (ec1->frames[i] < ec2->frames[i]) return -1;
   3916       if (ec1->frames[i] > ec2->frames[i]) return  1;
   3917    }
   3918    return 0;
   3919 }
   3920 
   3921 
   3922 /* Dec the ref of this RCEC. */
   3923 static void ctxt__rcdec ( RCEC* ec )
   3924 {
   3925    stats__ctxt_rcdec_calls++;
   3926    tl_assert(ec && ec->magic == RCEC_MAGIC);
   3927    tl_assert(ec->rc > 0);
   3928    ec->rc--;
   3929 }
   3930 
   3931 static void ctxt__rcinc ( RCEC* ec )
   3932 {
   3933    tl_assert(ec && ec->magic == RCEC_MAGIC);
   3934    ec->rc++;
   3935 }
   3936 
   3937 
   3938 //////////// BEGIN RCEC pool allocator
   3939 static PoolAlloc* rcec_pool_allocator;
   3940 
   3941 static RCEC* alloc_RCEC ( void ) {
   3942    return VG_(allocEltPA) ( rcec_pool_allocator );
   3943 }
   3944 
   3945 static void free_RCEC ( RCEC* rcec ) {
   3946    tl_assert(rcec->magic == RCEC_MAGIC);
   3947    VG_(freeEltPA)( rcec_pool_allocator, rcec );
   3948 }
   3949 //////////// END RCEC pool allocator
   3950 
   3951 
   3952 /* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
   3953    move it one step closer the the front of the list, so as to make
   3954    subsequent searches for it cheaper. */
   3955 static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
   3956 {
   3957    RCEC *ec0, *ec1, *ec2;
   3958    if (ec == *headp)
   3959       tl_assert(0); /* already at head of list */
   3960    tl_assert(ec != NULL);
   3961    ec0 = *headp;
   3962    ec1 = NULL;
   3963    ec2 = NULL;
   3964    while (True) {
   3965       if (ec0 == NULL || ec0 == ec) break;
   3966       ec2 = ec1;
   3967       ec1 = ec0;
   3968       ec0 = ec0->next;
   3969    }
   3970    tl_assert(ec0 == ec);
   3971    if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
   3972       RCEC* tmp;
   3973       /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
   3974          predecessor.  Swap ec0 and ec1, that is, move ec0 one step
   3975          closer to the start of the list. */
   3976       tl_assert(ec2->next == ec1);
   3977       tl_assert(ec1->next == ec0);
   3978       tmp = ec0->next;
   3979       ec2->next = ec0;
   3980       ec0->next = ec1;
   3981       ec1->next = tmp;
   3982    }
   3983    else
   3984    if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
   3985       /* it's second in the list. */
   3986       tl_assert(*headp == ec1);
   3987       tl_assert(ec1->next == ec0);
   3988       ec1->next = ec0->next;
   3989       ec0->next = ec1;
   3990       *headp = ec0;
   3991    }
   3992 }
   3993 
   3994 
   3995 /* Find the given RCEC in the tree, and return a pointer to it.  Or,
   3996    if not present, add the given one to the tree (by making a copy of
   3997    it, so the caller can immediately deallocate the original) and
   3998    return a pointer to the copy.  The caller can safely have 'example'
   3999    on its stack, since we will always return a pointer to a copy of
   4000    it, not to the original.  Note that the inserted node will have .rc
   4001    of zero and so the caller must immediatly increment it. */
   4002 __attribute__((noinline))
   4003 static RCEC* ctxt__find_or_add ( RCEC* example )
   4004 {
   4005    UWord hent;
   4006    RCEC* copy;
   4007    tl_assert(example && example->magic == RCEC_MAGIC);
   4008    tl_assert(example->rc == 0);
   4009 
   4010    /* Search the hash table to see if we already have it. */
   4011    stats__ctxt_tab_qs++;
   4012    hent = example->frames_hash % N_RCEC_TAB;
   4013    copy = contextTab[hent];
   4014    while (1) {
   4015       if (!copy) break;
   4016       tl_assert(copy->magic == RCEC_MAGIC);
   4017       stats__ctxt_tab_cmps++;
   4018       if (0 == RCEC__cmp_by_frames(copy, example)) break;
   4019       copy = copy->next;
   4020    }
   4021 
   4022    if (copy) {
   4023       tl_assert(copy != example);
   4024       /* optimisation: if it's not at the head of its list, move 1
   4025          step fwds, to make future searches cheaper */
   4026       if (copy != contextTab[hent]) {
   4027          move_RCEC_one_step_forward( &contextTab[hent], copy );
   4028       }
   4029    } else {
   4030       copy = alloc_RCEC();
   4031       tl_assert(copy != example);
   4032       *copy = *example;
   4033       copy->next = contextTab[hent];
   4034       contextTab[hent] = copy;
   4035       stats__ctxt_tab_curr++;
   4036       if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
   4037          stats__ctxt_tab_max = stats__ctxt_tab_curr;
   4038    }
   4039    return copy;
   4040 }
   4041 
   4042 static inline UWord ROLW ( UWord w, Int n )
   4043 {
   4044    Int bpw = 8 * sizeof(UWord);
   4045    w = (w << n) | (w >> (bpw-n));
   4046    return w;
   4047 }
   4048 
   4049 __attribute__((noinline))
   4050 static RCEC* get_RCEC ( Thr* thr )
   4051 {
   4052    UWord hash, i;
   4053    RCEC  example;
   4054    example.magic = RCEC_MAGIC;
   4055    example.rc = 0;
   4056    example.rcX = 0;
   4057    example.next = NULL;
   4058    main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
   4059    hash = 0;
   4060    for (i = 0; i < N_FRAMES; i++) {
   4061       hash ^= example.frames[i];
   4062       hash = ROLW(hash, 19);
   4063    }
   4064    example.frames_hash = hash;
   4065    return ctxt__find_or_add( &example );
   4066 }
   4067 
   4068 ///////////////////////////////////////////////////////
   4069 //// Part (2):
   4070 ///  A SparseWA guest-addr -> OldRef, that refers to (1)
   4071 ///
   4072 
   4073 // (UInt) `echo "Old Reference Information" | md5sum`
   4074 #define OldRef_MAGIC 0x30b1f075UL
   4075 
   4076 /* Records an access: a thread, a context (size & writeness) and the
   4077    number of held locks. The size (1,2,4,8) is encoded as 00 = 1, 01 =
   4078    2, 10 = 4, 11 = 8.
   4079 */
   4080 typedef
   4081    struct {
   4082       RCEC*     rcec;
   4083       WordSetID locksHeldW;
   4084       UInt      thrid  : SCALARTS_N_THRBITS;
   4085       UInt      szLg2B : 2;
   4086       UInt      isW    : 1;
   4087    }
   4088    Thr_n_RCEC;
   4089 
   4090 #define N_OLDREF_ACCS 5
   4091 
   4092 typedef
   4093    struct {
   4094       UWord magic;  /* sanity check only */
   4095       UWord gen;    /* when most recently accessed */
   4096                     /* or free list when not in use */
   4097       /* unused slots in this array have .thrid == 0, which is invalid */
   4098       Thr_n_RCEC accs[N_OLDREF_ACCS];
   4099    }
   4100    OldRef;
   4101 
   4102 
   4103 //////////// BEGIN OldRef pool allocator
   4104 static PoolAlloc* oldref_pool_allocator;
   4105 
   4106 static OldRef* alloc_OldRef ( void ) {
   4107    return VG_(allocEltPA) ( oldref_pool_allocator );
   4108 }
   4109 
   4110 static void free_OldRef ( OldRef* r ) {
   4111    tl_assert(r->magic == OldRef_MAGIC);
   4112    VG_(freeEltPA)( oldref_pool_allocator, r );
   4113 }
   4114 //////////// END OldRef pool allocator
   4115 
   4116 
   4117 static SparseWA* oldrefTree     = NULL; /* SparseWA* OldRef* */
   4118 static UWord     oldrefGen      = 0;    /* current LRU generation # */
   4119 static UWord     oldrefTreeN    = 0;    /* # elems in oldrefTree */
   4120 static UWord     oldrefGenIncAt = 0;    /* inc gen # when size hits this */
   4121 
   4122 inline static UInt min_UInt ( UInt a, UInt b ) {
   4123    return a < b ? a : b;
   4124 }
   4125 
   4126 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2).  Return -1 if the
   4127    first interval is lower, 1 if the first interval is higher, and 0
   4128    if there is any overlap.  Redundant paranoia with casting is there
   4129    following what looked distinctly like a bug in gcc-4.1.2, in which
   4130    some of the comparisons were done signedly instead of
   4131    unsignedly. */
   4132 /* Copied from exp-ptrcheck/sg_main.c */
   4133 static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
   4134                                      Addr a2, SizeT n2 ) {
   4135    UWord a1w = (UWord)a1;
   4136    UWord n1w = (UWord)n1;
   4137    UWord a2w = (UWord)a2;
   4138    UWord n2w = (UWord)n2;
   4139    tl_assert(n1w > 0 && n2w > 0);
   4140    if (a1w + n1w <= a2w) return -1L;
   4141    if (a2w + n2w <= a1w) return 1L;
   4142    return 0;
   4143 }
   4144 
   4145 static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
   4146 {
   4147    OldRef* ref;
   4148    RCEC*   rcec;
   4149    Word    i, j;
   4150    UWord   keyW, valW;
   4151    Bool    b;
   4152 
   4153    tl_assert(thr);
   4154    ThrID thrid = thr->thrid;
   4155    tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
   4156 
   4157    WordSetID locksHeldW = thr->hgthread->locksetW;
   4158 
   4159    rcec = get_RCEC( thr );
   4160    ctxt__rcinc(rcec);
   4161 
   4162    UInt szLg2B = 0;
   4163    switch (szB) {
   4164       /* This doesn't look particularly branch-predictor friendly. */
   4165       case 1:  szLg2B = 0; break;
   4166       case 2:  szLg2B = 1; break;
   4167       case 4:  szLg2B = 2; break;
   4168       case 8:  szLg2B = 3; break;
   4169       default: tl_assert(0);
   4170    }
   4171 
   4172    /* Look in the map to see if we already have a record for this
   4173       address. */
   4174    b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a );
   4175 
   4176    if (b) {
   4177 
   4178       /* We already have a record for this address.  We now need to
   4179          see if we have a stack trace pertaining to this (thrid, R/W,
   4180          size) triple. */
   4181       tl_assert(keyW == a);
   4182       ref = (OldRef*)valW;
   4183       tl_assert(ref->magic == OldRef_MAGIC);
   4184 
   4185       for (i = 0; i < N_OLDREF_ACCS; i++) {
   4186          if (ref->accs[i].thrid != thrid)
   4187             continue;
   4188          if (ref->accs[i].szLg2B != szLg2B)
   4189             continue;
   4190          if (ref->accs[i].isW != (UInt)(isW & 1))
   4191             continue;
   4192          /* else we have a match, so stop looking. */
   4193          break;
   4194       }
   4195 
   4196       if (i < N_OLDREF_ACCS) {
   4197          /* thread 'thr' has an entry at index 'i'.  Update its RCEC. */
   4198          if (i > 0) {
   4199             Thr_n_RCEC tmp = ref->accs[i-1];
   4200             ref->accs[i-1] = ref->accs[i];
   4201             ref->accs[i] = tmp;
   4202             i--;
   4203          }
   4204          if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
   4205          stats__ctxt_rcdec1++;
   4206          ctxt__rcdec( ref->accs[i].rcec );
   4207          tl_assert(ref->accs[i].thrid == thrid);
   4208          /* Update the RCEC and the W-held lockset. */
   4209          ref->accs[i].rcec       = rcec;
   4210          ref->accs[i].locksHeldW = locksHeldW;
   4211       } else {
   4212          /* No entry for this (thread, R/W, size, nWHeld) quad.
   4213             Shuffle all of them down one slot, and put the new entry
   4214             at the start of the array. */
   4215          if (ref->accs[N_OLDREF_ACCS-1].thrid != 0) {
   4216             /* the last slot is in use.  We must dec the rc on the
   4217                associated rcec. */
   4218             tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
   4219             stats__ctxt_rcdec2++;
   4220             if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
   4221                VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
   4222             ctxt__rcdec( ref->accs[N_OLDREF_ACCS-1].rcec );
   4223          } else {
   4224             tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
   4225          }
   4226          for (j = N_OLDREF_ACCS-1; j >= 1; j--)
   4227             ref->accs[j] = ref->accs[j-1];
   4228          ref->accs[0].thrid      = thrid;
   4229          ref->accs[0].szLg2B     = szLg2B;
   4230          ref->accs[0].isW        = (UInt)(isW & 1);
   4231          ref->accs[0].locksHeldW = locksHeldW;
   4232          ref->accs[0].rcec       = rcec;
   4233          /* thrid==0 is used to signify an empty slot, so we can't
   4234             add zero thrid (such a ThrID is invalid anyway). */
   4235          /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
   4236       }
   4237 
   4238       ref->gen = oldrefGen;
   4239 
   4240    } else {
   4241 
   4242       /* We don't have a record for this address.  Create a new one. */
   4243       if (oldrefTreeN >= oldrefGenIncAt) {
   4244          oldrefGen++;
   4245          oldrefGenIncAt = oldrefTreeN + 50000;
   4246          if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
   4247                             oldrefGen, oldrefTreeN );
   4248       }
   4249 
   4250       ref = alloc_OldRef();
   4251       ref->magic = OldRef_MAGIC;
   4252       ref->gen   = oldrefGen;
   4253       ref->accs[0].thrid      = thrid;
   4254       ref->accs[0].szLg2B     = szLg2B;
   4255       ref->accs[0].isW        = (UInt)(isW & 1);
   4256       ref->accs[0].locksHeldW = locksHeldW;
   4257       ref->accs[0].rcec       = rcec;
   4258 
   4259       /* thrid==0 is used to signify an empty slot, so we can't
   4260          add zero thrid (such a ThrID is invalid anyway). */
   4261       /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
   4262 
   4263       /* Clear out the rest of the entries */
   4264       for (j = 1; j < N_OLDREF_ACCS; j++) {
   4265          ref->accs[j].rcec       = NULL;
   4266          ref->accs[j].thrid      = 0;
   4267          ref->accs[j].szLg2B     = 0;
   4268          ref->accs[j].isW        = 0;
   4269          ref->accs[j].locksHeldW = 0;
   4270       }
   4271       VG_(addToSWA)( oldrefTree, a, (UWord)ref );
   4272       oldrefTreeN++;
   4273 
   4274    }
   4275 }
   4276 
   4277 
   4278 /* Extract info from the conflicting-access machinery. */
   4279 Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
   4280                               /*OUT*/Thr**        resThr,
   4281                               /*OUT*/SizeT*       resSzB,
   4282                               /*OUT*/Bool*        resIsW,
   4283                               /*OUT*/WordSetID*   locksHeldW,
   4284                               Thr* thr, Addr a, SizeT szB, Bool isW )
   4285 {
   4286    Word    i, j;
   4287    OldRef* ref;
   4288    UWord   keyW, valW;
   4289    Bool    b;
   4290 
   4291    ThrID     cand_thrid;
   4292    RCEC*     cand_rcec;
   4293    Bool      cand_isW;
   4294    SizeT     cand_szB;
   4295    WordSetID cand_locksHeldW;
   4296    Addr      cand_a;
   4297 
   4298    Addr toCheck[15];
   4299    Int  nToCheck = 0;
   4300 
   4301    tl_assert(thr);
   4302    tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
   4303 
   4304    ThrID thrid = thr->thrid;
   4305 
   4306    toCheck[nToCheck++] = a;
   4307    for (i = -7; i < (Word)szB; i++) {
   4308       if (i != 0)
   4309          toCheck[nToCheck++] = a + i;
   4310    }
   4311    tl_assert(nToCheck <= 15);
   4312 
   4313    /* Now see if we can find a suitable matching event for
   4314       any of the addresses in toCheck[0 .. nToCheck-1]. */
   4315    for (j = 0; j < nToCheck; j++) {
   4316 
   4317       cand_a = toCheck[j];
   4318       //      VG_(printf)("test %ld %p\n", j, cand_a);
   4319 
   4320       b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, cand_a );
   4321       if (!b)
   4322          continue;
   4323 
   4324       ref = (OldRef*)valW;
   4325       tl_assert(keyW == cand_a);
   4326       tl_assert(ref->magic == OldRef_MAGIC);
   4327       tl_assert(ref->accs[0].thrid != 0); /* first slot must always be used */
   4328 
   4329       cand_thrid      = 0; /* invalid; see comments in event_map_bind */
   4330       cand_rcec       = NULL;
   4331       cand_isW        = False;
   4332       cand_szB        = 0;
   4333       cand_locksHeldW = 0; /* always valid; see initialise_data_structures() */
   4334 
   4335       for (i = 0; i < N_OLDREF_ACCS; i++) {
   4336          Thr_n_RCEC* cand = &ref->accs[i];
   4337          cand_rcec       = cand->rcec;
   4338          cand_thrid      = cand->thrid;
   4339          cand_isW        = (Bool)cand->isW;
   4340          cand_szB        = 1 << cand->szLg2B;
   4341          cand_locksHeldW = cand->locksHeldW;
   4342 
   4343          if (cand_thrid == 0)
   4344             /* This slot isn't in use.  Ignore it. */
   4345             continue;
   4346 
   4347          if (cand_thrid == thrid)
   4348             /* This is an access by the same thread, but we're only
   4349                interested in accesses from other threads.  Ignore. */
   4350             continue;
   4351 
   4352          if ((!cand_isW) && (!isW))
   4353             /* We don't want to report a read racing against another
   4354                read; that's stupid.  So in this case move on. */
   4355             continue;
   4356 
   4357          if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
   4358             /* No overlap with the access we're asking about.  Ignore. */
   4359             continue;
   4360 
   4361          /* We have a match.  Stop searching. */
   4362          break;
   4363       }
   4364 
   4365       tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
   4366 
   4367       if (i < N_OLDREF_ACCS) {
   4368          Int n, maxNFrames;
   4369          /* return with success */
   4370          tl_assert(cand_thrid);
   4371          tl_assert(cand_rcec);
   4372          tl_assert(cand_rcec->magic == RCEC_MAGIC);
   4373          tl_assert(cand_szB >= 1);
   4374          /* Count how many non-zero frames we have. */
   4375          maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
   4376          for (n = 0; n < maxNFrames; n++) {
   4377             if (0 == cand_rcec->frames[n]) break;
   4378          }
   4379          *resEC      = VG_(make_ExeContext_from_StackTrace)
   4380                           (cand_rcec->frames, n);
   4381          *resThr     = Thr__from_ThrID(cand_thrid);
   4382          *resSzB     = cand_szB;
   4383          *resIsW     = cand_isW;
   4384          *locksHeldW = cand_locksHeldW;
   4385          return True;
   4386       }
   4387 
   4388       /* consider next address in toCheck[] */
   4389    } /* for (j = 0; j < nToCheck; j++) */
   4390 
   4391    /* really didn't find anything. */
   4392    return False;
   4393 }
   4394 
   4395 static void event_map_init ( void )
   4396 {
   4397    Word i;
   4398 
   4399    /* Context (RCEC) pool allocator */
   4400    rcec_pool_allocator = VG_(newPA) (
   4401                              sizeof(RCEC),
   4402                              1000 /* RCECs per pool */,
   4403                              HG_(zalloc),
   4404                              "libhb.event_map_init.1 (RCEC pools)",
   4405                              HG_(free)
   4406                           );
   4407 
   4408    /* Context table */
   4409    tl_assert(!contextTab);
   4410    contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
   4411                              N_RCEC_TAB * sizeof(RCEC*) );
   4412    tl_assert(contextTab);
   4413    for (i = 0; i < N_RCEC_TAB; i++)
   4414       contextTab[i] = NULL;
   4415 
   4416    /* Oldref pool allocator */
   4417    oldref_pool_allocator = VG_(newPA)(
   4418                                sizeof(OldRef),
   4419                                1000 /* OldRefs per pool */,
   4420                                HG_(zalloc),
   4421                                "libhb.event_map_init.3 (OldRef pools)",
   4422                                HG_(free)
   4423                             );
   4424 
   4425    /* Oldref tree */
   4426    tl_assert(!oldrefTree);
   4427    oldrefTree = VG_(newSWA)(
   4428                    HG_(zalloc),
   4429                    "libhb.event_map_init.4 (oldref tree)",
   4430                    HG_(free)
   4431                 );
   4432    tl_assert(oldrefTree);
   4433 
   4434    oldrefGen = 0;
   4435    oldrefGenIncAt = 0;
   4436    oldrefTreeN = 0;
   4437 }
   4438 
   4439 static void event_map__check_reference_counts ( Bool before )
   4440 {
   4441    RCEC*   rcec;
   4442    OldRef* oldref;
   4443    Word    i;
   4444    UWord   nEnts = 0;
   4445    UWord   keyW, valW;
   4446 
   4447    /* Set the 'check' reference counts to zero.  Also, optionally
   4448       check that the real reference counts are non-zero.  We allow
   4449       these to fall to zero before a GC, but the GC must get rid of
   4450       all those that are zero, hence none should be zero after a
   4451       GC. */
   4452    for (i = 0; i < N_RCEC_TAB; i++) {
   4453       for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
   4454          nEnts++;
   4455          tl_assert(rcec);
   4456          tl_assert(rcec->magic == RCEC_MAGIC);
   4457          if (!before)
   4458             tl_assert(rcec->rc > 0);
   4459          rcec->rcX = 0;
   4460       }
   4461    }
   4462 
   4463    /* check that the stats are sane */
   4464    tl_assert(nEnts == stats__ctxt_tab_curr);
   4465    tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
   4466 
   4467    /* visit all the referencing points, inc check ref counts */
   4468    VG_(initIterSWA)( oldrefTree );
   4469    while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
   4470       oldref = (OldRef*)valW;
   4471       tl_assert(oldref->magic == OldRef_MAGIC);
   4472       for (i = 0; i < N_OLDREF_ACCS; i++) {
   4473          ThrID aThrID = oldref->accs[i].thrid;
   4474          RCEC* aRef   = oldref->accs[i].rcec;
   4475          if (aThrID != 0) {
   4476             tl_assert(aRef);
   4477             tl_assert(aRef->magic == RCEC_MAGIC);
   4478             aRef->rcX++;
   4479          } else {
   4480             tl_assert(!aRef);
   4481          }
   4482       }
   4483    }
   4484 
   4485    /* compare check ref counts with actual */
   4486    for (i = 0; i < N_RCEC_TAB; i++) {
   4487       for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
   4488          tl_assert(rcec->rc == rcec->rcX);
   4489       }
   4490    }
   4491 }
   4492 
   4493 __attribute__((noinline))
   4494 static void event_map_maybe_GC ( void )
   4495 {
   4496    OldRef* oldref;
   4497    UWord   keyW, valW, retained, maxGen;
   4498    XArray* refs2del;
   4499    Word    i, j, n2del;
   4500 
   4501    UWord* genMap      = NULL;
   4502    UWord  genMap_min  = 0;
   4503    UWord  genMap_size = 0;
   4504 
   4505    if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size)))
   4506       return;
   4507 
   4508    if (0)
   4509       VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
   4510 
   4511    /* Check for sane command line params.  Limit values must match
   4512       those in hg_process_cmd_line_option. */
   4513    tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 );
   4514    tl_assert( HG_(clo_conflict_cache_size) <= 30*1000*1000 );
   4515 
   4516    /* Check our counting is sane (expensive) */
   4517    if (CHECK_CEM)
   4518       tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree ));
   4519 
   4520    /* Check the reference counts (expensive) */
   4521    if (CHECK_CEM)
   4522       event_map__check_reference_counts( True/*before*/ );
   4523 
   4524    /* Compute the distribution of generation values in the ref tree.
   4525       There are likely only to be a few different generation numbers
   4526       in the whole tree, but we don't know what they are.  Hence use a
   4527       dynamically resized array of counters.  The array is genMap[0
   4528       .. genMap_size-1], where genMap[0] is the count for the
   4529       generation number genMap_min, genMap[1] is the count for
   4530       genMap_min+1, etc.  If a new number is seen outside the range
   4531       [genMap_min .. genMap_min + genMap_size - 1] then the array is
   4532       copied into a larger array, and genMap_min and genMap_size are
   4533       adjusted accordingly. */
   4534 
   4535    /* genMap :: generation-number -> count-of-nodes-with-that-number */
   4536 
   4537    VG_(initIterSWA)( oldrefTree );
   4538    while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
   4539 
   4540        UWord ea, key;
   4541        oldref = (OldRef*)valW;
   4542        key = oldref->gen;
   4543 
   4544       /* BEGIN find 'ea', which is the index in genMap holding the
   4545          count for generation number 'key'. */
   4546       if (UNLIKELY(genMap == NULL)) {
   4547          /* deal with the first key to be seen, so that the following
   4548             cases don't need to handle the complexity of a NULL count
   4549             array. */
   4550          genMap_min  = key;
   4551          genMap_size = 1;
   4552          genMap = HG_(zalloc)( "libhb.emmG.1a",
   4553                                 genMap_size * sizeof(UWord) );
   4554          ea = 0;
   4555          if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
   4556                             key, genMap_min, genMap_min+genMap_size- 1 );
   4557       }
   4558       else
   4559       if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
   4560          /* this is the expected (almost-always-happens) case: 'key'
   4561             is already mapped in the array. */
   4562          ea = key - genMap_min;
   4563       }
   4564       else
   4565       if (key < genMap_min) {
   4566          /* 'key' appears before the start of the current array.
   4567             Extend the current array by allocating a larger one and
   4568             copying the current one to the upper end of it. */
   4569          Word   more;
   4570          UWord* map2;
   4571          more = genMap_min - key;
   4572          tl_assert(more > 0);
   4573          map2 = HG_(zalloc)( "libhb.emmG.1b",
   4574                              (genMap_size + more) * sizeof(UWord) );
   4575          VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
   4576          HG_(free)( genMap );
   4577          genMap = map2;
   4578          genMap_size += more;
   4579          genMap_min -= more;
   4580          ea = 0;
   4581          tl_assert(genMap_min == key);
   4582          if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
   4583                             key, genMap_min,  genMap_min+genMap_size- 1 );
   4584       }
   4585       else {
   4586          /* 'key' appears after the end of the current array.  Extend
   4587             the current array by allocating a larger one and copying
   4588             the current one to the lower end of it. */
   4589          Word   more;
   4590          UWord* map2;
   4591          tl_assert(key >= genMap_min + genMap_size);
   4592          more = key - (genMap_min + genMap_size) + 1;
   4593          tl_assert(more > 0);
   4594          map2 = HG_(zalloc)( "libhb.emmG.1c",
   4595                              (genMap_size + more) * sizeof(UWord) );
   4596          VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
   4597          HG_(free)( genMap );
   4598          genMap = map2;
   4599          genMap_size += more;
   4600          ea = genMap_size - 1;;
   4601          tl_assert(genMap_min + genMap_size - 1 == key);
   4602          if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
   4603                             key, genMap_min, genMap_min+genMap_size- 1 );
   4604       }
   4605       /* END find 'ea' from 'key' */
   4606 
   4607       tl_assert(ea >= 0 && ea < genMap_size);
   4608       /* and the whole point of this elaborate computation of 'ea' is .. */
   4609       genMap[ea]++;
   4610    }
   4611 
   4612    tl_assert(genMap);
   4613    tl_assert(genMap_size > 0);
   4614 
   4615    /* Sanity check what we just computed */
   4616    { UWord sum = 0;
   4617      for (i = 0; i < genMap_size; i++) {
   4618         if (0) VG_(printf)("  xxx: gen %ld has %lu\n",
   4619                            i + genMap_min, genMap[i] );
   4620         sum += genMap[i];
   4621      }
   4622      tl_assert(sum == oldrefTreeN);
   4623    }
   4624 
   4625    /* Figure out how many generations to throw away */
   4626    retained = oldrefTreeN;
   4627    maxGen = 0;
   4628 
   4629    for (i = 0; i < genMap_size; i++) {
   4630       keyW = i + genMap_min;
   4631       valW = genMap[i];
   4632       tl_assert(keyW > 0); /* can't allow a generation # 0 */
   4633       if (0) VG_(printf)("  XXX: gen %lu has %lu\n", keyW, valW );
   4634       tl_assert(keyW >= maxGen);
   4635       tl_assert(retained >= valW);
   4636       if (retained - valW
   4637           > (UWord)(HG_(clo_conflict_cache_size)
   4638                     * EVENT_MAP_GC_DISCARD_FRACTION)) {
   4639          retained -= valW;
   4640          maxGen = keyW;
   4641       } else {
   4642          break;
   4643       }
   4644    }
   4645 
   4646    HG_(free)(genMap);
   4647 
   4648    tl_assert(retained >= 0 && retained <= oldrefTreeN);
   4649 
   4650    /* Now make up a big list of the oldrefTree entries we want to
   4651       delete.  We can't simultaneously traverse the tree and delete
   4652       stuff from it, so first we need to copy them off somewhere
   4653       else. (sigh) */
   4654    refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
   4655                           HG_(free), sizeof(Addr) );
   4656 
   4657    if (retained < oldrefTreeN) {
   4658 
   4659       /* This is the normal (expected) case.  We discard any ref whose
   4660          generation number <= maxGen. */
   4661       VG_(initIterSWA)( oldrefTree );
   4662       while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
   4663          oldref = (OldRef*)valW;
   4664          tl_assert(oldref->magic == OldRef_MAGIC);
   4665          if (oldref->gen <= maxGen) {
   4666             VG_(addToXA)( refs2del, &keyW );
   4667          }
   4668       }
   4669       if (VG_(clo_stats)) {
   4670          VG_(message)(Vg_DebugMsg,
   4671             "libhb: EvM GC: delete generations %lu and below, "
   4672             "retaining %lu entries\n",
   4673             maxGen, retained );
   4674       }
   4675 
   4676    } else {
   4677 
   4678       static UInt rand_seed = 0; /* leave as static */
   4679 
   4680       /* Degenerate case: there's only one generation in the entire
   4681          tree, so we need to have some other way of deciding which
   4682          refs to throw away.  Just throw out half of them randomly. */
   4683       tl_assert(retained == oldrefTreeN);
   4684       VG_(initIterSWA)( oldrefTree );
   4685       while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
   4686          UInt n;
   4687          oldref = (OldRef*)valW;
   4688          tl_assert(oldref->magic == OldRef_MAGIC);
   4689          n = VG_(random)( &rand_seed );
   4690          if ((n & 0xFFF) < 0x800) {
   4691             VG_(addToXA)( refs2del, &keyW );
   4692             retained--;
   4693          }
   4694       }
   4695       if (VG_(clo_stats)) {
   4696          VG_(message)(Vg_DebugMsg,
   4697             "libhb: EvM GC: randomly delete half the entries, "
   4698             "retaining %lu entries\n",
   4699             retained );
   4700       }
   4701 
   4702    }
   4703 
   4704    n2del = VG_(sizeXA)( refs2del );
   4705    tl_assert(n2del == (Word)(oldrefTreeN - retained));
   4706 
   4707    if (0) VG_(printf)("%s","deleting entries\n");
   4708    for (i = 0; i < n2del; i++) {
   4709       Bool  b;
   4710       Addr  ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
   4711       b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del );
   4712       tl_assert(b);
   4713       tl_assert(keyW == ga2del);
   4714       oldref = (OldRef*)valW;
   4715       for (j = 0; j < N_OLDREF_ACCS; j++) {
   4716          ThrID aThrID = oldref->accs[j].thrid;
   4717          RCEC* aRef   = oldref->accs[j].rcec;
   4718          if (aRef) {
   4719             tl_assert(aThrID != 0);
   4720             stats__ctxt_rcdec3++;
   4721             ctxt__rcdec( aRef );
   4722          } else {
   4723             tl_assert(aThrID == 0);
   4724          }
   4725       }
   4726 
   4727       free_OldRef( oldref );
   4728    }
   4729 
   4730    VG_(deleteXA)( refs2del );
   4731 
   4732    tl_assert( VG_(sizeSWA)( oldrefTree ) == retained );
   4733 
   4734    oldrefTreeN = retained;
   4735    oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
   4736 
   4737    /* Throw away all RCECs with zero reference counts */
   4738    for (i = 0; i < N_RCEC_TAB; i++) {
   4739       RCEC** pp = &contextTab[i];
   4740       RCEC*  p  = *pp;
   4741       while (p) {
   4742          if (p->rc == 0) {
   4743             *pp = p->next;
   4744             free_RCEC(p);
   4745             p = *pp;
   4746             tl_assert(stats__ctxt_tab_curr > 0);
   4747             stats__ctxt_tab_curr--;
   4748          } else {
   4749             pp = &p->next;
   4750             p = p->next;
   4751          }
   4752       }
   4753    }
   4754 
   4755    /* Check the reference counts (expensive) */
   4756    if (CHECK_CEM)
   4757       event_map__check_reference_counts( False/*after*/ );
   4758 
   4759    //if (0)
   4760    //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
   4761    //            VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
   4762 
   4763 }
   4764 
   4765 
   4766 /////////////////////////////////////////////////////////
   4767 //                                                     //
   4768 // Core MSM                                            //
   4769 //                                                     //
   4770 /////////////////////////////////////////////////////////
   4771 
   4772 /* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
   4773    Nov 08, and again after [...],
   4774    June 09. */
   4775 
   4776 static ULong stats__msmcread         = 0;
   4777 static ULong stats__msmcread_change  = 0;
   4778 static ULong stats__msmcwrite        = 0;
   4779 static ULong stats__msmcwrite_change = 0;
   4780 
   4781 /* Some notes on the H1 history mechanism:
   4782 
   4783    Transition rules are:
   4784 
   4785    read_{Kr,Kw}(Cr,Cw)  = (Cr,           Cr `join` Kw)
   4786    write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
   4787 
   4788    After any access by a thread T to a location L, L's constraint pair
   4789    (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
   4790 
   4791    After a race by thread T conflicting with some previous access by
   4792    some other thread U, for a location with constraint (before
   4793    processing the later access) (Cr,Cw), then Cw[U] is the segment in
   4794    which the previously access lies.
   4795 
   4796    Hence in record_race_info, we pass in Cfailed and Kfailed, which
   4797    are compared so as to find out which thread(s) this access
   4798    conflicts with.  Once that is established, we also require the
   4799    pre-update Cw for the location, so we can index into it for those
   4800    threads, to get the scalar clock values for the point at which the
   4801    former accesses were made.  (In fact we only bother to do any of
   4802    this for an arbitrarily chosen one of the conflicting threads, as
   4803    that's simpler, it avoids flooding the user with vast amounts of
   4804    mostly useless information, and because the program is wrong if it
   4805    contains any races at all -- so we don't really need to show all
   4806    conflicting access pairs initially, so long as we only show none if
   4807    none exist).
   4808 
   4809    ---
   4810 
   4811    That requires the auxiliary proof that
   4812 
   4813       (Cr `join` Kw)[T] == Kw[T]
   4814 
   4815    Why should that be true?  Because for any thread T, Kw[T] >= the
   4816    scalar clock value for T known by any other thread.  In other
   4817    words, because T's value for its own scalar clock is at least as up
   4818    to date as the value for it known by any other thread (that is true
   4819    for both the R- and W- scalar clocks).  Hence no other thread will
   4820    be able to feed in a value for that element (indirectly via a
   4821    constraint) which will exceed Kw[T], and hence the join cannot
   4822    cause that particular element to advance.
   4823 */
   4824 
   4825 __attribute__((noinline))
   4826 static void record_race_info ( Thr* acc_thr,
   4827                                Addr acc_addr, SizeT szB, Bool isWrite,
   4828                                VtsID Cfailed,
   4829                                VtsID Kfailed,
   4830                                VtsID Cw )
   4831 {
   4832    /* Call here to report a race.  We just hand it onwards to
   4833       HG_(record_error_Race).  If that in turn discovers that the
   4834       error is going to be collected, then, at history_level 2, that
   4835       queries the conflicting-event map.  The alternative would be to
   4836       query it right here.  But that causes a lot of pointless queries
   4837       for errors which will shortly be discarded as duplicates, and
   4838       can become a performance overhead; so we defer the query until
   4839       we know the error is not a duplicate. */
   4840 
   4841    /* Stacks for the bounds of the (or one of the) conflicting
   4842       segment(s).  These are only set at history_level 1. */
   4843    ExeContext* hist1_seg_start = NULL;
   4844    ExeContext* hist1_seg_end   = NULL;
   4845    Thread*     hist1_conf_thr  = NULL;
   4846 
   4847    tl_assert(acc_thr);
   4848    tl_assert(acc_thr->hgthread);
   4849    tl_assert(acc_thr->hgthread->hbthr == acc_thr);
   4850    tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
   4851 
   4852    if (HG_(clo_history_level) == 1) {
   4853       Bool found;
   4854       Word firstIx, lastIx;
   4855       ULong_n_EC key;
   4856 
   4857       /* At history_level 1, we must round up the relevant stack-pair
   4858          for the conflicting segment right now.  This is because
   4859          deferring it is complex; we can't (easily) put Kfailed and
   4860          Cfailed into the XError and wait for later without
   4861          getting tied up in difficulties with VtsID reference
   4862          counting.  So just do it now. */
   4863       Thr*  confThr;
   4864       ULong confTym = 0;
   4865       /* Which thread are we in conflict with?  There may be more than
   4866          one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
   4867          (in fact it's the one with the lowest Thr* value). */
   4868       confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
   4869       /* This must exist!  since if it was NULL then there's no
   4870          conflict (semantics of return value of
   4871          VtsID__findFirst_notLEQ), and msmc{read,write}, which has
   4872          called us, just checked exactly this -- that there was in
   4873          fact a race. */
   4874       tl_assert(confThr);
   4875 
   4876       /* Get the scalar clock value that the conflicting thread
   4877          introduced into the constraint.  A careful examination of the
   4878          base machine rules shows that this must be the same as the
   4879          conflicting thread's scalar clock when it created this
   4880          constraint.  Hence we know the scalar clock of the
   4881          conflicting thread when the conflicting access was made. */
   4882       confTym = VtsID__indexAt( Cfailed, confThr );
   4883 
   4884       /* Using this scalar clock, index into the conflicting thread's
   4885          collection of stack traces made each time its vector clock
   4886          (hence its scalar clock) changed.  This gives the stack
   4887          traces at the start and end of the conflicting segment (well,
   4888          as per comment just above, of one of the conflicting
   4889          segments, if there are more than one). */
   4890       key.ull = confTym;
   4891       key.ec  = NULL;
   4892       /* tl_assert(confThr); -- asserted just above */
   4893       tl_assert(confThr->local_Kws_n_stacks);
   4894       firstIx = lastIx = 0;
   4895       found = VG_(lookupXA_UNSAFE)(
   4896                  confThr->local_Kws_n_stacks,
   4897                  &key, &firstIx, &lastIx,
   4898                  (XACmpFn_t)cmp__ULong_n_EC__by_ULong
   4899               );
   4900       if (0) VG_(printf)("record_race_info %u %u %u  confThr %p "
   4901                          "confTym %llu found %d (%lu,%lu)\n",
   4902                          Cfailed, Kfailed, Cw,
   4903                          confThr, confTym, found, firstIx, lastIx);
   4904       /* We can't indefinitely collect stack traces at VTS
   4905          transitions, since we'd eventually run out of memory.  Hence
   4906          note_local_Kw_n_stack_for will eventually throw away old
   4907          ones, which in turn means we might fail to find index value
   4908          confTym in the array. */
   4909       if (found) {
   4910          ULong_n_EC *pair_start, *pair_end;
   4911          pair_start
   4912             = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
   4913          hist1_seg_start = pair_start->ec;
   4914          if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
   4915             pair_end
   4916                = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
   4917                                             lastIx+1 );
   4918             /* from properties of VG_(lookupXA) and the comparison fn used: */
   4919             tl_assert(pair_start->ull < pair_end->ull);
   4920             hist1_seg_end = pair_end->ec;
   4921             /* Could do a bit better here.  It may be that pair_end
   4922                doesn't have a stack, but the following entries in the
   4923                array have the same scalar Kw and to have a stack.  So
   4924                we should search a bit further along the array than
   4925                lastIx+1 if hist1_seg_end is NULL. */
   4926          } else {
   4927             if (!confThr->llexit_done)
   4928                hist1_seg_end = main_get_EC( confThr );
   4929          }
   4930          // seg_start could be NULL iff this is the first stack in the thread
   4931          //if (seg_start) VG_(pp_ExeContext)(seg_start);
   4932          //if (seg_end)   VG_(pp_ExeContext)(seg_end);
   4933          hist1_conf_thr = confThr->hgthread;
   4934       }
   4935    }
   4936 
   4937    HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
   4938                            szB, isWrite,
   4939                            hist1_conf_thr, hist1_seg_start, hist1_seg_end );
   4940 }
   4941 
   4942 static Bool is_sane_SVal_C ( SVal sv ) {
   4943    Bool leq;
   4944    if (!SVal__isC(sv)) return True;
   4945    leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
   4946    return leq;
   4947 }
   4948 
   4949 
   4950 /* Compute new state following a read */
   4951 static inline SVal msmcread ( SVal svOld,
   4952                               /* The following are only needed for
   4953                                  creating error reports. */
   4954                               Thr* acc_thr,
   4955                               Addr acc_addr, SizeT szB )
   4956 {
   4957    SVal svNew = SVal_INVALID;
   4958    stats__msmcread++;
   4959 
   4960    /* Redundant sanity check on the constraints */
   4961    if (CHECK_MSM) {
   4962       tl_assert(is_sane_SVal_C(svOld));
   4963    }
   4964 
   4965    if (LIKELY(SVal__isC(svOld))) {
   4966       VtsID tviR  = acc_thr->viR;
   4967       VtsID tviW  = acc_thr->viW;
   4968       VtsID rmini = SVal__unC_Rmin(svOld);
   4969       VtsID wmini = SVal__unC_Wmin(svOld);
   4970       Bool  leq   = VtsID__cmpLEQ(rmini,tviR);
   4971       if (LIKELY(leq)) {
   4972          /* no race */
   4973          /* Note: RWLOCK subtlety: use tviW, not tviR */
   4974          svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
   4975          goto out;
   4976       } else {
   4977          /* assert on sanity of constraints. */
   4978          Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
   4979          tl_assert(leqxx);
   4980          // same as in non-race case
   4981          svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
   4982          record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
   4983                            rmini, /* Cfailed */
   4984                            tviR,  /* Kfailed */
   4985                            wmini  /* Cw */ );
   4986          goto out;
   4987       }
   4988    }
   4989    if (SVal__isA(svOld)) {
   4990       /* reading no-access memory (sigh); leave unchanged */
   4991       /* check for no pollution */
   4992       tl_assert(svOld == SVal_NOACCESS);
   4993       svNew = SVal_NOACCESS;
   4994       goto out;
   4995    }
   4996    if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
   4997    tl_assert(0);
   4998 
   4999   out:
   5000    if (CHECK_MSM) {
   5001       tl_assert(is_sane_SVal_C(svNew));
   5002    }
   5003    if (UNLIKELY(svNew != svOld)) {
   5004       tl_assert(svNew != SVal_INVALID);
   5005       if (HG_(clo_history_level) >= 2
   5006           && SVal__isC(svOld) && SVal__isC(svNew)) {
   5007          event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
   5008          stats__msmcread_change++;
   5009       }
   5010    }
   5011    return svNew;
   5012 }
   5013 
   5014 
   5015 /* Compute new state following a write */
   5016 static inline SVal msmcwrite ( SVal svOld,
   5017                               /* The following are only needed for
   5018                                  creating error reports. */
   5019                               Thr* acc_thr,
   5020                               Addr acc_addr, SizeT szB )
   5021 {
   5022    SVal svNew = SVal_INVALID;
   5023    stats__msmcwrite++;
   5024 
   5025    /* Redundant sanity check on the constraints */
   5026    if (CHECK_MSM) {
   5027       tl_assert(is_sane_SVal_C(svOld));
   5028    }
   5029 
   5030    if (LIKELY(SVal__isC(svOld))) {
   5031       VtsID tviW  = acc_thr->viW;
   5032       VtsID wmini = SVal__unC_Wmin(svOld);
   5033       Bool  leq   = VtsID__cmpLEQ(wmini,tviW);
   5034       if (LIKELY(leq)) {
   5035          /* no race */
   5036          svNew = SVal__mkC( tviW, tviW );
   5037          goto out;
   5038       } else {
   5039          VtsID rmini = SVal__unC_Rmin(svOld);
   5040          /* assert on sanity of constraints. */
   5041          Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
   5042          tl_assert(leqxx);
   5043          // same as in non-race case
   5044          // proof: in the non-race case, we have
   5045          //    rmini <= wmini (invar on constraints)
   5046          //    tviW <= tviR (invar on thread clocks)
   5047          //    wmini <= tviW (from run-time check)
   5048          // hence from transitivity of <= we have
   5049          //    rmini <= wmini <= tviW
   5050          // and so join(rmini,tviW) == tviW
   5051          // and    join(wmini,tviW) == tviW
   5052          // qed.
   5053          svNew = SVal__mkC( VtsID__join2(rmini, tviW),
   5054                             VtsID__join2(wmini, tviW) );
   5055          record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
   5056                            wmini, /* Cfailed */
   5057                            tviW,  /* Kfailed */
   5058                            wmini  /* Cw */ );
   5059          goto out;
   5060       }
   5061    }
   5062    if (SVal__isA(svOld)) {
   5063       /* writing no-access memory (sigh); leave unchanged */
   5064       /* check for no pollution */
   5065       tl_assert(svOld == SVal_NOACCESS);
   5066       svNew = SVal_NOACCESS;
   5067       goto out;
   5068    }
   5069    if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
   5070    tl_assert(0);
   5071 
   5072   out:
   5073    if (CHECK_MSM) {
   5074       tl_assert(is_sane_SVal_C(svNew));
   5075    }
   5076    if (UNLIKELY(svNew != svOld)) {
   5077       tl_assert(svNew != SVal_INVALID);
   5078       if (HG_(clo_history_level) >= 2
   5079           && SVal__isC(svOld) && SVal__isC(svNew)) {
   5080          event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
   5081          stats__msmcwrite_change++;
   5082       }
   5083    }
   5084    return svNew;
   5085 }
   5086 
   5087 
   5088 /////////////////////////////////////////////////////////
   5089 //                                                     //
   5090 // Apply core MSM to specific memory locations         //
   5091 //                                                     //
   5092 /////////////////////////////////////////////////////////
   5093 
   5094 /*------------- ZSM accesses: 8 bit sapply ------------- */
   5095 
   5096 static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
   5097    CacheLine* cl;
   5098    UWord      cloff, tno, toff;
   5099    SVal       svOld, svNew;
   5100    UShort     descr;
   5101    stats__cline_cread08s++;
   5102    cl    = get_cacheline(a);
   5103    cloff = get_cacheline_offset(a);
   5104    tno   = get_treeno(a);
   5105    toff  = get_tree_offset(a); /* == 0 .. 7 */
   5106    descr = cl->descrs[tno];
   5107    if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
   5108       SVal* tree = &cl->svals[tno << 3];
   5109       cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
   5110       if (CHECK_ZSM)
   5111          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
   5112    }
   5113    svOld = cl->svals[cloff];
   5114    svNew = msmcread( svOld, thr,a,1 );
   5115    if (CHECK_ZSM)
   5116       tl_assert(svNew != SVal_INVALID);
   5117    cl->svals[cloff] = svNew;
   5118 }
   5119 
   5120 static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
   5121    CacheLine* cl;
   5122    UWord      cloff, tno, toff;
   5123    SVal       svOld, svNew;
   5124    UShort     descr;
   5125    stats__cline_cwrite08s++;
   5126    cl    = get_cacheline(a);
   5127    cloff = get_cacheline_offset(a);
   5128    tno   = get_treeno(a);
   5129    toff  = get_tree_offset(a); /* == 0 .. 7 */
   5130    descr = cl->descrs[tno];
   5131    if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
   5132       SVal* tree = &cl->svals[tno << 3];
   5133       cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
   5134       if (CHECK_ZSM)
   5135          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
   5136    }
   5137    svOld = cl->svals[cloff];
   5138    svNew = msmcwrite( svOld, thr,a,1 );
   5139    if (CHECK_ZSM)
   5140       tl_assert(svNew != SVal_INVALID);
   5141    cl->svals[cloff] = svNew;
   5142 }
   5143 
   5144 /*------------- ZSM accesses: 16 bit sapply ------------- */
   5145 
   5146 static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
   5147    CacheLine* cl;
   5148    UWord      cloff, tno, toff;
   5149    SVal       svOld, svNew;
   5150    UShort     descr;
   5151    stats__cline_cread16s++;
   5152    if (UNLIKELY(!aligned16(a))) goto slowcase;
   5153    cl    = get_cacheline(a);
   5154    cloff = get_cacheline_offset(a);
   5155    tno   = get_treeno(a);
   5156    toff  = get_tree_offset(a); /* == 0, 2, 4 or 6 */
   5157    descr = cl->descrs[tno];
   5158    if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
   5159       if (valid_value_is_below_me_16(descr, toff)) {
   5160          goto slowcase;
   5161       } else {
   5162          SVal* tree = &cl->svals[tno << 3];
   5163          cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
   5164       }
   5165       if (CHECK_ZSM)
   5166          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
   5167    }
   5168    svOld = cl->svals[cloff];
   5169    svNew = msmcread( svOld, thr,a,2 );
   5170    if (CHECK_ZSM)
   5171       tl_assert(svNew != SVal_INVALID);
   5172    cl->svals[cloff] = svNew;
   5173    return;
   5174   slowcase: /* misaligned, or must go further down the tree */
   5175    stats__cline_16to8splits++;
   5176    zsm_sapply08__msmcread( thr, a + 0 );
   5177    zsm_sapply08__msmcread( thr, a + 1 );
   5178 }
   5179 
   5180 static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
   5181    CacheLine* cl;
   5182    UWord      cloff, tno, toff;
   5183    SVal       svOld, svNew;
   5184    UShort     descr;
   5185    stats__cline_cwrite16s++;
   5186    if (UNLIKELY(!aligned16(a))) goto slowcase;
   5187    cl    = get_cacheline(a);
   5188    cloff = get_cacheline_offset(a);
   5189    tno   = get_treeno(a);
   5190    toff  = get_tree_offset(a); /* == 0, 2, 4 or 6 */
   5191    descr = cl->descrs[tno];
   5192    if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
   5193       if (valid_value_is_below_me_16(descr, toff)) {
   5194          goto slowcase;
   5195       } else {
   5196          SVal* tree = &cl->svals[tno << 3];
   5197          cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
   5198       }
   5199       if (CHECK_ZSM)
   5200          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
   5201    }
   5202    svOld = cl->svals[cloff];
   5203    svNew = msmcwrite( svOld, thr,a,2 );
   5204    if (CHECK_ZSM)
   5205       tl_assert(svNew != SVal_INVALID);
   5206    cl->svals[cloff] = svNew;
   5207    return;
   5208   slowcase: /* misaligned, or must go further down the tree */
   5209    stats__cline_16to8splits++;
   5210    zsm_sapply08__msmcwrite( thr, a + 0 );
   5211    zsm_sapply08__msmcwrite( thr, a + 1 );
   5212 }
   5213 
   5214 /*------------- ZSM accesses: 32 bit sapply ------------- */
   5215 
   5216 static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
   5217    CacheLine* cl;
   5218    UWord      cloff, tno, toff;
   5219    SVal       svOld, svNew;
   5220    UShort     descr;
   5221    stats__cline_cread32s++;
   5222    if (UNLIKELY(!aligned32(a))) goto slowcase;
   5223    cl    = get_cacheline(a);
   5224    cloff = get_cacheline_offset(a);
   5225    tno   = get_treeno(a);
   5226    toff  = get_tree_offset(a); /* == 0 or 4 */
   5227    descr = cl->descrs[tno];
   5228    if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
   5229       if (valid_value_is_above_me_32(descr, toff)) {
   5230          SVal* tree = &cl->svals[tno << 3];
   5231          cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
   5232       } else {
   5233          goto slowcase;
   5234       }
   5235       if (CHECK_ZSM)
   5236          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
   5237    }
   5238    svOld = cl->svals[cloff];
   5239    svNew = msmcread( svOld, thr,a,4 );
   5240    if (CHECK_ZSM)
   5241       tl_assert(svNew != SVal_INVALID);
   5242    cl->svals[cloff] = svNew;
   5243    return;
   5244   slowcase: /* misaligned, or must go further down the tree */
   5245    stats__cline_32to16splits++;
   5246    zsm_sapply16__msmcread( thr, a + 0 );
   5247    zsm_sapply16__msmcread( thr, a + 2 );
   5248 }
   5249 
   5250 static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
   5251    CacheLine* cl;
   5252    UWord      cloff, tno, toff;
   5253    SVal       svOld, svNew;
   5254    UShort     descr;
   5255    stats__cline_cwrite32s++;
   5256    if (UNLIKELY(!aligned32(a))) goto slowcase;
   5257    cl    = get_cacheline(a);
   5258    cloff = get_cacheline_offset(a);
   5259    tno   = get_treeno(a);
   5260    toff  = get_tree_offset(a); /* == 0 or 4 */
   5261    descr = cl->descrs[tno];
   5262    if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
   5263       if (valid_value_is_above_me_32(descr, toff)) {
   5264          SVal* tree = &cl->svals[tno << 3];
   5265          cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
   5266       } else {
   5267          goto slowcase;
   5268       }
   5269       if (CHECK_ZSM)
   5270          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
   5271    }
   5272    svOld = cl->svals[cloff];
   5273    svNew = msmcwrite( svOld, thr,a,4 );
   5274    if (CHECK_ZSM)
   5275       tl_assert(svNew != SVal_INVALID);
   5276    cl->svals[cloff] = svNew;
   5277    return;
   5278   slowcase: /* misaligned, or must go further down the tree */
   5279    stats__cline_32to16splits++;
   5280    zsm_sapply16__msmcwrite( thr, a + 0 );
   5281    zsm_sapply16__msmcwrite( thr, a + 2 );
   5282 }
   5283 
   5284 /*------------- ZSM accesses: 64 bit sapply ------------- */
   5285 
   5286 static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
   5287    CacheLine* cl;
   5288    UWord      cloff, tno;
   5289    //UWord      toff;
   5290    SVal       svOld, svNew;
   5291    UShort     descr;
   5292    stats__cline_cread64s++;
   5293    if (UNLIKELY(!aligned64(a))) goto slowcase;
   5294    cl    = get_cacheline(a);
   5295    cloff = get_cacheline_offset(a);
   5296    tno   = get_treeno(a);
   5297    //toff  = get_tree_offset(a); /* == 0, unused */
   5298    descr = cl->descrs[tno];
   5299    if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
   5300       goto slowcase;
   5301    }
   5302    svOld = cl->svals[cloff];
   5303    svNew = msmcread( svOld, thr,a,8 );
   5304    if (CHECK_ZSM)
   5305       tl_assert(svNew != SVal_INVALID);
   5306    cl->svals[cloff] = svNew;
   5307    return;
   5308   slowcase: /* misaligned, or must go further down the tree */
   5309    stats__cline_64to32splits++;
   5310    zsm_sapply32__msmcread( thr, a + 0 );
   5311    zsm_sapply32__msmcread( thr, a + 4 );
   5312 }
   5313 
   5314 static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
   5315    CacheLine* cl;
   5316    UWord      cloff, tno;
   5317    //UWord      toff;
   5318    SVal       svOld, svNew;
   5319    UShort     descr;
   5320    stats__cline_cwrite64s++;
   5321    if (UNLIKELY(!aligned64(a))) goto slowcase;
   5322    cl    = get_cacheline(a);
   5323    cloff = get_cacheline_offset(a);
   5324    tno   = get_treeno(a);
   5325    //toff  = get_tree_offset(a); /* == 0, unused */
   5326    descr = cl->descrs[tno];
   5327    if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
   5328       goto slowcase;
   5329    }
   5330    svOld = cl->svals[cloff];
   5331    svNew = msmcwrite( svOld, thr,a,8 );
   5332    if (CHECK_ZSM)
   5333       tl_assert(svNew != SVal_INVALID);
   5334    cl->svals[cloff] = svNew;
   5335    return;
   5336   slowcase: /* misaligned, or must go further down the tree */
   5337    stats__cline_64to32splits++;
   5338    zsm_sapply32__msmcwrite( thr, a + 0 );
   5339    zsm_sapply32__msmcwrite( thr, a + 4 );
   5340 }
   5341 
   5342 /*--------------- ZSM accesses: 8 bit swrite --------------- */
   5343 
   5344 static
   5345 void zsm_swrite08 ( Addr a, SVal svNew ) {
   5346    CacheLine* cl;
   5347    UWord      cloff, tno, toff;
   5348    UShort     descr;
   5349    stats__cline_swrite08s++;
   5350    cl    = get_cacheline(a);
   5351    cloff = get_cacheline_offset(a);
   5352    tno   = get_treeno(a);
   5353    toff  = get_tree_offset(a); /* == 0 .. 7 */
   5354    descr = cl->descrs[tno];
   5355    if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
   5356       SVal* tree = &cl->svals[tno << 3];
   5357       cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
   5358       if (CHECK_ZSM)
   5359          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
   5360    }
   5361    tl_assert(svNew != SVal_INVALID);
   5362    cl->svals[cloff] = svNew;
   5363 }
   5364 
   5365 /*--------------- ZSM accesses: 16 bit swrite --------------- */
   5366 
   5367 static
   5368 void zsm_swrite16 ( Addr a, SVal svNew ) {
   5369    CacheLine* cl;
   5370    UWord      cloff, tno, toff;
   5371    UShort     descr;
   5372    stats__cline_swrite16s++;
   5373    if (UNLIKELY(!aligned16(a))) goto slowcase;
   5374    cl    = get_cacheline(a);
   5375    cloff = get_cacheline_offset(a);
   5376    tno   = get_treeno(a);
   5377    toff  = get_tree_offset(a); /* == 0, 2, 4 or 6 */
   5378    descr = cl->descrs[tno];
   5379    if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
   5380       if (valid_value_is_below_me_16(descr, toff)) {
   5381          /* Writing at this level.  Need to fix up 'descr'. */
   5382          cl->descrs[tno] = pullup_descr_to_16(descr, toff);
   5383          /* At this point, the tree does not match cl->descr[tno] any
   5384             more.  The assignments below will fix it up. */
   5385       } else {
   5386          /* We can't indiscriminately write on the w16 node as in the
   5387             w64 case, as that might make the node inconsistent with
   5388             its parent.  So first, pull down to this level. */
   5389          SVal* tree = &cl->svals[tno << 3];
   5390          cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
   5391       if (CHECK_ZSM)
   5392          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
   5393       }
   5394    }
   5395    tl_assert(svNew != SVal_INVALID);
   5396    cl->svals[cloff + 0] = svNew;
   5397    cl->svals[cloff + 1] = SVal_INVALID;
   5398    return;
   5399   slowcase: /* misaligned */
   5400    stats__cline_16to8splits++;
   5401    zsm_swrite08( a + 0, svNew );
   5402    zsm_swrite08( a + 1, svNew );
   5403 }
   5404 
   5405 /*--------------- ZSM accesses: 32 bit swrite --------------- */
   5406 
   5407 static
   5408 void zsm_swrite32 ( Addr a, SVal svNew ) {
   5409    CacheLine* cl;
   5410    UWord      cloff, tno, toff;
   5411    UShort     descr;
   5412    stats__cline_swrite32s++;
   5413    if (UNLIKELY(!aligned32(a))) goto slowcase;
   5414    cl    = get_cacheline(a);
   5415    cloff = get_cacheline_offset(a);
   5416    tno   = get_treeno(a);
   5417    toff  = get_tree_offset(a); /* == 0 or 4 */
   5418    descr = cl->descrs[tno];
   5419    if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
   5420       if (valid_value_is_above_me_32(descr, toff)) {
   5421          /* We can't indiscriminately write on the w32 node as in the
   5422             w64 case, as that might make the node inconsistent with
   5423             its parent.  So first, pull down to this level. */
   5424          SVal* tree = &cl->svals[tno << 3];
   5425          cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
   5426          if (CHECK_ZSM)
   5427             tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
   5428       } else {
   5429          /* Writing at this level.  Need to fix up 'descr'. */
   5430          cl->descrs[tno] = pullup_descr_to_32(descr, toff);
   5431          /* At this point, the tree does not match cl->descr[tno] any
   5432             more.  The assignments below will fix it up. */
   5433       }
   5434    }
   5435    tl_assert(svNew != SVal_INVALID);
   5436    cl->svals[cloff + 0] = svNew;
   5437    cl->svals[cloff + 1] = SVal_INVALID;
   5438    cl->svals[cloff + 2] = SVal_INVALID;
   5439    cl->svals[cloff + 3] = SVal_INVALID;
   5440    return;
   5441   slowcase: /* misaligned */
   5442    stats__cline_32to16splits++;
   5443    zsm_swrite16( a + 0, svNew );
   5444    zsm_swrite16( a + 2, svNew );
   5445 }
   5446 
   5447 /*--------------- ZSM accesses: 64 bit swrite --------------- */
   5448 
   5449 static
   5450 void zsm_swrite64 ( Addr a, SVal svNew ) {
   5451    CacheLine* cl;
   5452    UWord      cloff, tno;
   5453    //UWord    toff;
   5454    stats__cline_swrite64s++;
   5455    if (UNLIKELY(!aligned64(a))) goto slowcase;
   5456    cl    = get_cacheline(a);
   5457    cloff = get_cacheline_offset(a);
   5458    tno   = get_treeno(a);
   5459    //toff  = get_tree_offset(a); /* == 0, unused */
   5460    cl->descrs[tno] = TREE_DESCR_64;
   5461    tl_assert(svNew != SVal_INVALID);
   5462    cl->svals[cloff + 0] = svNew;
   5463    cl->svals[cloff + 1] = SVal_INVALID;
   5464    cl->svals[cloff + 2] = SVal_INVALID;
   5465    cl->svals[cloff + 3] = SVal_INVALID;
   5466    cl->svals[cloff + 4] = SVal_INVALID;
   5467    cl->svals[cloff + 5] = SVal_INVALID;
   5468    cl->svals[cloff + 6] = SVal_INVALID;
   5469    cl->svals[cloff + 7] = SVal_INVALID;
   5470    return;
   5471   slowcase: /* misaligned */
   5472    stats__cline_64to32splits++;
   5473    zsm_swrite32( a + 0, svNew );
   5474    zsm_swrite32( a + 4, svNew );
   5475 }
   5476 
   5477 /*------------- ZSM accesses: 8 bit sread/scopy ------------- */
   5478 
   5479 static
   5480 SVal zsm_sread08 ( Addr a ) {
   5481    CacheLine* cl;
   5482    UWord      cloff, tno, toff;
   5483    UShort     descr;
   5484    stats__cline_sread08s++;
   5485    cl    = get_cacheline(a);
   5486    cloff = get_cacheline_offset(a);
   5487    tno   = get_treeno(a);
   5488    toff  = get_tree_offset(a); /* == 0 .. 7 */
   5489    descr = cl->descrs[tno];
   5490    if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
   5491       SVal* tree = &cl->svals[tno << 3];
   5492       cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
   5493    }
   5494    return cl->svals[cloff];
   5495 }
   5496 
   5497 static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
   5498    SVal       sv;
   5499    stats__cline_scopy08s++;
   5500    sv = zsm_sread08( src );
   5501    zsm_swrite08( dst, sv );
   5502 }
   5503 
   5504 
   5505 /* Block-copy states (needed for implementing realloc()).  Note this
   5506    doesn't change the filtering arrangements.  The caller of
   5507    zsm_scopy_range needs to attend to that. */
   5508 
   5509 static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
   5510 {
   5511    SizeT i;
   5512    if (len == 0)
   5513       return;
   5514 
   5515    /* assert for non-overlappingness */
   5516    tl_assert(src+len <= dst || dst+len <= src);
   5517 
   5518    /* To be simple, just copy byte by byte.  But so as not to wreck
   5519       performance for later accesses to dst[0 .. len-1], normalise
   5520       destination lines as we finish with them, and also normalise the
   5521       line containing the first and last address. */
   5522    for (i = 0; i < len; i++) {
   5523       Bool normalise
   5524          = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
   5525            || i == 0       /* first in range */
   5526            || i == len-1;  /* last in range */
   5527       zsm_scopy08( src+i, dst+i, normalise );
   5528    }
   5529 }
   5530 
   5531 
   5532 /* For setting address ranges to a given value.  Has considerable
   5533    sophistication so as to avoid generating large numbers of pointless
   5534    cache loads/writebacks for large ranges. */
   5535 
   5536 /* Do small ranges in-cache, in the obvious way. */
   5537 static
   5538 void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
   5539 {
   5540    /* fast track a couple of common cases */
   5541    if (len == 4 && aligned32(a)) {
   5542       zsm_swrite32( a, svNew );
   5543       return;
   5544    }
   5545    if (len == 8 && aligned64(a)) {
   5546       zsm_swrite64( a, svNew );
   5547       return;
   5548    }
   5549 
   5550    /* be completely general (but as efficient as possible) */
   5551    if (len == 0) return;
   5552 
   5553    if (!aligned16(a) && len >= 1) {
   5554       zsm_swrite08( a, svNew );
   5555       a += 1;
   5556       len -= 1;
   5557       tl_assert(aligned16(a));
   5558    }
   5559    if (len == 0) return;
   5560 
   5561    if (!aligned32(a) && len >= 2) {
   5562       zsm_swrite16( a, svNew );
   5563       a += 2;
   5564       len -= 2;
   5565       tl_assert(aligned32(a));
   5566    }
   5567    if (len == 0) return;
   5568 
   5569    if (!aligned64(a) && len >= 4) {
   5570       zsm_swrite32( a, svNew );
   5571       a += 4;
   5572       len -= 4;
   5573       tl_assert(aligned64(a));
   5574    }
   5575    if (len == 0) return;
   5576 
   5577    if (len >= 8) {
   5578       tl_assert(aligned64(a));
   5579       while (len >= 8) {
   5580          zsm_swrite64( a, svNew );
   5581          a += 8;
   5582          len -= 8;
   5583       }
   5584       tl_assert(aligned64(a));
   5585    }
   5586    if (len == 0) return;
   5587 
   5588    if (len >= 4)
   5589       tl_assert(aligned32(a));
   5590    if (len >= 4) {
   5591       zsm_swrite32( a, svNew );
   5592       a += 4;
   5593       len -= 4;
   5594    }
   5595    if (len == 0) return;
   5596 
   5597    if (len >= 2)
   5598       tl_assert(aligned16(a));
   5599    if (len >= 2) {
   5600       zsm_swrite16( a, svNew );
   5601       a += 2;
   5602       len -= 2;
   5603    }
   5604    if (len == 0) return;
   5605 
   5606    if (len >= 1) {
   5607       zsm_swrite08( a, svNew );
   5608       //a += 1;
   5609       len -= 1;
   5610    }
   5611    tl_assert(len == 0);
   5612 }
   5613 
   5614 
   5615 /* If we're doing a small range, hand off to zsm_sset_range_SMALL.  But
   5616    for larger ranges, try to operate directly on the out-of-cache
   5617    representation, rather than dragging lines into the cache,
   5618    overwriting them, and forcing them out.  This turns out to be an
   5619    important performance optimisation.
   5620 
   5621    Note that this doesn't change the filtering arrangements.  The
   5622    caller of zsm_sset_range needs to attend to that. */
   5623 
   5624 static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
   5625 {
   5626    tl_assert(svNew != SVal_INVALID);
   5627    stats__cache_make_New_arange += (ULong)len;
   5628 
   5629    if (0 && len > 500)
   5630       VG_(printf)("make New      ( %#lx, %ld )\n", a, len );
   5631 
   5632    if (0) {
   5633       static UWord n_New_in_cache = 0;
   5634       static UWord n_New_not_in_cache = 0;
   5635       /* tag is 'a' with the in-line offset masked out,
   5636          eg a[31]..a[4] 0000 */
   5637       Addr       tag = a & ~(N_LINE_ARANGE - 1);
   5638       UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
   5639       if (LIKELY(tag == cache_shmem.tags0[wix])) {
   5640          n_New_in_cache++;
   5641       } else {
   5642          n_New_not_in_cache++;
   5643       }
   5644       if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
   5645          VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
   5646                      n_New_in_cache, n_New_not_in_cache );
   5647    }
   5648 
   5649    if (LIKELY(len < 2 * N_LINE_ARANGE)) {
   5650       zsm_sset_range_SMALL( a, len, svNew );
   5651    } else {
   5652       Addr  before_start  = a;
   5653       Addr  aligned_start = cacheline_ROUNDUP(a);
   5654       Addr  after_start   = cacheline_ROUNDDN(a + len);
   5655       UWord before_len    = aligned_start - before_start;
   5656       UWord aligned_len   = after_start - aligned_start;
   5657       UWord after_len     = a + len - after_start;
   5658       tl_assert(before_start <= aligned_start);
   5659       tl_assert(aligned_start <= after_start);
   5660       tl_assert(before_len < N_LINE_ARANGE);
   5661       tl_assert(after_len < N_LINE_ARANGE);
   5662       tl_assert(get_cacheline_offset(aligned_start) == 0);
   5663       if (get_cacheline_offset(a) == 0) {
   5664          tl_assert(before_len == 0);
   5665          tl_assert(a == aligned_start);
   5666       }
   5667       if (get_cacheline_offset(a+len) == 0) {
   5668          tl_assert(after_len == 0);
   5669          tl_assert(after_start == a+len);
   5670       }
   5671       if (before_len > 0) {
   5672          zsm_sset_range_SMALL( before_start, before_len, svNew );
   5673       }
   5674       if (after_len > 0) {
   5675          zsm_sset_range_SMALL( after_start, after_len, svNew );
   5676       }
   5677       stats__cache_make_New_inZrep += (ULong)aligned_len;
   5678 
   5679       while (1) {
   5680          Addr tag;
   5681          UWord wix;
   5682          if (aligned_start >= after_start)
   5683             break;
   5684          tl_assert(get_cacheline_offset(aligned_start) == 0);
   5685          tag = aligned_start & ~(N_LINE_ARANGE - 1);
   5686          wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
   5687          if (tag == cache_shmem.tags0[wix]) {
   5688             UWord i;
   5689             for (i = 0; i < N_LINE_ARANGE / 8; i++)
   5690                zsm_swrite64( aligned_start + i * 8, svNew );
   5691          } else {
   5692             UWord i;
   5693             Word zix;
   5694             SecMap* sm;
   5695             LineZ* lineZ;
   5696             /* This line is not in the cache.  Do not force it in; instead
   5697                modify it in-place. */
   5698             /* find the Z line to write in and rcdec it or the
   5699                associated F line. */
   5700             find_Z_for_writing( &sm, &zix, tag );
   5701             tl_assert(sm);
   5702             tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
   5703             lineZ = &sm->linesZ[zix];
   5704             lineZ->dict[0] = svNew;
   5705             lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
   5706             for (i = 0; i < N_LINE_ARANGE/4; i++)
   5707                lineZ->ix2s[i] = 0; /* all refer to dict[0] */
   5708             rcinc_LineZ(lineZ);
   5709          }
   5710          aligned_start += N_LINE_ARANGE;
   5711          aligned_len -= N_LINE_ARANGE;
   5712       }
   5713       tl_assert(aligned_start == after_start);
   5714       tl_assert(aligned_len == 0);
   5715    }
   5716 }
   5717 
   5718 
   5719 /////////////////////////////////////////////////////////
   5720 //                                                     //
   5721 // Front-filtering accesses                            //
   5722 //                                                     //
   5723 /////////////////////////////////////////////////////////
   5724 
   5725 static UWord stats__f_ac = 0;
   5726 static UWord stats__f_sk = 0;
   5727 
   5728 #if 0
   5729 #  define STATS__F_SHOW \
   5730      do { \
   5731         if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
   5732            VG_(printf)("filters: ac %lu sk %lu\n",   \
   5733            stats__f_ac, stats__f_sk); \
   5734      } while (0)
   5735 #else
   5736 #  define STATS__F_SHOW /* */
   5737 #endif
   5738 
   5739 void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
   5740    stats__f_ac++;
   5741    STATS__F_SHOW;
   5742    if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
   5743       stats__f_sk++;
   5744       return;
   5745    }
   5746    zsm_sapply08__msmcwrite(thr, a);
   5747 }
   5748 
   5749 void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
   5750    stats__f_ac++;
   5751    STATS__F_SHOW;
   5752    if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
   5753       stats__f_sk++;
   5754       return;
   5755    }
   5756    zsm_sapply16__msmcwrite(thr, a);
   5757 }
   5758 
   5759 void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
   5760    stats__f_ac++;
   5761    STATS__F_SHOW;
   5762    if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
   5763       stats__f_sk++;
   5764       return;
   5765    }
   5766    zsm_sapply32__msmcwrite(thr, a);
   5767 }
   5768 
   5769 void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
   5770    stats__f_ac++;
   5771    STATS__F_SHOW;
   5772    if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
   5773       stats__f_sk++;
   5774       return;
   5775    }
   5776    zsm_sapply64__msmcwrite(thr, a);
   5777 }
   5778 
   5779 void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
   5780 {
   5781    /* fast track a couple of common cases */
   5782    if (len == 4 && aligned32(a)) {
   5783       zsm_sapply32_f__msmcwrite( thr, a );
   5784       return;
   5785    }
   5786    if (len == 8 && aligned64(a)) {
   5787       zsm_sapply64_f__msmcwrite( thr, a );
   5788       return;
   5789    }
   5790 
   5791    /* be completely general (but as efficient as possible) */
   5792    if (len == 0) return;
   5793 
   5794    if (!aligned16(a) && len >= 1) {
   5795       zsm_sapply08_f__msmcwrite( thr, a );
   5796       a += 1;
   5797       len -= 1;
   5798       tl_assert(aligned16(a));
   5799    }
   5800    if (len == 0) return;
   5801 
   5802    if (!aligned32(a) && len >= 2) {
   5803       zsm_sapply16_f__msmcwrite( thr, a );
   5804       a += 2;
   5805       len -= 2;
   5806       tl_assert(aligned32(a));
   5807    }
   5808    if (len == 0) return;
   5809 
   5810    if (!aligned64(a) && len >= 4) {
   5811       zsm_sapply32_f__msmcwrite( thr, a );
   5812       a += 4;
   5813       len -= 4;
   5814       tl_assert(aligned64(a));
   5815    }
   5816    if (len == 0) return;
   5817 
   5818    if (len >= 8) {
   5819       tl_assert(aligned64(a));
   5820       while (len >= 8) {
   5821          zsm_sapply64_f__msmcwrite( thr, a );
   5822          a += 8;
   5823          len -= 8;
   5824       }
   5825       tl_assert(aligned64(a));
   5826    }
   5827    if (len == 0) return;
   5828 
   5829    if (len >= 4)
   5830       tl_assert(aligned32(a));
   5831    if (len >= 4) {
   5832       zsm_sapply32_f__msmcwrite( thr, a );
   5833       a += 4;
   5834       len -= 4;
   5835    }
   5836    if (len == 0) return;
   5837 
   5838    if (len >= 2)
   5839       tl_assert(aligned16(a));
   5840    if (len >= 2) {
   5841       zsm_sapply16_f__msmcwrite( thr, a );
   5842       a += 2;
   5843       len -= 2;
   5844    }
   5845    if (len == 0) return;
   5846 
   5847    if (len >= 1) {
   5848       zsm_sapply08_f__msmcwrite( thr, a );
   5849       //a += 1;
   5850       len -= 1;
   5851    }
   5852    tl_assert(len == 0);
   5853 }
   5854 
   5855 void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
   5856    stats__f_ac++;
   5857    STATS__F_SHOW;
   5858    if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
   5859       stats__f_sk++;
   5860       return;
   5861    }
   5862    zsm_sapply08__msmcread(thr, a);
   5863 }
   5864 
   5865 void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
   5866    stats__f_ac++;
   5867    STATS__F_SHOW;
   5868    if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
   5869       stats__f_sk++;
   5870       return;
   5871    }
   5872    zsm_sapply16__msmcread(thr, a);
   5873 }
   5874 
   5875 void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
   5876    stats__f_ac++;
   5877    STATS__F_SHOW;
   5878    if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
   5879       stats__f_sk++;
   5880       return;
   5881    }
   5882    zsm_sapply32__msmcread(thr, a);
   5883 }
   5884 
   5885 void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
   5886    stats__f_ac++;
   5887    STATS__F_SHOW;
   5888    if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
   5889       stats__f_sk++;
   5890       return;
   5891    }
   5892    zsm_sapply64__msmcread(thr, a);
   5893 }
   5894 
   5895 void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
   5896 {
   5897    /* fast track a couple of common cases */
   5898    if (len == 4 && aligned32(a)) {
   5899       zsm_sapply32_f__msmcread( thr, a );
   5900       return;
   5901    }
   5902    if (len == 8 && aligned64(a)) {
   5903       zsm_sapply64_f__msmcread( thr, a );
   5904       return;
   5905    }
   5906 
   5907    /* be completely general (but as efficient as possible) */
   5908    if (len == 0) return;
   5909 
   5910    if (!aligned16(a) && len >= 1) {
   5911       zsm_sapply08_f__msmcread( thr, a );
   5912       a += 1;
   5913       len -= 1;
   5914       tl_assert(aligned16(a));
   5915    }
   5916    if (len == 0) return;
   5917 
   5918    if (!aligned32(a) && len >= 2) {
   5919       zsm_sapply16_f__msmcread( thr, a );
   5920       a += 2;
   5921       len -= 2;
   5922       tl_assert(aligned32(a));
   5923    }
   5924    if (len == 0) return;
   5925 
   5926    if (!aligned64(a) && len >= 4) {
   5927       zsm_sapply32_f__msmcread( thr, a );
   5928       a += 4;
   5929       len -= 4;
   5930       tl_assert(aligned64(a));
   5931    }
   5932    if (len == 0) return;
   5933 
   5934    if (len >= 8) {
   5935       tl_assert(aligned64(a));
   5936       while (len >= 8) {
   5937          zsm_sapply64_f__msmcread( thr, a );
   5938          a += 8;
   5939          len -= 8;
   5940       }
   5941       tl_assert(aligned64(a));
   5942    }
   5943    if (len == 0) return;
   5944 
   5945    if (len >= 4)
   5946       tl_assert(aligned32(a));
   5947    if (len >= 4) {
   5948       zsm_sapply32_f__msmcread( thr, a );
   5949       a += 4;
   5950       len -= 4;
   5951    }
   5952    if (len == 0) return;
   5953 
   5954    if (len >= 2)
   5955       tl_assert(aligned16(a));
   5956    if (len >= 2) {
   5957       zsm_sapply16_f__msmcread( thr, a );
   5958       a += 2;
   5959       len -= 2;
   5960    }
   5961    if (len == 0) return;
   5962 
   5963    if (len >= 1) {
   5964       zsm_sapply08_f__msmcread( thr, a );
   5965       //a += 1;
   5966       len -= 1;
   5967    }
   5968    tl_assert(len == 0);
   5969 }
   5970 
   5971 void libhb_Thr_resumes ( Thr* thr )
   5972 {
   5973    if (0) VG_(printf)("resume %p\n", thr);
   5974    tl_assert(thr);
   5975    tl_assert(!thr->llexit_done);
   5976    Filter__clear(thr->filter, "libhb_Thr_resumes");
   5977    /* A kludge, but .. if this thread doesn't have any marker stacks
   5978       at all, get one right now.  This is easier than figuring out
   5979       exactly when at thread startup we can and can't take a stack
   5980       snapshot. */
   5981    if (HG_(clo_history_level) == 1) {
   5982       tl_assert(thr->local_Kws_n_stacks);
   5983       if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
   5984          note_local_Kw_n_stack_for(thr);
   5985    }
   5986 }
   5987 
   5988 
   5989 /////////////////////////////////////////////////////////
   5990 //                                                     //
   5991 // Synchronisation objects                             //
   5992 //                                                     //
   5993 /////////////////////////////////////////////////////////
   5994 
   5995 /* A double linked list of all the SO's. */
   5996 SO* admin_SO = NULL;
   5997 
   5998 static SO* SO__Alloc ( void )
   5999 {
   6000    SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
   6001    so->viR   = VtsID_INVALID;
   6002    so->viW   = VtsID_INVALID;
   6003    so->magic = SO_MAGIC;
   6004    /* Add to double linked list */
   6005    if (admin_SO) {
   6006       tl_assert(admin_SO->admin_prev == NULL);
   6007       admin_SO->admin_prev = so;
   6008       so->admin_next = admin_SO;
   6009    } else {
   6010       so->admin_next = NULL;
   6011    }
   6012    so->admin_prev = NULL;
   6013    admin_SO = so;
   6014    /* */
   6015    return so;
   6016 }
   6017 
   6018 static void SO__Dealloc ( SO* so )
   6019 {
   6020    tl_assert(so);
   6021    tl_assert(so->magic == SO_MAGIC);
   6022    if (so->viR == VtsID_INVALID) {
   6023       tl_assert(so->viW == VtsID_INVALID);
   6024    } else {
   6025       tl_assert(so->viW != VtsID_INVALID);
   6026       VtsID__rcdec(so->viR);
   6027       VtsID__rcdec(so->viW);
   6028    }
   6029    so->magic = 0;
   6030    /* Del from double linked list */
   6031    if (so->admin_prev)
   6032       so->admin_prev->admin_next = so->admin_next;
   6033    if (so->admin_next)
   6034       so->admin_next->admin_prev = so->admin_prev;
   6035    if (so == admin_SO)
   6036       admin_SO = so->admin_next;
   6037    /* */
   6038    HG_(free)( so );
   6039 }
   6040 
   6041 
   6042 /////////////////////////////////////////////////////////
   6043 //                                                     //
   6044 // Top Level API                                       //
   6045 //                                                     //
   6046 /////////////////////////////////////////////////////////
   6047 
   6048 static void show_thread_state ( const HChar* str, Thr* t )
   6049 {
   6050    if (1) return;
   6051    if (t->viR == t->viW) {
   6052       VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
   6053       VtsID__pp( t->viR );
   6054       VG_(printf)("%s","\n");
   6055    } else {
   6056       VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
   6057       VtsID__pp( t->viR );
   6058       VG_(printf)(" viW %u==", t->viW);
   6059       VtsID__pp( t->viW );
   6060       VG_(printf)("%s","\n");
   6061    }
   6062 }
   6063 
   6064 
   6065 Thr* libhb_init (
   6066         void        (*get_stacktrace)( Thr*, Addr*, UWord ),
   6067         ExeContext* (*get_EC)( Thr* )
   6068      )
   6069 {
   6070    Thr*  thr;
   6071    VtsID vi;
   6072 
   6073    // We will have to have to store a large number of these,
   6074    // so make sure they're the size we expect them to be.
   6075    tl_assert(sizeof(ScalarTS) == 8);
   6076 
   6077    /* because first 1024 unusable */
   6078    tl_assert(SCALARTS_N_THRBITS >= 11);
   6079    /* so as to fit in a UInt w/ 3 bits to spare (see defn of
   6080       Thr_n_RCEC). */
   6081    tl_assert(SCALARTS_N_THRBITS <= 29);
   6082 
   6083    /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
   6084       (32-bit).  It's not correctness-critical, but there are a lot of
   6085       them, so it's important from a space viewpoint.  Unfortunately
   6086       we simply can't pack it into 2 words on a 32-bit target. */
   6087    if (sizeof(UWord) == 8) {
   6088       tl_assert(sizeof(Thr_n_RCEC) == 16);
   6089    } else {
   6090       tl_assert(sizeof(Thr_n_RCEC) == 12);
   6091    }
   6092 
   6093    /* Word sets really are 32 bits.  Even on a 64 bit target. */
   6094    tl_assert(sizeof(WordSetID) == 4);
   6095    tl_assert(sizeof(WordSet) == sizeof(WordSetID));
   6096 
   6097    tl_assert(get_stacktrace);
   6098    tl_assert(get_EC);
   6099    main_get_stacktrace   = get_stacktrace;
   6100    main_get_EC           = get_EC;
   6101 
   6102    // No need to initialise hg_wordfm.
   6103    // No need to initialise hg_wordset.
   6104 
   6105    /* Allocated once and never deallocated.  Used as a temporary in
   6106       VTS singleton, tick and join operations. */
   6107    temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
   6108    temp_max_sized_VTS->id = VtsID_INVALID;
   6109    verydead_thread_table_init();
   6110    vts_set_init();
   6111    vts_tab_init();
   6112    event_map_init();
   6113    VtsID__invalidate_caches();
   6114 
   6115    // initialise shadow memory
   6116    zsm_init( SVal__rcinc, SVal__rcdec );
   6117 
   6118    thr = Thr__new();
   6119    vi  = VtsID__mk_Singleton( thr, 1 );
   6120    thr->viR = vi;
   6121    thr->viW = vi;
   6122    VtsID__rcinc(thr->viR);
   6123    VtsID__rcinc(thr->viW);
   6124 
   6125    show_thread_state("  root", thr);
   6126    return thr;
   6127 }
   6128 
   6129 
   6130 Thr* libhb_create ( Thr* parent )
   6131 {
   6132    /* The child's VTSs are copies of the parent's VTSs, but ticked at
   6133       the child's index.  Since the child's index is guaranteed
   6134       unique, it has never been seen before, so the implicit value
   6135       before the tick is zero and after that is one. */
   6136    Thr* child = Thr__new();
   6137 
   6138    child->viR = VtsID__tick( parent->viR, child );
   6139    child->viW = VtsID__tick( parent->viW, child );
   6140    Filter__clear(child->filter, "libhb_create(child)");
   6141    VtsID__rcinc(child->viR);
   6142    VtsID__rcinc(child->viW);
   6143    /* We need to do note_local_Kw_n_stack_for( child ), but it's too
   6144       early for that - it may not have a valid TId yet.  So, let
   6145       libhb_Thr_resumes pick it up the first time the thread runs. */
   6146 
   6147    tl_assert(VtsID__indexAt( child->viR, child ) == 1);
   6148    tl_assert(VtsID__indexAt( child->viW, child ) == 1);
   6149 
   6150    /* and the parent has to move along too */
   6151    VtsID__rcdec(parent->viR);
   6152    VtsID__rcdec(parent->viW);
   6153    parent->viR = VtsID__tick( parent->viR, parent );
   6154    parent->viW = VtsID__tick( parent->viW, parent );
   6155    Filter__clear(parent->filter, "libhb_create(parent)");
   6156    VtsID__rcinc(parent->viR);
   6157    VtsID__rcinc(parent->viW);
   6158    note_local_Kw_n_stack_for( parent );
   6159 
   6160    show_thread_state(" child", child);
   6161    show_thread_state("parent", parent);
   6162 
   6163    return child;
   6164 }
   6165 
   6166 /* Shut down the library, and print stats (in fact that's _all_
   6167    this is for. */
   6168 void libhb_shutdown ( Bool show_stats )
   6169 {
   6170    if (show_stats) {
   6171       VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
   6172       VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
   6173                   stats__secmaps_allocd,
   6174                   stats__secmap_ga_space_covered);
   6175       VG_(printf)("  linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
   6176                   stats__secmap_linesZ_allocd,
   6177                   stats__secmap_linesZ_bytes);
   6178       VG_(printf)("  linesF: %'10lu allocd (%'12lu bytes occupied)\n",
   6179                   stats__secmap_linesF_allocd,
   6180                   stats__secmap_linesF_bytes);
   6181       VG_(printf)(" secmaps: %'10lu iterator steppings\n",
   6182                   stats__secmap_iterator_steppings);
   6183       VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
   6184                   stats__secmaps_search, stats__secmaps_search_slow);
   6185 
   6186       VG_(printf)("%s","\n");
   6187       VG_(printf)("   cache: %'lu totrefs (%'lu misses)\n",
   6188                   stats__cache_totrefs, stats__cache_totmisses );
   6189       VG_(printf)("   cache: %'14lu Z-fetch,    %'14lu F-fetch\n",
   6190                   stats__cache_Z_fetches, stats__cache_F_fetches );
   6191       VG_(printf)("   cache: %'14lu Z-wback,    %'14lu F-wback\n",
   6192                   stats__cache_Z_wbacks, stats__cache_F_wbacks );
   6193       VG_(printf)("   cache: %'14lu invals,     %'14lu flushes\n",
   6194                   stats__cache_invals, stats__cache_flushes );
   6195       VG_(printf)("   cache: %'14llu arange_New  %'14llu direct-to-Zreps\n",
   6196                   stats__cache_make_New_arange,
   6197                   stats__cache_make_New_inZrep);
   6198 
   6199       VG_(printf)("%s","\n");
   6200       VG_(printf)("   cline: %'10lu normalises\n",
   6201                   stats__cline_normalises );
   6202       VG_(printf)("   cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
   6203                   stats__cline_cread64s,
   6204                   stats__cline_cread32s,
   6205                   stats__cline_cread16s,
   6206                   stats__cline_cread08s );
   6207       VG_(printf)("   cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
   6208                   stats__cline_cwrite64s,
   6209                   stats__cline_cwrite32s,
   6210                   stats__cline_cwrite16s,
   6211                   stats__cline_cwrite08s );
   6212       VG_(printf)("   cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
   6213                   stats__cline_swrite64s,
   6214                   stats__cline_swrite32s,
   6215                   stats__cline_swrite16s,
   6216                   stats__cline_swrite08s );
   6217       VG_(printf)("   cline: s rd1s %'lu, s copy1s %'lu\n",
   6218                   stats__cline_sread08s, stats__cline_scopy08s );
   6219       VG_(printf)("   cline:    splits: 8to4 %'12lu    4to2 %'12lu    2to1 %'12lu\n",
   6220                  stats__cline_64to32splits,
   6221                  stats__cline_32to16splits,
   6222                  stats__cline_16to8splits );
   6223       VG_(printf)("   cline: pulldowns: 8to4 %'12lu    4to2 %'12lu    2to1 %'12lu\n",
   6224                  stats__cline_64to32pulldown,
   6225                  stats__cline_32to16pulldown,
   6226                  stats__cline_16to8pulldown );
   6227       if (0)
   6228       VG_(printf)("   cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
   6229                   (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
   6230 
   6231       VG_(printf)("%s","\n");
   6232 
   6233       VG_(printf)("   libhb: %'13llu msmcread  (%'llu dragovers)\n",
   6234                   stats__msmcread, stats__msmcread_change);
   6235       VG_(printf)("   libhb: %'13llu msmcwrite (%'llu dragovers)\n",
   6236                   stats__msmcwrite, stats__msmcwrite_change);
   6237       VG_(printf)("   libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
   6238                   stats__cmpLEQ_queries, stats__cmpLEQ_misses);
   6239       VG_(printf)("   libhb: %'13llu join2  queries (%'llu misses)\n",
   6240                   stats__join2_queries, stats__join2_misses);
   6241 
   6242       VG_(printf)("%s","\n");
   6243       VG_(printf)( "   libhb: VTSops: tick %'lu,  join %'lu,  cmpLEQ %'lu\n",
   6244                    stats__vts__tick, stats__vts__join,  stats__vts__cmpLEQ );
   6245       VG_(printf)( "   libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
   6246                    stats__vts__cmp_structural, stats__vts__cmp_structural_slow );
   6247       VG_(printf)( "   libhb: VTSset: find__or__clone_and_add %'lu (%'lu allocd)\n",
   6248                    stats__vts_set__focaa, stats__vts_set__focaa_a );
   6249       VG_(printf)( "   libhb: VTSops: indexAt_SLOW %'lu\n",
   6250                    stats__vts__indexat_slow );
   6251 
   6252       VG_(printf)("%s","\n");
   6253       VG_(printf)(
   6254          "   libhb: %ld entries in vts_table (approximately %lu bytes)\n",
   6255          VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
   6256       );
   6257       VG_(printf)( "   libhb: %lu entries in vts_set\n",
   6258                    VG_(sizeFM)( vts_set ) );
   6259 
   6260       VG_(printf)("%s","\n");
   6261       VG_(printf)( "   libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
   6262                    stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
   6263                    stats__ctxt_rcdec2,
   6264                    stats__ctxt_rcdec3 );
   6265       VG_(printf)( "   libhb: ctxt__rcdec: calls %lu, discards %lu\n",
   6266                    stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
   6267       VG_(printf)( "   libhb: contextTab: %lu slots, %lu max ents\n",
   6268                    (UWord)N_RCEC_TAB,
   6269                    stats__ctxt_tab_curr );
   6270       VG_(printf)( "   libhb: contextTab: %lu queries, %lu cmps\n",
   6271                    stats__ctxt_tab_qs,
   6272                    stats__ctxt_tab_cmps );
   6273 #if 0
   6274       VG_(printf)("sizeof(AvlNode)     = %lu\n", sizeof(AvlNode));
   6275       VG_(printf)("sizeof(WordBag)     = %lu\n", sizeof(WordBag));
   6276       VG_(printf)("sizeof(MaybeWord)   = %lu\n", sizeof(MaybeWord));
   6277       VG_(printf)("sizeof(CacheLine)   = %lu\n", sizeof(CacheLine));
   6278       VG_(printf)("sizeof(LineZ)       = %lu\n", sizeof(LineZ));
   6279       VG_(printf)("sizeof(LineF)       = %lu\n", sizeof(LineF));
   6280       VG_(printf)("sizeof(SecMap)      = %lu\n", sizeof(SecMap));
   6281       VG_(printf)("sizeof(Cache)       = %lu\n", sizeof(Cache));
   6282       VG_(printf)("sizeof(SMCacheEnt)  = %lu\n", sizeof(SMCacheEnt));
   6283       VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
   6284       VG_(printf)("sizeof(VTS)         = %lu\n", sizeof(VTS));
   6285       VG_(printf)("sizeof(ScalarTS)    = %lu\n", sizeof(ScalarTS));
   6286       VG_(printf)("sizeof(VtsTE)       = %lu\n", sizeof(VtsTE));
   6287       VG_(printf)("sizeof(MSMInfo)     = %lu\n", sizeof(MSMInfo));
   6288 
   6289       VG_(printf)("sizeof(struct _XArray)     = %lu\n", sizeof(struct _XArray));
   6290       VG_(printf)("sizeof(struct _WordFM)     = %lu\n", sizeof(struct _WordFM));
   6291       VG_(printf)("sizeof(struct _Thr)     = %lu\n", sizeof(struct _Thr));
   6292       VG_(printf)("sizeof(struct _SO)     = %lu\n", sizeof(struct _SO));
   6293 #endif
   6294 
   6295       VG_(printf)("%s","<<< END libhb stats >>>\n");
   6296       VG_(printf)("%s","\n");
   6297 
   6298    }
   6299 }
   6300 
   6301 /* Receive notification that a thread has low level exited.  The
   6302    significance here is that we do not expect to see any more memory
   6303    references from it. */
   6304 void libhb_async_exit ( Thr* thr )
   6305 {
   6306    tl_assert(thr);
   6307    tl_assert(!thr->llexit_done);
   6308    thr->llexit_done = True;
   6309 
   6310    /* free up Filter and local_Kws_n_stacks (well, actually not the
   6311       latter ..) */
   6312    tl_assert(thr->filter);
   6313    HG_(free)(thr->filter);
   6314    thr->filter = NULL;
   6315 
   6316    /* Tell the VTS mechanism this thread has exited, so it can
   6317       participate in VTS pruning.  Note this can only happen if the
   6318       thread has both ll_exited and has been joined with. */
   6319    if (thr->joinedwith_done)
   6320       VTS__declare_thread_very_dead(thr);
   6321 
   6322    /* Another space-accuracy tradeoff.  Do we want to be able to show
   6323       H1 history for conflicts in threads which have since exited?  If
   6324       yes, then we better not free up thr->local_Kws_n_stacks.  The
   6325       downside is a potential per-thread leak of up to
   6326       N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
   6327       XArray average overcommit factor is (1.5 I'd guess). */
   6328    // hence:
   6329    // VG_(deleteXA)(thr->local_Kws_n_stacks);
   6330    // thr->local_Kws_n_stacks = NULL;
   6331 }
   6332 
   6333 /* Receive notification that a thread has been joined with.  The
   6334    significance here is that we do not expect to see any further
   6335    references to its vector clocks (Thr::viR and Thr::viW). */
   6336 void libhb_joinedwith_done ( Thr* thr )
   6337 {
   6338    tl_assert(thr);
   6339    /* Caller must ensure that this is only ever called once per Thr. */
   6340    tl_assert(!thr->joinedwith_done);
   6341    thr->joinedwith_done = True;
   6342    if (thr->llexit_done)
   6343       VTS__declare_thread_very_dead(thr);
   6344 }
   6345 
   6346 
   6347 /* Both Segs and SOs point to VTSs.  However, there is no sharing, so
   6348    a Seg that points at a VTS is its one-and-only owner, and ditto for
   6349    a SO that points at a VTS. */
   6350 
   6351 SO* libhb_so_alloc ( void )
   6352 {
   6353    return SO__Alloc();
   6354 }
   6355 
   6356 void libhb_so_dealloc ( SO* so )
   6357 {
   6358    tl_assert(so);
   6359    tl_assert(so->magic == SO_MAGIC);
   6360    SO__Dealloc(so);
   6361 }
   6362 
   6363 /* See comments in libhb.h for details on the meaning of
   6364    strong vs weak sends and strong vs weak receives. */
   6365 void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
   6366 {
   6367    /* Copy the VTSs from 'thr' into the sync object, and then move
   6368       the thread along one step. */
   6369 
   6370    tl_assert(so);
   6371    tl_assert(so->magic == SO_MAGIC);
   6372 
   6373    /* stay sane .. a thread's read-clock must always lead or be the
   6374       same as its write-clock */
   6375    { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
   6376      tl_assert(leq);
   6377    }
   6378 
   6379    /* since we're overwriting the VtsIDs in the SO, we need to drop
   6380       any references made by the previous contents thereof */
   6381    if (so->viR == VtsID_INVALID) {
   6382       tl_assert(so->viW == VtsID_INVALID);
   6383       so->viR = thr->viR;
   6384       so->viW = thr->viW;
   6385       VtsID__rcinc(so->viR);
   6386       VtsID__rcinc(so->viW);
   6387    } else {
   6388       /* In a strong send, we dump any previous VC in the SO and
   6389          install the sending thread's VC instead.  For a weak send we
   6390          must join2 with what's already there. */
   6391       tl_assert(so->viW != VtsID_INVALID);
   6392       VtsID__rcdec(so->viR);
   6393       VtsID__rcdec(so->viW);
   6394       so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
   6395       so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
   6396       VtsID__rcinc(so->viR);
   6397       VtsID__rcinc(so->viW);
   6398    }
   6399 
   6400    /* move both parent clocks along */
   6401    VtsID__rcdec(thr->viR);
   6402    VtsID__rcdec(thr->viW);
   6403    thr->viR = VtsID__tick( thr->viR, thr );
   6404    thr->viW = VtsID__tick( thr->viW, thr );
   6405    if (!thr->llexit_done) {
   6406       Filter__clear(thr->filter, "libhb_so_send");
   6407       note_local_Kw_n_stack_for(thr);
   6408    }
   6409    VtsID__rcinc(thr->viR);
   6410    VtsID__rcinc(thr->viW);
   6411 
   6412    if (strong_send)
   6413       show_thread_state("s-send", thr);
   6414    else
   6415       show_thread_state("w-send", thr);
   6416 }
   6417 
   6418 void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
   6419 {
   6420    tl_assert(so);
   6421    tl_assert(so->magic == SO_MAGIC);
   6422 
   6423    if (so->viR != VtsID_INVALID) {
   6424       tl_assert(so->viW != VtsID_INVALID);
   6425 
   6426       /* Weak receive (basically, an R-acquisition of a R-W lock).
   6427          This advances the read-clock of the receiver, but not the
   6428          write-clock. */
   6429       VtsID__rcdec(thr->viR);
   6430       thr->viR = VtsID__join2( thr->viR, so->viR );
   6431       VtsID__rcinc(thr->viR);
   6432 
   6433       /* At one point (r10589) it seemed safest to tick the clocks for
   6434          the receiving thread after the join.  But on reflection, I
   6435          wonder if that might cause it to 'overtake' constraints,
   6436          which could lead to missing races.  So, back out that part of
   6437          r10589. */
   6438       //VtsID__rcdec(thr->viR);
   6439       //thr->viR = VtsID__tick( thr->viR, thr );
   6440       //VtsID__rcinc(thr->viR);
   6441 
   6442       /* For a strong receive, we also advance the receiver's write
   6443          clock, which means the receive as a whole is essentially
   6444          equivalent to a W-acquisition of a R-W lock. */
   6445       if (strong_recv) {
   6446          VtsID__rcdec(thr->viW);
   6447          thr->viW = VtsID__join2( thr->viW, so->viW );
   6448          VtsID__rcinc(thr->viW);
   6449 
   6450          /* See comment just above, re r10589. */
   6451          //VtsID__rcdec(thr->viW);
   6452          //thr->viW = VtsID__tick( thr->viW, thr );
   6453          //VtsID__rcinc(thr->viW);
   6454       }
   6455 
   6456       if (thr->filter)
   6457          Filter__clear(thr->filter, "libhb_so_recv");
   6458       note_local_Kw_n_stack_for(thr);
   6459 
   6460       if (strong_recv)
   6461          show_thread_state("s-recv", thr);
   6462       else
   6463          show_thread_state("w-recv", thr);
   6464 
   6465    } else {
   6466       tl_assert(so->viW == VtsID_INVALID);
   6467       /* Deal with degenerate case: 'so' has no vts, so there has been
   6468          no message posted to it.  Just ignore this case. */
   6469       show_thread_state("d-recv", thr);
   6470    }
   6471 }
   6472 
   6473 Bool libhb_so_everSent ( SO* so )
   6474 {
   6475    if (so->viR == VtsID_INVALID) {
   6476       tl_assert(so->viW == VtsID_INVALID);
   6477       return False;
   6478    } else {
   6479       tl_assert(so->viW != VtsID_INVALID);
   6480       return True;
   6481    }
   6482 }
   6483 
   6484 #define XXX1 0 // 0x67a106c
   6485 #define XXX2 0
   6486 
   6487 static inline Bool TRACEME(Addr a, SizeT szB) {
   6488    if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
   6489    if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
   6490    return False;
   6491 }
   6492 static void trace ( Thr* thr, Addr a, SizeT szB, const HChar* s )
   6493 {
   6494   SVal sv = zsm_sread08(a);
   6495   VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
   6496   show_thread_state("", thr);
   6497   VG_(printf)("%s","\n");
   6498 }
   6499 
   6500 void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
   6501 {
   6502    SVal sv = SVal__mkC(thr->viW, thr->viW);
   6503    tl_assert(is_sane_SVal_C(sv));
   6504    if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
   6505    zsm_sset_range( a, szB, sv );
   6506    Filter__clear_range( thr->filter, a, szB );
   6507    if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
   6508 }
   6509 
   6510 void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
   6511 {
   6512    /* do nothing */
   6513 }
   6514 
   6515 void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
   6516 {
   6517    /* This really does put the requested range in NoAccess.  It's
   6518       expensive though. */
   6519    SVal sv = SVal_NOACCESS;
   6520    tl_assert(is_sane_SVal_C(sv));
   6521    zsm_sset_range( a, szB, sv );
   6522    Filter__clear_range( thr->filter, a, szB );
   6523 }
   6524 
   6525 void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
   6526 {
   6527    SVal sv = SVal_NOACCESS;
   6528    tl_assert(is_sane_SVal_C(sv));
   6529    if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
   6530    zsm_sset_range( a, szB, sv );
   6531    Filter__clear_range( thr->filter, a, szB );
   6532    if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
   6533 }
   6534 
   6535 Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
   6536    tl_assert(thr);
   6537    return thr->hgthread;
   6538 }
   6539 
   6540 void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
   6541    tl_assert(thr);
   6542    thr->hgthread = hgthread;
   6543 }
   6544 
   6545 void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
   6546 {
   6547    zsm_scopy_range(src, dst, len);
   6548    Filter__clear_range( thr->filter, dst, len );
   6549 }
   6550 
   6551 void libhb_maybe_GC ( void )
   6552 {
   6553    event_map_maybe_GC();
   6554    /* If there are still freelist entries available, no need for a
   6555       GC. */
   6556    if (vts_tab_freelist != VtsID_INVALID)
   6557       return;
   6558    /* So all the table entries are full, and we're having to expand
   6559       the table.  But did we hit the threshhold point yet? */
   6560    if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
   6561       return;
   6562    vts_tab__do_GC( False/*don't show stats*/ );
   6563 }
   6564 
   6565 
   6566 /////////////////////////////////////////////////////////////////
   6567 /////////////////////////////////////////////////////////////////
   6568 //                                                             //
   6569 // SECTION END main library                                    //
   6570 //                                                             //
   6571 /////////////////////////////////////////////////////////////////
   6572 /////////////////////////////////////////////////////////////////
   6573 
   6574 /*--------------------------------------------------------------------*/
   6575 /*--- end                                             libhb_main.c ---*/
   6576 /*--------------------------------------------------------------------*/
   6577