Home | History | Annotate | Download | only in memcheck
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- MemCheck: Maintain bitmaps of memory, tracking the           ---*/
      4 /*--- accessibility (A) and validity (V) status of each byte.      ---*/
      5 /*---                                                    mc_main.c ---*/
      6 /*--------------------------------------------------------------------*/
      7 
      8 /*
      9    This file is part of MemCheck, a heavyweight Valgrind tool for
     10    detecting memory errors.
     11 
     12    Copyright (C) 2000-2010 Julian Seward
     13       jseward (at) acm.org
     14 
     15    This program is free software; you can redistribute it and/or
     16    modify it under the terms of the GNU General Public License as
     17    published by the Free Software Foundation; either version 2 of the
     18    License, or (at your option) any later version.
     19 
     20    This program is distributed in the hope that it will be useful, but
     21    WITHOUT ANY WARRANTY; without even the implied warranty of
     22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     23    General Public License for more details.
     24 
     25    You should have received a copy of the GNU General Public License
     26    along with this program; if not, write to the Free Software
     27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     28    02111-1307, USA.
     29 
     30    The GNU General Public License is contained in the file COPYING.
     31 */
     32 
     33 #include "pub_tool_basics.h"
     34 #include "pub_tool_aspacemgr.h"
     35 #include "pub_tool_hashtable.h"     // For mc_include.h
     36 #include "pub_tool_libcbase.h"
     37 #include "pub_tool_libcassert.h"
     38 #include "pub_tool_libcprint.h"
     39 #include "pub_tool_machine.h"
     40 #include "pub_tool_mallocfree.h"
     41 #include "pub_tool_options.h"
     42 #include "pub_tool_oset.h"
     43 #include "pub_tool_replacemalloc.h"
     44 #include "pub_tool_tooliface.h"
     45 #include "pub_tool_threadstate.h"
     46 
     47 #include "mc_include.h"
     48 #include "memcheck.h"   /* for client requests */
     49 
     50 
     51 /* Set to 1 to do a little more sanity checking */
     52 #define VG_DEBUG_MEMORY 0
     53 
     54 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
     55 
     56 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
     57 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
     58 
     59 
     60 /*------------------------------------------------------------*/
     61 /*--- Fast-case knobs                                      ---*/
     62 /*------------------------------------------------------------*/
     63 
     64 // Comment these out to disable the fast cases (don't just set them to zero).
     65 
     66 #define PERF_FAST_LOADV    1
     67 #define PERF_FAST_STOREV   1
     68 
     69 #define PERF_FAST_SARP     1
     70 
     71 #define PERF_FAST_STACK    1
     72 #define PERF_FAST_STACK2   1
     73 
     74 /* Change this to 1 to enable assertions on origin tracking cache fast
     75    paths */
     76 #define OC_ENABLE_ASSERTIONS 0
     77 
     78 
     79 /*------------------------------------------------------------*/
     80 /*--- Comments on the origin tracking implementation       ---*/
     81 /*------------------------------------------------------------*/
     82 
     83 /* See detailed comment entitled
     84    AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
     85    which is contained further on in this file. */
     86 
     87 
     88 /*------------------------------------------------------------*/
     89 /*--- V bits and A bits                                    ---*/
     90 /*------------------------------------------------------------*/
     91 
     92 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
     93    thinks the corresponding value bit is defined.  And every memory byte
     94    has an A bit, which tracks whether Memcheck thinks the program can access
     95    it safely (ie. it's mapped, and has at least one of the RWX permission bits
     96    set).  So every N-bit register is shadowed with N V bits, and every memory
     97    byte is shadowed with 8 V bits and one A bit.
     98 
     99    In the implementation, we use two forms of compression (compressed V bits
    100    and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
    101    for memory.
    102 
    103    Memcheck also tracks extra information about each heap block that is
    104    allocated, for detecting memory leaks and other purposes.
    105 */
    106 
    107 /*------------------------------------------------------------*/
    108 /*--- Basic A/V bitmap representation.                     ---*/
    109 /*------------------------------------------------------------*/
    110 
    111 /* All reads and writes are checked against a memory map (a.k.a. shadow
    112    memory), which records the state of all memory in the process.
    113 
    114    On 32-bit machines the memory map is organised as follows.
    115    The top 16 bits of an address are used to index into a top-level
    116    map table, containing 65536 entries.  Each entry is a pointer to a
    117    second-level map, which records the accesibililty and validity
    118    permissions for the 65536 bytes indexed by the lower 16 bits of the
    119    address.  Each byte is represented by two bits (details are below).  So
    120    each second-level map contains 16384 bytes.  This two-level arrangement
    121    conveniently divides the 4G address space into 64k lumps, each size 64k
    122    bytes.
    123 
    124    All entries in the primary (top-level) map must point to a valid
    125    secondary (second-level) map.  Since many of the 64kB chunks will
    126    have the same status for every bit -- ie. noaccess (for unused
    127    address space) or entirely addressable and defined (for code segments) --
    128    there are three distinguished secondary maps, which indicate 'noaccess',
    129    'undefined' and 'defined'.  For these uniform 64kB chunks, the primary
    130    map entry points to the relevant distinguished map.  In practice,
    131    typically more than half of the addressable memory is represented with
    132    the 'undefined' or 'defined' distinguished secondary map, so it gives a
    133    good saving.  It also lets us set the V+A bits of large address regions
    134    quickly in set_address_range_perms().
    135 
    136    On 64-bit machines it's more complicated.  If we followed the same basic
    137    scheme we'd have a four-level table which would require too many memory
    138    accesses.  So instead the top-level map table has 2^19 entries (indexed
    139    using bits 16..34 of the address);  this covers the bottom 32GB.  Any
    140    accesses above 32GB are handled with a slow, sparse auxiliary table.
    141    Valgrind's address space manager tries very hard to keep things below
    142    this 32GB barrier so that performance doesn't suffer too much.
    143 
    144    Note that this file has a lot of different functions for reading and
    145    writing shadow memory.  Only a couple are strictly necessary (eg.
    146    get_vabits2 and set_vabits2), most are just specialised for specific
    147    common cases to improve performance.
    148 
    149    Aside: the V+A bits are less precise than they could be -- we have no way
    150    of marking memory as read-only.  It would be great if we could add an
    151    extra state VA_BITSn_READONLY.  But then we'd have 5 different states,
    152    which requires 2.3 bits to hold, and there's no way to do that elegantly
    153    -- we'd have to double up to 4 bits of metadata per byte, which doesn't
    154    seem worth it.
    155 */
    156 
    157 /* --------------- Basic configuration --------------- */
    158 
    159 /* Only change this.  N_PRIMARY_MAP *must* be a power of 2. */
    160 
    161 #if VG_WORDSIZE == 4
    162 
    163 /* cover the entire address space */
    164 #  define N_PRIMARY_BITS  16
    165 
    166 #else
    167 
    168 /* Just handle the first 256G fast and the rest via auxiliary
    169    primaries.  If you change this, Memcheck will assert at startup.
    170    See the definition of UNALIGNED_OR_HIGH for extensive comments. */
    171 #  define N_PRIMARY_BITS  22
    172 
    173 #endif
    174 
    175 
    176 /* Do not change this. */
    177 #define N_PRIMARY_MAP  ( ((UWord)1) << N_PRIMARY_BITS)
    178 
    179 /* Do not change this. */
    180 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
    181 
    182 
    183 /* --------------- Secondary maps --------------- */
    184 
    185 // Each byte of memory conceptually has an A bit, which indicates its
    186 // addressability, and 8 V bits, which indicates its definedness.
    187 //
    188 // But because very few bytes are partially defined, we can use a nice
    189 // compression scheme to reduce the size of shadow memory.  Each byte of
    190 // memory has 2 bits which indicates its state (ie. V+A bits):
    191 //
    192 //   00:  noaccess    (unaddressable but treated as fully defined)
    193 //   01:  undefined   (addressable and fully undefined)
    194 //   10:  defined     (addressable and fully defined)
    195 //   11:  partdefined (addressable and partially defined)
    196 //
    197 // In the "partdefined" case, we use a secondary table to store the V bits.
    198 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
    199 // bits.
    200 //
    201 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
    202 // four bytes (32 bits) of memory are in each chunk.  Hence the name
    203 // "vabits8".  This lets us get the V+A bits for four bytes at a time
    204 // easily (without having to do any shifting and/or masking), and that is a
    205 // very common operation.  (Note that although each vabits8 chunk
    206 // is 8 bits in size, it represents 32 bits of memory.)
    207 //
    208 // The representation is "inverse" little-endian... each 4 bytes of
    209 // memory is represented by a 1 byte value, where:
    210 //
    211 // - the status of byte (a+0) is held in bits [1..0]
    212 // - the status of byte (a+1) is held in bits [3..2]
    213 // - the status of byte (a+2) is held in bits [5..4]
    214 // - the status of byte (a+3) is held in bits [7..6]
    215 //
    216 // It's "inverse" because endianness normally describes a mapping from
    217 // value bits to memory addresses;  in this case the mapping is inverted.
    218 // Ie. instead of particular value bits being held in certain addresses, in
    219 // this case certain addresses are represented by particular value bits.
    220 // See insert_vabits2_into_vabits8() for an example.
    221 //
    222 // But note that we don't compress the V bits stored in registers;  they
    223 // need to be explicit to made the shadow operations possible.  Therefore
    224 // when moving values between registers and memory we need to convert
    225 // between the expanded in-register format and the compressed in-memory
    226 // format.  This isn't so difficult, it just requires careful attention in a
    227 // few places.
    228 
    229 // These represent eight bits of memory.
    230 #define VA_BITS2_NOACCESS     0x0      // 00b
    231 #define VA_BITS2_UNDEFINED    0x1      // 01b
    232 #define VA_BITS2_DEFINED      0x2      // 10b
    233 #define VA_BITS2_PARTDEFINED  0x3      // 11b
    234 
    235 // These represent 16 bits of memory.
    236 #define VA_BITS4_NOACCESS     0x0      // 00_00b
    237 #define VA_BITS4_UNDEFINED    0x5      // 01_01b
    238 #define VA_BITS4_DEFINED      0xa      // 10_10b
    239 
    240 // These represent 32 bits of memory.
    241 #define VA_BITS8_NOACCESS     0x00     // 00_00_00_00b
    242 #define VA_BITS8_UNDEFINED    0x55     // 01_01_01_01b
    243 #define VA_BITS8_DEFINED      0xaa     // 10_10_10_10b
    244 
    245 // These represent 64 bits of memory.
    246 #define VA_BITS16_NOACCESS    0x0000   // 00_00_00_00b x 2
    247 #define VA_BITS16_UNDEFINED   0x5555   // 01_01_01_01b x 2
    248 #define VA_BITS16_DEFINED     0xaaaa   // 10_10_10_10b x 2
    249 
    250 
    251 #define SM_CHUNKS             16384
    252 #define SM_OFF(aaa)           (((aaa) & 0xffff) >> 2)
    253 #define SM_OFF_16(aaa)        (((aaa) & 0xffff) >> 3)
    254 
    255 // Paranoia:  it's critical for performance that the requested inlining
    256 // occurs.  So try extra hard.
    257 #define INLINE    inline __attribute__((always_inline))
    258 
    259 static INLINE Addr start_of_this_sm ( Addr a ) {
    260    return (a & (~SM_MASK));
    261 }
    262 static INLINE Bool is_start_of_sm ( Addr a ) {
    263    return (start_of_this_sm(a) == a);
    264 }
    265 
    266 typedef
    267    struct {
    268       UChar vabits8[SM_CHUNKS];
    269    }
    270    SecMap;
    271 
    272 // 3 distinguished secondary maps, one for no-access, one for
    273 // accessible but undefined, and one for accessible and defined.
    274 // Distinguished secondaries may never be modified.
    275 #define SM_DIST_NOACCESS   0
    276 #define SM_DIST_UNDEFINED  1
    277 #define SM_DIST_DEFINED    2
    278 
    279 static SecMap sm_distinguished[3];
    280 
    281 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
    282    return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
    283 }
    284 
    285 // Forward declaration
    286 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
    287 
    288 /* dist_sm points to one of our three distinguished secondaries.  Make
    289    a copy of it so that we can write to it.
    290 */
    291 static SecMap* copy_for_writing ( SecMap* dist_sm )
    292 {
    293    SecMap* new_sm;
    294    tl_assert(dist_sm == &sm_distinguished[0]
    295           || dist_sm == &sm_distinguished[1]
    296           || dist_sm == &sm_distinguished[2]);
    297 
    298    new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
    299    if (new_sm == NULL)
    300       VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
    301                                    sizeof(SecMap) );
    302    VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
    303    update_SM_counts(dist_sm, new_sm);
    304    return new_sm;
    305 }
    306 
    307 /* --------------- Stats --------------- */
    308 
    309 static Int   n_issued_SMs      = 0;
    310 static Int   n_deissued_SMs    = 0;
    311 static Int   n_noaccess_SMs    = N_PRIMARY_MAP; // start with many noaccess DSMs
    312 static Int   n_undefined_SMs   = 0;
    313 static Int   n_defined_SMs     = 0;
    314 static Int   n_non_DSM_SMs     = 0;
    315 static Int   max_noaccess_SMs  = 0;
    316 static Int   max_undefined_SMs = 0;
    317 static Int   max_defined_SMs   = 0;
    318 static Int   max_non_DSM_SMs   = 0;
    319 
    320 /* # searches initiated in auxmap_L1, and # base cmps required */
    321 static ULong n_auxmap_L1_searches  = 0;
    322 static ULong n_auxmap_L1_cmps      = 0;
    323 /* # of searches that missed in auxmap_L1 and therefore had to
    324    be handed to auxmap_L2. And the number of nodes inserted. */
    325 static ULong n_auxmap_L2_searches  = 0;
    326 static ULong n_auxmap_L2_nodes     = 0;
    327 
    328 static Int   n_sanity_cheap     = 0;
    329 static Int   n_sanity_expensive = 0;
    330 
    331 static Int   n_secVBit_nodes   = 0;
    332 static Int   max_secVBit_nodes = 0;
    333 
    334 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
    335 {
    336    if      (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
    337    else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
    338    else if (oldSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  --;
    339    else                                                  { n_non_DSM_SMs  --;
    340                                                            n_deissued_SMs ++; }
    341 
    342    if      (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
    343    else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
    344    else if (newSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  ++;
    345    else                                                  { n_non_DSM_SMs  ++;
    346                                                            n_issued_SMs   ++; }
    347 
    348    if (n_noaccess_SMs  > max_noaccess_SMs ) max_noaccess_SMs  = n_noaccess_SMs;
    349    if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
    350    if (n_defined_SMs   > max_defined_SMs  ) max_defined_SMs   = n_defined_SMs;
    351    if (n_non_DSM_SMs   > max_non_DSM_SMs  ) max_non_DSM_SMs   = n_non_DSM_SMs;
    352 }
    353 
    354 /* --------------- Primary maps --------------- */
    355 
    356 /* The main primary map.  This covers some initial part of the address
    357    space, addresses 0 .. (N_PRIMARY_MAP << 16)-1.  The rest of it is
    358    handled using the auxiliary primary map.
    359 */
    360 static SecMap* primary_map[N_PRIMARY_MAP];
    361 
    362 
    363 /* An entry in the auxiliary primary map.  base must be a 64k-aligned
    364    value, and sm points at the relevant secondary map.  As with the
    365    main primary map, the secondary may be either a real secondary, or
    366    one of the three distinguished secondaries.  DO NOT CHANGE THIS
    367    LAYOUT: the first word has to be the key for OSet fast lookups.
    368 */
    369 typedef
    370    struct {
    371       Addr    base;
    372       SecMap* sm;
    373    }
    374    AuxMapEnt;
    375 
    376 /* Tunable parameter: How big is the L1 queue? */
    377 #define N_AUXMAP_L1 24
    378 
    379 /* Tunable parameter: How far along the L1 queue to insert
    380    entries resulting from L2 lookups? */
    381 #define AUXMAP_L1_INSERT_IX 12
    382 
    383 static struct {
    384           Addr       base;
    385           AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
    386        }
    387        auxmap_L1[N_AUXMAP_L1];
    388 
    389 static OSet* auxmap_L2 = NULL;
    390 
    391 static void init_auxmap_L1_L2 ( void )
    392 {
    393    Int i;
    394    for (i = 0; i < N_AUXMAP_L1; i++) {
    395       auxmap_L1[i].base = 0;
    396       auxmap_L1[i].ent  = NULL;
    397    }
    398 
    399    tl_assert(0 == offsetof(AuxMapEnt,base));
    400    tl_assert(sizeof(Addr) == sizeof(void*));
    401    auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/  offsetof(AuxMapEnt,base),
    402                                     /*fastCmp*/ NULL,
    403                                     VG_(malloc), "mc.iaLL.1", VG_(free) );
    404 }
    405 
    406 /* Check representation invariants; if OK return NULL; else a
    407    descriptive bit of text.  Also return the number of
    408    non-distinguished secondary maps referred to from the auxiliary
    409    primary maps. */
    410 
    411 static HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
    412 {
    413    Word i, j;
    414    /* On a 32-bit platform, the L2 and L1 tables should
    415       both remain empty forever.
    416 
    417       On a 64-bit platform:
    418       In the L2 table:
    419        all .base & 0xFFFF == 0
    420        all .base > MAX_PRIMARY_ADDRESS
    421       In the L1 table:
    422        all .base & 0xFFFF == 0
    423        all (.base > MAX_PRIMARY_ADDRESS
    424             .base & 0xFFFF == 0
    425             and .ent points to an AuxMapEnt with the same .base)
    426            or
    427            (.base == 0 and .ent == NULL)
    428    */
    429    *n_secmaps_found = 0;
    430    if (sizeof(void*) == 4) {
    431       /* 32-bit platform */
    432       if (VG_(OSetGen_Size)(auxmap_L2) != 0)
    433          return "32-bit: auxmap_L2 is non-empty";
    434       for (i = 0; i < N_AUXMAP_L1; i++)
    435         if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
    436       return "32-bit: auxmap_L1 is non-empty";
    437    } else {
    438       /* 64-bit platform */
    439       UWord elems_seen = 0;
    440       AuxMapEnt *elem, *res;
    441       AuxMapEnt key;
    442       /* L2 table */
    443       VG_(OSetGen_ResetIter)(auxmap_L2);
    444       while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
    445          elems_seen++;
    446          if (0 != (elem->base & (Addr)0xFFFF))
    447             return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
    448          if (elem->base <= MAX_PRIMARY_ADDRESS)
    449             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
    450          if (elem->sm == NULL)
    451             return "64-bit: .sm in _L2 is NULL";
    452          if (!is_distinguished_sm(elem->sm))
    453             (*n_secmaps_found)++;
    454       }
    455       if (elems_seen != n_auxmap_L2_nodes)
    456          return "64-bit: disagreement on number of elems in _L2";
    457       /* Check L1-L2 correspondence */
    458       for (i = 0; i < N_AUXMAP_L1; i++) {
    459          if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
    460             continue;
    461          if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
    462             return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
    463          if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
    464             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
    465          if (auxmap_L1[i].ent == NULL)
    466             return "64-bit: .ent is NULL in auxmap_L1";
    467          if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
    468             return "64-bit: _L1 and _L2 bases are inconsistent";
    469          /* Look it up in auxmap_L2. */
    470          key.base = auxmap_L1[i].base;
    471          key.sm   = 0;
    472          res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
    473          if (res == NULL)
    474             return "64-bit: _L1 .base not found in _L2";
    475          if (res != auxmap_L1[i].ent)
    476             return "64-bit: _L1 .ent disagrees with _L2 entry";
    477       }
    478       /* Check L1 contains no duplicates */
    479       for (i = 0; i < N_AUXMAP_L1; i++) {
    480          if (auxmap_L1[i].base == 0)
    481             continue;
    482 	 for (j = i+1; j < N_AUXMAP_L1; j++) {
    483             if (auxmap_L1[j].base == 0)
    484                continue;
    485             if (auxmap_L1[j].base == auxmap_L1[i].base)
    486                return "64-bit: duplicate _L1 .base entries";
    487          }
    488       }
    489    }
    490    return NULL; /* ok */
    491 }
    492 
    493 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
    494 {
    495    Word i;
    496    tl_assert(ent);
    497    tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
    498    for (i = N_AUXMAP_L1-1; i > rank; i--)
    499       auxmap_L1[i] = auxmap_L1[i-1];
    500    auxmap_L1[rank].base = ent->base;
    501    auxmap_L1[rank].ent  = ent;
    502 }
    503 
    504 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
    505 {
    506    AuxMapEnt  key;
    507    AuxMapEnt* res;
    508    Word       i;
    509 
    510    tl_assert(a > MAX_PRIMARY_ADDRESS);
    511    a &= ~(Addr)0xFFFF;
    512 
    513    /* First search the front-cache, which is a self-organising
    514       list containing the most popular entries. */
    515 
    516    if (LIKELY(auxmap_L1[0].base == a))
    517       return auxmap_L1[0].ent;
    518    if (LIKELY(auxmap_L1[1].base == a)) {
    519       Addr       t_base = auxmap_L1[0].base;
    520       AuxMapEnt* t_ent  = auxmap_L1[0].ent;
    521       auxmap_L1[0].base = auxmap_L1[1].base;
    522       auxmap_L1[0].ent  = auxmap_L1[1].ent;
    523       auxmap_L1[1].base = t_base;
    524       auxmap_L1[1].ent  = t_ent;
    525       return auxmap_L1[0].ent;
    526    }
    527 
    528    n_auxmap_L1_searches++;
    529 
    530    for (i = 0; i < N_AUXMAP_L1; i++) {
    531       if (auxmap_L1[i].base == a) {
    532          break;
    533       }
    534    }
    535    tl_assert(i >= 0 && i <= N_AUXMAP_L1);
    536 
    537    n_auxmap_L1_cmps += (ULong)(i+1);
    538 
    539    if (i < N_AUXMAP_L1) {
    540       if (i > 0) {
    541          Addr       t_base = auxmap_L1[i-1].base;
    542          AuxMapEnt* t_ent  = auxmap_L1[i-1].ent;
    543          auxmap_L1[i-1].base = auxmap_L1[i-0].base;
    544          auxmap_L1[i-1].ent  = auxmap_L1[i-0].ent;
    545          auxmap_L1[i-0].base = t_base;
    546          auxmap_L1[i-0].ent  = t_ent;
    547          i--;
    548       }
    549       return auxmap_L1[i].ent;
    550    }
    551 
    552    n_auxmap_L2_searches++;
    553 
    554    /* First see if we already have it. */
    555    key.base = a;
    556    key.sm   = 0;
    557 
    558    res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
    559    if (res)
    560       insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
    561    return res;
    562 }
    563 
    564 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
    565 {
    566    AuxMapEnt *nyu, *res;
    567 
    568    /* First see if we already have it. */
    569    res = maybe_find_in_auxmap( a );
    570    if (LIKELY(res))
    571       return res;
    572 
    573    /* Ok, there's no entry in the secondary map, so we'll have
    574       to allocate one. */
    575    a &= ~(Addr)0xFFFF;
    576 
    577    nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
    578    tl_assert(nyu);
    579    nyu->base = a;
    580    nyu->sm   = &sm_distinguished[SM_DIST_NOACCESS];
    581    VG_(OSetGen_Insert)( auxmap_L2, nyu );
    582    insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
    583    n_auxmap_L2_nodes++;
    584    return nyu;
    585 }
    586 
    587 /* --------------- SecMap fundamentals --------------- */
    588 
    589 // In all these, 'low' means it's definitely in the main primary map,
    590 // 'high' means it's definitely in the auxiliary table.
    591 
    592 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
    593 {
    594    UWord pm_off = a >> 16;
    595 #  if VG_DEBUG_MEMORY >= 1
    596    tl_assert(pm_off < N_PRIMARY_MAP);
    597 #  endif
    598    return &primary_map[ pm_off ];
    599 }
    600 
    601 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
    602 {
    603    AuxMapEnt* am = find_or_alloc_in_auxmap(a);
    604    return &am->sm;
    605 }
    606 
    607 static SecMap** get_secmap_ptr ( Addr a )
    608 {
    609    return ( a <= MAX_PRIMARY_ADDRESS
    610           ? get_secmap_low_ptr(a)
    611           : get_secmap_high_ptr(a));
    612 }
    613 
    614 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
    615 {
    616    return *get_secmap_low_ptr(a);
    617 }
    618 
    619 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
    620 {
    621    return *get_secmap_high_ptr(a);
    622 }
    623 
    624 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
    625 {
    626    SecMap** p = get_secmap_low_ptr(a);
    627    if (UNLIKELY(is_distinguished_sm(*p)))
    628       *p = copy_for_writing(*p);
    629    return *p;
    630 }
    631 
    632 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
    633 {
    634    SecMap** p = get_secmap_high_ptr(a);
    635    if (UNLIKELY(is_distinguished_sm(*p)))
    636       *p = copy_for_writing(*p);
    637    return *p;
    638 }
    639 
    640 /* Produce the secmap for 'a', either from the primary map or by
    641    ensuring there is an entry for it in the aux primary map.  The
    642    secmap may be a distinguished one as the caller will only want to
    643    be able to read it.
    644 */
    645 static INLINE SecMap* get_secmap_for_reading ( Addr a )
    646 {
    647    return ( a <= MAX_PRIMARY_ADDRESS
    648           ? get_secmap_for_reading_low (a)
    649           : get_secmap_for_reading_high(a) );
    650 }
    651 
    652 /* Produce the secmap for 'a', either from the primary map or by
    653    ensuring there is an entry for it in the aux primary map.  The
    654    secmap may not be a distinguished one, since the caller will want
    655    to be able to write it.  If it is a distinguished secondary, make a
    656    writable copy of it, install it, and return the copy instead.  (COW
    657    semantics).
    658 */
    659 static SecMap* get_secmap_for_writing ( Addr a )
    660 {
    661    return ( a <= MAX_PRIMARY_ADDRESS
    662           ? get_secmap_for_writing_low (a)
    663           : get_secmap_for_writing_high(a) );
    664 }
    665 
    666 /* If 'a' has a SecMap, produce it.  Else produce NULL.  But don't
    667    allocate one if one doesn't already exist.  This is used by the
    668    leak checker.
    669 */
    670 static SecMap* maybe_get_secmap_for ( Addr a )
    671 {
    672    if (a <= MAX_PRIMARY_ADDRESS) {
    673       return get_secmap_for_reading_low(a);
    674    } else {
    675       AuxMapEnt* am = maybe_find_in_auxmap(a);
    676       return am ? am->sm : NULL;
    677    }
    678 }
    679 
    680 /* --------------- Fundamental functions --------------- */
    681 
    682 static INLINE
    683 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
    684 {
    685    UInt shift =  (a & 3)  << 1;        // shift by 0, 2, 4, or 6
    686    *vabits8  &= ~(0x3     << shift);   // mask out the two old bits
    687    *vabits8  |=  (vabits2 << shift);   // mask  in the two new bits
    688 }
    689 
    690 static INLINE
    691 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
    692 {
    693    UInt shift;
    694    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
    695    shift     =  (a & 2)   << 1;        // shift by 0 or 4
    696    *vabits8 &= ~(0xf      << shift);   // mask out the four old bits
    697    *vabits8 |=  (vabits4 << shift);    // mask  in the four new bits
    698 }
    699 
    700 static INLINE
    701 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
    702 {
    703    UInt shift = (a & 3) << 1;          // shift by 0, 2, 4, or 6
    704    vabits8 >>= shift;                  // shift the two bits to the bottom
    705    return 0x3 & vabits8;               // mask out the rest
    706 }
    707 
    708 static INLINE
    709 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
    710 {
    711    UInt shift;
    712    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
    713    shift = (a & 2) << 1;               // shift by 0 or 4
    714    vabits8 >>= shift;                  // shift the four bits to the bottom
    715    return 0xf & vabits8;               // mask out the rest
    716 }
    717 
    718 // Note that these four are only used in slow cases.  The fast cases do
    719 // clever things like combine the auxmap check (in
    720 // get_secmap_{read,writ}able) with alignment checks.
    721 
    722 // *** WARNING! ***
    723 // Any time this function is called, if it is possible that vabits2
    724 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
    725 // sec-V-bits table must also be set!
    726 static INLINE
    727 void set_vabits2 ( Addr a, UChar vabits2 )
    728 {
    729    SecMap* sm       = get_secmap_for_writing(a);
    730    UWord   sm_off   = SM_OFF(a);
    731    insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
    732 }
    733 
    734 static INLINE
    735 UChar get_vabits2 ( Addr a )
    736 {
    737    SecMap* sm       = get_secmap_for_reading(a);
    738    UWord   sm_off   = SM_OFF(a);
    739    UChar   vabits8  = sm->vabits8[sm_off];
    740    return extract_vabits2_from_vabits8(a, vabits8);
    741 }
    742 
    743 // *** WARNING! ***
    744 // Any time this function is called, if it is possible that any of the
    745 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
    746 // corresponding entry(s) in the sec-V-bits table must also be set!
    747 static INLINE
    748 UChar get_vabits8_for_aligned_word32 ( Addr a )
    749 {
    750    SecMap* sm       = get_secmap_for_reading(a);
    751    UWord   sm_off   = SM_OFF(a);
    752    UChar   vabits8  = sm->vabits8[sm_off];
    753    return vabits8;
    754 }
    755 
    756 static INLINE
    757 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
    758 {
    759    SecMap* sm       = get_secmap_for_writing(a);
    760    UWord   sm_off   = SM_OFF(a);
    761    sm->vabits8[sm_off] = vabits8;
    762 }
    763 
    764 
    765 // Forward declarations
    766 static UWord get_sec_vbits8(Addr a);
    767 static void  set_sec_vbits8(Addr a, UWord vbits8);
    768 
    769 // Returns False if there was an addressability error.
    770 static INLINE
    771 Bool set_vbits8 ( Addr a, UChar vbits8 )
    772 {
    773    Bool  ok      = True;
    774    UChar vabits2 = get_vabits2(a);
    775    if ( VA_BITS2_NOACCESS != vabits2 ) {
    776       // Addressable.  Convert in-register format to in-memory format.
    777       // Also remove any existing sec V bit entry for the byte if no
    778       // longer necessary.
    779       if      ( V_BITS8_DEFINED   == vbits8 ) { vabits2 = VA_BITS2_DEFINED;   }
    780       else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
    781       else                                    { vabits2 = VA_BITS2_PARTDEFINED;
    782                                                 set_sec_vbits8(a, vbits8);  }
    783       set_vabits2(a, vabits2);
    784 
    785    } else {
    786       // Unaddressable!  Do nothing -- when writing to unaddressable
    787       // memory it acts as a black hole, and the V bits can never be seen
    788       // again.  So we don't have to write them at all.
    789       ok = False;
    790    }
    791    return ok;
    792 }
    793 
    794 // Returns False if there was an addressability error.  In that case, we put
    795 // all defined bits into vbits8.
    796 static INLINE
    797 Bool get_vbits8 ( Addr a, UChar* vbits8 )
    798 {
    799    Bool  ok      = True;
    800    UChar vabits2 = get_vabits2(a);
    801 
    802    // Convert the in-memory format to in-register format.
    803    if      ( VA_BITS2_DEFINED   == vabits2 ) { *vbits8 = V_BITS8_DEFINED;   }
    804    else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
    805    else if ( VA_BITS2_NOACCESS  == vabits2 ) {
    806       *vbits8 = V_BITS8_DEFINED;    // Make V bits defined!
    807       ok = False;
    808    } else {
    809       tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
    810       *vbits8 = get_sec_vbits8(a);
    811    }
    812    return ok;
    813 }
    814 
    815 
    816 /* --------------- Secondary V bit table ------------ */
    817 
    818 // This table holds the full V bit pattern for partially-defined bytes
    819 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
    820 // memory.
    821 //
    822 // Note: the nodes in this table can become stale.  Eg. if you write a PDB,
    823 // then overwrite the same address with a fully defined byte, the sec-V-bit
    824 // node will not necessarily be removed.  This is because checking for
    825 // whether removal is necessary would slow down the fast paths.
    826 //
    827 // To avoid the stale nodes building up too much, we periodically (once the
    828 // table reaches a certain size) garbage collect (GC) the table by
    829 // traversing it and evicting any "sufficiently stale" nodes, ie. nodes that
    830 // are stale and haven't been touched for a certain number of collections.
    831 // If more than a certain proportion of nodes survived, we increase the
    832 // table size so that GCs occur less often.
    833 //
    834 // (So this a bit different to a traditional GC, where you definitely want
    835 // to remove any dead nodes.  It's more like we have a resizable cache and
    836 // we're trying to find the right balance how many elements to evict and how
    837 // big to make the cache.)
    838 //
    839 // This policy is designed to avoid bad table bloat in the worst case where
    840 // a program creates huge numbers of stale PDBs -- we would get this bloat
    841 // if we had no GC -- while handling well the case where a node becomes
    842 // stale but shortly afterwards is rewritten with a PDB and so becomes
    843 // non-stale again (which happens quite often, eg. in perf/bz2).  If we just
    844 // remove all stale nodes as soon as possible, we just end up re-adding a
    845 // lot of them in later again.  The "sufficiently stale" approach avoids
    846 // this.  (If a program has many live PDBs, performance will just suck,
    847 // there's no way around that.)
    848 
    849 static OSet* secVBitTable;
    850 
    851 // Stats
    852 static ULong sec_vbits_new_nodes = 0;
    853 static ULong sec_vbits_updates   = 0;
    854 
    855 // This must be a power of two;  this is checked in mc_pre_clo_init().
    856 // The size chosen here is a trade-off:  if the nodes are bigger (ie. cover
    857 // a larger address range) they take more space but we can get multiple
    858 // partially-defined bytes in one if they are close to each other, reducing
    859 // the number of total nodes.  In practice sometimes they are clustered (eg.
    860 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
    861 // row), but often not.  So we choose something intermediate.
    862 #define BYTES_PER_SEC_VBIT_NODE     16
    863 
    864 // We make the table bigger if more than this many nodes survive a GC.
    865 #define MAX_SURVIVOR_PROPORTION  0.5
    866 
    867 // Each time we make the table bigger, we increase it by this much.
    868 #define TABLE_GROWTH_FACTOR      2
    869 
    870 // This defines "sufficiently stale" -- any node that hasn't been touched in
    871 // this many GCs will be removed.
    872 #define MAX_STALE_AGE            2
    873 
    874 // We GC the table when it gets this many nodes in it, ie. it's effectively
    875 // the table size.  It can change.
    876 static Int  secVBitLimit = 1024;
    877 
    878 // The number of GCs done, used to age sec-V-bit nodes for eviction.
    879 // Because it's unsigned, wrapping doesn't matter -- the right answer will
    880 // come out anyway.
    881 static UInt GCs_done = 0;
    882 
    883 typedef
    884    struct {
    885       Addr  a;
    886       UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
    887       UInt  last_touched;
    888    }
    889    SecVBitNode;
    890 
    891 static OSet* createSecVBitTable(void)
    892 {
    893    return VG_(OSetGen_Create)( offsetof(SecVBitNode, a),
    894                                NULL, // use fast comparisons
    895                                VG_(malloc), "mc.cSVT.1 (sec VBit table)",
    896                                VG_(free) );
    897 }
    898 
    899 static void gcSecVBitTable(void)
    900 {
    901    OSet*        secVBitTable2;
    902    SecVBitNode* n;
    903    Int          i, n_nodes = 0, n_survivors = 0;
    904 
    905    GCs_done++;
    906 
    907    // Create the new table.
    908    secVBitTable2 = createSecVBitTable();
    909 
    910    // Traverse the table, moving fresh nodes into the new table.
    911    VG_(OSetGen_ResetIter)(secVBitTable);
    912    while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
    913       Bool keep = False;
    914       if ( (GCs_done - n->last_touched) <= MAX_STALE_AGE ) {
    915          // Keep node if it's been touched recently enough (regardless of
    916          // freshness/staleness).
    917          keep = True;
    918       } else {
    919          // Keep node if any of its bytes are non-stale.  Using
    920          // get_vabits2() for the lookup is not very efficient, but I don't
    921          // think it matters.
    922          for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
    923             if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
    924                keep = True;      // Found a non-stale byte, so keep
    925                break;
    926             }
    927          }
    928       }
    929 
    930       if ( keep ) {
    931          // Insert a copy of the node into the new table.
    932          SecVBitNode* n2 =
    933             VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
    934          *n2 = *n;
    935          VG_(OSetGen_Insert)(secVBitTable2, n2);
    936       }
    937    }
    938 
    939    // Get the before and after sizes.
    940    n_nodes     = VG_(OSetGen_Size)(secVBitTable);
    941    n_survivors = VG_(OSetGen_Size)(secVBitTable2);
    942 
    943    // Destroy the old table, and put the new one in its place.
    944    VG_(OSetGen_Destroy)(secVBitTable);
    945    secVBitTable = secVBitTable2;
    946 
    947    if (VG_(clo_verbosity) > 1) {
    948       Char percbuf[6];
    949       VG_(percentify)(n_survivors, n_nodes, 1, 6, percbuf);
    950       VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%s)\n",
    951                    n_nodes, n_survivors, percbuf);
    952    }
    953 
    954    // Increase table size if necessary.
    955    if (n_survivors > (secVBitLimit * MAX_SURVIVOR_PROPORTION)) {
    956       secVBitLimit *= TABLE_GROWTH_FACTOR;
    957       if (VG_(clo_verbosity) > 1)
    958          VG_(message)(Vg_DebugMsg, "memcheck GC: increase table size to %d\n",
    959                       secVBitLimit);
    960    }
    961 }
    962 
    963 static UWord get_sec_vbits8(Addr a)
    964 {
    965    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
    966    Int          amod     = a % BYTES_PER_SEC_VBIT_NODE;
    967    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
    968    UChar        vbits8;
    969    tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
    970    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
    971    // make it to the secondary V bits table.
    972    vbits8 = n->vbits8[amod];
    973    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
    974    return vbits8;
    975 }
    976 
    977 static void set_sec_vbits8(Addr a, UWord vbits8)
    978 {
    979    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
    980    Int          i, amod  = a % BYTES_PER_SEC_VBIT_NODE;
    981    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
    982    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
    983    // make it to the secondary V bits table.
    984    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
    985    if (n) {
    986       n->vbits8[amod] = vbits8;     // update
    987       n->last_touched = GCs_done;
    988       sec_vbits_updates++;
    989    } else {
    990       // New node:  assign the specific byte, make the rest invalid (they
    991       // should never be read as-is, but be cautious).
    992       n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
    993       n->a            = aAligned;
    994       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
    995          n->vbits8[i] = V_BITS8_UNDEFINED;
    996       }
    997       n->vbits8[amod] = vbits8;
    998       n->last_touched = GCs_done;
    999 
   1000       // Do a table GC if necessary.  Nb: do this before inserting the new
   1001       // node, to avoid erroneously GC'ing the new node.
   1002       if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
   1003          gcSecVBitTable();
   1004       }
   1005 
   1006       // Insert the new node.
   1007       VG_(OSetGen_Insert)(secVBitTable, n);
   1008       sec_vbits_new_nodes++;
   1009 
   1010       n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
   1011       if (n_secVBit_nodes > max_secVBit_nodes)
   1012          max_secVBit_nodes = n_secVBit_nodes;
   1013    }
   1014 }
   1015 
   1016 /* --------------- Endianness helpers --------------- */
   1017 
   1018 /* Returns the offset in memory of the byteno-th most significant byte
   1019    in a wordszB-sized word, given the specified endianness. */
   1020 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
   1021                                     UWord byteno ) {
   1022    return bigendian ? (wordszB-1-byteno) : byteno;
   1023 }
   1024 
   1025 
   1026 /* --------------- Ignored address ranges --------------- */
   1027 
   1028 #define M_IGNORE_RANGES 4
   1029 
   1030 typedef
   1031    struct {
   1032       Int  used;
   1033       Addr start[M_IGNORE_RANGES];
   1034       Addr end[M_IGNORE_RANGES];
   1035    }
   1036    IgnoreRanges;
   1037 
   1038 static IgnoreRanges ignoreRanges;
   1039 
   1040 INLINE Bool MC_(in_ignored_range) ( Addr a )
   1041 {
   1042    Int i;
   1043    if (LIKELY(ignoreRanges.used == 0))
   1044       return False;
   1045    for (i = 0; i < ignoreRanges.used; i++) {
   1046       if (a >= ignoreRanges.start[i] && a < ignoreRanges.end[i])
   1047          return True;
   1048    }
   1049    return False;
   1050 }
   1051 
   1052 
   1053 /* Parse a 32- or 64-bit hex number, including leading 0x, from string
   1054    starting at *ppc, putting result in *result, and return True.  Or
   1055    fail, in which case *ppc and *result are undefined, and return
   1056    False. */
   1057 
   1058 static Bool isHex ( UChar c )
   1059 {
   1060   return ((c >= '0' && c <= '9') ||
   1061 	  (c >= 'a' && c <= 'f') ||
   1062 	  (c >= 'A' && c <= 'F'));
   1063 }
   1064 
   1065 static UInt fromHex ( UChar c )
   1066 {
   1067    if (c >= '0' && c <= '9')
   1068       return (UInt)c - (UInt)'0';
   1069    if (c >= 'a' && c <= 'f')
   1070       return 10 +  (UInt)c - (UInt)'a';
   1071    if (c >= 'A' && c <= 'F')
   1072       return 10 +  (UInt)c - (UInt)'A';
   1073    /*NOTREACHED*/
   1074    tl_assert(0);
   1075    return 0;
   1076 }
   1077 
   1078 static Bool parse_Addr ( UChar** ppc, Addr* result )
   1079 {
   1080    Int used, limit = 2 * sizeof(Addr);
   1081    if (**ppc != '0')
   1082       return False;
   1083    (*ppc)++;
   1084    if (**ppc != 'x')
   1085       return False;
   1086    (*ppc)++;
   1087    *result = 0;
   1088    used = 0;
   1089    while (isHex(**ppc)) {
   1090       UInt d = fromHex(**ppc);
   1091       tl_assert(d < 16);
   1092       *result = ((*result) << 4) | fromHex(**ppc);
   1093       (*ppc)++;
   1094       used++;
   1095       if (used > limit) return False;
   1096    }
   1097    if (used == 0)
   1098       return False;
   1099    return True;
   1100 }
   1101 
   1102 /* Parse two such numbers separated by a dash, or fail. */
   1103 
   1104 static Bool parse_range ( UChar** ppc, Addr* result1, Addr* result2 )
   1105 {
   1106    Bool ok = parse_Addr(ppc, result1);
   1107    if (!ok)
   1108       return False;
   1109    if (**ppc != '-')
   1110       return False;
   1111    (*ppc)++;
   1112    ok = parse_Addr(ppc, result2);
   1113    if (!ok)
   1114       return False;
   1115    return True;
   1116 }
   1117 
   1118 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
   1119    fail. */
   1120 
   1121 static Bool parse_ignore_ranges ( UChar* str0 )
   1122 {
   1123    Addr start, end;
   1124    Bool ok;
   1125    UChar*  str = str0;
   1126    UChar** ppc = &str;
   1127    ignoreRanges.used = 0;
   1128    while (1) {
   1129       ok = parse_range(ppc, &start, &end);
   1130       if (!ok)
   1131          return False;
   1132       if (ignoreRanges.used >= M_IGNORE_RANGES)
   1133          return False;
   1134       ignoreRanges.start[ignoreRanges.used] = start;
   1135       ignoreRanges.end[ignoreRanges.used] = end;
   1136       ignoreRanges.used++;
   1137       if (**ppc == 0)
   1138          return True;
   1139       if (**ppc != ',')
   1140          return False;
   1141       (*ppc)++;
   1142    }
   1143    /*NOTREACHED*/
   1144    return False;
   1145 }
   1146 
   1147 
   1148 /* --------------- Load/store slow cases. --------------- */
   1149 
   1150 static
   1151 #ifndef PERF_FAST_LOADV
   1152 INLINE
   1153 #endif
   1154 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
   1155 {
   1156    /* Make up a 64-bit result V word, which contains the loaded data for
   1157       valid addresses and Defined for invalid addresses.  Iterate over
   1158       the bytes in the word, from the most significant down to the
   1159       least. */
   1160    ULong vbits64     = V_BITS64_UNDEFINED;
   1161    SizeT szB         = nBits / 8;
   1162    SSizeT i;                        // Must be signed.
   1163    SizeT n_addrs_bad = 0;
   1164    Addr  ai;
   1165    Bool  partial_load_exemption_applies;
   1166    UChar vbits8;
   1167    Bool  ok;
   1168 
   1169    PROF_EVENT(30, "mc_LOADVn_slow");
   1170 
   1171    /* ------------ BEGIN semi-fast cases ------------ */
   1172    /* These deal quickly-ish with the common auxiliary primary map
   1173       cases on 64-bit platforms.  Are merely a speedup hack; can be
   1174       omitted without loss of correctness/functionality.  Note that in
   1175       both cases the "sizeof(void*) == 8" causes these cases to be
   1176       folded out by compilers on 32-bit platforms.  These are derived
   1177       from LOADV64 and LOADV32.
   1178    */
   1179    if (LIKELY(sizeof(void*) == 8
   1180                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
   1181       SecMap* sm       = get_secmap_for_reading(a);
   1182       UWord   sm_off16 = SM_OFF_16(a);
   1183       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   1184       if (LIKELY(vabits16 == VA_BITS16_DEFINED))
   1185          return V_BITS64_DEFINED;
   1186       if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
   1187          return V_BITS64_UNDEFINED;
   1188       /* else fall into the slow case */
   1189    }
   1190    if (LIKELY(sizeof(void*) == 8
   1191                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
   1192       SecMap* sm = get_secmap_for_reading(a);
   1193       UWord sm_off = SM_OFF(a);
   1194       UWord vabits8 = sm->vabits8[sm_off];
   1195       if (LIKELY(vabits8 == VA_BITS8_DEFINED))
   1196          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
   1197       if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
   1198          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
   1199       /* else fall into slow case */
   1200    }
   1201    /* ------------ END semi-fast cases ------------ */
   1202 
   1203    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
   1204 
   1205    for (i = szB-1; i >= 0; i--) {
   1206       PROF_EVENT(31, "mc_LOADVn_slow(loop)");
   1207       ai = a + byte_offset_w(szB, bigendian, i);
   1208       ok = get_vbits8(ai, &vbits8);
   1209       if (!ok) n_addrs_bad++;
   1210       vbits64 <<= 8;
   1211       vbits64 |= vbits8;
   1212    }
   1213 
   1214    /* This is a hack which avoids producing errors for code which
   1215       insists in stepping along byte strings in aligned word-sized
   1216       chunks, and there is a partially defined word at the end.  (eg,
   1217       optimised strlen).  Such code is basically broken at least WRT
   1218       semantics of ANSI C, but sometimes users don't have the option
   1219       to fix it, and so this option is provided.  Note it is now
   1220       defaulted to not-engaged.
   1221 
   1222       A load from a partially-addressible place is allowed if:
   1223       - the command-line flag is set
   1224       - it's a word-sized, word-aligned load
   1225       - at least one of the addresses in the word *is* valid
   1226    */
   1227    partial_load_exemption_applies
   1228       = MC_(clo_partial_loads_ok) && szB == VG_WORDSIZE
   1229                                    && VG_IS_WORD_ALIGNED(a)
   1230                                    && n_addrs_bad < VG_WORDSIZE;
   1231 
   1232    if (n_addrs_bad > 0 && !partial_load_exemption_applies)
   1233       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1234 
   1235    return vbits64;
   1236 }
   1237 
   1238 
   1239 static
   1240 #ifndef PERF_FAST_STOREV
   1241 INLINE
   1242 #endif
   1243 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
   1244 {
   1245    SizeT szB = nBits / 8;
   1246    SizeT i, n_addrs_bad = 0;
   1247    UChar vbits8;
   1248    Addr  ai;
   1249    Bool  ok;
   1250 
   1251    PROF_EVENT(35, "mc_STOREVn_slow");
   1252 
   1253    /* ------------ BEGIN semi-fast cases ------------ */
   1254    /* These deal quickly-ish with the common auxiliary primary map
   1255       cases on 64-bit platforms.  Are merely a speedup hack; can be
   1256       omitted without loss of correctness/functionality.  Note that in
   1257       both cases the "sizeof(void*) == 8" causes these cases to be
   1258       folded out by compilers on 32-bit platforms.  These are derived
   1259       from STOREV64 and STOREV32.
   1260    */
   1261    if (LIKELY(sizeof(void*) == 8
   1262                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
   1263       SecMap* sm       = get_secmap_for_reading(a);
   1264       UWord   sm_off16 = SM_OFF_16(a);
   1265       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   1266       if (LIKELY( !is_distinguished_sm(sm) &&
   1267                           (VA_BITS16_DEFINED   == vabits16 ||
   1268                            VA_BITS16_UNDEFINED == vabits16) )) {
   1269          /* Handle common case quickly: a is suitably aligned, */
   1270          /* is mapped, and is addressible. */
   1271          // Convert full V-bits in register to compact 2-bit form.
   1272          if (LIKELY(V_BITS64_DEFINED == vbytes)) {
   1273             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
   1274             return;
   1275          } else if (V_BITS64_UNDEFINED == vbytes) {
   1276             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
   1277             return;
   1278          }
   1279          /* else fall into the slow case */
   1280       }
   1281       /* else fall into the slow case */
   1282    }
   1283    if (LIKELY(sizeof(void*) == 8
   1284                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
   1285       SecMap* sm      = get_secmap_for_reading(a);
   1286       UWord   sm_off  = SM_OFF(a);
   1287       UWord   vabits8 = sm->vabits8[sm_off];
   1288       if (LIKELY( !is_distinguished_sm(sm) &&
   1289                           (VA_BITS8_DEFINED   == vabits8 ||
   1290                            VA_BITS8_UNDEFINED == vabits8) )) {
   1291          /* Handle common case quickly: a is suitably aligned, */
   1292          /* is mapped, and is addressible. */
   1293          // Convert full V-bits in register to compact 2-bit form.
   1294          if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
   1295             sm->vabits8[sm_off] = VA_BITS8_DEFINED;
   1296             return;
   1297          } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
   1298             sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
   1299             return;
   1300          }
   1301          /* else fall into the slow case */
   1302       }
   1303       /* else fall into the slow case */
   1304    }
   1305    /* ------------ END semi-fast cases ------------ */
   1306 
   1307    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
   1308 
   1309    /* Dump vbytes in memory, iterating from least to most significant
   1310       byte.  At the same time establish addressibility of the location. */
   1311    for (i = 0; i < szB; i++) {
   1312       PROF_EVENT(36, "mc_STOREVn_slow(loop)");
   1313       ai     = a + byte_offset_w(szB, bigendian, i);
   1314       vbits8 = vbytes & 0xff;
   1315       ok     = set_vbits8(ai, vbits8);
   1316       if (!ok) n_addrs_bad++;
   1317       vbytes >>= 8;
   1318    }
   1319 
   1320    /* If an address error has happened, report it. */
   1321    if (n_addrs_bad > 0)
   1322       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
   1323 }
   1324 
   1325 
   1326 /*------------------------------------------------------------*/
   1327 /*--- Setting permissions over address ranges.             ---*/
   1328 /*------------------------------------------------------------*/
   1329 
   1330 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
   1331                                       UWord dsm_num )
   1332 {
   1333    UWord    sm_off, sm_off16;
   1334    UWord    vabits2 = vabits16 & 0x3;
   1335    SizeT    lenA, lenB, len_to_next_secmap;
   1336    Addr     aNext;
   1337    SecMap*  sm;
   1338    SecMap** sm_ptr;
   1339    SecMap*  example_dsm;
   1340 
   1341    PROF_EVENT(150, "set_address_range_perms");
   1342 
   1343    /* Check the V+A bits make sense. */
   1344    tl_assert(VA_BITS16_NOACCESS  == vabits16 ||
   1345              VA_BITS16_UNDEFINED == vabits16 ||
   1346              VA_BITS16_DEFINED   == vabits16);
   1347 
   1348    // This code should never write PDBs;  ensure this.  (See comment above
   1349    // set_vabits2().)
   1350    tl_assert(VA_BITS2_PARTDEFINED != vabits2);
   1351 
   1352    if (lenT == 0)
   1353       return;
   1354 
   1355    if (lenT > 256 * 1024 * 1024) {
   1356       if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
   1357          Char* s = "unknown???";
   1358          if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
   1359          if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
   1360          if (vabits16 == VA_BITS16_DEFINED  ) s = "defined";
   1361          VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
   1362                                   "large range [0x%lx, 0x%lx) (%s)\n",
   1363                                   a, a + lenT, s);
   1364       }
   1365    }
   1366 
   1367 #ifndef PERF_FAST_SARP
   1368    /*------------------ debug-only case ------------------ */
   1369    {
   1370       // Endianness doesn't matter here because all bytes are being set to
   1371       // the same value.
   1372       // Nb: We don't have to worry about updating the sec-V-bits table
   1373       // after these set_vabits2() calls because this code never writes
   1374       // VA_BITS2_PARTDEFINED values.
   1375       SizeT i;
   1376       for (i = 0; i < lenT; i++) {
   1377          set_vabits2(a + i, vabits2);
   1378       }
   1379       return;
   1380    }
   1381 #endif
   1382 
   1383    /*------------------ standard handling ------------------ */
   1384 
   1385    /* Get the distinguished secondary that we might want
   1386       to use (part of the space-compression scheme). */
   1387    example_dsm = &sm_distinguished[dsm_num];
   1388 
   1389    // We have to handle ranges covering various combinations of partial and
   1390    // whole sec-maps.  Here is how parts 1, 2 and 3 are used in each case.
   1391    // Cases marked with a '*' are common.
   1392    //
   1393    //   TYPE                                             PARTS USED
   1394    //   ----                                             ----------
   1395    // * one partial sec-map                  (p)         1
   1396    // - one whole sec-map                    (P)         2
   1397    //
   1398    // * two partial sec-maps                 (pp)        1,3
   1399    // - one partial, one whole sec-map       (pP)        1,2
   1400    // - one whole, one partial sec-map       (Pp)        2,3
   1401    // - two whole sec-maps                   (PP)        2,2
   1402    //
   1403    // * one partial, one whole, one partial  (pPp)       1,2,3
   1404    // - one partial, two whole               (pPP)       1,2,2
   1405    // - two whole, one partial               (PPp)       2,2,3
   1406    // - three whole                          (PPP)       2,2,2
   1407    //
   1408    // * one partial, N-2 whole, one partial  (pP...Pp)   1,2...2,3
   1409    // - one partial, N-1 whole               (pP...PP)   1,2...2,2
   1410    // - N-1 whole, one partial               (PP...Pp)   2,2...2,3
   1411    // - N whole                              (PP...PP)   2,2...2,3
   1412 
   1413    // Break up total length (lenT) into two parts:  length in the first
   1414    // sec-map (lenA), and the rest (lenB);   lenT == lenA + lenB.
   1415    aNext = start_of_this_sm(a) + SM_SIZE;
   1416    len_to_next_secmap = aNext - a;
   1417    if ( lenT <= len_to_next_secmap ) {
   1418       // Range entirely within one sec-map.  Covers almost all cases.
   1419       PROF_EVENT(151, "set_address_range_perms-single-secmap");
   1420       lenA = lenT;
   1421       lenB = 0;
   1422    } else if (is_start_of_sm(a)) {
   1423       // Range spans at least one whole sec-map, and starts at the beginning
   1424       // of a sec-map; skip to Part 2.
   1425       PROF_EVENT(152, "set_address_range_perms-startof-secmap");
   1426       lenA = 0;
   1427       lenB = lenT;
   1428       goto part2;
   1429    } else {
   1430       // Range spans two or more sec-maps, first one is partial.
   1431       PROF_EVENT(153, "set_address_range_perms-multiple-secmaps");
   1432       lenA = len_to_next_secmap;
   1433       lenB = lenT - lenA;
   1434    }
   1435 
   1436    //------------------------------------------------------------------------
   1437    // Part 1: Deal with the first sec_map.  Most of the time the range will be
   1438    // entirely within a sec_map and this part alone will suffice.  Also,
   1439    // doing it this way lets us avoid repeatedly testing for the crossing of
   1440    // a sec-map boundary within these loops.
   1441    //------------------------------------------------------------------------
   1442 
   1443    // If it's distinguished, make it undistinguished if necessary.
   1444    sm_ptr = get_secmap_ptr(a);
   1445    if (is_distinguished_sm(*sm_ptr)) {
   1446       if (*sm_ptr == example_dsm) {
   1447          // Sec-map already has the V+A bits that we want, so skip.
   1448          PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick");
   1449          a    = aNext;
   1450          lenA = 0;
   1451       } else {
   1452          PROF_EVENT(155, "set_address_range_perms-dist-sm1");
   1453          *sm_ptr = copy_for_writing(*sm_ptr);
   1454       }
   1455    }
   1456    sm = *sm_ptr;
   1457 
   1458    // 1 byte steps
   1459    while (True) {
   1460       if (VG_IS_8_ALIGNED(a)) break;
   1461       if (lenA < 1)           break;
   1462       PROF_EVENT(156, "set_address_range_perms-loop1a");
   1463       sm_off = SM_OFF(a);
   1464       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1465       a    += 1;
   1466       lenA -= 1;
   1467    }
   1468    // 8-aligned, 8 byte steps
   1469    while (True) {
   1470       if (lenA < 8) break;
   1471       PROF_EVENT(157, "set_address_range_perms-loop8a");
   1472       sm_off16 = SM_OFF_16(a);
   1473       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
   1474       a    += 8;
   1475       lenA -= 8;
   1476    }
   1477    // 1 byte steps
   1478    while (True) {
   1479       if (lenA < 1) break;
   1480       PROF_EVENT(158, "set_address_range_perms-loop1b");
   1481       sm_off = SM_OFF(a);
   1482       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1483       a    += 1;
   1484       lenA -= 1;
   1485    }
   1486 
   1487    // We've finished the first sec-map.  Is that it?
   1488    if (lenB == 0)
   1489       return;
   1490 
   1491    //------------------------------------------------------------------------
   1492    // Part 2: Fast-set entire sec-maps at a time.
   1493    //------------------------------------------------------------------------
   1494   part2:
   1495    // 64KB-aligned, 64KB steps.
   1496    // Nb: we can reach here with lenB < SM_SIZE
   1497    tl_assert(0 == lenA);
   1498    while (True) {
   1499       if (lenB < SM_SIZE) break;
   1500       tl_assert(is_start_of_sm(a));
   1501       PROF_EVENT(159, "set_address_range_perms-loop64K");
   1502       sm_ptr = get_secmap_ptr(a);
   1503       if (!is_distinguished_sm(*sm_ptr)) {
   1504          PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm");
   1505          // Free the non-distinguished sec-map that we're replacing.  This
   1506          // case happens moderately often, enough to be worthwhile.
   1507          VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
   1508       }
   1509       update_SM_counts(*sm_ptr, example_dsm);
   1510       // Make the sec-map entry point to the example DSM
   1511       *sm_ptr = example_dsm;
   1512       lenB -= SM_SIZE;
   1513       a    += SM_SIZE;
   1514    }
   1515 
   1516    // We've finished the whole sec-maps.  Is that it?
   1517    if (lenB == 0)
   1518       return;
   1519 
   1520    //------------------------------------------------------------------------
   1521    // Part 3: Finish off the final partial sec-map, if necessary.
   1522    //------------------------------------------------------------------------
   1523 
   1524    tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
   1525 
   1526    // If it's distinguished, make it undistinguished if necessary.
   1527    sm_ptr = get_secmap_ptr(a);
   1528    if (is_distinguished_sm(*sm_ptr)) {
   1529       if (*sm_ptr == example_dsm) {
   1530          // Sec-map already has the V+A bits that we want, so stop.
   1531          PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick");
   1532          return;
   1533       } else {
   1534          PROF_EVENT(162, "set_address_range_perms-dist-sm2");
   1535          *sm_ptr = copy_for_writing(*sm_ptr);
   1536       }
   1537    }
   1538    sm = *sm_ptr;
   1539 
   1540    // 8-aligned, 8 byte steps
   1541    while (True) {
   1542       if (lenB < 8) break;
   1543       PROF_EVENT(163, "set_address_range_perms-loop8b");
   1544       sm_off16 = SM_OFF_16(a);
   1545       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
   1546       a    += 8;
   1547       lenB -= 8;
   1548    }
   1549    // 1 byte steps
   1550    while (True) {
   1551       if (lenB < 1) return;
   1552       PROF_EVENT(164, "set_address_range_perms-loop1c");
   1553       sm_off = SM_OFF(a);
   1554       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1555       a    += 1;
   1556       lenB -= 1;
   1557    }
   1558 }
   1559 
   1560 
   1561 /* --- Set permissions for arbitrary address ranges --- */
   1562 
   1563 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
   1564 {
   1565    PROF_EVENT(40, "MC_(make_mem_noaccess)");
   1566    DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
   1567    set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
   1568    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1569       ocache_sarp_Clear_Origins ( a, len );
   1570 }
   1571 
   1572 static void make_mem_undefined ( Addr a, SizeT len )
   1573 {
   1574    PROF_EVENT(41, "make_mem_undefined");
   1575    DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
   1576    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
   1577 }
   1578 
   1579 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
   1580 {
   1581    PROF_EVENT(41, "MC_(make_mem_undefined)");
   1582    DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
   1583    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
   1584    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1585       ocache_sarp_Set_Origins ( a, len, otag );
   1586 }
   1587 
   1588 static
   1589 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
   1590                                           ThreadId tid, UInt okind )
   1591 {
   1592    UInt        ecu;
   1593    ExeContext* here;
   1594    /* VG_(record_ExeContext) checks for validity of tid, and asserts
   1595       if it is invalid.  So no need to do it here. */
   1596    tl_assert(okind <= 3);
   1597    here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
   1598    tl_assert(here);
   1599    ecu = VG_(get_ECU_from_ExeContext)(here);
   1600    tl_assert(VG_(is_plausible_ECU)(ecu));
   1601    MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
   1602 }
   1603 
   1604 static
   1605 void make_mem_undefined_w_tid ( Addr a, SizeT len, ThreadId tid ) {
   1606    make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
   1607 }
   1608 
   1609 
   1610 void MC_(make_mem_defined) ( Addr a, SizeT len )
   1611 {
   1612    PROF_EVENT(42, "MC_(make_mem_defined)");
   1613    DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
   1614    set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
   1615    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1616       ocache_sarp_Clear_Origins ( a, len );
   1617 }
   1618 
   1619 /* For each byte in [a,a+len), if the byte is addressable, make it be
   1620    defined, but if it isn't addressible, leave it alone.  In other
   1621    words a version of MC_(make_mem_defined) that doesn't mess with
   1622    addressibility.  Low-performance implementation. */
   1623 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
   1624 {
   1625    SizeT i;
   1626    UChar vabits2;
   1627    DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
   1628    for (i = 0; i < len; i++) {
   1629       vabits2 = get_vabits2( a+i );
   1630       if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
   1631          set_vabits2(a+i, VA_BITS2_DEFINED);
   1632          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
   1633             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
   1634          }
   1635       }
   1636    }
   1637 }
   1638 
   1639 /* Similarly (needed for mprotect handling ..) */
   1640 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
   1641 {
   1642    SizeT i;
   1643    UChar vabits2;
   1644    DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
   1645    for (i = 0; i < len; i++) {
   1646       vabits2 = get_vabits2( a+i );
   1647       if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
   1648          set_vabits2(a+i, VA_BITS2_DEFINED);
   1649          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
   1650             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
   1651          }
   1652       }
   1653    }
   1654 }
   1655 
   1656 /* --- Block-copy permissions (needed for implementing realloc() and
   1657        sys_mremap). --- */
   1658 
   1659 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
   1660 {
   1661    SizeT i, j;
   1662    UChar vabits2, vabits8;
   1663    Bool  aligned, nooverlap;
   1664 
   1665    DEBUG("MC_(copy_address_range_state)\n");
   1666    PROF_EVENT(50, "MC_(copy_address_range_state)");
   1667 
   1668    if (len == 0 || src == dst)
   1669       return;
   1670 
   1671    aligned   = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
   1672    nooverlap = src+len <= dst || dst+len <= src;
   1673 
   1674    if (nooverlap && aligned) {
   1675 
   1676       /* Vectorised fast case, when no overlap and suitably aligned */
   1677       /* vector loop */
   1678       i = 0;
   1679       while (len >= 4) {
   1680          vabits8 = get_vabits8_for_aligned_word32( src+i );
   1681          set_vabits8_for_aligned_word32( dst+i, vabits8 );
   1682          if (LIKELY(VA_BITS8_DEFINED == vabits8
   1683                             || VA_BITS8_UNDEFINED == vabits8
   1684                             || VA_BITS8_NOACCESS == vabits8)) {
   1685             /* do nothing */
   1686          } else {
   1687             /* have to copy secondary map info */
   1688             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
   1689                set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
   1690             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
   1691                set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
   1692             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
   1693                set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
   1694             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
   1695                set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
   1696          }
   1697          i += 4;
   1698          len -= 4;
   1699       }
   1700       /* fixup loop */
   1701       while (len >= 1) {
   1702          vabits2 = get_vabits2( src+i );
   1703          set_vabits2( dst+i, vabits2 );
   1704          if (VA_BITS2_PARTDEFINED == vabits2) {
   1705             set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
   1706          }
   1707          i++;
   1708          len--;
   1709       }
   1710 
   1711    } else {
   1712 
   1713       /* We have to do things the slow way */
   1714       if (src < dst) {
   1715          for (i = 0, j = len-1; i < len; i++, j--) {
   1716             PROF_EVENT(51, "MC_(copy_address_range_state)(loop)");
   1717             vabits2 = get_vabits2( src+j );
   1718             set_vabits2( dst+j, vabits2 );
   1719             if (VA_BITS2_PARTDEFINED == vabits2) {
   1720                set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
   1721             }
   1722          }
   1723       }
   1724 
   1725       if (src > dst) {
   1726          for (i = 0; i < len; i++) {
   1727             PROF_EVENT(52, "MC_(copy_address_range_state)(loop)");
   1728             vabits2 = get_vabits2( src+i );
   1729             set_vabits2( dst+i, vabits2 );
   1730             if (VA_BITS2_PARTDEFINED == vabits2) {
   1731                set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
   1732             }
   1733          }
   1734       }
   1735    }
   1736 
   1737 }
   1738 
   1739 
   1740 /*------------------------------------------------------------*/
   1741 /*--- Origin tracking stuff - cache basics                 ---*/
   1742 /*------------------------------------------------------------*/
   1743 
   1744 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
   1745    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1746 
   1747    Note that this implementation draws inspiration from the "origin
   1748    tracking by value piggybacking" scheme described in "Tracking Bad
   1749    Apples: Reporting the Origin of Null and Undefined Value Errors"
   1750    (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
   1751    Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
   1752    implemented completely differently.
   1753 
   1754    Origin tags and ECUs -- about the shadow values
   1755    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1756 
   1757    This implementation tracks the defining point of all uninitialised
   1758    values using so called "origin tags", which are 32-bit integers,
   1759    rather than using the values themselves to encode the origins.  The
   1760    latter, so-called value piggybacking", is what the OOPSLA07 paper
   1761    describes.
   1762 
   1763    Origin tags, as tracked by the machinery below, are 32-bit unsigned
   1764    ints (UInts), regardless of the machine's word size.  Each tag
   1765    comprises an upper 30-bit ECU field and a lower 2-bit
   1766    'kind' field.  The ECU field is a number given out by m_execontext
   1767    and has a 1-1 mapping with ExeContext*s.  An ECU can be used
   1768    directly as an origin tag (otag), but in fact we want to put
   1769    additional information 'kind' field to indicate roughly where the
   1770    tag came from.  This helps print more understandable error messages
   1771    for the user -- it has no other purpose.  In summary:
   1772 
   1773    * Both ECUs and origin tags are represented as 32-bit words
   1774 
   1775    * m_execontext and the core-tool interface deal purely in ECUs.
   1776      They have no knowledge of origin tags - that is a purely
   1777      Memcheck-internal matter.
   1778 
   1779    * all valid ECUs have the lowest 2 bits zero and at least
   1780      one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
   1781 
   1782    * to convert from an ECU to an otag, OR in one of the MC_OKIND_
   1783      constants defined in mc_include.h.
   1784 
   1785    * to convert an otag back to an ECU, AND it with ~3
   1786 
   1787    One important fact is that no valid otag is zero.  A zero otag is
   1788    used by the implementation to indicate "no origin", which could
   1789    mean that either the value is defined, or it is undefined but the
   1790    implementation somehow managed to lose the origin.
   1791 
   1792    The ECU used for memory created by malloc etc is derived from the
   1793    stack trace at the time the malloc etc happens.  This means the
   1794    mechanism can show the exact allocation point for heap-created
   1795    uninitialised values.
   1796 
   1797    In contrast, it is simply too expensive to create a complete
   1798    backtrace for each stack allocation.  Therefore we merely use a
   1799    depth-1 backtrace for stack allocations, which can be done once at
   1800    translation time, rather than N times at run time.  The result of
   1801    this is that, for stack created uninitialised values, Memcheck can
   1802    only show the allocating function, and not what called it.
   1803    Furthermore, compilers tend to move the stack pointer just once at
   1804    the start of the function, to allocate all locals, and so in fact
   1805    the stack origin almost always simply points to the opening brace
   1806    of the function.  Net result is, for stack origins, the mechanism
   1807    can tell you in which function the undefined value was created, but
   1808    that's all.  Users will need to carefully check all locals in the
   1809    specified function.
   1810 
   1811    Shadowing registers and memory
   1812    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1813 
   1814    Memory is shadowed using a two level cache structure (ocacheL1 and
   1815    ocacheL2).  Memory references are first directed to ocacheL1.  This
   1816    is a traditional 2-way set associative cache with 32-byte lines and
   1817    approximate LRU replacement within each set.
   1818 
   1819    A naive implementation would require storing one 32 bit otag for
   1820    each byte of memory covered, a 4:1 space overhead.  Instead, there
   1821    is one otag for every 4 bytes of memory covered, plus a 4-bit mask
   1822    that shows which of the 4 bytes have that shadow value and which
   1823    have a shadow value of zero (indicating no origin).  Hence a lot of
   1824    space is saved, but the cost is that only one different origin per
   1825    4 bytes of address space can be represented.  This is a source of
   1826    imprecision, but how much of a problem it really is remains to be
   1827    seen.
   1828 
   1829    A cache line that contains all zeroes ("no origins") contains no
   1830    useful information, and can be ejected from the L1 cache "for
   1831    free", in the sense that a read miss on the L1 causes a line of
   1832    zeroes to be installed.  However, ejecting a line containing
   1833    nonzeroes risks losing origin information permanently.  In order to
   1834    prevent such lossage, ejected nonzero lines are placed in a
   1835    secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
   1836    lines.  This can grow arbitrarily large, and so should ensure that
   1837    Memcheck runs out of memory in preference to losing useful origin
   1838    info due to cache size limitations.
   1839 
   1840    Shadowing registers is a bit tricky, because the shadow values are
   1841    32 bits, regardless of the size of the register.  That gives a
   1842    problem for registers smaller than 32 bits.  The solution is to
   1843    find spaces in the guest state that are unused, and use those to
   1844    shadow guest state fragments smaller than 32 bits.  For example, on
   1845    ppc32/64, each vector register is 16 bytes long.  If 4 bytes of the
   1846    shadow are allocated for the register's otag, then there are still
   1847    12 bytes left over which could be used to shadow 3 other values.
   1848 
   1849    This implies there is some non-obvious mapping from guest state
   1850    (start,length) pairs to the relevant shadow offset (for the origin
   1851    tags).  And it is unfortunately guest-architecture specific.  The
   1852    mapping is contained in mc_machine.c, which is quite lengthy but
   1853    straightforward.
   1854 
   1855    Instrumenting the IR
   1856    ~~~~~~~~~~~~~~~~~~~~
   1857 
   1858    Instrumentation is largely straightforward, and done by the
   1859    functions schemeE and schemeS in mc_translate.c.  These generate
   1860    code for handling the origin tags of expressions (E) and statements
   1861    (S) respectively.  The rather strange names are a reference to the
   1862    "compilation schemes" shown in Simon Peyton Jones' book "The
   1863    Implementation of Functional Programming Languages" (Prentice Hall,
   1864    1987, see
   1865    http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
   1866 
   1867    schemeS merely arranges to move shadow values around the guest
   1868    state to track the incoming IR.  schemeE is largely trivial too.
   1869    The only significant point is how to compute the otag corresponding
   1870    to binary (or ternary, quaternary, etc) operator applications.  The
   1871    rule is simple: just take whichever value is larger (32-bit
   1872    unsigned max).  Constants get the special value zero.  Hence this
   1873    rule always propagates a nonzero (known) otag in preference to a
   1874    zero (unknown, or more likely, value-is-defined) tag, as we want.
   1875    If two different undefined values are inputs to a binary operator
   1876    application, then which is propagated is arbitrary, but that
   1877    doesn't matter, since the program is erroneous in using either of
   1878    the values, and so there's no point in attempting to propagate
   1879    both.
   1880 
   1881    Since constants are abstracted to (otag) zero, much of the
   1882    instrumentation code can be folded out without difficulty by the
   1883    generic post-instrumentation IR cleanup pass, using these rules:
   1884    Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
   1885    constants is evaluated at JIT time.  And the resulting dead code
   1886    removal.  In practice this causes surprisingly few Max32Us to
   1887    survive through to backend code generation.
   1888 
   1889    Integration with the V-bits machinery
   1890    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1891 
   1892    This is again largely straightforward.  Mostly the otag and V bits
   1893    stuff are independent.  The only point of interaction is when the V
   1894    bits instrumenter creates a call to a helper function to report an
   1895    uninitialised value error -- in that case it must first use schemeE
   1896    to get hold of the origin tag expression for the value, and pass
   1897    that to the helper too.
   1898 
   1899    There is the usual stuff to do with setting address range
   1900    permissions.  When memory is painted undefined, we must also know
   1901    the origin tag to paint with, which involves some tedious plumbing,
   1902    particularly to do with the fast case stack handlers.  When memory
   1903    is painted defined or noaccess then the origin tags must be forced
   1904    to zero.
   1905 
   1906    One of the goals of the implementation was to ensure that the
   1907    non-origin tracking mode isn't slowed down at all.  To do this,
   1908    various functions to do with memory permissions setting (again,
   1909    mostly pertaining to the stack) are duplicated for the with- and
   1910    without-otag case.
   1911 
   1912    Dealing with stack redzones, and the NIA cache
   1913    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1914 
   1915    This is one of the few non-obvious parts of the implementation.
   1916 
   1917    Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
   1918    reserved area below the stack pointer, that can be used as scratch
   1919    space by compiler generated code for functions.  In the Memcheck
   1920    sources this is referred to as the "stack redzone".  The important
   1921    thing here is that such redzones are considered volatile across
   1922    function calls and returns.  So Memcheck takes care to mark them as
   1923    undefined for each call and return, on the afflicted platforms.
   1924    Past experience shows this is essential in order to get reliable
   1925    messages about uninitialised values that come from the stack.
   1926 
   1927    So the question is, when we paint a redzone undefined, what origin
   1928    tag should we use for it?  Consider a function f() calling g().  If
   1929    we paint the redzone using an otag derived from the ExeContext of
   1930    the CALL/BL instruction in f, then any errors in g causing it to
   1931    use uninitialised values that happen to lie in the redzone, will be
   1932    reported as having their origin in f.  Which is highly confusing.
   1933 
   1934    The same applies for returns: if, on a return, we paint the redzone
   1935    using a origin tag derived from the ExeContext of the RET/BLR
   1936    instruction in g, then any later errors in f causing it to use
   1937    uninitialised values in the redzone, will be reported as having
   1938    their origin in g.  Which is just as confusing.
   1939 
   1940    To do it right, in both cases we need to use an origin tag which
   1941    pertains to the instruction which dynamically follows the CALL/BL
   1942    or RET/BLR.  In short, one derived from the NIA - the "next
   1943    instruction address".
   1944 
   1945    To make this work, Memcheck's redzone-painting helper,
   1946    MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
   1947    NIA.  It converts the NIA to a 1-element ExeContext, and uses that
   1948    ExeContext's ECU as the basis for the otag used to paint the
   1949    redzone.  The expensive part of this is converting an NIA into an
   1950    ECU, since this happens once for every call and every return.  So
   1951    we use a simple 511-line, 2-way set associative cache
   1952    (nia_to_ecu_cache) to cache the mappings, and that knocks most of
   1953    the cost out.
   1954 
   1955    Further background comments
   1956    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1957 
   1958    > Question: why is otag a UInt?  Wouldn't a UWord be better?  Isn't
   1959    > it really just the address of the relevant ExeContext?
   1960 
   1961    Well, it's not the address, but a value which has a 1-1 mapping
   1962    with ExeContexts, and is guaranteed not to be zero, since zero
   1963    denotes (to memcheck) "unknown origin or defined value".  So these
   1964    UInts are just numbers starting at 4 and incrementing by 4; each
   1965    ExeContext is given a number when it is created.  (*** NOTE this
   1966    confuses otags and ECUs; see comments above ***).
   1967 
   1968    Making these otags 32-bit regardless of the machine's word size
   1969    makes the 64-bit implementation easier (next para).  And it doesn't
   1970    really limit us in any way, since for the tags to overflow would
   1971    require that the program somehow caused 2^30-1 different
   1972    ExeContexts to be created, in which case it is probably in deep
   1973    trouble.  Not to mention V will have soaked up many tens of
   1974    gigabytes of memory merely to store them all.
   1975 
   1976    So having 64-bit origins doesn't really buy you anything, and has
   1977    the following downsides:
   1978 
   1979    Suppose that instead, an otag is a UWord.  This would mean that, on
   1980    a 64-bit target,
   1981 
   1982    1. It becomes hard to shadow any element of guest state which is
   1983       smaller than 8 bytes.  To do so means you'd need to find some
   1984       8-byte-sized hole in the guest state which you don't want to
   1985       shadow, and use that instead to hold the otag.  On ppc64, the
   1986       condition code register(s) are split into 20 UChar sized pieces,
   1987       all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
   1988       and so that would entail finding 160 bytes somewhere else in the
   1989       guest state.
   1990 
   1991       Even on x86, I want to track origins for %AH .. %DH (bits 15:8
   1992       of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
   1993       same) and so I had to look for 4 untracked otag-sized areas in
   1994       the guest state to make that possible.
   1995 
   1996       The same problem exists of course when origin tags are only 32
   1997       bits, but it's less extreme.
   1998 
   1999    2. (More compelling) it doubles the size of the origin shadow
   2000       memory.  Given that the shadow memory is organised as a fixed
   2001       size cache, and that accuracy of tracking is limited by origins
   2002       falling out the cache due to space conflicts, this isn't good.
   2003 
   2004    > Another question: is the origin tracking perfect, or are there
   2005    > cases where it fails to determine an origin?
   2006 
   2007    It is imperfect for at least for the following reasons, and
   2008    probably more:
   2009 
   2010    * Insufficient capacity in the origin cache.  When a line is
   2011      evicted from the cache it is gone forever, and so subsequent
   2012      queries for the line produce zero, indicating no origin
   2013      information.  Interestingly, a line containing all zeroes can be
   2014      evicted "free" from the cache, since it contains no useful
   2015      information, so there is scope perhaps for some cleverer cache
   2016      management schemes.  (*** NOTE, with the introduction of the
   2017      second level origin tag cache, ocacheL2, this is no longer a
   2018      problem. ***)
   2019 
   2020    * The origin cache only stores one otag per 32-bits of address
   2021      space, plus 4 bits indicating which of the 4 bytes has that tag
   2022      and which are considered defined.  The result is that if two
   2023      undefined bytes in the same word are stored in memory, the first
   2024      stored byte's origin will be lost and replaced by the origin for
   2025      the second byte.
   2026 
   2027    * Nonzero origin tags for defined values.  Consider a binary
   2028      operator application op(x,y).  Suppose y is undefined (and so has
   2029      a valid nonzero origin tag), and x is defined, but erroneously
   2030      has a nonzero origin tag (defined values should have tag zero).
   2031      If the erroneous tag has a numeric value greater than y's tag,
   2032      then the rule for propagating origin tags though binary
   2033      operations, which is simply to take the unsigned max of the two
   2034      tags, will erroneously propagate x's tag rather than y's.
   2035 
   2036    * Some obscure uses of x86/amd64 byte registers can cause lossage
   2037      or confusion of origins.  %AH .. %DH are treated as different
   2038      from, and unrelated to, their parent registers, %EAX .. %EDX.
   2039      So some wierd sequences like
   2040 
   2041         movb undefined-value, %AH
   2042         movb defined-value, %AL
   2043         .. use %AX or %EAX ..
   2044 
   2045      will cause the origin attributed to %AH to be ignored, since %AL,
   2046      %AX, %EAX are treated as the same register, and %AH as a
   2047      completely separate one.
   2048 
   2049    But having said all that, it actually seems to work fairly well in
   2050    practice.
   2051 */
   2052 
   2053 static UWord stats_ocacheL1_find           = 0;
   2054 static UWord stats_ocacheL1_found_at_1     = 0;
   2055 static UWord stats_ocacheL1_found_at_N     = 0;
   2056 static UWord stats_ocacheL1_misses         = 0;
   2057 static UWord stats_ocacheL1_lossage        = 0;
   2058 static UWord stats_ocacheL1_movefwds       = 0;
   2059 
   2060 static UWord stats__ocacheL2_refs          = 0;
   2061 static UWord stats__ocacheL2_misses        = 0;
   2062 static UWord stats__ocacheL2_n_nodes_max   = 0;
   2063 
   2064 /* Cache of 32-bit values, one every 32 bits of address space */
   2065 
   2066 #define OC_BITS_PER_LINE 5
   2067 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
   2068 
   2069 static INLINE UWord oc_line_offset ( Addr a ) {
   2070    return (a >> 2) & (OC_W32S_PER_LINE - 1);
   2071 }
   2072 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
   2073    return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
   2074 }
   2075 
   2076 #define OC_LINES_PER_SET 2
   2077 
   2078 #define OC_N_SET_BITS    20
   2079 #define OC_N_SETS        (1 << OC_N_SET_BITS)
   2080 
   2081 /* These settings give:
   2082    64 bit host: ocache:  100,663,296 sizeB    67,108,864 useful
   2083    32 bit host: ocache:   92,274,688 sizeB    67,108,864 useful
   2084 */
   2085 
   2086 #define OC_MOVE_FORWARDS_EVERY_BITS 7
   2087 
   2088 
   2089 typedef
   2090    struct {
   2091       Addr  tag;
   2092       UInt  w32[OC_W32S_PER_LINE];
   2093       UChar descr[OC_W32S_PER_LINE];
   2094    }
   2095    OCacheLine;
   2096 
   2097 /* Classify and also sanity-check 'line'.  Return 'e' (empty) if not
   2098    in use, 'n' (nonzero) if it contains at least one valid origin tag,
   2099    and 'z' if all the represented tags are zero. */
   2100 static UChar classify_OCacheLine ( OCacheLine* line )
   2101 {
   2102    UWord i;
   2103    if (line->tag == 1/*invalid*/)
   2104       return 'e'; /* EMPTY */
   2105    tl_assert(is_valid_oc_tag(line->tag));
   2106    for (i = 0; i < OC_W32S_PER_LINE; i++) {
   2107       tl_assert(0 == ((~0xF) & line->descr[i]));
   2108       if (line->w32[i] > 0 && line->descr[i] > 0)
   2109          return 'n'; /* NONZERO - contains useful info */
   2110    }
   2111    return 'z'; /* ZERO - no useful info */
   2112 }
   2113 
   2114 typedef
   2115    struct {
   2116       OCacheLine line[OC_LINES_PER_SET];
   2117    }
   2118    OCacheSet;
   2119 
   2120 typedef
   2121    struct {
   2122       OCacheSet set[OC_N_SETS];
   2123    }
   2124    OCache;
   2125 
   2126 static OCache* ocacheL1 = NULL;
   2127 static UWord   ocacheL1_event_ctr = 0;
   2128 
   2129 static void init_ocacheL2 ( void ); /* fwds */
   2130 static void init_OCache ( void )
   2131 {
   2132    UWord line, set;
   2133    tl_assert(MC_(clo_mc_level) >= 3);
   2134    tl_assert(ocacheL1 == NULL);
   2135    ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
   2136    if (ocacheL1 == NULL) {
   2137       VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
   2138                                    sizeof(OCache) );
   2139    }
   2140    tl_assert(ocacheL1 != NULL);
   2141    for (set = 0; set < OC_N_SETS; set++) {
   2142       for (line = 0; line < OC_LINES_PER_SET; line++) {
   2143          ocacheL1->set[set].line[line].tag = 1/*invalid*/;
   2144       }
   2145    }
   2146    init_ocacheL2();
   2147 }
   2148 
   2149 static void moveLineForwards ( OCacheSet* set, UWord lineno )
   2150 {
   2151    OCacheLine tmp;
   2152    stats_ocacheL1_movefwds++;
   2153    tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
   2154    tmp = set->line[lineno-1];
   2155    set->line[lineno-1] = set->line[lineno];
   2156    set->line[lineno] = tmp;
   2157 }
   2158 
   2159 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
   2160    UWord i;
   2161    for (i = 0; i < OC_W32S_PER_LINE; i++) {
   2162       line->w32[i] = 0; /* NO ORIGIN */
   2163       line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
   2164    }
   2165    line->tag = tag;
   2166 }
   2167 
   2168 //////////////////////////////////////////////////////////////
   2169 //// OCache backing store
   2170 
   2171 static OSet* ocacheL2 = NULL;
   2172 
   2173 static void* ocacheL2_malloc ( HChar* cc, SizeT szB ) {
   2174    return VG_(malloc)(cc, szB);
   2175 }
   2176 static void ocacheL2_free ( void* v ) {
   2177    VG_(free)( v );
   2178 }
   2179 
   2180 /* Stats: # nodes currently in tree */
   2181 static UWord stats__ocacheL2_n_nodes = 0;
   2182 
   2183 static void init_ocacheL2 ( void )
   2184 {
   2185    tl_assert(!ocacheL2);
   2186    tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
   2187    tl_assert(0 == offsetof(OCacheLine,tag));
   2188    ocacheL2
   2189       = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
   2190                              NULL, /* fast cmp */
   2191                              ocacheL2_malloc, "mc.ioL2", ocacheL2_free );
   2192    tl_assert(ocacheL2);
   2193    stats__ocacheL2_n_nodes = 0;
   2194 }
   2195 
   2196 /* Find line with the given tag in the tree, or NULL if not found. */
   2197 static OCacheLine* ocacheL2_find_tag ( Addr tag )
   2198 {
   2199    OCacheLine* line;
   2200    tl_assert(is_valid_oc_tag(tag));
   2201    stats__ocacheL2_refs++;
   2202    line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
   2203    return line;
   2204 }
   2205 
   2206 /* Delete the line with the given tag from the tree, if it is present, and
   2207    free up the associated memory. */
   2208 static void ocacheL2_del_tag ( Addr tag )
   2209 {
   2210    OCacheLine* line;
   2211    tl_assert(is_valid_oc_tag(tag));
   2212    stats__ocacheL2_refs++;
   2213    line = VG_(OSetGen_Remove)( ocacheL2, &tag );
   2214    if (line) {
   2215       VG_(OSetGen_FreeNode)(ocacheL2, line);
   2216       tl_assert(stats__ocacheL2_n_nodes > 0);
   2217       stats__ocacheL2_n_nodes--;
   2218    }
   2219 }
   2220 
   2221 /* Add a copy of the given line to the tree.  It must not already be
   2222    present. */
   2223 static void ocacheL2_add_line ( OCacheLine* line )
   2224 {
   2225    OCacheLine* copy;
   2226    tl_assert(is_valid_oc_tag(line->tag));
   2227    copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
   2228    tl_assert(copy);
   2229    *copy = *line;
   2230    stats__ocacheL2_refs++;
   2231    VG_(OSetGen_Insert)( ocacheL2, copy );
   2232    stats__ocacheL2_n_nodes++;
   2233    if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
   2234       stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
   2235 }
   2236 
   2237 ////
   2238 //////////////////////////////////////////////////////////////
   2239 
   2240 __attribute__((noinline))
   2241 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
   2242 {
   2243    OCacheLine *victim, *inL2;
   2244    UChar c;
   2245    UWord line;
   2246    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
   2247    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
   2248    UWord tag     = a & tagmask;
   2249    tl_assert(setno >= 0 && setno < OC_N_SETS);
   2250 
   2251    /* we already tried line == 0; skip therefore. */
   2252    for (line = 1; line < OC_LINES_PER_SET; line++) {
   2253       if (ocacheL1->set[setno].line[line].tag == tag) {
   2254          if (line == 1) {
   2255             stats_ocacheL1_found_at_1++;
   2256          } else {
   2257             stats_ocacheL1_found_at_N++;
   2258          }
   2259          if (UNLIKELY(0 == (ocacheL1_event_ctr++
   2260                             & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
   2261             moveLineForwards( &ocacheL1->set[setno], line );
   2262             line--;
   2263          }
   2264          return &ocacheL1->set[setno].line[line];
   2265       }
   2266    }
   2267 
   2268    /* A miss.  Use the last slot.  Implicitly this means we're
   2269       ejecting the line in the last slot. */
   2270    stats_ocacheL1_misses++;
   2271    tl_assert(line == OC_LINES_PER_SET);
   2272    line--;
   2273    tl_assert(line > 0);
   2274 
   2275    /* First, move the to-be-ejected line to the L2 cache. */
   2276    victim = &ocacheL1->set[setno].line[line];
   2277    c = classify_OCacheLine(victim);
   2278    switch (c) {
   2279       case 'e':
   2280          /* the line is empty (has invalid tag); ignore it. */
   2281          break;
   2282       case 'z':
   2283          /* line contains zeroes.  We must ensure the backing store is
   2284             updated accordingly, either by copying the line there
   2285             verbatim, or by ensuring it isn't present there.  We
   2286             chosse the latter on the basis that it reduces the size of
   2287             the backing store. */
   2288          ocacheL2_del_tag( victim->tag );
   2289          break;
   2290       case 'n':
   2291          /* line contains at least one real, useful origin.  Copy it
   2292             to the backing store. */
   2293          stats_ocacheL1_lossage++;
   2294          inL2 = ocacheL2_find_tag( victim->tag );
   2295          if (inL2) {
   2296             *inL2 = *victim;
   2297          } else {
   2298             ocacheL2_add_line( victim );
   2299          }
   2300          break;
   2301       default:
   2302          tl_assert(0);
   2303    }
   2304 
   2305    /* Now we must reload the L1 cache from the backing tree, if
   2306       possible. */
   2307    tl_assert(tag != victim->tag); /* stay sane */
   2308    inL2 = ocacheL2_find_tag( tag );
   2309    if (inL2) {
   2310       /* We're in luck.  It's in the L2. */
   2311       ocacheL1->set[setno].line[line] = *inL2;
   2312    } else {
   2313       /* Missed at both levels of the cache hierarchy.  We have to
   2314          declare it as full of zeroes (unknown origins). */
   2315       stats__ocacheL2_misses++;
   2316       zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
   2317    }
   2318 
   2319    /* Move it one forwards */
   2320    moveLineForwards( &ocacheL1->set[setno], line );
   2321    line--;
   2322 
   2323    return &ocacheL1->set[setno].line[line];
   2324 }
   2325 
   2326 static INLINE OCacheLine* find_OCacheLine ( Addr a )
   2327 {
   2328    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
   2329    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
   2330    UWord tag     = a & tagmask;
   2331 
   2332    stats_ocacheL1_find++;
   2333 
   2334    if (OC_ENABLE_ASSERTIONS) {
   2335       tl_assert(setno >= 0 && setno < OC_N_SETS);
   2336       tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
   2337    }
   2338 
   2339    if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
   2340       return &ocacheL1->set[setno].line[0];
   2341    }
   2342 
   2343    return find_OCacheLine_SLOW( a );
   2344 }
   2345 
   2346 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
   2347 {
   2348    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2349    //// Set the origins for a+0 .. a+7
   2350    { OCacheLine* line;
   2351      UWord lineoff = oc_line_offset(a);
   2352      if (OC_ENABLE_ASSERTIONS) {
   2353         tl_assert(lineoff >= 0
   2354                   && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2355      }
   2356      line = find_OCacheLine( a );
   2357      line->descr[lineoff+0] = 0xF;
   2358      line->descr[lineoff+1] = 0xF;
   2359      line->w32[lineoff+0]   = otag;
   2360      line->w32[lineoff+1]   = otag;
   2361    }
   2362    //// END inlined, specialised version of MC_(helperc_b_store8)
   2363 }
   2364 
   2365 
   2366 /*------------------------------------------------------------*/
   2367 /*--- Aligned fast case permission setters,                ---*/
   2368 /*--- for dealing with stacks                              ---*/
   2369 /*------------------------------------------------------------*/
   2370 
   2371 /*--------------------- 32-bit ---------------------*/
   2372 
   2373 /* Nb: by "aligned" here we mean 4-byte aligned */
   2374 
   2375 static INLINE void make_aligned_word32_undefined ( Addr a )
   2376 {
   2377    PROF_EVENT(300, "make_aligned_word32_undefined");
   2378 
   2379 #ifndef PERF_FAST_STACK2
   2380    make_mem_undefined(a, 4);
   2381 #else
   2382    {
   2383       UWord   sm_off;
   2384       SecMap* sm;
   2385 
   2386       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2387          PROF_EVENT(301, "make_aligned_word32_undefined-slow1");
   2388          make_mem_undefined(a, 4);
   2389          return;
   2390       }
   2391 
   2392       sm                  = get_secmap_for_writing_low(a);
   2393       sm_off              = SM_OFF(a);
   2394       sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
   2395    }
   2396 #endif
   2397 }
   2398 
   2399 static INLINE
   2400 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
   2401 {
   2402    make_aligned_word32_undefined(a);
   2403    //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
   2404    //// Set the origins for a+0 .. a+3
   2405    { OCacheLine* line;
   2406      UWord lineoff = oc_line_offset(a);
   2407      if (OC_ENABLE_ASSERTIONS) {
   2408         tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   2409      }
   2410      line = find_OCacheLine( a );
   2411      line->descr[lineoff] = 0xF;
   2412      line->w32[lineoff]   = otag;
   2413    }
   2414    //// END inlined, specialised version of MC_(helperc_b_store4)
   2415 }
   2416 
   2417 static INLINE
   2418 void make_aligned_word32_noaccess ( Addr a )
   2419 {
   2420    PROF_EVENT(310, "make_aligned_word32_noaccess");
   2421 
   2422 #ifndef PERF_FAST_STACK2
   2423    MC_(make_mem_noaccess)(a, 4);
   2424 #else
   2425    {
   2426       UWord   sm_off;
   2427       SecMap* sm;
   2428 
   2429       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2430          PROF_EVENT(311, "make_aligned_word32_noaccess-slow1");
   2431          MC_(make_mem_noaccess)(a, 4);
   2432          return;
   2433       }
   2434 
   2435       sm                  = get_secmap_for_writing_low(a);
   2436       sm_off              = SM_OFF(a);
   2437       sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
   2438 
   2439       //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
   2440       //// Set the origins for a+0 .. a+3.
   2441       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   2442          OCacheLine* line;
   2443          UWord lineoff = oc_line_offset(a);
   2444          if (OC_ENABLE_ASSERTIONS) {
   2445             tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   2446          }
   2447          line = find_OCacheLine( a );
   2448          line->descr[lineoff] = 0;
   2449       }
   2450       //// END inlined, specialised version of MC_(helperc_b_store4)
   2451    }
   2452 #endif
   2453 }
   2454 
   2455 /*--------------------- 64-bit ---------------------*/
   2456 
   2457 /* Nb: by "aligned" here we mean 8-byte aligned */
   2458 
   2459 static INLINE void make_aligned_word64_undefined ( Addr a )
   2460 {
   2461    PROF_EVENT(320, "make_aligned_word64_undefined");
   2462 
   2463 #ifndef PERF_FAST_STACK2
   2464    make_mem_undefined(a, 8);
   2465 #else
   2466    {
   2467       UWord   sm_off16;
   2468       SecMap* sm;
   2469 
   2470       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2471          PROF_EVENT(321, "make_aligned_word64_undefined-slow1");
   2472          make_mem_undefined(a, 8);
   2473          return;
   2474       }
   2475 
   2476       sm       = get_secmap_for_writing_low(a);
   2477       sm_off16 = SM_OFF_16(a);
   2478       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED;
   2479    }
   2480 #endif
   2481 }
   2482 
   2483 static INLINE
   2484 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
   2485 {
   2486    make_aligned_word64_undefined(a);
   2487    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2488    //// Set the origins for a+0 .. a+7
   2489    { OCacheLine* line;
   2490      UWord lineoff = oc_line_offset(a);
   2491      tl_assert(lineoff >= 0
   2492                && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2493      line = find_OCacheLine( a );
   2494      line->descr[lineoff+0] = 0xF;
   2495      line->descr[lineoff+1] = 0xF;
   2496      line->w32[lineoff+0]   = otag;
   2497      line->w32[lineoff+1]   = otag;
   2498    }
   2499    //// END inlined, specialised version of MC_(helperc_b_store8)
   2500 }
   2501 
   2502 static INLINE
   2503 void make_aligned_word64_noaccess ( Addr a )
   2504 {
   2505    PROF_EVENT(330, "make_aligned_word64_noaccess");
   2506 
   2507 #ifndef PERF_FAST_STACK2
   2508    MC_(make_mem_noaccess)(a, 8);
   2509 #else
   2510    {
   2511       UWord   sm_off16;
   2512       SecMap* sm;
   2513 
   2514       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2515          PROF_EVENT(331, "make_aligned_word64_noaccess-slow1");
   2516          MC_(make_mem_noaccess)(a, 8);
   2517          return;
   2518       }
   2519 
   2520       sm       = get_secmap_for_writing_low(a);
   2521       sm_off16 = SM_OFF_16(a);
   2522       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS;
   2523 
   2524       //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2525       //// Clear the origins for a+0 .. a+7.
   2526       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   2527          OCacheLine* line;
   2528          UWord lineoff = oc_line_offset(a);
   2529          tl_assert(lineoff >= 0
   2530                    && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2531          line = find_OCacheLine( a );
   2532          line->descr[lineoff+0] = 0;
   2533          line->descr[lineoff+1] = 0;
   2534       }
   2535       //// END inlined, specialised version of MC_(helperc_b_store8)
   2536    }
   2537 #endif
   2538 }
   2539 
   2540 
   2541 /*------------------------------------------------------------*/
   2542 /*--- Stack pointer adjustment                             ---*/
   2543 /*------------------------------------------------------------*/
   2544 
   2545 #ifdef PERF_FAST_STACK
   2546 #  define MAYBE_USED
   2547 #else
   2548 #  define MAYBE_USED __attribute__((unused))
   2549 #endif
   2550 
   2551 /*--------------- adjustment by 4 bytes ---------------*/
   2552 
   2553 MAYBE_USED
   2554 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
   2555 {
   2556    UInt otag = ecu | MC_OKIND_STACK;
   2557    PROF_EVENT(110, "new_mem_stack_4");
   2558    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2559       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
   2560    } else {
   2561       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
   2562    }
   2563 }
   2564 
   2565 MAYBE_USED
   2566 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
   2567 {
   2568    PROF_EVENT(110, "new_mem_stack_4");
   2569    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2570       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2571    } else {
   2572       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
   2573    }
   2574 }
   2575 
   2576 MAYBE_USED
   2577 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
   2578 {
   2579    PROF_EVENT(120, "die_mem_stack_4");
   2580    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2581       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
   2582    } else {
   2583       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
   2584    }
   2585 }
   2586 
   2587 /*--------------- adjustment by 8 bytes ---------------*/
   2588 
   2589 MAYBE_USED
   2590 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
   2591 {
   2592    UInt otag = ecu | MC_OKIND_STACK;
   2593    PROF_EVENT(111, "new_mem_stack_8");
   2594    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2595       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
   2596    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2597       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2598       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
   2599    } else {
   2600       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
   2601    }
   2602 }
   2603 
   2604 MAYBE_USED
   2605 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
   2606 {
   2607    PROF_EVENT(111, "new_mem_stack_8");
   2608    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2609       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2610    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2611       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2612       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   2613    } else {
   2614       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
   2615    }
   2616 }
   2617 
   2618 MAYBE_USED
   2619 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
   2620 {
   2621    PROF_EVENT(121, "die_mem_stack_8");
   2622    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2623       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
   2624    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2625       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
   2626       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
   2627    } else {
   2628       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
   2629    }
   2630 }
   2631 
   2632 /*--------------- adjustment by 12 bytes ---------------*/
   2633 
   2634 MAYBE_USED
   2635 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
   2636 {
   2637    UInt otag = ecu | MC_OKIND_STACK;
   2638    PROF_EVENT(112, "new_mem_stack_12");
   2639    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2640       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2641       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
   2642    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2643       /* from previous test we don't have 8-alignment at offset +0,
   2644          hence must have 8 alignment at offsets +4/-4.  Hence safe to
   2645          do 4 at +0 and then 8 at +4/. */
   2646       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2647       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
   2648    } else {
   2649       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
   2650    }
   2651 }
   2652 
   2653 MAYBE_USED
   2654 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
   2655 {
   2656    PROF_EVENT(112, "new_mem_stack_12");
   2657    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2658       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2659       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2660    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2661       /* from previous test we don't have 8-alignment at offset +0,
   2662          hence must have 8 alignment at offsets +4/-4.  Hence safe to
   2663          do 4 at +0 and then 8 at +4/. */
   2664       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2665       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   2666    } else {
   2667       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
   2668    }
   2669 }
   2670 
   2671 MAYBE_USED
   2672 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
   2673 {
   2674    PROF_EVENT(122, "die_mem_stack_12");
   2675    /* Note the -12 in the test */
   2676    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
   2677       /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
   2678          -4. */
   2679       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2680       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   2681    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2682       /* We have 4-alignment at +0, but we don't have 8-alignment at
   2683          -12.  So we must have 8-alignment at -8.  Hence do 4 at -12
   2684          and then 8 at -8. */
   2685       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2686       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
   2687    } else {
   2688       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
   2689    }
   2690 }
   2691 
   2692 /*--------------- adjustment by 16 bytes ---------------*/
   2693 
   2694 MAYBE_USED
   2695 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
   2696 {
   2697    UInt otag = ecu | MC_OKIND_STACK;
   2698    PROF_EVENT(113, "new_mem_stack_16");
   2699    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2700       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
   2701       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2702       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
   2703    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2704       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
   2705          Hence do 4 at +0, 8 at +4, 4 at +12. */
   2706       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2707       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
   2708       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
   2709    } else {
   2710       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
   2711    }
   2712 }
   2713 
   2714 MAYBE_USED
   2715 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
   2716 {
   2717    PROF_EVENT(113, "new_mem_stack_16");
   2718    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2719       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
   2720       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2721       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2722    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2723       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
   2724          Hence do 4 at +0, 8 at +4, 4 at +12. */
   2725       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2726       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4  );
   2727       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
   2728    } else {
   2729       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
   2730    }
   2731 }
   2732 
   2733 MAYBE_USED
   2734 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
   2735 {
   2736    PROF_EVENT(123, "die_mem_stack_16");
   2737    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2738       /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
   2739       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2740       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
   2741    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2742       /* 8 alignment must be at -12.  Do 4 at -16, 8 at -12, 4 at -4. */
   2743       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2744       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2745       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   2746    } else {
   2747       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
   2748    }
   2749 }
   2750 
   2751 /*--------------- adjustment by 32 bytes ---------------*/
   2752 
   2753 MAYBE_USED
   2754 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
   2755 {
   2756    UInt otag = ecu | MC_OKIND_STACK;
   2757    PROF_EVENT(114, "new_mem_stack_32");
   2758    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2759       /* Straightforward */
   2760       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2761       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   2762       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   2763       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   2764    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2765       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
   2766          +0,+28. */
   2767       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2768       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
   2769       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
   2770       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
   2771       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
   2772    } else {
   2773       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
   2774    }
   2775 }
   2776 
   2777 MAYBE_USED
   2778 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
   2779 {
   2780    PROF_EVENT(114, "new_mem_stack_32");
   2781    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2782       /* Straightforward */
   2783       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2784       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2785       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   2786       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   2787    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2788       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
   2789          +0,+28. */
   2790       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2791       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   2792       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
   2793       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
   2794       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
   2795    } else {
   2796       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
   2797    }
   2798 }
   2799 
   2800 MAYBE_USED
   2801 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
   2802 {
   2803    PROF_EVENT(124, "die_mem_stack_32");
   2804    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2805       /* Straightforward */
   2806       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   2807       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   2808       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2809       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   2810    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2811       /* 8 alignment must be at -4 etc.  Hence do 8 at -12,-20,-28 and
   2812          4 at -32,-4. */
   2813       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   2814       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
   2815       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
   2816       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2817       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   2818    } else {
   2819       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
   2820    }
   2821 }
   2822 
   2823 /*--------------- adjustment by 112 bytes ---------------*/
   2824 
   2825 MAYBE_USED
   2826 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
   2827 {
   2828    UInt otag = ecu | MC_OKIND_STACK;
   2829    PROF_EVENT(115, "new_mem_stack_112");
   2830    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2831       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2832       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   2833       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   2834       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   2835       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
   2836       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
   2837       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
   2838       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
   2839       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
   2840       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
   2841       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
   2842       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
   2843       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
   2844       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   2845    } else {
   2846       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
   2847    }
   2848 }
   2849 
   2850 MAYBE_USED
   2851 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
   2852 {
   2853    PROF_EVENT(115, "new_mem_stack_112");
   2854    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2855       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2856       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2857       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   2858       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   2859       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   2860       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   2861       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   2862       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   2863       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   2864       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   2865       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   2866       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   2867       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   2868       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   2869    } else {
   2870       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
   2871    }
   2872 }
   2873 
   2874 MAYBE_USED
   2875 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
   2876 {
   2877    PROF_EVENT(125, "die_mem_stack_112");
   2878    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2879       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   2880       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   2881       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   2882       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   2883       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   2884       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   2885       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   2886       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   2887       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   2888       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   2889       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   2890       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   2891       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2892       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   2893    } else {
   2894       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
   2895    }
   2896 }
   2897 
   2898 /*--------------- adjustment by 128 bytes ---------------*/
   2899 
   2900 MAYBE_USED
   2901 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
   2902 {
   2903    UInt otag = ecu | MC_OKIND_STACK;
   2904    PROF_EVENT(116, "new_mem_stack_128");
   2905    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2906       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2907       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   2908       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   2909       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   2910       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
   2911       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
   2912       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
   2913       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
   2914       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
   2915       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
   2916       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
   2917       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
   2918       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
   2919       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   2920       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   2921       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   2922    } else {
   2923       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
   2924    }
   2925 }
   2926 
   2927 MAYBE_USED
   2928 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
   2929 {
   2930    PROF_EVENT(116, "new_mem_stack_128");
   2931    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2932       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2933       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2934       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   2935       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   2936       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   2937       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   2938       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   2939       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   2940       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   2941       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   2942       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   2943       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   2944       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   2945       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   2946       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   2947       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   2948    } else {
   2949       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
   2950    }
   2951 }
   2952 
   2953 MAYBE_USED
   2954 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
   2955 {
   2956    PROF_EVENT(126, "die_mem_stack_128");
   2957    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2958       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   2959       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   2960       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   2961       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   2962       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   2963       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   2964       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   2965       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   2966       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   2967       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   2968       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   2969       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   2970       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   2971       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   2972       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2973       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   2974    } else {
   2975       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
   2976    }
   2977 }
   2978 
   2979 /*--------------- adjustment by 144 bytes ---------------*/
   2980 
   2981 MAYBE_USED
   2982 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
   2983 {
   2984    UInt otag = ecu | MC_OKIND_STACK;
   2985    PROF_EVENT(117, "new_mem_stack_144");
   2986    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2987       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
   2988       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
   2989       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
   2990       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
   2991       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
   2992       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
   2993       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
   2994       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
   2995       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
   2996       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
   2997       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
   2998       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
   2999       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
   3000       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3001       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   3002       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   3003       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
   3004       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
   3005    } else {
   3006       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
   3007    }
   3008 }
   3009 
   3010 MAYBE_USED
   3011 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
   3012 {
   3013    PROF_EVENT(117, "new_mem_stack_144");
   3014    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3015       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3016       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3017       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3018       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3019       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3020       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3021       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3022       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3023       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3024       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3025       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3026       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3027       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3028       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3029       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   3030       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   3031       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
   3032       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
   3033    } else {
   3034       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
   3035    }
   3036 }
   3037 
   3038 MAYBE_USED
   3039 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
   3040 {
   3041    PROF_EVENT(127, "die_mem_stack_144");
   3042    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3043       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
   3044       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
   3045       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   3046       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   3047       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3048       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3049       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3050       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3051       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3052       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3053       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3054       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3055       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3056       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3057       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3058       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3059       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3060       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3061    } else {
   3062       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
   3063    }
   3064 }
   3065 
   3066 /*--------------- adjustment by 160 bytes ---------------*/
   3067 
   3068 MAYBE_USED
   3069 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
   3070 {
   3071    UInt otag = ecu | MC_OKIND_STACK;
   3072    PROF_EVENT(118, "new_mem_stack_160");
   3073    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3074       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
   3075       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
   3076       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
   3077       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
   3078       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
   3079       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
   3080       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
   3081       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
   3082       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
   3083       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
   3084       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
   3085       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
   3086       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
   3087       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3088       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   3089       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   3090       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
   3091       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
   3092       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
   3093       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
   3094    } else {
   3095       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
   3096    }
   3097 }
   3098 
   3099 MAYBE_USED
   3100 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
   3101 {
   3102    PROF_EVENT(118, "new_mem_stack_160");
   3103    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3104       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3105       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3106       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3107       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3108       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3109       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3110       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3111       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3112       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3113       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3114       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3115       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3116       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3117       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3118       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   3119       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   3120       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
   3121       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
   3122       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
   3123       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
   3124    } else {
   3125       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
   3126    }
   3127 }
   3128 
   3129 MAYBE_USED
   3130 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
   3131 {
   3132    PROF_EVENT(128, "die_mem_stack_160");
   3133    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3134       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
   3135       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
   3136       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
   3137       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
   3138       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   3139       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   3140       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3141       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3142       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3143       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3144       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3145       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3146       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3147       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3148       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3149       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3150       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3151       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3152       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3153       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3154    } else {
   3155       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
   3156    }
   3157 }
   3158 
   3159 /*--------------- adjustment by N bytes ---------------*/
   3160 
   3161 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
   3162 {
   3163    UInt otag = ecu | MC_OKIND_STACK;
   3164    PROF_EVENT(115, "new_mem_stack_w_otag");
   3165    MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
   3166 }
   3167 
   3168 static void mc_new_mem_stack ( Addr a, SizeT len )
   3169 {
   3170    PROF_EVENT(115, "new_mem_stack");
   3171    make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
   3172 }
   3173 
   3174 static void mc_die_mem_stack ( Addr a, SizeT len )
   3175 {
   3176    PROF_EVENT(125, "die_mem_stack");
   3177    MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
   3178 }
   3179 
   3180 
   3181 /* The AMD64 ABI says:
   3182 
   3183    "The 128-byte area beyond the location pointed to by %rsp is considered
   3184     to be reserved and shall not be modified by signal or interrupt
   3185     handlers.  Therefore, functions may use this area for temporary data
   3186     that is not needed across function calls.  In particular, leaf functions
   3187     may use this area for their entire stack frame, rather than adjusting
   3188     the stack pointer in the prologue and epilogue.  This area is known as
   3189     red zone [sic]."
   3190 
   3191    So after any call or return we need to mark this redzone as containing
   3192    undefined values.
   3193 
   3194    Consider this:  we're in function f.  f calls g.  g moves rsp down
   3195    modestly (say 16 bytes) and writes stuff all over the red zone, making it
   3196    defined.  g returns.  f is buggy and reads from parts of the red zone
   3197    that it didn't write on.  But because g filled that area in, f is going
   3198    to be picking up defined V bits and so any errors from reading bits of
   3199    the red zone it didn't write, will be missed.  The only solution I could
   3200    think of was to make the red zone undefined when g returns to f.
   3201 
   3202    This is in accordance with the ABI, which makes it clear the redzone
   3203    is volatile across function calls.
   3204 
   3205    The problem occurs the other way round too: f could fill the RZ up
   3206    with defined values and g could mistakenly read them.  So the RZ
   3207    also needs to be nuked on function calls.
   3208 */
   3209 
   3210 
   3211 /* Here's a simple cache to hold nia -> ECU mappings.  It could be
   3212    improved so as to have a lower miss rate. */
   3213 
   3214 static UWord stats__nia_cache_queries = 0;
   3215 static UWord stats__nia_cache_misses  = 0;
   3216 
   3217 typedef
   3218    struct { UWord nia0; UWord ecu0;   /* nia0 maps to ecu0 */
   3219             UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
   3220    WCacheEnt;
   3221 
   3222 #define N_NIA_TO_ECU_CACHE 511
   3223 
   3224 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
   3225 
   3226 static void init_nia_to_ecu_cache ( void )
   3227 {
   3228    UWord       i;
   3229    Addr        zero_addr = 0;
   3230    ExeContext* zero_ec;
   3231    UInt        zero_ecu;
   3232    /* Fill all the slots with an entry for address zero, and the
   3233       relevant otags accordingly.  Hence the cache is initially filled
   3234       with valid data. */
   3235    zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
   3236    tl_assert(zero_ec);
   3237    zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
   3238    tl_assert(VG_(is_plausible_ECU)(zero_ecu));
   3239    for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
   3240       nia_to_ecu_cache[i].nia0 = zero_addr;
   3241       nia_to_ecu_cache[i].ecu0 = zero_ecu;
   3242       nia_to_ecu_cache[i].nia1 = zero_addr;
   3243       nia_to_ecu_cache[i].ecu1 = zero_ecu;
   3244    }
   3245 }
   3246 
   3247 static inline UInt convert_nia_to_ecu ( Addr nia )
   3248 {
   3249    UWord i;
   3250    UInt        ecu;
   3251    ExeContext* ec;
   3252 
   3253    tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
   3254 
   3255    stats__nia_cache_queries++;
   3256    i = nia % N_NIA_TO_ECU_CACHE;
   3257    tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
   3258 
   3259    if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
   3260       return nia_to_ecu_cache[i].ecu0;
   3261 
   3262    if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
   3263 #     define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
   3264       SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
   3265       SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
   3266 #     undef SWAP
   3267       return nia_to_ecu_cache[i].ecu0;
   3268    }
   3269 
   3270    stats__nia_cache_misses++;
   3271    ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
   3272    tl_assert(ec);
   3273    ecu = VG_(get_ECU_from_ExeContext)(ec);
   3274    tl_assert(VG_(is_plausible_ECU)(ecu));
   3275 
   3276    nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
   3277    nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
   3278 
   3279    nia_to_ecu_cache[i].nia0 = nia;
   3280    nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
   3281    return ecu;
   3282 }
   3283 
   3284 
   3285 /* Note that this serves both the origin-tracking and
   3286    no-origin-tracking modes.  We assume that calls to it are
   3287    sufficiently infrequent that it isn't worth specialising for the
   3288    with/without origin-tracking cases. */
   3289 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
   3290 {
   3291    UInt otag;
   3292    tl_assert(sizeof(UWord) == sizeof(SizeT));
   3293    if (0)
   3294       VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
   3295                   base, len, nia );
   3296 
   3297    if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   3298       UInt ecu = convert_nia_to_ecu ( nia );
   3299       tl_assert(VG_(is_plausible_ECU)(ecu));
   3300       otag = ecu | MC_OKIND_STACK;
   3301    } else {
   3302       tl_assert(nia == 0);
   3303       otag = 0;
   3304    }
   3305 
   3306 #  if 0
   3307    /* Really slow version */
   3308    MC_(make_mem_undefined)(base, len, otag);
   3309 #  endif
   3310 
   3311 #  if 0
   3312    /* Slow(ish) version, which is fairly easily seen to be correct.
   3313    */
   3314    if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
   3315       make_aligned_word64_undefined(base +   0, otag);
   3316       make_aligned_word64_undefined(base +   8, otag);
   3317       make_aligned_word64_undefined(base +  16, otag);
   3318       make_aligned_word64_undefined(base +  24, otag);
   3319 
   3320       make_aligned_word64_undefined(base +  32, otag);
   3321       make_aligned_word64_undefined(base +  40, otag);
   3322       make_aligned_word64_undefined(base +  48, otag);
   3323       make_aligned_word64_undefined(base +  56, otag);
   3324 
   3325       make_aligned_word64_undefined(base +  64, otag);
   3326       make_aligned_word64_undefined(base +  72, otag);
   3327       make_aligned_word64_undefined(base +  80, otag);
   3328       make_aligned_word64_undefined(base +  88, otag);
   3329 
   3330       make_aligned_word64_undefined(base +  96, otag);
   3331       make_aligned_word64_undefined(base + 104, otag);
   3332       make_aligned_word64_undefined(base + 112, otag);
   3333       make_aligned_word64_undefined(base + 120, otag);
   3334    } else {
   3335       MC_(make_mem_undefined)(base, len, otag);
   3336    }
   3337 #  endif
   3338 
   3339    /* Idea is: go fast when
   3340          * 8-aligned and length is 128
   3341          * the sm is available in the main primary map
   3342          * the address range falls entirely with a single secondary map
   3343       If all those conditions hold, just update the V+A bits by writing
   3344       directly into the vabits array.  (If the sm was distinguished, this
   3345       will make a copy and then write to it.)
   3346    */
   3347 
   3348    if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
   3349       /* Now we know the address range is suitably sized and aligned. */
   3350       UWord a_lo = (UWord)(base);
   3351       UWord a_hi = (UWord)(base + 128 - 1);
   3352       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
   3353       if (a_hi <= MAX_PRIMARY_ADDRESS) {
   3354          // Now we know the entire range is within the main primary map.
   3355          SecMap* sm    = get_secmap_for_writing_low(a_lo);
   3356          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
   3357          /* Now we know that the entire address range falls within a
   3358             single secondary map, and that that secondary 'lives' in
   3359             the main primary map. */
   3360          if (LIKELY(sm == sm_hi)) {
   3361             // Finally, we know that the range is entirely within one secmap.
   3362             UWord   v_off = SM_OFF(a_lo);
   3363             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
   3364             p[ 0] = VA_BITS16_UNDEFINED;
   3365             p[ 1] = VA_BITS16_UNDEFINED;
   3366             p[ 2] = VA_BITS16_UNDEFINED;
   3367             p[ 3] = VA_BITS16_UNDEFINED;
   3368             p[ 4] = VA_BITS16_UNDEFINED;
   3369             p[ 5] = VA_BITS16_UNDEFINED;
   3370             p[ 6] = VA_BITS16_UNDEFINED;
   3371             p[ 7] = VA_BITS16_UNDEFINED;
   3372             p[ 8] = VA_BITS16_UNDEFINED;
   3373             p[ 9] = VA_BITS16_UNDEFINED;
   3374             p[10] = VA_BITS16_UNDEFINED;
   3375             p[11] = VA_BITS16_UNDEFINED;
   3376             p[12] = VA_BITS16_UNDEFINED;
   3377             p[13] = VA_BITS16_UNDEFINED;
   3378             p[14] = VA_BITS16_UNDEFINED;
   3379             p[15] = VA_BITS16_UNDEFINED;
   3380             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   3381                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
   3382                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
   3383                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
   3384                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
   3385                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
   3386                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
   3387                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
   3388                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
   3389                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
   3390                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
   3391                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
   3392                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
   3393                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
   3394                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
   3395                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
   3396                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
   3397             }
   3398             return;
   3399          }
   3400       }
   3401    }
   3402 
   3403    /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
   3404    if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
   3405       /* Now we know the address range is suitably sized and aligned. */
   3406       UWord a_lo = (UWord)(base);
   3407       UWord a_hi = (UWord)(base + 288 - 1);
   3408       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
   3409       if (a_hi <= MAX_PRIMARY_ADDRESS) {
   3410          // Now we know the entire range is within the main primary map.
   3411          SecMap* sm    = get_secmap_for_writing_low(a_lo);
   3412          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
   3413          /* Now we know that the entire address range falls within a
   3414             single secondary map, and that that secondary 'lives' in
   3415             the main primary map. */
   3416          if (LIKELY(sm == sm_hi)) {
   3417             // Finally, we know that the range is entirely within one secmap.
   3418             UWord   v_off = SM_OFF(a_lo);
   3419             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
   3420             p[ 0] = VA_BITS16_UNDEFINED;
   3421             p[ 1] = VA_BITS16_UNDEFINED;
   3422             p[ 2] = VA_BITS16_UNDEFINED;
   3423             p[ 3] = VA_BITS16_UNDEFINED;
   3424             p[ 4] = VA_BITS16_UNDEFINED;
   3425             p[ 5] = VA_BITS16_UNDEFINED;
   3426             p[ 6] = VA_BITS16_UNDEFINED;
   3427             p[ 7] = VA_BITS16_UNDEFINED;
   3428             p[ 8] = VA_BITS16_UNDEFINED;
   3429             p[ 9] = VA_BITS16_UNDEFINED;
   3430             p[10] = VA_BITS16_UNDEFINED;
   3431             p[11] = VA_BITS16_UNDEFINED;
   3432             p[12] = VA_BITS16_UNDEFINED;
   3433             p[13] = VA_BITS16_UNDEFINED;
   3434             p[14] = VA_BITS16_UNDEFINED;
   3435             p[15] = VA_BITS16_UNDEFINED;
   3436             p[16] = VA_BITS16_UNDEFINED;
   3437             p[17] = VA_BITS16_UNDEFINED;
   3438             p[18] = VA_BITS16_UNDEFINED;
   3439             p[19] = VA_BITS16_UNDEFINED;
   3440             p[20] = VA_BITS16_UNDEFINED;
   3441             p[21] = VA_BITS16_UNDEFINED;
   3442             p[22] = VA_BITS16_UNDEFINED;
   3443             p[23] = VA_BITS16_UNDEFINED;
   3444             p[24] = VA_BITS16_UNDEFINED;
   3445             p[25] = VA_BITS16_UNDEFINED;
   3446             p[26] = VA_BITS16_UNDEFINED;
   3447             p[27] = VA_BITS16_UNDEFINED;
   3448             p[28] = VA_BITS16_UNDEFINED;
   3449             p[29] = VA_BITS16_UNDEFINED;
   3450             p[30] = VA_BITS16_UNDEFINED;
   3451             p[31] = VA_BITS16_UNDEFINED;
   3452             p[32] = VA_BITS16_UNDEFINED;
   3453             p[33] = VA_BITS16_UNDEFINED;
   3454             p[34] = VA_BITS16_UNDEFINED;
   3455             p[35] = VA_BITS16_UNDEFINED;
   3456             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   3457                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
   3458                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
   3459                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
   3460                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
   3461                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
   3462                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
   3463                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
   3464                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
   3465                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
   3466                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
   3467                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
   3468                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
   3469                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
   3470                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
   3471                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
   3472                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
   3473                set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
   3474                set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
   3475                set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
   3476                set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
   3477                set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
   3478                set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
   3479                set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
   3480                set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
   3481                set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
   3482                set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
   3483                set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
   3484                set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
   3485                set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
   3486                set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
   3487                set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
   3488                set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
   3489                set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
   3490                set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
   3491                set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
   3492                set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
   3493             }
   3494             return;
   3495          }
   3496       }
   3497    }
   3498 
   3499    /* else fall into slow case */
   3500    MC_(make_mem_undefined_w_otag)(base, len, otag);
   3501 }
   3502 
   3503 
   3504 /*------------------------------------------------------------*/
   3505 /*--- Checking memory                                      ---*/
   3506 /*------------------------------------------------------------*/
   3507 
   3508 typedef
   3509    enum {
   3510       MC_Ok = 5,
   3511       MC_AddrErr = 6,
   3512       MC_ValueErr = 7
   3513    }
   3514    MC_ReadResult;
   3515 
   3516 
   3517 /* Check permissions for address range.  If inadequate permissions
   3518    exist, *bad_addr is set to the offending address, so the caller can
   3519    know what it is. */
   3520 
   3521 /* Returns True if [a .. a+len) is not addressible.  Otherwise,
   3522    returns False, and if bad_addr is non-NULL, sets *bad_addr to
   3523    indicate the lowest failing address.  Functions below are
   3524    similar. */
   3525 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
   3526 {
   3527    SizeT i;
   3528    UWord vabits2;
   3529 
   3530    PROF_EVENT(60, "check_mem_is_noaccess");
   3531    for (i = 0; i < len; i++) {
   3532       PROF_EVENT(61, "check_mem_is_noaccess(loop)");
   3533       vabits2 = get_vabits2(a);
   3534       if (VA_BITS2_NOACCESS != vabits2) {
   3535          if (bad_addr != NULL) *bad_addr = a;
   3536          return False;
   3537       }
   3538       a++;
   3539    }
   3540    return True;
   3541 }
   3542 
   3543 static Bool is_mem_addressable ( Addr a, SizeT len,
   3544                                  /*OUT*/Addr* bad_addr )
   3545 {
   3546    SizeT i;
   3547    UWord vabits2;
   3548 
   3549    PROF_EVENT(62, "is_mem_addressable");
   3550    for (i = 0; i < len; i++) {
   3551       PROF_EVENT(63, "is_mem_addressable(loop)");
   3552       vabits2 = get_vabits2(a);
   3553       if (VA_BITS2_NOACCESS == vabits2) {
   3554          if (bad_addr != NULL) *bad_addr = a;
   3555          return False;
   3556       }
   3557       a++;
   3558    }
   3559    return True;
   3560 }
   3561 
   3562 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
   3563                                       /*OUT*/Addr* bad_addr,
   3564                                       /*OUT*/UInt* otag )
   3565 {
   3566    SizeT i;
   3567    UWord vabits2;
   3568 
   3569    PROF_EVENT(64, "is_mem_defined");
   3570    DEBUG("is_mem_defined\n");
   3571 
   3572    if (otag)     *otag = 0;
   3573    if (bad_addr) *bad_addr = 0;
   3574    for (i = 0; i < len; i++) {
   3575       PROF_EVENT(65, "is_mem_defined(loop)");
   3576       vabits2 = get_vabits2(a);
   3577       if (VA_BITS2_DEFINED != vabits2) {
   3578          // Error!  Nb: Report addressability errors in preference to
   3579          // definedness errors.  And don't report definedeness errors unless
   3580          // --undef-value-errors=yes.
   3581          if (bad_addr) {
   3582             *bad_addr = a;
   3583          }
   3584          if (VA_BITS2_NOACCESS == vabits2) {
   3585             return MC_AddrErr;
   3586          }
   3587          if (MC_(clo_mc_level) >= 2) {
   3588             if (otag && MC_(clo_mc_level) == 3) {
   3589                *otag = MC_(helperc_b_load1)( a );
   3590             }
   3591             return MC_ValueErr;
   3592          }
   3593       }
   3594       a++;
   3595    }
   3596    return MC_Ok;
   3597 }
   3598 
   3599 
   3600 /* Check a zero-terminated ascii string.  Tricky -- don't want to
   3601    examine the actual bytes, to find the end, until we're sure it is
   3602    safe to do so. */
   3603 
   3604 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
   3605 {
   3606    UWord vabits2;
   3607 
   3608    PROF_EVENT(66, "mc_is_defined_asciiz");
   3609    DEBUG("mc_is_defined_asciiz\n");
   3610 
   3611    if (otag)     *otag = 0;
   3612    if (bad_addr) *bad_addr = 0;
   3613    while (True) {
   3614       PROF_EVENT(67, "mc_is_defined_asciiz(loop)");
   3615       vabits2 = get_vabits2(a);
   3616       if (VA_BITS2_DEFINED != vabits2) {
   3617          // Error!  Nb: Report addressability errors in preference to
   3618          // definedness errors.  And don't report definedeness errors unless
   3619          // --undef-value-errors=yes.
   3620          if (bad_addr) {
   3621             *bad_addr = a;
   3622          }
   3623          if (VA_BITS2_NOACCESS == vabits2) {
   3624             return MC_AddrErr;
   3625          }
   3626          if (MC_(clo_mc_level) >= 2) {
   3627             if (otag && MC_(clo_mc_level) == 3) {
   3628                *otag = MC_(helperc_b_load1)( a );
   3629             }
   3630             return MC_ValueErr;
   3631          }
   3632       }
   3633       /* Ok, a is safe to read. */
   3634       if (* ((UChar*)a) == 0) {
   3635          return MC_Ok;
   3636       }
   3637       a++;
   3638    }
   3639 }
   3640 
   3641 
   3642 /*------------------------------------------------------------*/
   3643 /*--- Memory event handlers                                ---*/
   3644 /*------------------------------------------------------------*/
   3645 
   3646 static
   3647 void check_mem_is_addressable ( CorePart part, ThreadId tid, Char* s,
   3648                                 Addr base, SizeT size )
   3649 {
   3650    Addr bad_addr;
   3651    Bool ok = is_mem_addressable ( base, size, &bad_addr );
   3652 
   3653    if (!ok) {
   3654       switch (part) {
   3655       case Vg_CoreSysCall:
   3656          MC_(record_memparam_error) ( tid, bad_addr,
   3657                                       /*isAddrErr*/True, s, 0/*otag*/ );
   3658          break;
   3659 
   3660       case Vg_CoreSignal:
   3661          MC_(record_core_mem_error)( tid, s );
   3662          break;
   3663 
   3664       default:
   3665          VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
   3666       }
   3667    }
   3668 }
   3669 
   3670 static
   3671 void check_mem_is_defined ( CorePart part, ThreadId tid, Char* s,
   3672                             Addr base, SizeT size )
   3673 {
   3674    UInt otag = 0;
   3675    Addr bad_addr;
   3676    MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
   3677 
   3678    if (MC_Ok != res) {
   3679       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
   3680 
   3681       switch (part) {
   3682       case Vg_CoreSysCall:
   3683          MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
   3684                                       isAddrErr ? 0 : otag );
   3685          break;
   3686 
   3687       case Vg_CoreSysCallArgInMem:
   3688          MC_(record_regparam_error) ( tid, s, otag );
   3689          break;
   3690 
   3691       /* If we're being asked to jump to a silly address, record an error
   3692          message before potentially crashing the entire system. */
   3693       case Vg_CoreTranslate:
   3694          MC_(record_jump_error)( tid, bad_addr );
   3695          break;
   3696 
   3697       default:
   3698          VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
   3699       }
   3700    }
   3701 }
   3702 
   3703 static
   3704 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
   3705                                    Char* s, Addr str )
   3706 {
   3707    MC_ReadResult res;
   3708    Addr bad_addr = 0;   // shut GCC up
   3709    UInt otag = 0;
   3710 
   3711    tl_assert(part == Vg_CoreSysCall);
   3712    res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
   3713    if (MC_Ok != res) {
   3714       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
   3715       MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
   3716                                    isAddrErr ? 0 : otag );
   3717    }
   3718 }
   3719 
   3720 /* Handling of mmap and mprotect is not as simple as it seems.
   3721 
   3722    The underlying semantics are that memory obtained from mmap is
   3723    always initialised, but may be inaccessible.  And changes to the
   3724    protection of memory do not change its contents and hence not its
   3725    definedness state.  Problem is we can't model
   3726    inaccessible-but-with-some-definedness state; once we mark memory
   3727    as inaccessible we lose all info about definedness, and so can't
   3728    restore that if it is later made accessible again.
   3729 
   3730    One obvious thing to do is this:
   3731 
   3732       mmap/mprotect NONE  -> noaccess
   3733       mmap/mprotect other -> defined
   3734 
   3735    The problem case here is: taking accessible memory, writing
   3736    uninitialised data to it, mprotecting it NONE and later mprotecting
   3737    it back to some accessible state causes the undefinedness to be
   3738    lost.
   3739 
   3740    A better proposal is:
   3741 
   3742      (1) mmap NONE       ->  make noaccess
   3743      (2) mmap other      ->  make defined
   3744 
   3745      (3) mprotect NONE   ->  # no change
   3746      (4) mprotect other  ->  change any "noaccess" to "defined"
   3747 
   3748    (2) is OK because memory newly obtained from mmap really is defined
   3749        (zeroed out by the kernel -- doing anything else would
   3750        constitute a massive security hole.)
   3751 
   3752    (1) is OK because the only way to make the memory usable is via
   3753        (4), in which case we also wind up correctly marking it all as
   3754        defined.
   3755 
   3756    (3) is the weak case.  We choose not to change memory state.
   3757        (presumably the range is in some mixture of "defined" and
   3758        "undefined", viz, accessible but with arbitrary V bits).  Doing
   3759        nothing means we retain the V bits, so that if the memory is
   3760        later mprotected "other", the V bits remain unchanged, so there
   3761        can be no false negatives.  The bad effect is that if there's
   3762        an access in the area, then MC cannot warn; but at least we'll
   3763        get a SEGV to show, so it's better than nothing.
   3764 
   3765    Consider the sequence (3) followed by (4).  Any memory that was
   3766    "defined" or "undefined" previously retains its state (as
   3767    required).  Any memory that was "noaccess" before can only have
   3768    been made that way by (1), and so it's OK to change it to
   3769    "defined".
   3770 
   3771    See https://bugs.kde.org/show_bug.cgi?id=205541
   3772    and https://bugs.kde.org/show_bug.cgi?id=210268
   3773 */
   3774 static
   3775 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
   3776                        ULong di_handle )
   3777 {
   3778    if (rr || ww || xx) {
   3779       /* (2) mmap/mprotect other -> defined */
   3780       MC_(make_mem_defined)(a, len);
   3781    } else {
   3782       /* (1) mmap/mprotect NONE  -> noaccess */
   3783       MC_(make_mem_noaccess)(a, len);
   3784    }
   3785 }
   3786 
   3787 static
   3788 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
   3789 {
   3790    if (rr || ww || xx) {
   3791       /* (4) mprotect other  ->  change any "noaccess" to "defined" */
   3792       make_mem_defined_if_noaccess(a, len);
   3793    } else {
   3794       /* (3) mprotect NONE   ->  # no change */
   3795       /* do nothing */
   3796    }
   3797 }
   3798 
   3799 
   3800 static
   3801 void mc_new_mem_startup( Addr a, SizeT len,
   3802                          Bool rr, Bool ww, Bool xx, ULong di_handle )
   3803 {
   3804    // Because code is defined, initialised variables get put in the data
   3805    // segment and are defined, and uninitialised variables get put in the
   3806    // bss segment and are auto-zeroed (and so defined).
   3807    //
   3808    // It's possible that there will be padding between global variables.
   3809    // This will also be auto-zeroed, and marked as defined by Memcheck.  If
   3810    // a program uses it, Memcheck will not complain.  This is arguably a
   3811    // false negative, but it's a grey area -- the behaviour is defined (the
   3812    // padding is zeroed) but it's probably not what the user intended.  And
   3813    // we can't avoid it.
   3814    //
   3815    // Note: we generally ignore RWX permissions, because we can't track them
   3816    // without requiring more than one A bit which would slow things down a
   3817    // lot.  But on Darwin the 0th page is mapped but !R and !W and !X.
   3818    // So we mark any such pages as "unaddressable".
   3819    DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
   3820          a, (ULong)len, rr, ww, xx);
   3821    mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
   3822 }
   3823 
   3824 static
   3825 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
   3826 {
   3827    MC_(make_mem_defined)(a, len);
   3828 }
   3829 
   3830 
   3831 /*------------------------------------------------------------*/
   3832 /*--- Register event handlers                              ---*/
   3833 /*------------------------------------------------------------*/
   3834 
   3835 /* Try and get a nonzero origin for the guest state section of thread
   3836    tid characterised by (offset,size).  Return 0 if nothing to show
   3837    for it. */
   3838 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
   3839                                              Int offset, SizeT size )
   3840 {
   3841    Int   sh2off;
   3842    UChar area[6];
   3843    UInt  otag;
   3844    sh2off = MC_(get_otrack_shadow_offset)( offset, size );
   3845    if (sh2off == -1)
   3846       return 0;  /* This piece of guest state is not tracked */
   3847    tl_assert(sh2off >= 0);
   3848    tl_assert(0 == (sh2off % 4));
   3849    area[0] = 0x31;
   3850    area[5] = 0x27;
   3851    VG_(get_shadow_regs_area)( tid, &area[1], 2/*shadowno*/,sh2off,4 );
   3852    tl_assert(area[0] == 0x31);
   3853    tl_assert(area[5] == 0x27);
   3854    otag = *(UInt*)&area[1];
   3855    return otag;
   3856 }
   3857 
   3858 
   3859 /* When some chunk of guest state is written, mark the corresponding
   3860    shadow area as valid.  This is used to initialise arbitrarily large
   3861    chunks of guest state, hence the _SIZE value, which has to be as
   3862    big as the biggest guest state.
   3863 */
   3864 static void mc_post_reg_write ( CorePart part, ThreadId tid,
   3865                                 PtrdiffT offset, SizeT size)
   3866 {
   3867 #  define MAX_REG_WRITE_SIZE 1408
   3868    UChar area[MAX_REG_WRITE_SIZE];
   3869    tl_assert(size <= MAX_REG_WRITE_SIZE);
   3870    VG_(memset)(area, V_BITS8_DEFINED, size);
   3871    VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
   3872 #  undef MAX_REG_WRITE_SIZE
   3873 }
   3874 
   3875 static
   3876 void mc_post_reg_write_clientcall ( ThreadId tid,
   3877                                     PtrdiffT offset, SizeT size, Addr f)
   3878 {
   3879    mc_post_reg_write(/*dummy*/0, tid, offset, size);
   3880 }
   3881 
   3882 /* Look at the definedness of the guest's shadow state for
   3883    [offset, offset+len).  If any part of that is undefined, record
   3884    a parameter error.
   3885 */
   3886 static void mc_pre_reg_read ( CorePart part, ThreadId tid, Char* s,
   3887                               PtrdiffT offset, SizeT size)
   3888 {
   3889    Int   i;
   3890    Bool  bad;
   3891    UInt  otag;
   3892 
   3893    UChar area[16];
   3894    tl_assert(size <= 16);
   3895 
   3896    VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
   3897 
   3898    bad = False;
   3899    for (i = 0; i < size; i++) {
   3900       if (area[i] != V_BITS8_DEFINED) {
   3901          bad = True;
   3902          break;
   3903       }
   3904    }
   3905 
   3906    if (!bad)
   3907       return;
   3908 
   3909    /* We've found some undefinedness.  See if we can also find an
   3910       origin for it. */
   3911    otag = mb_get_origin_for_guest_offset( tid, offset, size );
   3912    MC_(record_regparam_error) ( tid, s, otag );
   3913 }
   3914 
   3915 
   3916 /*------------------------------------------------------------*/
   3917 /*--- Functions called directly from generated code:       ---*/
   3918 /*--- Load/store handlers.                                 ---*/
   3919 /*------------------------------------------------------------*/
   3920 
   3921 /* Types:  LOADV32, LOADV16, LOADV8 are:
   3922                UWord fn ( Addr a )
   3923    so they return 32-bits on 32-bit machines and 64-bits on
   3924    64-bit machines.  Addr has the same size as a host word.
   3925 
   3926    LOADV64 is always  ULong fn ( Addr a )
   3927 
   3928    Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
   3929    are a UWord, and for STOREV64 they are a ULong.
   3930 */
   3931 
   3932 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
   3933    naturally '_sz/8'-aligned, or it exceeds the range covered by the
   3934    primary map.  This is all very tricky (and important!), so let's
   3935    work through the maths by hand (below), *and* assert for these
   3936    values at startup. */
   3937 #define MASK(_szInBytes) \
   3938    ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
   3939 
   3940 /* MASK only exists so as to define this macro. */
   3941 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
   3942    ((_a) & MASK((_szInBits>>3)))
   3943 
   3944 /* On a 32-bit machine:
   3945 
   3946    N_PRIMARY_BITS          == 16, so
   3947    N_PRIMARY_MAP           == 0x10000, so
   3948    N_PRIMARY_MAP-1         == 0xFFFF, so
   3949    (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
   3950 
   3951    MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
   3952            = ~ ( 0xFFFF | 0xFFFF0000 )
   3953            = ~ 0xFFFF'FFFF
   3954            = 0
   3955 
   3956    MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
   3957            = ~ ( 0xFFFE | 0xFFFF0000 )
   3958            = ~ 0xFFFF'FFFE
   3959            = 1
   3960 
   3961    MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
   3962            = ~ ( 0xFFFC | 0xFFFF0000 )
   3963            = ~ 0xFFFF'FFFC
   3964            = 3
   3965 
   3966    MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
   3967            = ~ ( 0xFFF8 | 0xFFFF0000 )
   3968            = ~ 0xFFFF'FFF8
   3969            = 7
   3970 
   3971    Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
   3972    precisely when a is not 1/2/4/8-bytes aligned.  And obviously, for
   3973    the 1-byte alignment case, it is always a zero value, since MASK(1)
   3974    is zero.  All as expected.
   3975 
   3976    On a 64-bit machine, it's more complex, since we're testing
   3977    simultaneously for misalignment and for the address being at or
   3978    above 32G:
   3979 
   3980    N_PRIMARY_BITS          == 19, so
   3981    N_PRIMARY_MAP           == 0x80000, so
   3982    N_PRIMARY_MAP-1         == 0x7FFFF, so
   3983    (N_PRIMARY_MAP-1) << 16 == 0x7FFFF'0000, and so
   3984 
   3985    MASK(1) = ~ ( (0x10000 - 1) | 0x7FFFF'0000 )
   3986            = ~ ( 0xFFFF | 0x7FFFF'0000 )
   3987            = ~ 0x7FFFF'FFFF
   3988            = 0xFFFF'FFF8'0000'0000
   3989 
   3990    MASK(2) = ~ ( (0x10000 - 2) | 0x7FFFF'0000 )
   3991            = ~ ( 0xFFFE | 0x7FFFF'0000 )
   3992            = ~ 0x7FFFF'FFFE
   3993            = 0xFFFF'FFF8'0000'0001
   3994 
   3995    MASK(4) = ~ ( (0x10000 - 4) | 0x7FFFF'0000 )
   3996            = ~ ( 0xFFFC | 0x7FFFF'0000 )
   3997            = ~ 0x7FFFF'FFFC
   3998            = 0xFFFF'FFF8'0000'0003
   3999 
   4000    MASK(8) = ~ ( (0x10000 - 8) | 0x7FFFF'0000 )
   4001            = ~ ( 0xFFF8 | 0x7FFFF'0000 )
   4002            = ~ 0x7FFFF'FFF8
   4003            = 0xFFFF'FFF8'0000'0007
   4004 */
   4005 
   4006 
   4007 /* ------------------------ Size = 8 ------------------------ */
   4008 
   4009 static INLINE
   4010 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
   4011 {
   4012    PROF_EVENT(200, "mc_LOADV64");
   4013 
   4014 #ifndef PERF_FAST_LOADV
   4015    return mc_LOADVn_slow( a, 64, isBigEndian );
   4016 #else
   4017    {
   4018       UWord   sm_off16, vabits16;
   4019       SecMap* sm;
   4020 
   4021       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
   4022          PROF_EVENT(201, "mc_LOADV64-slow1");
   4023          return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
   4024       }
   4025 
   4026       sm       = get_secmap_for_reading_low(a);
   4027       sm_off16 = SM_OFF_16(a);
   4028       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   4029 
   4030       // Handle common case quickly: a is suitably aligned, is mapped, and
   4031       // addressible.
   4032       // Convert V bits from compact memory form to expanded register form.
   4033       if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
   4034          return V_BITS64_DEFINED;
   4035       } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
   4036          return V_BITS64_UNDEFINED;
   4037       } else {
   4038          /* Slow case: the 8 bytes are not all-defined or all-undefined. */
   4039          PROF_EVENT(202, "mc_LOADV64-slow2");
   4040          return mc_LOADVn_slow( a, 64, isBigEndian );
   4041       }
   4042    }
   4043 #endif
   4044 }
   4045 
   4046 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
   4047 {
   4048    return mc_LOADV64(a, True);
   4049 }
   4050 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
   4051 {
   4052    return mc_LOADV64(a, False);
   4053 }
   4054 
   4055 
   4056 static INLINE
   4057 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
   4058 {
   4059    PROF_EVENT(210, "mc_STOREV64");
   4060 
   4061 #ifndef PERF_FAST_STOREV
   4062    // XXX: this slow case seems to be marginally faster than the fast case!
   4063    // Investigate further.
   4064    mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4065 #else
   4066    {
   4067       UWord   sm_off16, vabits16;
   4068       SecMap* sm;
   4069 
   4070       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
   4071          PROF_EVENT(211, "mc_STOREV64-slow1");
   4072          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4073          return;
   4074       }
   4075 
   4076       sm       = get_secmap_for_reading_low(a);
   4077       sm_off16 = SM_OFF_16(a);
   4078       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   4079 
   4080       if (LIKELY( !is_distinguished_sm(sm) &&
   4081                           (VA_BITS16_DEFINED   == vabits16 ||
   4082                            VA_BITS16_UNDEFINED == vabits16) ))
   4083       {
   4084          /* Handle common case quickly: a is suitably aligned, */
   4085          /* is mapped, and is addressible. */
   4086          // Convert full V-bits in register to compact 2-bit form.
   4087          if (V_BITS64_DEFINED == vbits64) {
   4088             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
   4089          } else if (V_BITS64_UNDEFINED == vbits64) {
   4090             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
   4091          } else {
   4092             /* Slow but general case -- writing partially defined bytes. */
   4093             PROF_EVENT(212, "mc_STOREV64-slow2");
   4094             mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4095          }
   4096       } else {
   4097          /* Slow but general case. */
   4098          PROF_EVENT(213, "mc_STOREV64-slow3");
   4099          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4100       }
   4101    }
   4102 #endif
   4103 }
   4104 
   4105 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
   4106 {
   4107    mc_STOREV64(a, vbits64, True);
   4108 }
   4109 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
   4110 {
   4111    mc_STOREV64(a, vbits64, False);
   4112 }
   4113 
   4114 
   4115 /* ------------------------ Size = 4 ------------------------ */
   4116 
   4117 static INLINE
   4118 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
   4119 {
   4120    PROF_EVENT(220, "mc_LOADV32");
   4121 
   4122 #ifndef PERF_FAST_LOADV
   4123    return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
   4124 #else
   4125    {
   4126       UWord   sm_off, vabits8;
   4127       SecMap* sm;
   4128 
   4129       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
   4130          PROF_EVENT(221, "mc_LOADV32-slow1");
   4131          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
   4132       }
   4133 
   4134       sm      = get_secmap_for_reading_low(a);
   4135       sm_off  = SM_OFF(a);
   4136       vabits8 = sm->vabits8[sm_off];
   4137 
   4138       // Handle common case quickly: a is suitably aligned, is mapped, and the
   4139       // entire word32 it lives in is addressible.
   4140       // Convert V bits from compact memory form to expanded register form.
   4141       // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
   4142       // Almost certainly not necessary, but be paranoid.
   4143       if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
   4144          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
   4145       } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
   4146          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
   4147       } else {
   4148          /* Slow case: the 4 bytes are not all-defined or all-undefined. */
   4149          PROF_EVENT(222, "mc_LOADV32-slow2");
   4150          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
   4151       }
   4152    }
   4153 #endif
   4154 }
   4155 
   4156 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
   4157 {
   4158    return mc_LOADV32(a, True);
   4159 }
   4160 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
   4161 {
   4162    return mc_LOADV32(a, False);
   4163 }
   4164 
   4165 
   4166 static INLINE
   4167 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
   4168 {
   4169    PROF_EVENT(230, "mc_STOREV32");
   4170 
   4171 #ifndef PERF_FAST_STOREV
   4172    mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4173 #else
   4174    {
   4175       UWord   sm_off, vabits8;
   4176       SecMap* sm;
   4177 
   4178       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
   4179          PROF_EVENT(231, "mc_STOREV32-slow1");
   4180          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4181          return;
   4182       }
   4183 
   4184       sm      = get_secmap_for_reading_low(a);
   4185       sm_off  = SM_OFF(a);
   4186       vabits8 = sm->vabits8[sm_off];
   4187 
   4188       // Cleverness:  sometimes we don't have to write the shadow memory at
   4189       // all, if we can tell that what we want to write is the same as what is
   4190       // already there.  The 64/16/8 bit cases also have cleverness at this
   4191       // point, but it works a little differently to the code below.
   4192       if (V_BITS32_DEFINED == vbits32) {
   4193          if (vabits8 == (UInt)VA_BITS8_DEFINED) {
   4194             return;
   4195          } else if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) {
   4196             sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
   4197          } else {
   4198             // not defined/undefined, or distinguished and changing state
   4199             PROF_EVENT(232, "mc_STOREV32-slow2");
   4200             mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4201          }
   4202       } else if (V_BITS32_UNDEFINED == vbits32) {
   4203          if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
   4204             return;
   4205          } else if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
   4206             sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
   4207          } else {
   4208             // not defined/undefined, or distinguished and changing state
   4209             PROF_EVENT(233, "mc_STOREV32-slow3");
   4210             mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4211          }
   4212       } else {
   4213          // Partially defined word
   4214          PROF_EVENT(234, "mc_STOREV32-slow4");
   4215          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4216       }
   4217    }
   4218 #endif
   4219 }
   4220 
   4221 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
   4222 {
   4223    mc_STOREV32(a, vbits32, True);
   4224 }
   4225 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
   4226 {
   4227    mc_STOREV32(a, vbits32, False);
   4228 }
   4229 
   4230 
   4231 /* ------------------------ Size = 2 ------------------------ */
   4232 
   4233 static INLINE
   4234 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
   4235 {
   4236    PROF_EVENT(240, "mc_LOADV16");
   4237 
   4238 #ifndef PERF_FAST_LOADV
   4239    return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
   4240 #else
   4241    {
   4242       UWord   sm_off, vabits8;
   4243       SecMap* sm;
   4244 
   4245       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
   4246          PROF_EVENT(241, "mc_LOADV16-slow1");
   4247          return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
   4248       }
   4249 
   4250       sm      = get_secmap_for_reading_low(a);
   4251       sm_off  = SM_OFF(a);
   4252       vabits8 = sm->vabits8[sm_off];
   4253       // Handle common case quickly: a is suitably aligned, is mapped, and is
   4254       // addressible.
   4255       // Convert V bits from compact memory form to expanded register form
   4256       if      (vabits8 == VA_BITS8_DEFINED  ) { return V_BITS16_DEFINED;   }
   4257       else if (vabits8 == VA_BITS8_UNDEFINED) { return V_BITS16_UNDEFINED; }
   4258       else {
   4259          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
   4260          // the two sub-bytes.
   4261          UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
   4262          if      (vabits4 == VA_BITS4_DEFINED  ) { return V_BITS16_DEFINED;   }
   4263          else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
   4264          else {
   4265             /* Slow case: the two bytes are not all-defined or all-undefined. */
   4266             PROF_EVENT(242, "mc_LOADV16-slow2");
   4267             return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
   4268          }
   4269       }
   4270    }
   4271 #endif
   4272 }
   4273 
   4274 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
   4275 {
   4276    return mc_LOADV16(a, True);
   4277 }
   4278 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
   4279 {
   4280    return mc_LOADV16(a, False);
   4281 }
   4282 
   4283 
   4284 static INLINE
   4285 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
   4286 {
   4287    PROF_EVENT(250, "mc_STOREV16");
   4288 
   4289 #ifndef PERF_FAST_STOREV
   4290    mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4291 #else
   4292    {
   4293       UWord   sm_off, vabits8;
   4294       SecMap* sm;
   4295 
   4296       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
   4297          PROF_EVENT(251, "mc_STOREV16-slow1");
   4298          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4299          return;
   4300       }
   4301 
   4302       sm      = get_secmap_for_reading_low(a);
   4303       sm_off  = SM_OFF(a);
   4304       vabits8 = sm->vabits8[sm_off];
   4305       if (LIKELY( !is_distinguished_sm(sm) &&
   4306                           (VA_BITS8_DEFINED   == vabits8 ||
   4307                            VA_BITS8_UNDEFINED == vabits8) ))
   4308       {
   4309          /* Handle common case quickly: a is suitably aligned, */
   4310          /* is mapped, and is addressible. */
   4311          // Convert full V-bits in register to compact 2-bit form.
   4312          if (V_BITS16_DEFINED == vbits16) {
   4313             insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED ,
   4314                                          &(sm->vabits8[sm_off]) );
   4315          } else if (V_BITS16_UNDEFINED == vbits16) {
   4316             insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
   4317                                          &(sm->vabits8[sm_off]) );
   4318          } else {
   4319             /* Slow but general case -- writing partially defined bytes. */
   4320             PROF_EVENT(252, "mc_STOREV16-slow2");
   4321             mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4322          }
   4323       } else {
   4324          /* Slow but general case. */
   4325          PROF_EVENT(253, "mc_STOREV16-slow3");
   4326          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4327       }
   4328    }
   4329 #endif
   4330 }
   4331 
   4332 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
   4333 {
   4334    mc_STOREV16(a, vbits16, True);
   4335 }
   4336 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
   4337 {
   4338    mc_STOREV16(a, vbits16, False);
   4339 }
   4340 
   4341 
   4342 /* ------------------------ Size = 1 ------------------------ */
   4343 /* Note: endianness is irrelevant for size == 1 */
   4344 
   4345 VG_REGPARM(1)
   4346 UWord MC_(helperc_LOADV8) ( Addr a )
   4347 {
   4348    PROF_EVENT(260, "mc_LOADV8");
   4349 
   4350 #ifndef PERF_FAST_LOADV
   4351    return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
   4352 #else
   4353    {
   4354       UWord   sm_off, vabits8;
   4355       SecMap* sm;
   4356 
   4357       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
   4358          PROF_EVENT(261, "mc_LOADV8-slow1");
   4359          return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
   4360       }
   4361 
   4362       sm      = get_secmap_for_reading_low(a);
   4363       sm_off  = SM_OFF(a);
   4364       vabits8 = sm->vabits8[sm_off];
   4365       // Convert V bits from compact memory form to expanded register form
   4366       // Handle common case quickly: a is mapped, and the entire
   4367       // word32 it lives in is addressible.
   4368       if      (vabits8 == VA_BITS8_DEFINED  ) { return V_BITS8_DEFINED;   }
   4369       else if (vabits8 == VA_BITS8_UNDEFINED) { return V_BITS8_UNDEFINED; }
   4370       else {
   4371          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
   4372          // the single byte.
   4373          UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
   4374          if      (vabits2 == VA_BITS2_DEFINED  ) { return V_BITS8_DEFINED;   }
   4375          else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
   4376          else {
   4377             /* Slow case: the byte is not all-defined or all-undefined. */
   4378             PROF_EVENT(262, "mc_LOADV8-slow2");
   4379             return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
   4380          }
   4381       }
   4382    }
   4383 #endif
   4384 }
   4385 
   4386 
   4387 VG_REGPARM(2)
   4388 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
   4389 {
   4390    PROF_EVENT(270, "mc_STOREV8");
   4391 
   4392 #ifndef PERF_FAST_STOREV
   4393    mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4394 #else
   4395    {
   4396       UWord   sm_off, vabits8;
   4397       SecMap* sm;
   4398 
   4399       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
   4400          PROF_EVENT(271, "mc_STOREV8-slow1");
   4401          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4402          return;
   4403       }
   4404 
   4405       sm      = get_secmap_for_reading_low(a);
   4406       sm_off  = SM_OFF(a);
   4407       vabits8 = sm->vabits8[sm_off];
   4408       if (LIKELY
   4409             ( !is_distinguished_sm(sm) &&
   4410               ( (VA_BITS8_DEFINED == vabits8 || VA_BITS8_UNDEFINED == vabits8)
   4411              || (VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8))
   4412               )
   4413             )
   4414          )
   4415       {
   4416          /* Handle common case quickly: a is mapped, the entire word32 it
   4417             lives in is addressible. */
   4418          // Convert full V-bits in register to compact 2-bit form.
   4419          if (V_BITS8_DEFINED == vbits8) {
   4420             insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
   4421                                           &(sm->vabits8[sm_off]) );
   4422          } else if (V_BITS8_UNDEFINED == vbits8) {
   4423             insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
   4424                                           &(sm->vabits8[sm_off]) );
   4425          } else {
   4426             /* Slow but general case -- writing partially defined bytes. */
   4427             PROF_EVENT(272, "mc_STOREV8-slow2");
   4428             mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4429          }
   4430       } else {
   4431          /* Slow but general case. */
   4432          PROF_EVENT(273, "mc_STOREV8-slow3");
   4433          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4434       }
   4435    }
   4436 #endif
   4437 }
   4438 
   4439 
   4440 /*------------------------------------------------------------*/
   4441 /*--- Functions called directly from generated code:       ---*/
   4442 /*--- Value-check failure handlers.                        ---*/
   4443 /*------------------------------------------------------------*/
   4444 
   4445 /* Call these ones when an origin is available ... */
   4446 VG_REGPARM(1)
   4447 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
   4448    MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
   4449 }
   4450 
   4451 VG_REGPARM(1)
   4452 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
   4453    MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
   4454 }
   4455 
   4456 VG_REGPARM(1)
   4457 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
   4458    MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
   4459 }
   4460 
   4461 VG_REGPARM(1)
   4462 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
   4463    MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
   4464 }
   4465 
   4466 VG_REGPARM(2)
   4467 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
   4468    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
   4469 }
   4470 
   4471 /* ... and these when an origin isn't available. */
   4472 
   4473 VG_REGPARM(0)
   4474 void MC_(helperc_value_check0_fail_no_o) ( void ) {
   4475    MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
   4476 }
   4477 
   4478 VG_REGPARM(0)
   4479 void MC_(helperc_value_check1_fail_no_o) ( void ) {
   4480    MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
   4481 }
   4482 
   4483 VG_REGPARM(0)
   4484 void MC_(helperc_value_check4_fail_no_o) ( void ) {
   4485    MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
   4486 }
   4487 
   4488 VG_REGPARM(0)
   4489 void MC_(helperc_value_check8_fail_no_o) ( void ) {
   4490    MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
   4491 }
   4492 
   4493 VG_REGPARM(1)
   4494 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
   4495    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
   4496 }
   4497 
   4498 
   4499 /*------------------------------------------------------------*/
   4500 /*--- Metadata get/set functions, for client requests.     ---*/
   4501 /*------------------------------------------------------------*/
   4502 
   4503 // Nb: this expands the V+A bits out into register-form V bits, even though
   4504 // they're in memory.  This is for backward compatibility, and because it's
   4505 // probably what the user wants.
   4506 
   4507 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
   4508    error [no longer used], 3 == addressing error. */
   4509 /* Nb: We used to issue various definedness/addressability errors from here,
   4510    but we took them out because they ranged from not-very-helpful to
   4511    downright annoying, and they complicated the error data structures. */
   4512 static Int mc_get_or_set_vbits_for_client (
   4513    Addr a,
   4514    Addr vbits,
   4515    SizeT szB,
   4516    Bool setting /* True <=> set vbits,  False <=> get vbits */
   4517 )
   4518 {
   4519    SizeT i;
   4520    Bool  ok;
   4521    UChar vbits8;
   4522 
   4523    /* Check that arrays are addressible before doing any getting/setting. */
   4524    for (i = 0; i < szB; i++) {
   4525       if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
   4526           VA_BITS2_NOACCESS == get_vabits2(vbits + i)) {
   4527          return 3;
   4528       }
   4529    }
   4530 
   4531    /* Do the copy */
   4532    if (setting) {
   4533       /* setting */
   4534       for (i = 0; i < szB; i++) {
   4535          ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
   4536          tl_assert(ok);
   4537       }
   4538    } else {
   4539       /* getting */
   4540       for (i = 0; i < szB; i++) {
   4541          ok = get_vbits8(a + i, &vbits8);
   4542          tl_assert(ok);
   4543          ((UChar*)vbits)[i] = vbits8;
   4544       }
   4545       // The bytes in vbits[] have now been set, so mark them as such.
   4546       MC_(make_mem_defined)(vbits, szB);
   4547    }
   4548 
   4549    return 1;
   4550 }
   4551 
   4552 
   4553 /*------------------------------------------------------------*/
   4554 /*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
   4555 /*------------------------------------------------------------*/
   4556 
   4557 /* For the memory leak detector, say whether an entire 64k chunk of
   4558    address space is possibly in use, or not.  If in doubt return
   4559    True.
   4560 */
   4561 Bool MC_(is_within_valid_secondary) ( Addr a )
   4562 {
   4563    SecMap* sm = maybe_get_secmap_for ( a );
   4564    if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]
   4565        || MC_(in_ignored_range)(a)) {
   4566       /* Definitely not in use. */
   4567       return False;
   4568    } else {
   4569       return True;
   4570    }
   4571 }
   4572 
   4573 
   4574 /* For the memory leak detector, say whether or not a given word
   4575    address is to be regarded as valid. */
   4576 Bool MC_(is_valid_aligned_word) ( Addr a )
   4577 {
   4578    tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
   4579    tl_assert(VG_IS_WORD_ALIGNED(a));
   4580    if (is_mem_defined( a, sizeof(UWord), NULL, NULL) == MC_Ok
   4581        && !MC_(in_ignored_range)(a)) {
   4582       return True;
   4583    } else {
   4584       return False;
   4585    }
   4586 }
   4587 
   4588 
   4589 /*------------------------------------------------------------*/
   4590 /*--- Initialisation                                       ---*/
   4591 /*------------------------------------------------------------*/
   4592 
   4593 static void init_shadow_memory ( void )
   4594 {
   4595    Int     i;
   4596    SecMap* sm;
   4597 
   4598    tl_assert(V_BIT_UNDEFINED   == 1);
   4599    tl_assert(V_BIT_DEFINED     == 0);
   4600    tl_assert(V_BITS8_UNDEFINED == 0xFF);
   4601    tl_assert(V_BITS8_DEFINED   == 0);
   4602 
   4603    /* Build the 3 distinguished secondaries */
   4604    sm = &sm_distinguished[SM_DIST_NOACCESS];
   4605    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
   4606 
   4607    sm = &sm_distinguished[SM_DIST_UNDEFINED];
   4608    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
   4609 
   4610    sm = &sm_distinguished[SM_DIST_DEFINED];
   4611    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
   4612 
   4613    /* Set up the primary map. */
   4614    /* These entries gradually get overwritten as the used address
   4615       space expands. */
   4616    for (i = 0; i < N_PRIMARY_MAP; i++)
   4617       primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
   4618 
   4619    /* Auxiliary primary maps */
   4620    init_auxmap_L1_L2();
   4621 
   4622    /* auxmap_size = auxmap_used = 0;
   4623       no ... these are statically initialised */
   4624 
   4625    /* Secondary V bit table */
   4626    secVBitTable = createSecVBitTable();
   4627 }
   4628 
   4629 
   4630 /*------------------------------------------------------------*/
   4631 /*--- Sanity check machinery (permanently engaged)         ---*/
   4632 /*------------------------------------------------------------*/
   4633 
   4634 static Bool mc_cheap_sanity_check ( void )
   4635 {
   4636    n_sanity_cheap++;
   4637    PROF_EVENT(490, "cheap_sanity_check");
   4638    /* Check for sane operating level */
   4639    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
   4640       return False;
   4641    /* nothing else useful we can rapidly check */
   4642    return True;
   4643 }
   4644 
   4645 static Bool mc_expensive_sanity_check ( void )
   4646 {
   4647    Int     i;
   4648    Word    n_secmaps_found;
   4649    SecMap* sm;
   4650    HChar*  errmsg;
   4651    Bool    bad = False;
   4652 
   4653    if (0) VG_(printf)("expensive sanity check\n");
   4654    if (0) return True;
   4655 
   4656    n_sanity_expensive++;
   4657    PROF_EVENT(491, "expensive_sanity_check");
   4658 
   4659    /* Check for sane operating level */
   4660    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
   4661       return False;
   4662 
   4663    /* Check that the 3 distinguished SMs are still as they should be. */
   4664 
   4665    /* Check noaccess DSM. */
   4666    sm = &sm_distinguished[SM_DIST_NOACCESS];
   4667    for (i = 0; i < SM_CHUNKS; i++)
   4668       if (sm->vabits8[i] != VA_BITS8_NOACCESS)
   4669          bad = True;
   4670 
   4671    /* Check undefined DSM. */
   4672    sm = &sm_distinguished[SM_DIST_UNDEFINED];
   4673    for (i = 0; i < SM_CHUNKS; i++)
   4674       if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
   4675          bad = True;
   4676 
   4677    /* Check defined DSM. */
   4678    sm = &sm_distinguished[SM_DIST_DEFINED];
   4679    for (i = 0; i < SM_CHUNKS; i++)
   4680       if (sm->vabits8[i] != VA_BITS8_DEFINED)
   4681          bad = True;
   4682 
   4683    if (bad) {
   4684       VG_(printf)("memcheck expensive sanity: "
   4685                   "distinguished_secondaries have changed\n");
   4686       return False;
   4687    }
   4688 
   4689    /* If we're not checking for undefined value errors, the secondary V bit
   4690     * table should be empty. */
   4691    if (MC_(clo_mc_level) == 1) {
   4692       if (0 != VG_(OSetGen_Size)(secVBitTable))
   4693          return False;
   4694    }
   4695 
   4696    /* check the auxiliary maps, very thoroughly */
   4697    n_secmaps_found = 0;
   4698    errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
   4699    if (errmsg) {
   4700       VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
   4701       return False;
   4702    }
   4703 
   4704    /* n_secmaps_found is now the number referred to by the auxiliary
   4705       primary map.  Now add on the ones referred to by the main
   4706       primary map. */
   4707    for (i = 0; i < N_PRIMARY_MAP; i++) {
   4708       if (primary_map[i] == NULL) {
   4709          bad = True;
   4710       } else {
   4711          if (!is_distinguished_sm(primary_map[i]))
   4712             n_secmaps_found++;
   4713       }
   4714    }
   4715 
   4716    /* check that the number of secmaps issued matches the number that
   4717       are reachable (iow, no secmap leaks) */
   4718    if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
   4719       bad = True;
   4720 
   4721    if (bad) {
   4722       VG_(printf)("memcheck expensive sanity: "
   4723                   "apparent secmap leakage\n");
   4724       return False;
   4725    }
   4726 
   4727    if (bad) {
   4728       VG_(printf)("memcheck expensive sanity: "
   4729                   "auxmap covers wrong address space\n");
   4730       return False;
   4731    }
   4732 
   4733    /* there is only one pointer to each secmap (expensive) */
   4734 
   4735    return True;
   4736 }
   4737 
   4738 /*------------------------------------------------------------*/
   4739 /*--- Command line args                                    ---*/
   4740 /*------------------------------------------------------------*/
   4741 
   4742 Bool          MC_(clo_partial_loads_ok)       = False;
   4743 Long          MC_(clo_freelist_vol)           = 20*1000*1000LL;
   4744 LeakCheckMode MC_(clo_leak_check)             = LC_Summary;
   4745 VgRes         MC_(clo_leak_resolution)        = Vg_HighRes;
   4746 Bool          MC_(clo_show_reachable)         = False;
   4747 Bool          MC_(clo_show_possibly_lost)     = True;
   4748 Bool          MC_(clo_workaround_gcc296_bugs) = False;
   4749 Int           MC_(clo_malloc_fill)            = -1;
   4750 Int           MC_(clo_free_fill)              = -1;
   4751 Int           MC_(clo_mc_level)               = 2;
   4752 const char*   MC_(clo_summary_file)           = NULL;
   4753 
   4754 
   4755 static Bool mc_process_cmd_line_options(Char* arg)
   4756 {
   4757    Char* tmp_str;
   4758 
   4759    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
   4760 
   4761    /* Set MC_(clo_mc_level):
   4762          1 = A bit tracking only
   4763          2 = A and V bit tracking, but no V bit origins
   4764          3 = A and V bit tracking, and V bit origins
   4765 
   4766       Do this by inspecting --undef-value-errors= and
   4767       --track-origins=.  Reject the case --undef-value-errors=no
   4768       --track-origins=yes as meaningless.
   4769    */
   4770    if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
   4771       if (MC_(clo_mc_level) == 3) {
   4772          goto bad_level;
   4773       } else {
   4774          MC_(clo_mc_level) = 1;
   4775          return True;
   4776       }
   4777    }
   4778    if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
   4779       if (MC_(clo_mc_level) == 1)
   4780          MC_(clo_mc_level) = 2;
   4781       return True;
   4782    }
   4783    if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
   4784       if (MC_(clo_mc_level) == 3)
   4785          MC_(clo_mc_level) = 2;
   4786       return True;
   4787    }
   4788    if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
   4789       if (MC_(clo_mc_level) == 1) {
   4790          goto bad_level;
   4791       } else {
   4792          MC_(clo_mc_level) = 3;
   4793          return True;
   4794       }
   4795    }
   4796 
   4797 	if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
   4798    else if VG_BOOL_CLO(arg, "--show-reachable",   MC_(clo_show_reachable))   {}
   4799    else if VG_BOOL_CLO(arg, "--show-possibly-lost",
   4800                                             MC_(clo_show_possibly_lost))     {}
   4801    else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
   4802                                             MC_(clo_workaround_gcc296_bugs)) {}
   4803 
   4804    else if VG_BINT_CLO(arg, "--freelist-vol",  MC_(clo_freelist_vol),
   4805                                                0, 10*1000*1000*1000LL) {}
   4806 
   4807    else if VG_XACT_CLO(arg, "--leak-check=no",
   4808                             MC_(clo_leak_check), LC_Off) {}
   4809    else if VG_XACT_CLO(arg, "--leak-check=summary",
   4810                             MC_(clo_leak_check), LC_Summary) {}
   4811    else if VG_XACT_CLO(arg, "--leak-check=yes",
   4812                             MC_(clo_leak_check), LC_Full) {}
   4813    else if VG_XACT_CLO(arg, "--leak-check=full",
   4814                             MC_(clo_leak_check), LC_Full) {}
   4815 
   4816    else if VG_XACT_CLO(arg, "--leak-resolution=low",
   4817                             MC_(clo_leak_resolution), Vg_LowRes) {}
   4818    else if VG_XACT_CLO(arg, "--leak-resolution=med",
   4819                             MC_(clo_leak_resolution), Vg_MedRes) {}
   4820    else if VG_XACT_CLO(arg, "--leak-resolution=high",
   4821                             MC_(clo_leak_resolution), Vg_HighRes) {}
   4822 
   4823    else if VG_STR_CLO(arg, "--summary-file", tmp_str) {
   4824       MC_(clo_summary_file) = VG_(strdup)("clo_summary_file", tmp_str);
   4825    }
   4826    else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
   4827       Int  i;
   4828       Bool ok  = parse_ignore_ranges(tmp_str);
   4829       if (!ok)
   4830         return False;
   4831       tl_assert(ignoreRanges.used >= 0);
   4832       tl_assert(ignoreRanges.used < M_IGNORE_RANGES);
   4833       for (i = 0; i < ignoreRanges.used; i++) {
   4834          Addr s = ignoreRanges.start[i];
   4835          Addr e = ignoreRanges.end[i];
   4836          Addr limit = 0x4000000; /* 64M - entirely arbitrary limit */
   4837          if (e <= s) {
   4838             VG_(message)(Vg_DebugMsg,
   4839                "ERROR: --ignore-ranges: end <= start in range:\n");
   4840             VG_(message)(Vg_DebugMsg,
   4841                "       0x%lx-0x%lx\n", s, e);
   4842             return False;
   4843          }
   4844          if (e - s > limit) {
   4845             VG_(message)(Vg_DebugMsg,
   4846                "ERROR: --ignore-ranges: suspiciously large range:\n");
   4847             VG_(message)(Vg_DebugMsg,
   4848                "       0x%lx-0x%lx (size %ld)\n", s, e, (UWord)(e-s));
   4849             return False;
   4850 	 }
   4851       }
   4852    }
   4853 
   4854    else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
   4855    else if VG_BHEX_CLO(arg, "--free-fill",   MC_(clo_free_fill),   0x00,0xFF) {}
   4856 
   4857    else
   4858       return VG_(replacement_malloc_process_cmd_line_option)(arg);
   4859 
   4860    return True;
   4861 
   4862 
   4863   bad_level:
   4864    VG_(fmsg_bad_option)(arg,
   4865       "--track-origins=yes has no effect when --undef-value-errors=no.\n");
   4866 }
   4867 
   4868 static void mc_print_usage(void)
   4869 {
   4870    VG_(printf)(
   4871 "    --leak-check=no|summary|full     search for memory leaks at exit?  [summary]\n"
   4872 "    --leak-resolution=low|med|high   differentiation of leak stack traces [high]\n"
   4873 "    --show-reachable=no|yes          show reachable blocks in leak check? [no]\n"
   4874 "    --show-possibly-lost=no|yes      show possibly lost blocks in leak check?\n"
   4875 "                                     [yes]\n"
   4876 "    --undef-value-errors=no|yes      check for undefined value errors [yes]\n"
   4877 "    --track-origins=no|yes           show origins of undefined values? [no]\n"
   4878 "    --partial-loads-ok=no|yes        too hard to explain here; see manual [no]\n"
   4879 "    --freelist-vol=<number>          volume of freed blocks queue [20000000]\n"
   4880 "    --workaround-gcc296-bugs=no|yes  self explanatory [no]\n"
   4881 "    --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS]   assume given addresses are OK\n"
   4882 "    --malloc-fill=<hexnumber>        fill malloc'd areas with given value\n"
   4883 "    --free-fill=<hexnumber>          fill free'd areas with given value\n"
   4884    );
   4885 }
   4886 
   4887 static void mc_print_debug_usage(void)
   4888 {
   4889    VG_(printf)(
   4890 "    (none)\n"
   4891    );
   4892 }
   4893 
   4894 
   4895 /*------------------------------------------------------------*/
   4896 /*--- Client blocks                                        ---*/
   4897 /*------------------------------------------------------------*/
   4898 
   4899 /* Client block management:
   4900 
   4901    This is managed as an expanding array of client block descriptors.
   4902    Indices of live descriptors are issued to the client, so it can ask
   4903    to free them later.  Therefore we cannot slide live entries down
   4904    over dead ones.  Instead we must use free/inuse flags and scan for
   4905    an empty slot at allocation time.  This in turn means allocation is
   4906    relatively expensive, so we hope this does not happen too often.
   4907 
   4908    An unused block has start == size == 0
   4909 */
   4910 
   4911 /* type CGenBlock is defined in mc_include.h */
   4912 
   4913 /* This subsystem is self-initialising. */
   4914 static UWord      cgb_size = 0;
   4915 static UWord      cgb_used = 0;
   4916 static CGenBlock* cgbs     = NULL;
   4917 
   4918 /* Stats for this subsystem. */
   4919 static ULong cgb_used_MAX = 0;   /* Max in use. */
   4920 static ULong cgb_allocs   = 0;   /* Number of allocs. */
   4921 static ULong cgb_discards = 0;   /* Number of discards. */
   4922 static ULong cgb_search   = 0;   /* Number of searches. */
   4923 
   4924 
   4925 /* Get access to the client block array. */
   4926 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
   4927                                  /*OUT*/UWord* nBlocks )
   4928 {
   4929    *blocks  = cgbs;
   4930    *nBlocks = cgb_used;
   4931 }
   4932 
   4933 
   4934 static
   4935 Int alloc_client_block ( void )
   4936 {
   4937    UWord      i, sz_new;
   4938    CGenBlock* cgbs_new;
   4939 
   4940    cgb_allocs++;
   4941 
   4942    for (i = 0; i < cgb_used; i++) {
   4943       cgb_search++;
   4944       if (cgbs[i].start == 0 && cgbs[i].size == 0)
   4945          return i;
   4946    }
   4947 
   4948    /* Not found.  Try to allocate one at the end. */
   4949    if (cgb_used < cgb_size) {
   4950       cgb_used++;
   4951       return cgb_used-1;
   4952    }
   4953 
   4954    /* Ok, we have to allocate a new one. */
   4955    tl_assert(cgb_used == cgb_size);
   4956    sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
   4957 
   4958    cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
   4959    for (i = 0; i < cgb_used; i++)
   4960       cgbs_new[i] = cgbs[i];
   4961 
   4962    if (cgbs != NULL)
   4963       VG_(free)( cgbs );
   4964    cgbs = cgbs_new;
   4965 
   4966    cgb_size = sz_new;
   4967    cgb_used++;
   4968    if (cgb_used > cgb_used_MAX)
   4969       cgb_used_MAX = cgb_used;
   4970    return cgb_used-1;
   4971 }
   4972 
   4973 
   4974 static void show_client_block_stats ( void )
   4975 {
   4976    VG_(message)(Vg_DebugMsg,
   4977       "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
   4978       cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
   4979    );
   4980 }
   4981 
   4982 
   4983 /*------------------------------------------------------------*/
   4984 /*--- Client requests                                      ---*/
   4985 /*------------------------------------------------------------*/
   4986 
   4987 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
   4988 {
   4989    Int   i;
   4990    Bool  ok;
   4991    Addr  bad_addr;
   4992 
   4993    if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
   4994        && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
   4995        && VG_USERREQ__FREELIKE_BLOCK   != arg[0]
   4996        && VG_USERREQ__CREATE_MEMPOOL   != arg[0]
   4997        && VG_USERREQ__DESTROY_MEMPOOL  != arg[0]
   4998        && VG_USERREQ__MEMPOOL_ALLOC    != arg[0]
   4999        && VG_USERREQ__MEMPOOL_FREE     != arg[0]
   5000        && VG_USERREQ__MEMPOOL_TRIM     != arg[0]
   5001        && VG_USERREQ__MOVE_MEMPOOL     != arg[0]
   5002        && VG_USERREQ__MEMPOOL_CHANGE   != arg[0]
   5003        && VG_USERREQ__MEMPOOL_EXISTS   != arg[0])
   5004       return False;
   5005 
   5006    switch (arg[0]) {
   5007       case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE:
   5008          ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
   5009          if (!ok)
   5010             MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
   5011          *ret = ok ? (UWord)NULL : bad_addr;
   5012          break;
   5013 
   5014       case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
   5015          MC_ReadResult res;
   5016          UInt otag = 0;
   5017          res = is_mem_defined ( arg[1], arg[2], &bad_addr, &otag );
   5018          if (MC_AddrErr == res)
   5019             MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
   5020          else if (MC_ValueErr == res)
   5021             MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/False, otag );
   5022          *ret = ( res==MC_Ok ? (UWord)NULL : bad_addr );
   5023          break;
   5024       }
   5025 
   5026       case VG_USERREQ__DO_LEAK_CHECK:
   5027          MC_(detect_memory_leaks)(tid, arg[1] ? LC_Summary : LC_Full);
   5028          *ret = 0; /* return value is meaningless */
   5029          break;
   5030 
   5031       case VG_USERREQ__MAKE_MEM_NOACCESS:
   5032          MC_(make_mem_noaccess) ( arg[1], arg[2] );
   5033          *ret = -1;
   5034          break;
   5035 
   5036       case VG_USERREQ__MAKE_MEM_UNDEFINED:
   5037          make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
   5038                                               MC_OKIND_USER );
   5039          *ret = -1;
   5040          break;
   5041 
   5042       case VG_USERREQ__MAKE_MEM_DEFINED:
   5043          MC_(make_mem_defined) ( arg[1], arg[2] );
   5044          *ret = -1;
   5045          break;
   5046 
   5047       case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
   5048          make_mem_defined_if_addressable ( arg[1], arg[2] );
   5049          *ret = -1;
   5050          break;
   5051 
   5052       case VG_USERREQ__CREATE_BLOCK: /* describe a block */
   5053          if (arg[1] != 0 && arg[2] != 0) {
   5054             i = alloc_client_block();
   5055             /* VG_(printf)("allocated %d %p\n", i, cgbs); */
   5056             cgbs[i].start = arg[1];
   5057             cgbs[i].size  = arg[2];
   5058             cgbs[i].desc  = VG_(strdup)("mc.mhcr.1", (Char *)arg[3]);
   5059             cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
   5060             *ret = i;
   5061          } else
   5062             *ret = -1;
   5063          break;
   5064 
   5065       case VG_USERREQ__DISCARD: /* discard */
   5066          if (cgbs == NULL
   5067              || arg[2] >= cgb_used ||
   5068              (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
   5069             *ret = 1;
   5070          } else {
   5071             tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
   5072             cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
   5073             VG_(free)(cgbs[arg[2]].desc);
   5074             cgb_discards++;
   5075             *ret = 0;
   5076          }
   5077          break;
   5078 
   5079       case VG_USERREQ__GET_VBITS:
   5080          *ret = mc_get_or_set_vbits_for_client
   5081                    ( arg[1], arg[2], arg[3], False /* get them */ );
   5082          break;
   5083 
   5084       case VG_USERREQ__SET_VBITS:
   5085          *ret = mc_get_or_set_vbits_for_client
   5086                    ( arg[1], arg[2], arg[3], True /* set them */ );
   5087          break;
   5088 
   5089       case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
   5090          UWord** argp = (UWord**)arg;
   5091          // MC_(bytes_leaked) et al were set by the last leak check (or zero
   5092          // if no prior leak checks performed).
   5093          *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
   5094          *argp[2] = MC_(bytes_dubious);
   5095          *argp[3] = MC_(bytes_reachable);
   5096          *argp[4] = MC_(bytes_suppressed);
   5097          // there is no argp[5]
   5098          //*argp[5] = MC_(bytes_indirect);
   5099          // XXX need to make *argp[1-4] defined;  currently done in the
   5100          // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
   5101          *ret = 0;
   5102          return True;
   5103       }
   5104       case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
   5105          UWord** argp = (UWord**)arg;
   5106          // MC_(blocks_leaked) et al were set by the last leak check (or zero
   5107          // if no prior leak checks performed).
   5108          *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
   5109          *argp[2] = MC_(blocks_dubious);
   5110          *argp[3] = MC_(blocks_reachable);
   5111          *argp[4] = MC_(blocks_suppressed);
   5112          // there is no argp[5]
   5113          //*argp[5] = MC_(blocks_indirect);
   5114          // XXX need to make *argp[1-4] defined;  currently done in the
   5115          // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
   5116          *ret = 0;
   5117          return True;
   5118       }
   5119       case VG_USERREQ__MALLOCLIKE_BLOCK: {
   5120          Addr p         = (Addr)arg[1];
   5121          SizeT sizeB    =       arg[2];
   5122          //UInt rzB       =       arg[3];    XXX: unused!
   5123          Bool is_zeroed = (Bool)arg[4];
   5124 
   5125          MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
   5126                           MC_AllocCustom, MC_(malloc_list) );
   5127          return True;
   5128       }
   5129       case VG_USERREQ__FREELIKE_BLOCK: {
   5130          Addr p         = (Addr)arg[1];
   5131          UInt rzB       =       arg[2];
   5132 
   5133          MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
   5134          return True;
   5135       }
   5136 
   5137       case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
   5138          Char* s   = (Char*)arg[1];
   5139          Addr  dst = (Addr) arg[2];
   5140          Addr  src = (Addr) arg[3];
   5141          SizeT len = (SizeT)arg[4];
   5142          MC_(record_overlap_error)(tid, s, src, dst, len);
   5143          return True;
   5144       }
   5145 
   5146       case VG_USERREQ__CREATE_MEMPOOL: {
   5147          Addr pool      = (Addr)arg[1];
   5148          UInt rzB       =       arg[2];
   5149          Bool is_zeroed = (Bool)arg[3];
   5150 
   5151          MC_(create_mempool) ( pool, rzB, is_zeroed );
   5152          return True;
   5153       }
   5154 
   5155       case VG_USERREQ__DESTROY_MEMPOOL: {
   5156          Addr pool      = (Addr)arg[1];
   5157 
   5158          MC_(destroy_mempool) ( pool );
   5159          return True;
   5160       }
   5161 
   5162       case VG_USERREQ__MEMPOOL_ALLOC: {
   5163          Addr pool      = (Addr)arg[1];
   5164          Addr addr      = (Addr)arg[2];
   5165          UInt size      =       arg[3];
   5166 
   5167          MC_(mempool_alloc) ( tid, pool, addr, size );
   5168          return True;
   5169       }
   5170 
   5171       case VG_USERREQ__MEMPOOL_FREE: {
   5172          Addr pool      = (Addr)arg[1];
   5173          Addr addr      = (Addr)arg[2];
   5174 
   5175          MC_(mempool_free) ( pool, addr );
   5176          return True;
   5177       }
   5178 
   5179       case VG_USERREQ__MEMPOOL_TRIM: {
   5180          Addr pool      = (Addr)arg[1];
   5181          Addr addr      = (Addr)arg[2];
   5182          UInt size      =       arg[3];
   5183 
   5184          MC_(mempool_trim) ( pool, addr, size );
   5185          return True;
   5186       }
   5187 
   5188       case VG_USERREQ__MOVE_MEMPOOL: {
   5189          Addr poolA     = (Addr)arg[1];
   5190          Addr poolB     = (Addr)arg[2];
   5191 
   5192          MC_(move_mempool) ( poolA, poolB );
   5193          return True;
   5194       }
   5195 
   5196       case VG_USERREQ__MEMPOOL_CHANGE: {
   5197          Addr pool      = (Addr)arg[1];
   5198          Addr addrA     = (Addr)arg[2];
   5199          Addr addrB     = (Addr)arg[3];
   5200          UInt size      =       arg[4];
   5201 
   5202          MC_(mempool_change) ( pool, addrA, addrB, size );
   5203          return True;
   5204       }
   5205 
   5206       case VG_USERREQ__MEMPOOL_EXISTS: {
   5207          Addr pool      = (Addr)arg[1];
   5208 
   5209          *ret = (UWord) MC_(mempool_exists) ( pool );
   5210 	 return True;
   5211       }
   5212 
   5213 
   5214       default:
   5215          VG_(message)(
   5216             Vg_UserMsg,
   5217             "Warning: unknown memcheck client request code %llx\n",
   5218             (ULong)arg[0]
   5219          );
   5220          return False;
   5221    }
   5222    return True;
   5223 }
   5224 
   5225 
   5226 /*------------------------------------------------------------*/
   5227 /*--- Crude profiling machinery.                           ---*/
   5228 /*------------------------------------------------------------*/
   5229 
   5230 // We track a number of interesting events (using PROF_EVENT)
   5231 // if MC_PROFILE_MEMORY is defined.
   5232 
   5233 #ifdef MC_PROFILE_MEMORY
   5234 
   5235 UInt   MC_(event_ctr)[N_PROF_EVENTS];
   5236 HChar* MC_(event_ctr_name)[N_PROF_EVENTS];
   5237 
   5238 static void init_prof_mem ( void )
   5239 {
   5240    Int i;
   5241    for (i = 0; i < N_PROF_EVENTS; i++) {
   5242       MC_(event_ctr)[i] = 0;
   5243       MC_(event_ctr_name)[i] = NULL;
   5244    }
   5245 }
   5246 
   5247 static void done_prof_mem ( void )
   5248 {
   5249    Int  i;
   5250    Bool spaced = False;
   5251    for (i = 0; i < N_PROF_EVENTS; i++) {
   5252       if (!spaced && (i % 10) == 0) {
   5253          VG_(printf)("\n");
   5254          spaced = True;
   5255       }
   5256       if (MC_(event_ctr)[i] > 0) {
   5257          spaced = False;
   5258          VG_(printf)( "prof mem event %3d: %9d   %s\n",
   5259                       i, MC_(event_ctr)[i],
   5260                       MC_(event_ctr_name)[i]
   5261                          ? MC_(event_ctr_name)[i] : "unnamed");
   5262       }
   5263    }
   5264 }
   5265 
   5266 #else
   5267 
   5268 static void init_prof_mem ( void ) { }
   5269 static void done_prof_mem ( void ) { }
   5270 
   5271 #endif
   5272 
   5273 
   5274 /*------------------------------------------------------------*/
   5275 /*--- Origin tracking stuff                                ---*/
   5276 /*------------------------------------------------------------*/
   5277 
   5278 /*--------------------------------------------*/
   5279 /*--- Origin tracking: load handlers       ---*/
   5280 /*--------------------------------------------*/
   5281 
   5282 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
   5283    return or1 > or2 ? or1 : or2;
   5284 }
   5285 
   5286 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
   5287    OCacheLine* line;
   5288    UChar descr;
   5289    UWord lineoff = oc_line_offset(a);
   5290    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
   5291 
   5292    if (OC_ENABLE_ASSERTIONS) {
   5293       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   5294    }
   5295 
   5296    line = find_OCacheLine( a );
   5297 
   5298    descr = line->descr[lineoff];
   5299    if (OC_ENABLE_ASSERTIONS) {
   5300       tl_assert(descr < 0x10);
   5301    }
   5302 
   5303    if (LIKELY(0 == (descr & (1 << byteoff))))  {
   5304       return 0;
   5305    } else {
   5306       return line->w32[lineoff];
   5307    }
   5308 }
   5309 
   5310 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
   5311    OCacheLine* line;
   5312    UChar descr;
   5313    UWord lineoff, byteoff;
   5314 
   5315    if (UNLIKELY(a & 1)) {
   5316       /* Handle misaligned case, slowly. */
   5317       UInt oLo   = (UInt)MC_(helperc_b_load1)( a + 0 );
   5318       UInt oHi   = (UInt)MC_(helperc_b_load1)( a + 1 );
   5319       return merge_origins(oLo, oHi);
   5320    }
   5321 
   5322    lineoff = oc_line_offset(a);
   5323    byteoff = a & 3; /* 0 or 2 */
   5324 
   5325    if (OC_ENABLE_ASSERTIONS) {
   5326       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   5327    }
   5328    line = find_OCacheLine( a );
   5329 
   5330    descr = line->descr[lineoff];
   5331    if (OC_ENABLE_ASSERTIONS) {
   5332       tl_assert(descr < 0x10);
   5333    }
   5334 
   5335    if (LIKELY(0 == (descr & (3 << byteoff)))) {
   5336       return 0;
   5337    } else {
   5338       return line->w32[lineoff];
   5339    }
   5340 }
   5341 
   5342 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
   5343    OCacheLine* line;
   5344    UChar descr;
   5345    UWord lineoff;
   5346 
   5347    if (UNLIKELY(a & 3)) {
   5348       /* Handle misaligned case, slowly. */
   5349       UInt oLo   = (UInt)MC_(helperc_b_load2)( a + 0 );
   5350       UInt oHi   = (UInt)MC_(helperc_b_load2)( a + 2 );
   5351       return merge_origins(oLo, oHi);
   5352    }
   5353 
   5354    lineoff = oc_line_offset(a);
   5355    if (OC_ENABLE_ASSERTIONS) {
   5356       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   5357    }
   5358 
   5359    line = find_OCacheLine( a );
   5360 
   5361    descr = line->descr[lineoff];
   5362    if (OC_ENABLE_ASSERTIONS) {
   5363       tl_assert(descr < 0x10);
   5364    }
   5365 
   5366    if (LIKELY(0 == descr)) {
   5367       return 0;
   5368    } else {
   5369       return line->w32[lineoff];
   5370    }
   5371 }
   5372 
   5373 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
   5374    OCacheLine* line;
   5375    UChar descrLo, descrHi, descr;
   5376    UWord lineoff;
   5377 
   5378    if (UNLIKELY(a & 7)) {
   5379       /* Handle misaligned case, slowly. */
   5380       UInt oLo   = (UInt)MC_(helperc_b_load4)( a + 0 );
   5381       UInt oHi   = (UInt)MC_(helperc_b_load4)( a + 4 );
   5382       return merge_origins(oLo, oHi);
   5383    }
   5384 
   5385    lineoff = oc_line_offset(a);
   5386    if (OC_ENABLE_ASSERTIONS) {
   5387       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
   5388    }
   5389 
   5390    line = find_OCacheLine( a );
   5391 
   5392    descrLo = line->descr[lineoff + 0];
   5393    descrHi = line->descr[lineoff + 1];
   5394    descr   = descrLo | descrHi;
   5395    if (OC_ENABLE_ASSERTIONS) {
   5396       tl_assert(descr < 0x10);
   5397    }
   5398 
   5399    if (LIKELY(0 == descr)) {
   5400       return 0; /* both 32-bit chunks are defined */
   5401    } else {
   5402       UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
   5403       UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
   5404       return merge_origins(oLo, oHi);
   5405    }
   5406 }
   5407 
   5408 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
   5409    UInt oLo   = (UInt)MC_(helperc_b_load8)( a + 0 );
   5410    UInt oHi   = (UInt)MC_(helperc_b_load8)( a + 8 );
   5411    UInt oBoth = merge_origins(oLo, oHi);
   5412    return (UWord)oBoth;
   5413 }
   5414 
   5415 
   5416 /*--------------------------------------------*/
   5417 /*--- Origin tracking: store handlers      ---*/
   5418 /*--------------------------------------------*/
   5419 
   5420 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
   5421    OCacheLine* line;
   5422    UWord lineoff = oc_line_offset(a);
   5423    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
   5424 
   5425    if (OC_ENABLE_ASSERTIONS) {
   5426       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   5427    }
   5428 
   5429    line = find_OCacheLine( a );
   5430 
   5431    if (d32 == 0) {
   5432       line->descr[lineoff] &= ~(1 << byteoff);
   5433    } else {
   5434       line->descr[lineoff] |= (1 << byteoff);
   5435       line->w32[lineoff] = d32;
   5436    }
   5437 }
   5438 
   5439 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
   5440    OCacheLine* line;
   5441    UWord lineoff, byteoff;
   5442 
   5443    if (UNLIKELY(a & 1)) {
   5444       /* Handle misaligned case, slowly. */
   5445       MC_(helperc_b_store1)( a + 0, d32 );
   5446       MC_(helperc_b_store1)( a + 1, d32 );
   5447       return;
   5448    }
   5449 
   5450    lineoff = oc_line_offset(a);
   5451    byteoff = a & 3; /* 0 or 2 */
   5452 
   5453    if (OC_ENABLE_ASSERTIONS) {
   5454       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   5455    }
   5456 
   5457    line = find_OCacheLine( a );
   5458 
   5459    if (d32 == 0) {
   5460       line->descr[lineoff] &= ~(3 << byteoff);
   5461    } else {
   5462       line->descr[lineoff] |= (3 << byteoff);
   5463       line->w32[lineoff] = d32;
   5464    }
   5465 }
   5466 
   5467 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
   5468    OCacheLine* line;
   5469    UWord lineoff;
   5470 
   5471    if (UNLIKELY(a & 3)) {
   5472       /* Handle misaligned case, slowly. */
   5473       MC_(helperc_b_store2)( a + 0, d32 );
   5474       MC_(helperc_b_store2)( a + 2, d32 );
   5475       return;
   5476    }
   5477 
   5478    lineoff = oc_line_offset(a);
   5479    if (OC_ENABLE_ASSERTIONS) {
   5480       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   5481    }
   5482 
   5483    line = find_OCacheLine( a );
   5484 
   5485    if (d32 == 0) {
   5486       line->descr[lineoff] = 0;
   5487    } else {
   5488       line->descr[lineoff] = 0xF;
   5489       line->w32[lineoff] = d32;
   5490    }
   5491 }
   5492 
   5493 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
   5494    OCacheLine* line;
   5495    UWord lineoff;
   5496 
   5497    if (UNLIKELY(a & 7)) {
   5498       /* Handle misaligned case, slowly. */
   5499       MC_(helperc_b_store4)( a + 0, d32 );
   5500       MC_(helperc_b_store4)( a + 4, d32 );
   5501       return;
   5502    }
   5503 
   5504    lineoff = oc_line_offset(a);
   5505    if (OC_ENABLE_ASSERTIONS) {
   5506       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
   5507    }
   5508 
   5509    line = find_OCacheLine( a );
   5510 
   5511    if (d32 == 0) {
   5512       line->descr[lineoff + 0] = 0;
   5513       line->descr[lineoff + 1] = 0;
   5514    } else {
   5515       line->descr[lineoff + 0] = 0xF;
   5516       line->descr[lineoff + 1] = 0xF;
   5517       line->w32[lineoff + 0] = d32;
   5518       line->w32[lineoff + 1] = d32;
   5519    }
   5520 }
   5521 
   5522 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
   5523    MC_(helperc_b_store8)( a + 0, d32 );
   5524    MC_(helperc_b_store8)( a + 8, d32 );
   5525 }
   5526 
   5527 
   5528 /*--------------------------------------------*/
   5529 /*--- Origin tracking: sarp handlers       ---*/
   5530 /*--------------------------------------------*/
   5531 
   5532 __attribute__((noinline))
   5533 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
   5534    if ((a & 1) && len >= 1) {
   5535       MC_(helperc_b_store1)( a, otag );
   5536       a++;
   5537       len--;
   5538    }
   5539    if ((a & 2) && len >= 2) {
   5540       MC_(helperc_b_store2)( a, otag );
   5541       a += 2;
   5542       len -= 2;
   5543    }
   5544    if (len >= 4)
   5545       tl_assert(0 == (a & 3));
   5546    while (len >= 4) {
   5547       MC_(helperc_b_store4)( a, otag );
   5548       a += 4;
   5549       len -= 4;
   5550    }
   5551    if (len >= 2) {
   5552       MC_(helperc_b_store2)( a, otag );
   5553       a += 2;
   5554       len -= 2;
   5555    }
   5556    if (len >= 1) {
   5557       MC_(helperc_b_store1)( a, otag );
   5558       //a++;
   5559       len--;
   5560    }
   5561    tl_assert(len == 0);
   5562 }
   5563 
   5564 __attribute__((noinline))
   5565 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
   5566    if ((a & 1) && len >= 1) {
   5567       MC_(helperc_b_store1)( a, 0 );
   5568       a++;
   5569       len--;
   5570    }
   5571    if ((a & 2) && len >= 2) {
   5572       MC_(helperc_b_store2)( a, 0 );
   5573       a += 2;
   5574       len -= 2;
   5575    }
   5576    if (len >= 4)
   5577       tl_assert(0 == (a & 3));
   5578    while (len >= 4) {
   5579       MC_(helperc_b_store4)( a, 0 );
   5580       a += 4;
   5581       len -= 4;
   5582    }
   5583    if (len >= 2) {
   5584       MC_(helperc_b_store2)( a, 0 );
   5585       a += 2;
   5586       len -= 2;
   5587    }
   5588    if (len >= 1) {
   5589       MC_(helperc_b_store1)( a, 0 );
   5590       //a++;
   5591       len--;
   5592    }
   5593    tl_assert(len == 0);
   5594 }
   5595 
   5596 
   5597 /*------------------------------------------------------------*/
   5598 /*--- Setup and finalisation                               ---*/
   5599 /*------------------------------------------------------------*/
   5600 
   5601 static void mc_post_clo_init ( void )
   5602 {
   5603    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
   5604 
   5605    if (MC_(clo_mc_level) == 3) {
   5606       /* We're doing origin tracking. */
   5607 #     ifdef PERF_FAST_STACK
   5608       VG_(track_new_mem_stack_4_w_ECU)   ( mc_new_mem_stack_4_w_ECU   );
   5609       VG_(track_new_mem_stack_8_w_ECU)   ( mc_new_mem_stack_8_w_ECU   );
   5610       VG_(track_new_mem_stack_12_w_ECU)  ( mc_new_mem_stack_12_w_ECU  );
   5611       VG_(track_new_mem_stack_16_w_ECU)  ( mc_new_mem_stack_16_w_ECU  );
   5612       VG_(track_new_mem_stack_32_w_ECU)  ( mc_new_mem_stack_32_w_ECU  );
   5613       VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
   5614       VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
   5615       VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
   5616       VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
   5617 #     endif
   5618       VG_(track_new_mem_stack_w_ECU)     ( mc_new_mem_stack_w_ECU     );
   5619    } else {
   5620       /* Not doing origin tracking */
   5621 #     ifdef PERF_FAST_STACK
   5622       VG_(track_new_mem_stack_4)   ( mc_new_mem_stack_4   );
   5623       VG_(track_new_mem_stack_8)   ( mc_new_mem_stack_8   );
   5624       VG_(track_new_mem_stack_12)  ( mc_new_mem_stack_12  );
   5625       VG_(track_new_mem_stack_16)  ( mc_new_mem_stack_16  );
   5626       VG_(track_new_mem_stack_32)  ( mc_new_mem_stack_32  );
   5627       VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
   5628       VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
   5629       VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
   5630       VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
   5631 #     endif
   5632       VG_(track_new_mem_stack)     ( mc_new_mem_stack     );
   5633    }
   5634 
   5635    /* This origin tracking cache is huge (~100M), so only initialise
   5636       if we need it. */
   5637    if (MC_(clo_mc_level) >= 3) {
   5638       init_OCache();
   5639       tl_assert(ocacheL1 != NULL);
   5640       tl_assert(ocacheL2 != NULL);
   5641    } else {
   5642       tl_assert(ocacheL1 == NULL);
   5643       tl_assert(ocacheL2 == NULL);
   5644    }
   5645 }
   5646 
   5647 static void print_SM_info(char* type, int n_SMs)
   5648 {
   5649    VG_(message)(Vg_DebugMsg,
   5650       " memcheck: SMs: %s = %d (%ldk, %ldM)\n",
   5651       type,
   5652       n_SMs,
   5653       n_SMs * sizeof(SecMap) / 1024UL,
   5654       n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
   5655 }
   5656 
   5657 static void mc_fini ( Int exitcode )
   5658 {
   5659    MC_(print_malloc_stats)();
   5660 
   5661    if (MC_(clo_leak_check) != LC_Off) {
   5662       MC_(detect_memory_leaks)(1/*bogus ThreadId*/, MC_(clo_leak_check));
   5663    } else {
   5664       if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
   5665          VG_(umsg)(
   5666             "For a detailed leak analysis, rerun with: --leak-check=full\n"
   5667             "\n"
   5668          );
   5669       }
   5670    }
   5671 
   5672    if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
   5673       VG_(message)(Vg_UserMsg,
   5674                    "For counts of detected and suppressed errors, rerun with: -v\n");
   5675    }
   5676 
   5677    if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
   5678        && MC_(clo_mc_level) == 2) {
   5679       VG_(message)(Vg_UserMsg,
   5680                    "Use --track-origins=yes to see where "
   5681                    "uninitialised values come from\n");
   5682    }
   5683 
   5684    done_prof_mem();
   5685 
   5686    if (VG_(clo_stats)) {
   5687       SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
   5688 
   5689       VG_(message)(Vg_DebugMsg,
   5690          " memcheck: sanity checks: %d cheap, %d expensive\n",
   5691          n_sanity_cheap, n_sanity_expensive );
   5692       VG_(message)(Vg_DebugMsg,
   5693          " memcheck: auxmaps: %lld auxmap entries (%lldk, %lldM) in use\n",
   5694          n_auxmap_L2_nodes,
   5695          n_auxmap_L2_nodes * 64,
   5696          n_auxmap_L2_nodes / 16 );
   5697       VG_(message)(Vg_DebugMsg,
   5698          " memcheck: auxmaps_L1: %lld searches, %lld cmps, ratio %lld:10\n",
   5699          n_auxmap_L1_searches, n_auxmap_L1_cmps,
   5700          (10ULL * n_auxmap_L1_cmps)
   5701             / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
   5702       );
   5703       VG_(message)(Vg_DebugMsg,
   5704          " memcheck: auxmaps_L2: %lld searches, %lld nodes\n",
   5705          n_auxmap_L2_searches, n_auxmap_L2_nodes
   5706       );
   5707 
   5708       print_SM_info("n_issued     ", n_issued_SMs);
   5709       print_SM_info("n_deissued   ", n_deissued_SMs);
   5710       print_SM_info("max_noaccess ", max_noaccess_SMs);
   5711       print_SM_info("max_undefined", max_undefined_SMs);
   5712       print_SM_info("max_defined  ", max_defined_SMs);
   5713       print_SM_info("max_non_DSM  ", max_non_DSM_SMs);
   5714 
   5715       // Three DSMs, plus the non-DSM ones
   5716       max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
   5717       // The 3*sizeof(Word) bytes is the AVL node metadata size.
   5718       // The 4*sizeof(Word) bytes is the malloc metadata size.
   5719       // Hardwiring these sizes in sucks, but I don't see how else to do it.
   5720       max_secVBit_szB = max_secVBit_nodes *
   5721             (sizeof(SecVBitNode) + 3*sizeof(Word) + 4*sizeof(Word));
   5722       max_shmem_szB   = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
   5723 
   5724       VG_(message)(Vg_DebugMsg,
   5725          " memcheck: max sec V bit nodes:    %d (%ldk, %ldM)\n",
   5726          max_secVBit_nodes, max_secVBit_szB / 1024,
   5727                             max_secVBit_szB / (1024 * 1024));
   5728       VG_(message)(Vg_DebugMsg,
   5729          " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
   5730          sec_vbits_new_nodes + sec_vbits_updates,
   5731          sec_vbits_new_nodes, sec_vbits_updates );
   5732       VG_(message)(Vg_DebugMsg,
   5733          " memcheck: max shadow mem size:   %ldk, %ldM\n",
   5734          max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
   5735 
   5736       if (MC_(clo_mc_level) >= 3) {
   5737          VG_(message)(Vg_DebugMsg,
   5738                       " ocacheL1: %'12lu refs   %'12lu misses (%'lu lossage)\n",
   5739                       stats_ocacheL1_find,
   5740                       stats_ocacheL1_misses,
   5741                       stats_ocacheL1_lossage );
   5742          VG_(message)(Vg_DebugMsg,
   5743                       " ocacheL1: %'12lu at 0   %'12lu at 1\n",
   5744                       stats_ocacheL1_find - stats_ocacheL1_misses
   5745                          - stats_ocacheL1_found_at_1
   5746                          - stats_ocacheL1_found_at_N,
   5747                       stats_ocacheL1_found_at_1 );
   5748          VG_(message)(Vg_DebugMsg,
   5749                       " ocacheL1: %'12lu at 2+  %'12lu move-fwds\n",
   5750                       stats_ocacheL1_found_at_N,
   5751                       stats_ocacheL1_movefwds );
   5752          VG_(message)(Vg_DebugMsg,
   5753                       " ocacheL1: %'12lu sizeB  %'12u useful\n",
   5754                       (UWord)sizeof(OCache),
   5755                       4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
   5756          VG_(message)(Vg_DebugMsg,
   5757                       " ocacheL2: %'12lu refs   %'12lu misses\n",
   5758                       stats__ocacheL2_refs,
   5759                       stats__ocacheL2_misses );
   5760          VG_(message)(Vg_DebugMsg,
   5761                       " ocacheL2:    %'9lu max nodes %'9lu curr nodes\n",
   5762                       stats__ocacheL2_n_nodes_max,
   5763                       stats__ocacheL2_n_nodes );
   5764          VG_(message)(Vg_DebugMsg,
   5765                       " niacache: %'12lu refs   %'12lu misses\n",
   5766                       stats__nia_cache_queries, stats__nia_cache_misses);
   5767       } else {
   5768          tl_assert(ocacheL1 == NULL);
   5769          tl_assert(ocacheL2 == NULL);
   5770       }
   5771    }
   5772 
   5773    if (0) {
   5774       VG_(message)(Vg_DebugMsg,
   5775         "------ Valgrind's client block stats follow ---------------\n" );
   5776       show_client_block_stats();
   5777    }
   5778 }
   5779 
   5780 static void mc_pre_clo_init(void)
   5781 {
   5782    VG_(details_name)            ("Memcheck");
   5783    VG_(details_version)         (NULL);
   5784    VG_(details_description)     ("a memory error detector");
   5785    VG_(details_copyright_author)(
   5786       "Copyright (C) 2002-2010, and GNU GPL'd, by Julian Seward et al.");
   5787    VG_(details_bug_reports_to)  (VG_BUGS_TO);
   5788    VG_(details_avg_translation_sizeB) ( 556 );
   5789 
   5790    VG_(basic_tool_funcs)          (mc_post_clo_init,
   5791                                    MC_(instrument),
   5792                                    mc_fini);
   5793 
   5794    VG_(needs_final_IR_tidy_pass)  ( MC_(final_tidy) );
   5795 
   5796 
   5797    VG_(needs_core_errors)         ();
   5798    VG_(needs_tool_errors)         (MC_(eq_Error),
   5799                                    MC_(before_pp_Error),
   5800                                    MC_(pp_Error),
   5801                                    True,/*show TIDs for errors*/
   5802                                    MC_(update_Error_extra),
   5803                                    MC_(is_recognised_suppression),
   5804                                    MC_(read_extra_suppression_info),
   5805                                    MC_(error_matches_suppression),
   5806                                    MC_(get_error_name),
   5807                                    MC_(get_extra_suppression_info));
   5808    VG_(needs_libc_freeres)        ();
   5809    VG_(needs_command_line_options)(mc_process_cmd_line_options,
   5810                                    mc_print_usage,
   5811                                    mc_print_debug_usage);
   5812    VG_(needs_client_requests)     (mc_handle_client_request);
   5813    VG_(needs_sanity_checks)       (mc_cheap_sanity_check,
   5814                                    mc_expensive_sanity_check);
   5815    VG_(needs_malloc_replacement)  (MC_(malloc),
   5816                                    MC_(__builtin_new),
   5817                                    MC_(__builtin_vec_new),
   5818                                    MC_(memalign),
   5819                                    MC_(calloc),
   5820                                    MC_(free),
   5821                                    MC_(__builtin_delete),
   5822                                    MC_(__builtin_vec_delete),
   5823                                    MC_(realloc),
   5824                                    MC_(malloc_usable_size),
   5825                                    MC_MALLOC_REDZONE_SZB );
   5826 
   5827    VG_(needs_xml_output)          ();
   5828 
   5829    VG_(track_new_mem_startup)     ( mc_new_mem_startup );
   5830    VG_(track_new_mem_stack_signal)( make_mem_undefined_w_tid );
   5831    // We assume that brk()/sbrk() does not initialise new memory.  Is this
   5832    // accurate?  John Reiser says:
   5833    //
   5834    //   0) sbrk() can *decrease* process address space.  No zero fill is done
   5835    //   for a decrease, not even the fragment on the high end of the last page
   5836    //   that is beyond the new highest address.  For maximum safety and
   5837    //   portability, then the bytes in the last page that reside above [the
   5838    //   new] sbrk(0) should be considered to be uninitialized, but in practice
   5839    //   it is exceedingly likely that they will retain their previous
   5840    //   contents.
   5841    //
   5842    //   1) If an increase is large enough to require new whole pages, then
   5843    //   those new whole pages (like all new pages) are zero-filled by the
   5844    //   operating system.  So if sbrk(0) already is page aligned, then
   5845    //   sbrk(PAGE_SIZE) *does* zero-fill the new memory.
   5846    //
   5847    //   2) Any increase that lies within an existing allocated page is not
   5848    //   changed.  So if (x = sbrk(0)) is not page aligned, then
   5849    //   sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
   5850    //   existing contents, and an additional PAGE_SIZE bytes which are zeroed.
   5851    //   ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
   5852    //   of them come along for the ride because the operating system deals
   5853    //   only in whole pages.  Again, for maximum safety and portability, then
   5854    //   anything that lives above [the new] sbrk(0) should be considered
   5855    //   uninitialized, but in practice will retain previous contents [zero in
   5856    //   this case.]"
   5857    //
   5858    // In short:
   5859    //
   5860    //   A key property of sbrk/brk is that new whole pages that are supplied
   5861    //   by the operating system *do* get initialized to zero.
   5862    //
   5863    // As for the portability of all this:
   5864    //
   5865    //   sbrk and brk are not POSIX.  However, any system that is a derivative
   5866    //   of *nix has sbrk and brk because there are too many softwares (such as
   5867    //   the Bourne shell) which rely on the traditional memory map (.text,
   5868    //   .data+.bss, stack) and the existence of sbrk/brk.
   5869    //
   5870    // So we should arguably observe all this.  However:
   5871    // - The current inaccuracy has caused maybe one complaint in seven years(?)
   5872    // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
   5873    //   doubt most programmers know the above information.
   5874    // So I'm not terribly unhappy with marking it as undefined. --njn.
   5875    //
   5876    // [More:  I think most of what John said only applies to sbrk().  It seems
   5877    // that brk() always deals in whole pages.  And since this event deals
   5878    // directly with brk(), not with sbrk(), perhaps it would be reasonable to
   5879    // just mark all memory it allocates as defined.]
   5880    //
   5881    VG_(track_new_mem_brk)         ( make_mem_undefined_w_tid );
   5882 
   5883    // Handling of mmap and mprotect isn't simple (well, it is simple,
   5884    // but the justification isn't.)  See comments above, just prior to
   5885    // mc_new_mem_mmap.
   5886    VG_(track_new_mem_mmap)        ( mc_new_mem_mmap );
   5887    VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
   5888 
   5889    VG_(track_copy_mem_remap)      ( MC_(copy_address_range_state) );
   5890 
   5891    VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
   5892    VG_(track_die_mem_brk)         ( MC_(make_mem_noaccess) );
   5893    VG_(track_die_mem_munmap)      ( MC_(make_mem_noaccess) );
   5894 
   5895    /* Defer the specification of the new_mem_stack functions to the
   5896       post_clo_init function, since we need to first parse the command
   5897       line before deciding which set to use. */
   5898 
   5899 #  ifdef PERF_FAST_STACK
   5900    VG_(track_die_mem_stack_4)     ( mc_die_mem_stack_4   );
   5901    VG_(track_die_mem_stack_8)     ( mc_die_mem_stack_8   );
   5902    VG_(track_die_mem_stack_12)    ( mc_die_mem_stack_12  );
   5903    VG_(track_die_mem_stack_16)    ( mc_die_mem_stack_16  );
   5904    VG_(track_die_mem_stack_32)    ( mc_die_mem_stack_32  );
   5905    VG_(track_die_mem_stack_112)   ( mc_die_mem_stack_112 );
   5906    VG_(track_die_mem_stack_128)   ( mc_die_mem_stack_128 );
   5907    VG_(track_die_mem_stack_144)   ( mc_die_mem_stack_144 );
   5908    VG_(track_die_mem_stack_160)   ( mc_die_mem_stack_160 );
   5909 #  endif
   5910    VG_(track_die_mem_stack)       ( mc_die_mem_stack     );
   5911 
   5912    VG_(track_ban_mem_stack)       ( MC_(make_mem_noaccess) );
   5913 
   5914    VG_(track_pre_mem_read)        ( check_mem_is_defined );
   5915    VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
   5916    VG_(track_pre_mem_write)       ( check_mem_is_addressable );
   5917    VG_(track_post_mem_write)      ( mc_post_mem_write );
   5918 
   5919    if (MC_(clo_mc_level) >= 2)
   5920       VG_(track_pre_reg_read)     ( mc_pre_reg_read );
   5921 
   5922    VG_(track_post_reg_write)                  ( mc_post_reg_write );
   5923    VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
   5924 
   5925    init_shadow_memory();
   5926    MC_(malloc_list)  = VG_(HT_construct)( "MC_(malloc_list)" );
   5927    MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
   5928    init_prof_mem();
   5929 
   5930    tl_assert( mc_expensive_sanity_check() );
   5931 
   5932    // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
   5933    tl_assert(sizeof(UWord) == sizeof(Addr));
   5934    // Call me paranoid.  I don't care.
   5935    tl_assert(sizeof(void*) == sizeof(Addr));
   5936 
   5937    // BYTES_PER_SEC_VBIT_NODE must be a power of two.
   5938    tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
   5939 
   5940    /* This is small.  Always initialise it. */
   5941    init_nia_to_ecu_cache();
   5942 
   5943    /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
   5944       if we need to, since the command line args haven't been
   5945       processed yet.  Hence defer it to mc_post_clo_init. */
   5946    tl_assert(ocacheL1 == NULL);
   5947    tl_assert(ocacheL2 == NULL);
   5948 
   5949    /* Check some important stuff.  See extensive comments above
   5950       re UNALIGNED_OR_HIGH for background. */
   5951 #  if VG_WORDSIZE == 4
   5952    tl_assert(sizeof(void*) == 4);
   5953    tl_assert(sizeof(Addr)  == 4);
   5954    tl_assert(sizeof(UWord) == 4);
   5955    tl_assert(sizeof(Word)  == 4);
   5956    tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
   5957    tl_assert(MASK(1) == 0UL);
   5958    tl_assert(MASK(2) == 1UL);
   5959    tl_assert(MASK(4) == 3UL);
   5960    tl_assert(MASK(8) == 7UL);
   5961 #  else
   5962    tl_assert(VG_WORDSIZE == 8);
   5963    tl_assert(sizeof(void*) == 8);
   5964    tl_assert(sizeof(Addr)  == 8);
   5965    tl_assert(sizeof(UWord) == 8);
   5966    tl_assert(sizeof(Word)  == 8);
   5967    tl_assert(MAX_PRIMARY_ADDRESS == 0x3FFFFFFFFFULL);
   5968    tl_assert(MASK(1) == 0xFFFFFFC000000000ULL);
   5969    tl_assert(MASK(2) == 0xFFFFFFC000000001ULL);
   5970    tl_assert(MASK(4) == 0xFFFFFFC000000003ULL);
   5971    tl_assert(MASK(8) == 0xFFFFFFC000000007ULL);
   5972 #  endif
   5973 }
   5974 
   5975 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
   5976 
   5977 /*--------------------------------------------------------------------*/
   5978 /*--- end                                                mc_main.c ---*/
   5979 /*--------------------------------------------------------------------*/
   5980