Home | History | Annotate | Download | only in memcheck
      1 /* -*- mode: C; c-basic-offset: 3; -*- */
      2 
      3 /*--------------------------------------------------------------------*/
      4 /*--- MemCheck: Maintain bitmaps of memory, tracking the           ---*/
      5 /*--- accessibility (A) and validity (V) status of each byte.      ---*/
      6 /*---                                                    mc_main.c ---*/
      7 /*--------------------------------------------------------------------*/
      8 
      9 /*
     10    This file is part of MemCheck, a heavyweight Valgrind tool for
     11    detecting memory errors.
     12 
     13    Copyright (C) 2000-2017 Julian Seward
     14       jseward (at) acm.org
     15 
     16    This program is free software; you can redistribute it and/or
     17    modify it under the terms of the GNU General Public License as
     18    published by the Free Software Foundation; either version 2 of the
     19    License, or (at your option) any later version.
     20 
     21    This program is distributed in the hope that it will be useful, but
     22    WITHOUT ANY WARRANTY; without even the implied warranty of
     23    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     24    General Public License for more details.
     25 
     26    You should have received a copy of the GNU General Public License
     27    along with this program; if not, write to the Free Software
     28    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     29    02111-1307, USA.
     30 
     31    The GNU General Public License is contained in the file COPYING.
     32 */
     33 
     34 #include "pub_tool_basics.h"
     35 #include "pub_tool_aspacemgr.h"
     36 #include "pub_tool_gdbserver.h"
     37 #include "pub_tool_poolalloc.h"
     38 #include "pub_tool_hashtable.h"     // For mc_include.h
     39 #include "pub_tool_libcbase.h"
     40 #include "pub_tool_libcassert.h"
     41 #include "pub_tool_libcprint.h"
     42 #include "pub_tool_machine.h"
     43 #include "pub_tool_mallocfree.h"
     44 #include "pub_tool_options.h"
     45 #include "pub_tool_oset.h"
     46 #include "pub_tool_rangemap.h"
     47 #include "pub_tool_replacemalloc.h"
     48 #include "pub_tool_tooliface.h"
     49 #include "pub_tool_threadstate.h"
     50 #include "pub_tool_xarray.h"
     51 #include "pub_tool_xtree.h"
     52 #include "pub_tool_xtmemory.h"
     53 
     54 #include "mc_include.h"
     55 #include "memcheck.h"   /* for client requests */
     56 
     57 
     58 /* Set to 1 to enable handwritten assembly helpers on targets for
     59    which it is supported. */
     60 #define ENABLE_ASSEMBLY_HELPERS 1
     61 
     62 /* Set to 1 to do a little more sanity checking */
     63 #define VG_DEBUG_MEMORY 0
     64 
     65 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
     66 
     67 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
     68 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
     69 
     70 
     71 /*------------------------------------------------------------*/
     72 /*--- Fast-case knobs                                      ---*/
     73 /*------------------------------------------------------------*/
     74 
     75 // Comment these out to disable the fast cases (don't just set them to zero).
     76 
     77 #define PERF_FAST_LOADV    1
     78 #define PERF_FAST_STOREV   1
     79 
     80 #define PERF_FAST_SARP     1
     81 
     82 #define PERF_FAST_STACK    1
     83 #define PERF_FAST_STACK2   1
     84 
     85 /* Change this to 1 to enable assertions on origin tracking cache fast
     86    paths */
     87 #define OC_ENABLE_ASSERTIONS 0
     88 
     89 
     90 /*------------------------------------------------------------*/
     91 /*--- Comments on the origin tracking implementation       ---*/
     92 /*------------------------------------------------------------*/
     93 
     94 /* See detailed comment entitled
     95    AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
     96    which is contained further on in this file. */
     97 
     98 
     99 /*------------------------------------------------------------*/
    100 /*--- V bits and A bits                                    ---*/
    101 /*------------------------------------------------------------*/
    102 
    103 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
    104    thinks the corresponding value bit is defined.  And every memory byte
    105    has an A bit, which tracks whether Memcheck thinks the program can access
    106    it safely (ie. it's mapped, and has at least one of the RWX permission bits
    107    set).  So every N-bit register is shadowed with N V bits, and every memory
    108    byte is shadowed with 8 V bits and one A bit.
    109 
    110    In the implementation, we use two forms of compression (compressed V bits
    111    and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
    112    for memory.
    113 
    114    Memcheck also tracks extra information about each heap block that is
    115    allocated, for detecting memory leaks and other purposes.
    116 */
    117 
    118 /*------------------------------------------------------------*/
    119 /*--- Basic A/V bitmap representation.                     ---*/
    120 /*------------------------------------------------------------*/
    121 
    122 /* All reads and writes are checked against a memory map (a.k.a. shadow
    123    memory), which records the state of all memory in the process.
    124 
    125    On 32-bit machines the memory map is organised as follows.
    126    The top 16 bits of an address are used to index into a top-level
    127    map table, containing 65536 entries.  Each entry is a pointer to a
    128    second-level map, which records the accesibililty and validity
    129    permissions for the 65536 bytes indexed by the lower 16 bits of the
    130    address.  Each byte is represented by two bits (details are below).  So
    131    each second-level map contains 16384 bytes.  This two-level arrangement
    132    conveniently divides the 4G address space into 64k lumps, each size 64k
    133    bytes.
    134 
    135    All entries in the primary (top-level) map must point to a valid
    136    secondary (second-level) map.  Since many of the 64kB chunks will
    137    have the same status for every bit -- ie. noaccess (for unused
    138    address space) or entirely addressable and defined (for code segments) --
    139    there are three distinguished secondary maps, which indicate 'noaccess',
    140    'undefined' and 'defined'.  For these uniform 64kB chunks, the primary
    141    map entry points to the relevant distinguished map.  In practice,
    142    typically more than half of the addressable memory is represented with
    143    the 'undefined' or 'defined' distinguished secondary map, so it gives a
    144    good saving.  It also lets us set the V+A bits of large address regions
    145    quickly in set_address_range_perms().
    146 
    147    On 64-bit machines it's more complicated.  If we followed the same basic
    148    scheme we'd have a four-level table which would require too many memory
    149    accesses.  So instead the top-level map table has 2^20 entries (indexed
    150    using bits 16..35 of the address);  this covers the bottom 64GB.  Any
    151    accesses above 64GB are handled with a slow, sparse auxiliary table.
    152    Valgrind's address space manager tries very hard to keep things below
    153    this 64GB barrier so that performance doesn't suffer too much.
    154 
    155    Note that this file has a lot of different functions for reading and
    156    writing shadow memory.  Only a couple are strictly necessary (eg.
    157    get_vabits2 and set_vabits2), most are just specialised for specific
    158    common cases to improve performance.
    159 
    160    Aside: the V+A bits are less precise than they could be -- we have no way
    161    of marking memory as read-only.  It would be great if we could add an
    162    extra state VA_BITSn_READONLY.  But then we'd have 5 different states,
    163    which requires 2.3 bits to hold, and there's no way to do that elegantly
    164    -- we'd have to double up to 4 bits of metadata per byte, which doesn't
    165    seem worth it.
    166 */
    167 
    168 /* --------------- Basic configuration --------------- */
    169 
    170 /* Only change this.  N_PRIMARY_MAP *must* be a power of 2. */
    171 
    172 #if VG_WORDSIZE == 4
    173 
    174 /* cover the entire address space */
    175 #  define N_PRIMARY_BITS  16
    176 
    177 #else
    178 
    179 /* Just handle the first 128G fast and the rest via auxiliary
    180    primaries.  If you change this, Memcheck will assert at startup.
    181    See the definition of UNALIGNED_OR_HIGH for extensive comments. */
    182 #  define N_PRIMARY_BITS  21
    183 
    184 #endif
    185 
    186 
    187 /* Do not change this. */
    188 #define N_PRIMARY_MAP  ( ((UWord)1) << N_PRIMARY_BITS)
    189 
    190 /* Do not change this. */
    191 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
    192 
    193 
    194 /* --------------- Secondary maps --------------- */
    195 
    196 // Each byte of memory conceptually has an A bit, which indicates its
    197 // addressability, and 8 V bits, which indicates its definedness.
    198 //
    199 // But because very few bytes are partially defined, we can use a nice
    200 // compression scheme to reduce the size of shadow memory.  Each byte of
    201 // memory has 2 bits which indicates its state (ie. V+A bits):
    202 //
    203 //   00:  noaccess    (unaddressable but treated as fully defined)
    204 //   01:  undefined   (addressable and fully undefined)
    205 //   10:  defined     (addressable and fully defined)
    206 //   11:  partdefined (addressable and partially defined)
    207 //
    208 // In the "partdefined" case, we use a secondary table to store the V bits.
    209 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
    210 // bits.
    211 //
    212 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
    213 // four bytes (32 bits) of memory are in each chunk.  Hence the name
    214 // "vabits8".  This lets us get the V+A bits for four bytes at a time
    215 // easily (without having to do any shifting and/or masking), and that is a
    216 // very common operation.  (Note that although each vabits8 chunk
    217 // is 8 bits in size, it represents 32 bits of memory.)
    218 //
    219 // The representation is "inverse" little-endian... each 4 bytes of
    220 // memory is represented by a 1 byte value, where:
    221 //
    222 // - the status of byte (a+0) is held in bits [1..0]
    223 // - the status of byte (a+1) is held in bits [3..2]
    224 // - the status of byte (a+2) is held in bits [5..4]
    225 // - the status of byte (a+3) is held in bits [7..6]
    226 //
    227 // It's "inverse" because endianness normally describes a mapping from
    228 // value bits to memory addresses;  in this case the mapping is inverted.
    229 // Ie. instead of particular value bits being held in certain addresses, in
    230 // this case certain addresses are represented by particular value bits.
    231 // See insert_vabits2_into_vabits8() for an example.
    232 //
    233 // But note that we don't compress the V bits stored in registers;  they
    234 // need to be explicit to made the shadow operations possible.  Therefore
    235 // when moving values between registers and memory we need to convert
    236 // between the expanded in-register format and the compressed in-memory
    237 // format.  This isn't so difficult, it just requires careful attention in a
    238 // few places.
    239 
    240 // These represent eight bits of memory.
    241 #define VA_BITS2_NOACCESS     0x0      // 00b
    242 #define VA_BITS2_UNDEFINED    0x1      // 01b
    243 #define VA_BITS2_DEFINED      0x2      // 10b
    244 #define VA_BITS2_PARTDEFINED  0x3      // 11b
    245 
    246 // These represent 16 bits of memory.
    247 #define VA_BITS4_NOACCESS     0x0      // 00_00b
    248 #define VA_BITS4_UNDEFINED    0x5      // 01_01b
    249 #define VA_BITS4_DEFINED      0xa      // 10_10b
    250 
    251 // These represent 32 bits of memory.
    252 #define VA_BITS8_NOACCESS     0x00     // 00_00_00_00b
    253 #define VA_BITS8_UNDEFINED    0x55     // 01_01_01_01b
    254 #define VA_BITS8_DEFINED      0xaa     // 10_10_10_10b
    255 
    256 // These represent 64 bits of memory.
    257 #define VA_BITS16_NOACCESS    0x0000   // 00_00_00_00b x 2
    258 #define VA_BITS16_UNDEFINED   0x5555   // 01_01_01_01b x 2
    259 #define VA_BITS16_DEFINED     0xaaaa   // 10_10_10_10b x 2
    260 
    261 // These represent 128 bits of memory.
    262 #define VA_BITS32_UNDEFINED   0x55555555  // 01_01_01_01b x 4
    263 
    264 
    265 #define SM_CHUNKS             16384    // Each SM covers 64k of memory.
    266 #define SM_OFF(aaa)           (((aaa) & 0xffff) >> 2)
    267 #define SM_OFF_16(aaa)        (((aaa) & 0xffff) >> 3)
    268 
    269 // Paranoia:  it's critical for performance that the requested inlining
    270 // occurs.  So try extra hard.
    271 #define INLINE    inline __attribute__((always_inline))
    272 
    273 static INLINE Addr start_of_this_sm ( Addr a ) {
    274    return (a & (~SM_MASK));
    275 }
    276 static INLINE Bool is_start_of_sm ( Addr a ) {
    277    return (start_of_this_sm(a) == a);
    278 }
    279 
    280 STATIC_ASSERT(SM_CHUNKS % 2 == 0);
    281 
    282 typedef
    283    union {
    284       UChar vabits8[SM_CHUNKS];
    285       UShort vabits16[SM_CHUNKS/2];
    286    }
    287    SecMap;
    288 
    289 // 3 distinguished secondary maps, one for no-access, one for
    290 // accessible but undefined, and one for accessible and defined.
    291 // Distinguished secondaries may never be modified.
    292 #define SM_DIST_NOACCESS   0
    293 #define SM_DIST_UNDEFINED  1
    294 #define SM_DIST_DEFINED    2
    295 
    296 static SecMap sm_distinguished[3];
    297 
    298 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
    299    return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
    300 }
    301 
    302 // Forward declaration
    303 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
    304 
    305 /* dist_sm points to one of our three distinguished secondaries.  Make
    306    a copy of it so that we can write to it.
    307 */
    308 static SecMap* copy_for_writing ( SecMap* dist_sm )
    309 {
    310    SecMap* new_sm;
    311    tl_assert(dist_sm == &sm_distinguished[0]
    312           || dist_sm == &sm_distinguished[1]
    313           || dist_sm == &sm_distinguished[2]);
    314 
    315    new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
    316    if (new_sm == NULL)
    317       VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
    318                                    sizeof(SecMap) );
    319    VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
    320    update_SM_counts(dist_sm, new_sm);
    321    return new_sm;
    322 }
    323 
    324 /* --------------- Stats --------------- */
    325 
    326 static Int   n_issued_SMs      = 0;
    327 static Int   n_deissued_SMs    = 0;
    328 static Int   n_noaccess_SMs    = N_PRIMARY_MAP; // start with many noaccess DSMs
    329 static Int   n_undefined_SMs   = 0;
    330 static Int   n_defined_SMs     = 0;
    331 static Int   n_non_DSM_SMs     = 0;
    332 static Int   max_noaccess_SMs  = 0;
    333 static Int   max_undefined_SMs = 0;
    334 static Int   max_defined_SMs   = 0;
    335 static Int   max_non_DSM_SMs   = 0;
    336 
    337 /* # searches initiated in auxmap_L1, and # base cmps required */
    338 static ULong n_auxmap_L1_searches  = 0;
    339 static ULong n_auxmap_L1_cmps      = 0;
    340 /* # of searches that missed in auxmap_L1 and therefore had to
    341    be handed to auxmap_L2. And the number of nodes inserted. */
    342 static ULong n_auxmap_L2_searches  = 0;
    343 static ULong n_auxmap_L2_nodes     = 0;
    344 
    345 static Int   n_sanity_cheap     = 0;
    346 static Int   n_sanity_expensive = 0;
    347 
    348 static Int   n_secVBit_nodes   = 0;
    349 static Int   max_secVBit_nodes = 0;
    350 
    351 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
    352 {
    353    if      (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
    354    else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
    355    else if (oldSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  --;
    356    else                                                  { n_non_DSM_SMs  --;
    357                                                            n_deissued_SMs ++; }
    358 
    359    if      (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
    360    else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
    361    else if (newSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  ++;
    362    else                                                  { n_non_DSM_SMs  ++;
    363                                                            n_issued_SMs   ++; }
    364 
    365    if (n_noaccess_SMs  > max_noaccess_SMs ) max_noaccess_SMs  = n_noaccess_SMs;
    366    if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
    367    if (n_defined_SMs   > max_defined_SMs  ) max_defined_SMs   = n_defined_SMs;
    368    if (n_non_DSM_SMs   > max_non_DSM_SMs  ) max_non_DSM_SMs   = n_non_DSM_SMs;
    369 }
    370 
    371 /* --------------- Primary maps --------------- */
    372 
    373 /* The main primary map.  This covers some initial part of the address
    374    space, addresses 0 .. (N_PRIMARY_MAP << 16)-1.  The rest of it is
    375    handled using the auxiliary primary map.
    376 */
    377 static SecMap* primary_map[N_PRIMARY_MAP];
    378 
    379 
    380 /* An entry in the auxiliary primary map.  base must be a 64k-aligned
    381    value, and sm points at the relevant secondary map.  As with the
    382    main primary map, the secondary may be either a real secondary, or
    383    one of the three distinguished secondaries.  DO NOT CHANGE THIS
    384    LAYOUT: the first word has to be the key for OSet fast lookups.
    385 */
    386 typedef
    387    struct {
    388       Addr    base;
    389       SecMap* sm;
    390    }
    391    AuxMapEnt;
    392 
    393 /* Tunable parameter: How big is the L1 queue? */
    394 #define N_AUXMAP_L1 24
    395 
    396 /* Tunable parameter: How far along the L1 queue to insert
    397    entries resulting from L2 lookups? */
    398 #define AUXMAP_L1_INSERT_IX 12
    399 
    400 static struct {
    401           Addr       base;
    402           AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
    403        }
    404        auxmap_L1[N_AUXMAP_L1];
    405 
    406 static OSet* auxmap_L2 = NULL;
    407 
    408 static void init_auxmap_L1_L2 ( void )
    409 {
    410    Int i;
    411    for (i = 0; i < N_AUXMAP_L1; i++) {
    412       auxmap_L1[i].base = 0;
    413       auxmap_L1[i].ent  = NULL;
    414    }
    415 
    416    tl_assert(0 == offsetof(AuxMapEnt,base));
    417    tl_assert(sizeof(Addr) == sizeof(void*));
    418    auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/  offsetof(AuxMapEnt,base),
    419                                     /*fastCmp*/ NULL,
    420                                     VG_(malloc), "mc.iaLL.1", VG_(free) );
    421 }
    422 
    423 /* Check representation invariants; if OK return NULL; else a
    424    descriptive bit of text.  Also return the number of
    425    non-distinguished secondary maps referred to from the auxiliary
    426    primary maps. */
    427 
    428 static const HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
    429 {
    430    Word i, j;
    431    /* On a 32-bit platform, the L2 and L1 tables should
    432       both remain empty forever.
    433 
    434       On a 64-bit platform:
    435       In the L2 table:
    436        all .base & 0xFFFF == 0
    437        all .base > MAX_PRIMARY_ADDRESS
    438       In the L1 table:
    439        all .base & 0xFFFF == 0
    440        all (.base > MAX_PRIMARY_ADDRESS
    441             .base & 0xFFFF == 0
    442             and .ent points to an AuxMapEnt with the same .base)
    443            or
    444            (.base == 0 and .ent == NULL)
    445    */
    446    *n_secmaps_found = 0;
    447    if (sizeof(void*) == 4) {
    448       /* 32-bit platform */
    449       if (VG_(OSetGen_Size)(auxmap_L2) != 0)
    450          return "32-bit: auxmap_L2 is non-empty";
    451       for (i = 0; i < N_AUXMAP_L1; i++)
    452         if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
    453       return "32-bit: auxmap_L1 is non-empty";
    454    } else {
    455       /* 64-bit platform */
    456       UWord elems_seen = 0;
    457       AuxMapEnt *elem, *res;
    458       AuxMapEnt key;
    459       /* L2 table */
    460       VG_(OSetGen_ResetIter)(auxmap_L2);
    461       while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
    462          elems_seen++;
    463          if (0 != (elem->base & (Addr)0xFFFF))
    464             return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
    465          if (elem->base <= MAX_PRIMARY_ADDRESS)
    466             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
    467          if (elem->sm == NULL)
    468             return "64-bit: .sm in _L2 is NULL";
    469          if (!is_distinguished_sm(elem->sm))
    470             (*n_secmaps_found)++;
    471       }
    472       if (elems_seen != n_auxmap_L2_nodes)
    473          return "64-bit: disagreement on number of elems in _L2";
    474       /* Check L1-L2 correspondence */
    475       for (i = 0; i < N_AUXMAP_L1; i++) {
    476          if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
    477             continue;
    478          if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
    479             return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
    480          if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
    481             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
    482          if (auxmap_L1[i].ent == NULL)
    483             return "64-bit: .ent is NULL in auxmap_L1";
    484          if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
    485             return "64-bit: _L1 and _L2 bases are inconsistent";
    486          /* Look it up in auxmap_L2. */
    487          key.base = auxmap_L1[i].base;
    488          key.sm   = 0;
    489          res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
    490          if (res == NULL)
    491             return "64-bit: _L1 .base not found in _L2";
    492          if (res != auxmap_L1[i].ent)
    493             return "64-bit: _L1 .ent disagrees with _L2 entry";
    494       }
    495       /* Check L1 contains no duplicates */
    496       for (i = 0; i < N_AUXMAP_L1; i++) {
    497          if (auxmap_L1[i].base == 0)
    498             continue;
    499 	 for (j = i+1; j < N_AUXMAP_L1; j++) {
    500             if (auxmap_L1[j].base == 0)
    501                continue;
    502             if (auxmap_L1[j].base == auxmap_L1[i].base)
    503                return "64-bit: duplicate _L1 .base entries";
    504          }
    505       }
    506    }
    507    return NULL; /* ok */
    508 }
    509 
    510 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
    511 {
    512    Word i;
    513    tl_assert(ent);
    514    tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
    515    for (i = N_AUXMAP_L1-1; i > rank; i--)
    516       auxmap_L1[i] = auxmap_L1[i-1];
    517    auxmap_L1[rank].base = ent->base;
    518    auxmap_L1[rank].ent  = ent;
    519 }
    520 
    521 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
    522 {
    523    AuxMapEnt  key;
    524    AuxMapEnt* res;
    525    Word       i;
    526 
    527    tl_assert(a > MAX_PRIMARY_ADDRESS);
    528    a &= ~(Addr)0xFFFF;
    529 
    530    /* First search the front-cache, which is a self-organising
    531       list containing the most popular entries. */
    532 
    533    if (LIKELY(auxmap_L1[0].base == a))
    534       return auxmap_L1[0].ent;
    535    if (LIKELY(auxmap_L1[1].base == a)) {
    536       Addr       t_base = auxmap_L1[0].base;
    537       AuxMapEnt* t_ent  = auxmap_L1[0].ent;
    538       auxmap_L1[0].base = auxmap_L1[1].base;
    539       auxmap_L1[0].ent  = auxmap_L1[1].ent;
    540       auxmap_L1[1].base = t_base;
    541       auxmap_L1[1].ent  = t_ent;
    542       return auxmap_L1[0].ent;
    543    }
    544 
    545    n_auxmap_L1_searches++;
    546 
    547    for (i = 0; i < N_AUXMAP_L1; i++) {
    548       if (auxmap_L1[i].base == a) {
    549          break;
    550       }
    551    }
    552    tl_assert(i >= 0 && i <= N_AUXMAP_L1);
    553 
    554    n_auxmap_L1_cmps += (ULong)(i+1);
    555 
    556    if (i < N_AUXMAP_L1) {
    557       if (i > 0) {
    558          Addr       t_base = auxmap_L1[i-1].base;
    559          AuxMapEnt* t_ent  = auxmap_L1[i-1].ent;
    560          auxmap_L1[i-1].base = auxmap_L1[i-0].base;
    561          auxmap_L1[i-1].ent  = auxmap_L1[i-0].ent;
    562          auxmap_L1[i-0].base = t_base;
    563          auxmap_L1[i-0].ent  = t_ent;
    564          i--;
    565       }
    566       return auxmap_L1[i].ent;
    567    }
    568 
    569    n_auxmap_L2_searches++;
    570 
    571    /* First see if we already have it. */
    572    key.base = a;
    573    key.sm   = 0;
    574 
    575    res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
    576    if (res)
    577       insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
    578    return res;
    579 }
    580 
    581 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
    582 {
    583    AuxMapEnt *nyu, *res;
    584 
    585    /* First see if we already have it. */
    586    res = maybe_find_in_auxmap( a );
    587    if (LIKELY(res))
    588       return res;
    589 
    590    /* Ok, there's no entry in the secondary map, so we'll have
    591       to allocate one. */
    592    a &= ~(Addr)0xFFFF;
    593 
    594    nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
    595    nyu->base = a;
    596    nyu->sm   = &sm_distinguished[SM_DIST_NOACCESS];
    597    VG_(OSetGen_Insert)( auxmap_L2, nyu );
    598    insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
    599    n_auxmap_L2_nodes++;
    600    return nyu;
    601 }
    602 
    603 /* --------------- SecMap fundamentals --------------- */
    604 
    605 // In all these, 'low' means it's definitely in the main primary map,
    606 // 'high' means it's definitely in the auxiliary table.
    607 
    608 static INLINE UWord get_primary_map_low_offset ( Addr a )
    609 {
    610   UWord pm_off = a >> 16;
    611   return pm_off;
    612 }
    613 
    614 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
    615 {
    616    UWord pm_off = a >> 16;
    617 #  if VG_DEBUG_MEMORY >= 1
    618    tl_assert(pm_off < N_PRIMARY_MAP);
    619 #  endif
    620    return &primary_map[ pm_off ];
    621 }
    622 
    623 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
    624 {
    625    AuxMapEnt* am = find_or_alloc_in_auxmap(a);
    626    return &am->sm;
    627 }
    628 
    629 static INLINE SecMap** get_secmap_ptr ( Addr a )
    630 {
    631    return ( a <= MAX_PRIMARY_ADDRESS
    632           ? get_secmap_low_ptr(a)
    633           : get_secmap_high_ptr(a));
    634 }
    635 
    636 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
    637 {
    638    return *get_secmap_low_ptr(a);
    639 }
    640 
    641 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
    642 {
    643    return *get_secmap_high_ptr(a);
    644 }
    645 
    646 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
    647 {
    648    SecMap** p = get_secmap_low_ptr(a);
    649    if (UNLIKELY(is_distinguished_sm(*p)))
    650       *p = copy_for_writing(*p);
    651    return *p;
    652 }
    653 
    654 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
    655 {
    656    SecMap** p = get_secmap_high_ptr(a);
    657    if (UNLIKELY(is_distinguished_sm(*p)))
    658       *p = copy_for_writing(*p);
    659    return *p;
    660 }
    661 
    662 /* Produce the secmap for 'a', either from the primary map or by
    663    ensuring there is an entry for it in the aux primary map.  The
    664    secmap may be a distinguished one as the caller will only want to
    665    be able to read it.
    666 */
    667 static INLINE SecMap* get_secmap_for_reading ( Addr a )
    668 {
    669    return ( a <= MAX_PRIMARY_ADDRESS
    670           ? get_secmap_for_reading_low (a)
    671           : get_secmap_for_reading_high(a) );
    672 }
    673 
    674 /* Produce the secmap for 'a', either from the primary map or by
    675    ensuring there is an entry for it in the aux primary map.  The
    676    secmap may not be a distinguished one, since the caller will want
    677    to be able to write it.  If it is a distinguished secondary, make a
    678    writable copy of it, install it, and return the copy instead.  (COW
    679    semantics).
    680 */
    681 static INLINE SecMap* get_secmap_for_writing ( Addr a )
    682 {
    683    return ( a <= MAX_PRIMARY_ADDRESS
    684           ? get_secmap_for_writing_low (a)
    685           : get_secmap_for_writing_high(a) );
    686 }
    687 
    688 /* If 'a' has a SecMap, produce it.  Else produce NULL.  But don't
    689    allocate one if one doesn't already exist.  This is used by the
    690    leak checker.
    691 */
    692 static SecMap* maybe_get_secmap_for ( Addr a )
    693 {
    694    if (a <= MAX_PRIMARY_ADDRESS) {
    695       return get_secmap_for_reading_low(a);
    696    } else {
    697       AuxMapEnt* am = maybe_find_in_auxmap(a);
    698       return am ? am->sm : NULL;
    699    }
    700 }
    701 
    702 /* --------------- Fundamental functions --------------- */
    703 
    704 static INLINE
    705 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
    706 {
    707    UInt shift =  (a & 3)  << 1;        // shift by 0, 2, 4, or 6
    708    *vabits8  &= ~(0x3     << shift);   // mask out the two old bits
    709    *vabits8  |=  (vabits2 << shift);   // mask  in the two new bits
    710 }
    711 
    712 static INLINE
    713 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
    714 {
    715    UInt shift;
    716    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
    717    shift     =  (a & 2)   << 1;        // shift by 0 or 4
    718    *vabits8 &= ~(0xf      << shift);   // mask out the four old bits
    719    *vabits8 |=  (vabits4 << shift);    // mask  in the four new bits
    720 }
    721 
    722 static INLINE
    723 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
    724 {
    725    UInt shift = (a & 3) << 1;          // shift by 0, 2, 4, or 6
    726    vabits8 >>= shift;                  // shift the two bits to the bottom
    727    return 0x3 & vabits8;               // mask out the rest
    728 }
    729 
    730 static INLINE
    731 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
    732 {
    733    UInt shift;
    734    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
    735    shift = (a & 2) << 1;               // shift by 0 or 4
    736    vabits8 >>= shift;                  // shift the four bits to the bottom
    737    return 0xf & vabits8;               // mask out the rest
    738 }
    739 
    740 // Note that these four are only used in slow cases.  The fast cases do
    741 // clever things like combine the auxmap check (in
    742 // get_secmap_{read,writ}able) with alignment checks.
    743 
    744 // *** WARNING! ***
    745 // Any time this function is called, if it is possible that vabits2
    746 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
    747 // sec-V-bits table must also be set!
    748 static INLINE
    749 void set_vabits2 ( Addr a, UChar vabits2 )
    750 {
    751    SecMap* sm       = get_secmap_for_writing(a);
    752    UWord   sm_off   = SM_OFF(a);
    753    insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
    754 }
    755 
    756 static INLINE
    757 UChar get_vabits2 ( Addr a )
    758 {
    759    SecMap* sm       = get_secmap_for_reading(a);
    760    UWord   sm_off   = SM_OFF(a);
    761    UChar   vabits8  = sm->vabits8[sm_off];
    762    return extract_vabits2_from_vabits8(a, vabits8);
    763 }
    764 
    765 // *** WARNING! ***
    766 // Any time this function is called, if it is possible that any of the
    767 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
    768 // corresponding entry(s) in the sec-V-bits table must also be set!
    769 static INLINE
    770 UChar get_vabits8_for_aligned_word32 ( Addr a )
    771 {
    772    SecMap* sm       = get_secmap_for_reading(a);
    773    UWord   sm_off   = SM_OFF(a);
    774    UChar   vabits8  = sm->vabits8[sm_off];
    775    return vabits8;
    776 }
    777 
    778 static INLINE
    779 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
    780 {
    781    SecMap* sm       = get_secmap_for_writing(a);
    782    UWord   sm_off   = SM_OFF(a);
    783    sm->vabits8[sm_off] = vabits8;
    784 }
    785 
    786 
    787 // Forward declarations
    788 static UWord get_sec_vbits8(Addr a);
    789 static void  set_sec_vbits8(Addr a, UWord vbits8);
    790 
    791 // Returns False if there was an addressability error.
    792 static INLINE
    793 Bool set_vbits8 ( Addr a, UChar vbits8 )
    794 {
    795    Bool  ok      = True;
    796    UChar vabits2 = get_vabits2(a);
    797    if ( VA_BITS2_NOACCESS != vabits2 ) {
    798       // Addressable.  Convert in-register format to in-memory format.
    799       // Also remove any existing sec V bit entry for the byte if no
    800       // longer necessary.
    801       if      ( V_BITS8_DEFINED   == vbits8 ) { vabits2 = VA_BITS2_DEFINED;   }
    802       else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
    803       else                                    { vabits2 = VA_BITS2_PARTDEFINED;
    804                                                 set_sec_vbits8(a, vbits8);  }
    805       set_vabits2(a, vabits2);
    806 
    807    } else {
    808       // Unaddressable!  Do nothing -- when writing to unaddressable
    809       // memory it acts as a black hole, and the V bits can never be seen
    810       // again.  So we don't have to write them at all.
    811       ok = False;
    812    }
    813    return ok;
    814 }
    815 
    816 // Returns False if there was an addressability error.  In that case, we put
    817 // all defined bits into vbits8.
    818 static INLINE
    819 Bool get_vbits8 ( Addr a, UChar* vbits8 )
    820 {
    821    Bool  ok      = True;
    822    UChar vabits2 = get_vabits2(a);
    823 
    824    // Convert the in-memory format to in-register format.
    825    if      ( VA_BITS2_DEFINED   == vabits2 ) { *vbits8 = V_BITS8_DEFINED;   }
    826    else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
    827    else if ( VA_BITS2_NOACCESS  == vabits2 ) {
    828       *vbits8 = V_BITS8_DEFINED;    // Make V bits defined!
    829       ok = False;
    830    } else {
    831       tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
    832       *vbits8 = get_sec_vbits8(a);
    833    }
    834    return ok;
    835 }
    836 
    837 
    838 /* --------------- Secondary V bit table ------------ */
    839 
    840 // This table holds the full V bit pattern for partially-defined bytes
    841 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
    842 // memory.
    843 //
    844 // Note: the nodes in this table can become stale.  Eg. if you write a PDB,
    845 // then overwrite the same address with a fully defined byte, the sec-V-bit
    846 // node will not necessarily be removed.  This is because checking for
    847 // whether removal is necessary would slow down the fast paths.
    848 //
    849 // To avoid the stale nodes building up too much, we periodically (once the
    850 // table reaches a certain size) garbage collect (GC) the table by
    851 // traversing it and evicting any nodes not having PDB.
    852 // If more than a certain proportion of nodes survived, we increase the
    853 // table size so that GCs occur less often.
    854 //
    855 // This policy is designed to avoid bad table bloat in the worst case where
    856 // a program creates huge numbers of stale PDBs -- we would get this bloat
    857 // if we had no GC -- while handling well the case where a node becomes
    858 // stale but shortly afterwards is rewritten with a PDB and so becomes
    859 // non-stale again (which happens quite often, eg. in perf/bz2).  If we just
    860 // remove all stale nodes as soon as possible, we just end up re-adding a
    861 // lot of them in later again.  The "sufficiently stale" approach avoids
    862 // this.  (If a program has many live PDBs, performance will just suck,
    863 // there's no way around that.)
    864 //
    865 // Further comments, JRS 14 Feb 2012.  It turns out that the policy of
    866 // holding on to stale entries for 2 GCs before discarding them can lead
    867 // to massive space leaks.  So we're changing to an arrangement where
    868 // lines are evicted as soon as they are observed to be stale during a
    869 // GC.  This also has a side benefit of allowing the sufficiently_stale
    870 // field to be removed from the SecVBitNode struct, reducing its size by
    871 // 8 bytes, which is a substantial space saving considering that the
    872 // struct was previously 32 or so bytes, on a 64 bit target.
    873 //
    874 // In order to try and mitigate the problem that the "sufficiently stale"
    875 // heuristic was designed to avoid, the table size is allowed to drift
    876 // up ("DRIFTUP") slowly to 80000, even if the residency is low.  This
    877 // means that nodes will exist in the table longer on average, and hopefully
    878 // will be deleted and re-added less frequently.
    879 //
    880 // The previous scaling up mechanism (now called STEPUP) is retained:
    881 // if residency exceeds 50%, the table is scaled up, although by a
    882 // factor sqrt(2) rather than 2 as before.  This effectively doubles the
    883 // frequency of GCs when there are many PDBs at reduces the tendency of
    884 // stale PDBs to reside for long periods in the table.
    885 
    886 static OSet* secVBitTable;
    887 
    888 // Stats
    889 static ULong sec_vbits_new_nodes = 0;
    890 static ULong sec_vbits_updates   = 0;
    891 
    892 // This must be a power of two;  this is checked in mc_pre_clo_init().
    893 // The size chosen here is a trade-off:  if the nodes are bigger (ie. cover
    894 // a larger address range) they take more space but we can get multiple
    895 // partially-defined bytes in one if they are close to each other, reducing
    896 // the number of total nodes.  In practice sometimes they are clustered (eg.
    897 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
    898 // row), but often not.  So we choose something intermediate.
    899 #define BYTES_PER_SEC_VBIT_NODE     16
    900 
    901 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
    902 // more than this many nodes survive a GC.
    903 #define STEPUP_SURVIVOR_PROPORTION  0.5
    904 #define STEPUP_GROWTH_FACTOR        1.414213562
    905 
    906 // If the above heuristic doesn't apply, then we may make the table
    907 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
    908 // this many nodes survive a GC, _and_ the total table size does
    909 // not exceed a fixed limit.  The numbers are somewhat arbitrary, but
    910 // work tolerably well on long Firefox runs.  The scaleup ratio of 1.5%
    911 // effectively although gradually reduces residency and increases time
    912 // between GCs for programs with small numbers of PDBs.  The 80000 limit
    913 // effectively limits the table size to around 2MB for programs with
    914 // small numbers of PDBs, whilst giving a reasonably long lifetime to
    915 // entries, to try and reduce the costs resulting from deleting and
    916 // re-adding of entries.
    917 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
    918 #define DRIFTUP_GROWTH_FACTOR       1.015
    919 #define DRIFTUP_MAX_SIZE            80000
    920 
    921 // We GC the table when it gets this many nodes in it, ie. it's effectively
    922 // the table size.  It can change.
    923 static Int  secVBitLimit = 1000;
    924 
    925 // The number of GCs done, used to age sec-V-bit nodes for eviction.
    926 // Because it's unsigned, wrapping doesn't matter -- the right answer will
    927 // come out anyway.
    928 static UInt GCs_done = 0;
    929 
    930 typedef
    931    struct {
    932       Addr  a;
    933       UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
    934    }
    935    SecVBitNode;
    936 
    937 static OSet* createSecVBitTable(void)
    938 {
    939    OSet* newSecVBitTable;
    940    newSecVBitTable = VG_(OSetGen_Create_With_Pool)
    941       ( offsetof(SecVBitNode, a),
    942         NULL, // use fast comparisons
    943         VG_(malloc), "mc.cSVT.1 (sec VBit table)",
    944         VG_(free),
    945         1000,
    946         sizeof(SecVBitNode));
    947    return newSecVBitTable;
    948 }
    949 
    950 static void gcSecVBitTable(void)
    951 {
    952    OSet*        secVBitTable2;
    953    SecVBitNode* n;
    954    Int          i, n_nodes = 0, n_survivors = 0;
    955 
    956    GCs_done++;
    957 
    958    // Create the new table.
    959    secVBitTable2 = createSecVBitTable();
    960 
    961    // Traverse the table, moving fresh nodes into the new table.
    962    VG_(OSetGen_ResetIter)(secVBitTable);
    963    while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
    964       // Keep node if any of its bytes are non-stale.  Using
    965       // get_vabits2() for the lookup is not very efficient, but I don't
    966       // think it matters.
    967       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
    968          if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
    969             // Found a non-stale byte, so keep =>
    970             // Insert a copy of the node into the new table.
    971             SecVBitNode* n2 =
    972                VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
    973             *n2 = *n;
    974             VG_(OSetGen_Insert)(secVBitTable2, n2);
    975             break;
    976          }
    977       }
    978    }
    979 
    980    // Get the before and after sizes.
    981    n_nodes     = VG_(OSetGen_Size)(secVBitTable);
    982    n_survivors = VG_(OSetGen_Size)(secVBitTable2);
    983 
    984    // Destroy the old table, and put the new one in its place.
    985    VG_(OSetGen_Destroy)(secVBitTable);
    986    secVBitTable = secVBitTable2;
    987 
    988    if (VG_(clo_verbosity) > 1 && n_nodes != 0) {
    989       VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
    990                    n_nodes, n_survivors, n_survivors * 100.0 / n_nodes);
    991    }
    992 
    993    // Increase table size if necessary.
    994    if ((Double)n_survivors
    995        > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
    996       secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
    997       if (VG_(clo_verbosity) > 1)
    998          VG_(message)(Vg_DebugMsg,
    999                       "memcheck GC: %d new table size (stepup)\n",
   1000                       secVBitLimit);
   1001    }
   1002    else
   1003    if (secVBitLimit < DRIFTUP_MAX_SIZE
   1004        && (Double)n_survivors
   1005           > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
   1006       secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
   1007       if (VG_(clo_verbosity) > 1)
   1008          VG_(message)(Vg_DebugMsg,
   1009                       "memcheck GC: %d new table size (driftup)\n",
   1010                       secVBitLimit);
   1011    }
   1012 }
   1013 
   1014 static UWord get_sec_vbits8(Addr a)
   1015 {
   1016    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
   1017    Int          amod     = a % BYTES_PER_SEC_VBIT_NODE;
   1018    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
   1019    UChar        vbits8;
   1020    tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
   1021    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
   1022    // make it to the secondary V bits table.
   1023    vbits8 = n->vbits8[amod];
   1024    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
   1025    return vbits8;
   1026 }
   1027 
   1028 static void set_sec_vbits8(Addr a, UWord vbits8)
   1029 {
   1030    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
   1031    Int          i, amod  = a % BYTES_PER_SEC_VBIT_NODE;
   1032    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
   1033    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
   1034    // make it to the secondary V bits table.
   1035    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
   1036    if (n) {
   1037       n->vbits8[amod] = vbits8;     // update
   1038       sec_vbits_updates++;
   1039    } else {
   1040       // Do a table GC if necessary.  Nb: do this before creating and
   1041       // inserting the new node, to avoid erroneously GC'ing the new node.
   1042       if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
   1043          gcSecVBitTable();
   1044       }
   1045 
   1046       // New node:  assign the specific byte, make the rest invalid (they
   1047       // should never be read as-is, but be cautious).
   1048       n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
   1049       n->a            = aAligned;
   1050       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
   1051          n->vbits8[i] = V_BITS8_UNDEFINED;
   1052       }
   1053       n->vbits8[amod] = vbits8;
   1054 
   1055       // Insert the new node.
   1056       VG_(OSetGen_Insert)(secVBitTable, n);
   1057       sec_vbits_new_nodes++;
   1058 
   1059       n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
   1060       if (n_secVBit_nodes > max_secVBit_nodes)
   1061          max_secVBit_nodes = n_secVBit_nodes;
   1062    }
   1063 }
   1064 
   1065 /* --------------- Endianness helpers --------------- */
   1066 
   1067 /* Returns the offset in memory of the byteno-th most significant byte
   1068    in a wordszB-sized word, given the specified endianness. */
   1069 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
   1070                                     UWord byteno ) {
   1071    return bigendian ? (wordszB-1-byteno) : byteno;
   1072 }
   1073 
   1074 
   1075 /* --------------- Ignored address ranges --------------- */
   1076 
   1077 /* Denotes the address-error-reportability status for address ranges:
   1078    IAR_NotIgnored:  the usual case -- report errors in this range
   1079    IAR_CommandLine: don't report errors -- from command line setting
   1080    IAR_ClientReq:   don't report errors -- from client request
   1081 */
   1082 typedef
   1083    enum { IAR_INVALID=99,
   1084           IAR_NotIgnored,
   1085           IAR_CommandLine,
   1086           IAR_ClientReq }
   1087    IARKind;
   1088 
   1089 static const HChar* showIARKind ( IARKind iark )
   1090 {
   1091    switch (iark) {
   1092       case IAR_INVALID:     return "INVALID";
   1093       case IAR_NotIgnored:  return "NotIgnored";
   1094       case IAR_CommandLine: return "CommandLine";
   1095       case IAR_ClientReq:   return "ClientReq";
   1096       default:              return "???";
   1097    }
   1098 }
   1099 
   1100 // RangeMap<IARKind>
   1101 static RangeMap* gIgnoredAddressRanges = NULL;
   1102 
   1103 static void init_gIgnoredAddressRanges ( void )
   1104 {
   1105    if (LIKELY(gIgnoredAddressRanges != NULL))
   1106       return;
   1107    gIgnoredAddressRanges = VG_(newRangeMap)( VG_(malloc), "mc.igIAR.1",
   1108                                              VG_(free), IAR_NotIgnored );
   1109 }
   1110 
   1111 Bool MC_(in_ignored_range) ( Addr a )
   1112 {
   1113    if (LIKELY(gIgnoredAddressRanges == NULL))
   1114       return False;
   1115    UWord how     = IAR_INVALID;
   1116    UWord key_min = ~(UWord)0;
   1117    UWord key_max =  (UWord)0;
   1118    VG_(lookupRangeMap)(&key_min, &key_max, &how, gIgnoredAddressRanges, a);
   1119    tl_assert(key_min <= a && a <= key_max);
   1120    switch (how) {
   1121       case IAR_NotIgnored:  return False;
   1122       case IAR_CommandLine: return True;
   1123       case IAR_ClientReq:   return True;
   1124       default: break; /* invalid */
   1125    }
   1126    VG_(tool_panic)("MC_(in_ignore_range)");
   1127    /*NOTREACHED*/
   1128 }
   1129 
   1130 Bool MC_(in_ignored_range_below_sp) ( Addr sp, Addr a, UInt szB )
   1131 {
   1132    if (LIKELY(!MC_(clo_ignore_range_below_sp)))
   1133        return False;
   1134    tl_assert(szB >= 1 && szB <= 32);
   1135    tl_assert(MC_(clo_ignore_range_below_sp__first_offset)
   1136              > MC_(clo_ignore_range_below_sp__last_offset));
   1137    Addr range_lo = sp - MC_(clo_ignore_range_below_sp__first_offset);
   1138    Addr range_hi = sp - MC_(clo_ignore_range_below_sp__last_offset);
   1139    if (range_lo >= range_hi) {
   1140       /* Bizarre.  We have a wraparound situation.  What should we do? */
   1141       return False; // Play safe
   1142    } else {
   1143       /* This is the expected case. */
   1144       if (range_lo <= a && a + szB - 1 <= range_hi)
   1145          return True;
   1146       else
   1147          return False;
   1148    }
   1149    /*NOTREACHED*/
   1150    tl_assert(0);
   1151 }
   1152 
   1153 /* Parse two Addrs (in hex) separated by a dash, or fail. */
   1154 
   1155 static Bool parse_Addr_pair ( const HChar** ppc, Addr* result1, Addr* result2 )
   1156 {
   1157    Bool ok = VG_(parse_Addr) (ppc, result1);
   1158    if (!ok)
   1159       return False;
   1160    if (**ppc != '-')
   1161       return False;
   1162    (*ppc)++;
   1163    ok = VG_(parse_Addr) (ppc, result2);
   1164    if (!ok)
   1165       return False;
   1166    return True;
   1167 }
   1168 
   1169 /* Parse two UInts (32 bit unsigned, in decimal) separated by a dash,
   1170    or fail. */
   1171 
   1172 static Bool parse_UInt_pair ( const HChar** ppc, UInt* result1, UInt* result2 )
   1173 {
   1174    Bool ok = VG_(parse_UInt) (ppc, result1);
   1175    if (!ok)
   1176       return False;
   1177    if (**ppc != '-')
   1178       return False;
   1179    (*ppc)++;
   1180    ok = VG_(parse_UInt) (ppc, result2);
   1181    if (!ok)
   1182       return False;
   1183    return True;
   1184 }
   1185 
   1186 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
   1187    fail.  If they are valid, add them to the global set of ignored
   1188    ranges. */
   1189 static Bool parse_ignore_ranges ( const HChar* str0 )
   1190 {
   1191    init_gIgnoredAddressRanges();
   1192    const HChar*  str = str0;
   1193    const HChar** ppc = &str;
   1194    while (1) {
   1195       Addr start = ~(Addr)0;
   1196       Addr end   = (Addr)0;
   1197       Bool ok    = parse_Addr_pair(ppc, &start, &end);
   1198       if (!ok)
   1199          return False;
   1200       if (start > end)
   1201          return False;
   1202       VG_(bindRangeMap)( gIgnoredAddressRanges, start, end, IAR_CommandLine );
   1203       if (**ppc == 0)
   1204          return True;
   1205       if (**ppc != ',')
   1206          return False;
   1207       (*ppc)++;
   1208    }
   1209    /*NOTREACHED*/
   1210    return False;
   1211 }
   1212 
   1213 /* Add or remove [start, +len) from the set of ignored ranges. */
   1214 static Bool modify_ignore_ranges ( Bool addRange, Addr start, Addr len )
   1215 {
   1216    init_gIgnoredAddressRanges();
   1217    const Bool verbose = (VG_(clo_verbosity) > 1);
   1218    if (len == 0) {
   1219       return False;
   1220    }
   1221    if (addRange) {
   1222       VG_(bindRangeMap)(gIgnoredAddressRanges,
   1223                         start, start+len-1, IAR_ClientReq);
   1224       if (verbose)
   1225          VG_(dmsg)("memcheck: modify_ignore_ranges: add %p %p\n",
   1226                    (void*)start, (void*)(start+len-1));
   1227    } else {
   1228       VG_(bindRangeMap)(gIgnoredAddressRanges,
   1229                         start, start+len-1, IAR_NotIgnored);
   1230       if (verbose)
   1231          VG_(dmsg)("memcheck: modify_ignore_ranges: del %p %p\n",
   1232                    (void*)start, (void*)(start+len-1));
   1233    }
   1234    if (verbose) {
   1235       VG_(dmsg)("memcheck:   now have %u ranges:\n",
   1236                 VG_(sizeRangeMap)(gIgnoredAddressRanges));
   1237       UInt i;
   1238       for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
   1239          UWord val     = IAR_INVALID;
   1240          UWord key_min = ~(UWord)0;
   1241          UWord key_max = (UWord)0;
   1242          VG_(indexRangeMap)( &key_min, &key_max, &val,
   1243                              gIgnoredAddressRanges, i );
   1244          VG_(dmsg)("memcheck:      [%u]  %016lx-%016lx  %s\n",
   1245                    i, key_min, key_max, showIARKind(val));
   1246       }
   1247    }
   1248    return True;
   1249 }
   1250 
   1251 
   1252 /* --------------- Load/store slow cases. --------------- */
   1253 
   1254 static
   1255 __attribute__((noinline))
   1256 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
   1257                                 Addr a, SizeT nBits, Bool bigendian )
   1258 {
   1259    ULong  pessim[4];     /* only used when p-l-ok=yes */
   1260    SSizeT szB            = nBits / 8;
   1261    SSizeT szL            = szB / 8;  /* Size in Longs (64-bit units) */
   1262    SSizeT i, j;          /* Must be signed. */
   1263    SizeT  n_addrs_bad = 0;
   1264    Addr   ai;
   1265    UChar  vbits8;
   1266    Bool   ok;
   1267 
   1268    /* Code below assumes load size is a power of two and at least 64
   1269       bits. */
   1270    tl_assert((szB & (szB-1)) == 0 && szL > 0);
   1271 
   1272    /* If this triggers, you probably just need to increase the size of
   1273       the pessim array. */
   1274    tl_assert(szL <= sizeof(pessim) / sizeof(pessim[0]));
   1275 
   1276    for (j = 0; j < szL; j++) {
   1277       pessim[j] = V_BITS64_DEFINED;
   1278       res[j] = V_BITS64_UNDEFINED;
   1279    }
   1280 
   1281    /* Make up a result V word, which contains the loaded data for
   1282       valid addresses and Defined for invalid addresses.  Iterate over
   1283       the bytes in the word, from the most significant down to the
   1284       least.  The vbits to return are calculated into vbits128.  Also
   1285       compute the pessimising value to be used when
   1286       --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
   1287       info can be gleaned from the pessim array) but is used as a
   1288       cross-check. */
   1289    for (j = szL-1; j >= 0; j--) {
   1290       ULong vbits64    = V_BITS64_UNDEFINED;
   1291       ULong pessim64   = V_BITS64_DEFINED;
   1292       UWord long_index = byte_offset_w(szL, bigendian, j);
   1293       for (i = 8-1; i >= 0; i--) {
   1294          PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW_LOOP);
   1295          ai = a + 8*long_index + byte_offset_w(8, bigendian, i);
   1296          ok = get_vbits8(ai, &vbits8);
   1297          vbits64 <<= 8;
   1298          vbits64 |= vbits8;
   1299          if (!ok) n_addrs_bad++;
   1300          pessim64 <<= 8;
   1301          pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
   1302       }
   1303       res[long_index] = vbits64;
   1304       pessim[long_index] = pessim64;
   1305    }
   1306 
   1307    /* In the common case, all the addresses involved are valid, so we
   1308       just return the computed V bits and have done. */
   1309    if (LIKELY(n_addrs_bad == 0))
   1310       return;
   1311 
   1312    /* If there's no possibility of getting a partial-loads-ok
   1313       exemption, report the error and quit. */
   1314    if (!MC_(clo_partial_loads_ok)) {
   1315       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1316       return;
   1317    }
   1318 
   1319    /* The partial-loads-ok excemption might apply.  Find out if it
   1320       does.  If so, don't report an addressing error, but do return
   1321       Undefined for the bytes that are out of range, so as to avoid
   1322       false negatives.  If it doesn't apply, just report an addressing
   1323       error in the usual way. */
   1324 
   1325    /* Some code steps along byte strings in aligned chunks
   1326       even when there is only a partially defined word at the end (eg,
   1327       optimised strlen).  This is allowed by the memory model of
   1328       modern machines, since an aligned load cannot span two pages and
   1329       thus cannot "partially fault".
   1330 
   1331       Therefore, a load from a partially-addressible place is allowed
   1332       if all of the following hold:
   1333       - the command-line flag is set [by default, it isn't]
   1334       - it's an aligned load
   1335       - at least one of the addresses in the word *is* valid
   1336 
   1337       Since this suppresses the addressing error, we avoid false
   1338       negatives by marking bytes undefined when they come from an
   1339       invalid address.
   1340    */
   1341 
   1342    /* "at least one of the addresses is invalid" */
   1343    ok = False;
   1344    for (j = 0; j < szL; j++)
   1345       ok |= pessim[j] != V_BITS64_DEFINED;
   1346    tl_assert(ok);
   1347 
   1348    if (0 == (a & (szB - 1)) && n_addrs_bad < szB) {
   1349       /* Exemption applies.  Use the previously computed pessimising
   1350          value and return the combined result, but don't flag an
   1351          addressing error.  The pessimising value is Defined for valid
   1352          addresses and Undefined for invalid addresses. */
   1353       /* for assumption that doing bitwise or implements UifU */
   1354       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
   1355       /* (really need "UifU" here...)
   1356          vbits[j] UifU= pessim[j]  (is pessimised by it, iow) */
   1357       for (j = szL-1; j >= 0; j--)
   1358          res[j] |= pessim[j];
   1359       return;
   1360    }
   1361 
   1362    /* Exemption doesn't apply.  Flag an addressing error in the normal
   1363       way. */
   1364    MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1365 }
   1366 
   1367 
   1368 static
   1369 __attribute__((noinline))
   1370 __attribute__((used))
   1371 VG_REGPARM(3) /* make sure we're using a fixed calling convention, since
   1372                  this function may get called from hand written assembly. */
   1373 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
   1374 {
   1375    PROF_EVENT(MCPE_LOADVN_SLOW);
   1376 
   1377    /* ------------ BEGIN semi-fast cases ------------ */
   1378    /* These deal quickly-ish with the common auxiliary primary map
   1379       cases on 64-bit platforms.  Are merely a speedup hack; can be
   1380       omitted without loss of correctness/functionality.  Note that in
   1381       both cases the "sizeof(void*) == 8" causes these cases to be
   1382       folded out by compilers on 32-bit platforms.  These are derived
   1383       from LOADV64 and LOADV32.
   1384    */
   1385    if (LIKELY(sizeof(void*) == 8
   1386                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
   1387       SecMap* sm       = get_secmap_for_reading(a);
   1388       UWord   sm_off16 = SM_OFF_16(a);
   1389       UWord   vabits16 = sm->vabits16[sm_off16];
   1390       if (LIKELY(vabits16 == VA_BITS16_DEFINED))
   1391          return V_BITS64_DEFINED;
   1392       if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
   1393          return V_BITS64_UNDEFINED;
   1394       /* else fall into the slow case */
   1395    }
   1396    if (LIKELY(sizeof(void*) == 8
   1397                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
   1398       SecMap* sm = get_secmap_for_reading(a);
   1399       UWord sm_off = SM_OFF(a);
   1400       UWord vabits8 = sm->vabits8[sm_off];
   1401       if (LIKELY(vabits8 == VA_BITS8_DEFINED))
   1402          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
   1403       if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
   1404          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
   1405       /* else fall into slow case */
   1406    }
   1407    /* ------------ END semi-fast cases ------------ */
   1408 
   1409    ULong  vbits64     = V_BITS64_UNDEFINED; /* result */
   1410    ULong  pessim64    = V_BITS64_DEFINED;   /* only used when p-l-ok=yes */
   1411    SSizeT szB         = nBits / 8;
   1412    SSizeT i;          /* Must be signed. */
   1413    SizeT  n_addrs_bad = 0;
   1414    Addr   ai;
   1415    UChar  vbits8;
   1416    Bool   ok;
   1417 
   1418    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
   1419 
   1420    /* Make up a 64-bit result V word, which contains the loaded data
   1421       for valid addresses and Defined for invalid addresses.  Iterate
   1422       over the bytes in the word, from the most significant down to
   1423       the least.  The vbits to return are calculated into vbits64.
   1424       Also compute the pessimising value to be used when
   1425       --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
   1426       info can be gleaned from pessim64) but is used as a
   1427       cross-check. */
   1428    for (i = szB-1; i >= 0; i--) {
   1429       PROF_EVENT(MCPE_LOADVN_SLOW_LOOP);
   1430       ai = a + byte_offset_w(szB, bigendian, i);
   1431       ok = get_vbits8(ai, &vbits8);
   1432       vbits64 <<= 8;
   1433       vbits64 |= vbits8;
   1434       if (!ok) n_addrs_bad++;
   1435       pessim64 <<= 8;
   1436       pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
   1437    }
   1438 
   1439    /* In the common case, all the addresses involved are valid, so we
   1440       just return the computed V bits and have done. */
   1441    if (LIKELY(n_addrs_bad == 0))
   1442       return vbits64;
   1443 
   1444    /* If there's no possibility of getting a partial-loads-ok
   1445       exemption, report the error and quit. */
   1446    if (!MC_(clo_partial_loads_ok)) {
   1447       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1448       return vbits64;
   1449    }
   1450 
   1451    /* The partial-loads-ok excemption might apply.  Find out if it
   1452       does.  If so, don't report an addressing error, but do return
   1453       Undefined for the bytes that are out of range, so as to avoid
   1454       false negatives.  If it doesn't apply, just report an addressing
   1455       error in the usual way. */
   1456 
   1457    /* Some code steps along byte strings in aligned word-sized chunks
   1458       even when there is only a partially defined word at the end (eg,
   1459       optimised strlen).  This is allowed by the memory model of
   1460       modern machines, since an aligned load cannot span two pages and
   1461       thus cannot "partially fault".  Despite such behaviour being
   1462       declared undefined by ANSI C/C++.
   1463 
   1464       Therefore, a load from a partially-addressible place is allowed
   1465       if all of the following hold:
   1466       - the command-line flag is set [by default, it isn't]
   1467       - it's a word-sized, word-aligned load
   1468       - at least one of the addresses in the word *is* valid
   1469 
   1470       Since this suppresses the addressing error, we avoid false
   1471       negatives by marking bytes undefined when they come from an
   1472       invalid address.
   1473    */
   1474 
   1475    /* "at least one of the addresses is invalid" */
   1476    tl_assert(pessim64 != V_BITS64_DEFINED);
   1477 
   1478    if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
   1479        && n_addrs_bad < VG_WORDSIZE) {
   1480       /* Exemption applies.  Use the previously computed pessimising
   1481          value for vbits64 and return the combined result, but don't
   1482          flag an addressing error.  The pessimising value is Defined
   1483          for valid addresses and Undefined for invalid addresses. */
   1484       /* for assumption that doing bitwise or implements UifU */
   1485       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
   1486       /* (really need "UifU" here...)
   1487          vbits64 UifU= pessim64  (is pessimised by it, iow) */
   1488       vbits64 |= pessim64;
   1489       return vbits64;
   1490    }
   1491 
   1492    /* Also, in appears that gcc generates string-stepping code in
   1493       32-bit chunks on 64 bit platforms.  So, also grant an exception
   1494       for this case.  Note that the first clause of the conditional
   1495       (VG_WORDSIZE == 8) is known at compile time, so the whole clause
   1496       will get folded out in 32 bit builds. */
   1497    if (VG_WORDSIZE == 8
   1498        && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4) {
   1499       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
   1500       /* (really need "UifU" here...)
   1501          vbits64 UifU= pessim64  (is pessimised by it, iow) */
   1502       vbits64 |= pessim64;
   1503       /* Mark the upper 32 bits as undefined, just to be on the safe
   1504          side. */
   1505       vbits64 |= (((ULong)V_BITS32_UNDEFINED) << 32);
   1506       return vbits64;
   1507    }
   1508 
   1509    /* Exemption doesn't apply.  Flag an addressing error in the normal
   1510       way. */
   1511    MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1512 
   1513    return vbits64;
   1514 }
   1515 
   1516 
   1517 static
   1518 __attribute__((noinline))
   1519 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
   1520 {
   1521    SizeT szB = nBits / 8;
   1522    SizeT i, n_addrs_bad = 0;
   1523    UChar vbits8;
   1524    Addr  ai;
   1525    Bool  ok;
   1526 
   1527    PROF_EVENT(MCPE_STOREVN_SLOW);
   1528 
   1529    /* ------------ BEGIN semi-fast cases ------------ */
   1530    /* These deal quickly-ish with the common auxiliary primary map
   1531       cases on 64-bit platforms.  Are merely a speedup hack; can be
   1532       omitted without loss of correctness/functionality.  Note that in
   1533       both cases the "sizeof(void*) == 8" causes these cases to be
   1534       folded out by compilers on 32-bit platforms.  The logic below
   1535       is somewhat similar to some cases extensively commented in
   1536       MC_(helperc_STOREV8).
   1537    */
   1538    if (LIKELY(sizeof(void*) == 8
   1539                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
   1540       SecMap* sm       = get_secmap_for_reading(a);
   1541       UWord   sm_off16 = SM_OFF_16(a);
   1542       UWord   vabits16 = sm->vabits16[sm_off16];
   1543       if (LIKELY( !is_distinguished_sm(sm) &&
   1544                           (VA_BITS16_DEFINED   == vabits16 ||
   1545                            VA_BITS16_UNDEFINED == vabits16) )) {
   1546          /* Handle common case quickly: a is suitably aligned, */
   1547          /* is mapped, and is addressible. */
   1548          // Convert full V-bits in register to compact 2-bit form.
   1549          if (LIKELY(V_BITS64_DEFINED == vbytes)) {
   1550             sm->vabits16[sm_off16] = VA_BITS16_DEFINED;
   1551             return;
   1552          } else if (V_BITS64_UNDEFINED == vbytes) {
   1553             sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
   1554             return;
   1555          }
   1556          /* else fall into the slow case */
   1557       }
   1558       /* else fall into the slow case */
   1559    }
   1560    if (LIKELY(sizeof(void*) == 8
   1561                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
   1562       SecMap* sm      = get_secmap_for_reading(a);
   1563       UWord   sm_off  = SM_OFF(a);
   1564       UWord   vabits8 = sm->vabits8[sm_off];
   1565       if (LIKELY( !is_distinguished_sm(sm) &&
   1566                           (VA_BITS8_DEFINED   == vabits8 ||
   1567                            VA_BITS8_UNDEFINED == vabits8) )) {
   1568          /* Handle common case quickly: a is suitably aligned, */
   1569          /* is mapped, and is addressible. */
   1570          // Convert full V-bits in register to compact 2-bit form.
   1571          if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
   1572             sm->vabits8[sm_off] = VA_BITS8_DEFINED;
   1573             return;
   1574          } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
   1575             sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
   1576             return;
   1577          }
   1578          /* else fall into the slow case */
   1579       }
   1580       /* else fall into the slow case */
   1581    }
   1582    /* ------------ END semi-fast cases ------------ */
   1583 
   1584    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
   1585 
   1586    /* Dump vbytes in memory, iterating from least to most significant
   1587       byte.  At the same time establish addressibility of the location. */
   1588    for (i = 0; i < szB; i++) {
   1589       PROF_EVENT(MCPE_STOREVN_SLOW_LOOP);
   1590       ai     = a + byte_offset_w(szB, bigendian, i);
   1591       vbits8 = vbytes & 0xff;
   1592       ok     = set_vbits8(ai, vbits8);
   1593       if (!ok) n_addrs_bad++;
   1594       vbytes >>= 8;
   1595    }
   1596 
   1597    /* If an address error has happened, report it. */
   1598    if (n_addrs_bad > 0)
   1599       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
   1600 }
   1601 
   1602 
   1603 /*------------------------------------------------------------*/
   1604 /*--- Setting permissions over address ranges.             ---*/
   1605 /*------------------------------------------------------------*/
   1606 
   1607 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
   1608                                       UWord dsm_num )
   1609 {
   1610    UWord    sm_off, sm_off16;
   1611    UWord    vabits2 = vabits16 & 0x3;
   1612    SizeT    lenA, lenB, len_to_next_secmap;
   1613    Addr     aNext;
   1614    SecMap*  sm;
   1615    SecMap** sm_ptr;
   1616    SecMap*  example_dsm;
   1617 
   1618    PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS);
   1619 
   1620    /* Check the V+A bits make sense. */
   1621    tl_assert(VA_BITS16_NOACCESS  == vabits16 ||
   1622              VA_BITS16_UNDEFINED == vabits16 ||
   1623              VA_BITS16_DEFINED   == vabits16);
   1624 
   1625    // This code should never write PDBs;  ensure this.  (See comment above
   1626    // set_vabits2().)
   1627    tl_assert(VA_BITS2_PARTDEFINED != vabits2);
   1628 
   1629    if (lenT == 0)
   1630       return;
   1631 
   1632    if (lenT > 256 * 1024 * 1024) {
   1633       if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
   1634          const HChar* s = "unknown???";
   1635          if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
   1636          if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
   1637          if (vabits16 == VA_BITS16_DEFINED  ) s = "defined";
   1638          VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
   1639                                   "large range [0x%lx, 0x%lx) (%s)\n",
   1640                                   a, a + lenT, s);
   1641       }
   1642    }
   1643 
   1644 #ifndef PERF_FAST_SARP
   1645    /*------------------ debug-only case ------------------ */
   1646    {
   1647       // Endianness doesn't matter here because all bytes are being set to
   1648       // the same value.
   1649       // Nb: We don't have to worry about updating the sec-V-bits table
   1650       // after these set_vabits2() calls because this code never writes
   1651       // VA_BITS2_PARTDEFINED values.
   1652       SizeT i;
   1653       for (i = 0; i < lenT; i++) {
   1654          set_vabits2(a + i, vabits2);
   1655       }
   1656       return;
   1657    }
   1658 #endif
   1659 
   1660    /*------------------ standard handling ------------------ */
   1661 
   1662    /* Get the distinguished secondary that we might want
   1663       to use (part of the space-compression scheme). */
   1664    example_dsm = &sm_distinguished[dsm_num];
   1665 
   1666    // We have to handle ranges covering various combinations of partial and
   1667    // whole sec-maps.  Here is how parts 1, 2 and 3 are used in each case.
   1668    // Cases marked with a '*' are common.
   1669    //
   1670    //   TYPE                                             PARTS USED
   1671    //   ----                                             ----------
   1672    // * one partial sec-map                  (p)         1
   1673    // - one whole sec-map                    (P)         2
   1674    //
   1675    // * two partial sec-maps                 (pp)        1,3
   1676    // - one partial, one whole sec-map       (pP)        1,2
   1677    // - one whole, one partial sec-map       (Pp)        2,3
   1678    // - two whole sec-maps                   (PP)        2,2
   1679    //
   1680    // * one partial, one whole, one partial  (pPp)       1,2,3
   1681    // - one partial, two whole               (pPP)       1,2,2
   1682    // - two whole, one partial               (PPp)       2,2,3
   1683    // - three whole                          (PPP)       2,2,2
   1684    //
   1685    // * one partial, N-2 whole, one partial  (pP...Pp)   1,2...2,3
   1686    // - one partial, N-1 whole               (pP...PP)   1,2...2,2
   1687    // - N-1 whole, one partial               (PP...Pp)   2,2...2,3
   1688    // - N whole                              (PP...PP)   2,2...2,3
   1689 
   1690    // Break up total length (lenT) into two parts:  length in the first
   1691    // sec-map (lenA), and the rest (lenB);   lenT == lenA + lenB.
   1692    aNext = start_of_this_sm(a) + SM_SIZE;
   1693    len_to_next_secmap = aNext - a;
   1694    if ( lenT <= len_to_next_secmap ) {
   1695       // Range entirely within one sec-map.  Covers almost all cases.
   1696       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP);
   1697       lenA = lenT;
   1698       lenB = 0;
   1699    } else if (is_start_of_sm(a)) {
   1700       // Range spans at least one whole sec-map, and starts at the beginning
   1701       // of a sec-map; skip to Part 2.
   1702       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP);
   1703       lenA = 0;
   1704       lenB = lenT;
   1705       goto part2;
   1706    } else {
   1707       // Range spans two or more sec-maps, first one is partial.
   1708       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS);
   1709       lenA = len_to_next_secmap;
   1710       lenB = lenT - lenA;
   1711    }
   1712 
   1713    //------------------------------------------------------------------------
   1714    // Part 1: Deal with the first sec_map.  Most of the time the range will be
   1715    // entirely within a sec_map and this part alone will suffice.  Also,
   1716    // doing it this way lets us avoid repeatedly testing for the crossing of
   1717    // a sec-map boundary within these loops.
   1718    //------------------------------------------------------------------------
   1719 
   1720    // If it's distinguished, make it undistinguished if necessary.
   1721    sm_ptr = get_secmap_ptr(a);
   1722    if (is_distinguished_sm(*sm_ptr)) {
   1723       if (*sm_ptr == example_dsm) {
   1724          // Sec-map already has the V+A bits that we want, so skip.
   1725          PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK);
   1726          a    = aNext;
   1727          lenA = 0;
   1728       } else {
   1729          PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1);
   1730          *sm_ptr = copy_for_writing(*sm_ptr);
   1731       }
   1732    }
   1733    sm = *sm_ptr;
   1734 
   1735    // 1 byte steps
   1736    while (True) {
   1737       if (VG_IS_8_ALIGNED(a)) break;
   1738       if (lenA < 1)           break;
   1739       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A);
   1740       sm_off = SM_OFF(a);
   1741       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1742       a    += 1;
   1743       lenA -= 1;
   1744    }
   1745    // 8-aligned, 8 byte steps
   1746    while (True) {
   1747       if (lenA < 8) break;
   1748       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A);
   1749       sm_off16 = SM_OFF_16(a);
   1750       sm->vabits16[sm_off16] = vabits16;
   1751       a    += 8;
   1752       lenA -= 8;
   1753    }
   1754    // 1 byte steps
   1755    while (True) {
   1756       if (lenA < 1) break;
   1757       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B);
   1758       sm_off = SM_OFF(a);
   1759       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1760       a    += 1;
   1761       lenA -= 1;
   1762    }
   1763 
   1764    // We've finished the first sec-map.  Is that it?
   1765    if (lenB == 0)
   1766       return;
   1767 
   1768    //------------------------------------------------------------------------
   1769    // Part 2: Fast-set entire sec-maps at a time.
   1770    //------------------------------------------------------------------------
   1771   part2:
   1772    // 64KB-aligned, 64KB steps.
   1773    // Nb: we can reach here with lenB < SM_SIZE
   1774    tl_assert(0 == lenA);
   1775    while (True) {
   1776       if (lenB < SM_SIZE) break;
   1777       tl_assert(is_start_of_sm(a));
   1778       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K);
   1779       sm_ptr = get_secmap_ptr(a);
   1780       if (!is_distinguished_sm(*sm_ptr)) {
   1781          PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM);
   1782          // Free the non-distinguished sec-map that we're replacing.  This
   1783          // case happens moderately often, enough to be worthwhile.
   1784          SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
   1785          tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
   1786       }
   1787       update_SM_counts(*sm_ptr, example_dsm);
   1788       // Make the sec-map entry point to the example DSM
   1789       *sm_ptr = example_dsm;
   1790       lenB -= SM_SIZE;
   1791       a    += SM_SIZE;
   1792    }
   1793 
   1794    // We've finished the whole sec-maps.  Is that it?
   1795    if (lenB == 0)
   1796       return;
   1797 
   1798    //------------------------------------------------------------------------
   1799    // Part 3: Finish off the final partial sec-map, if necessary.
   1800    //------------------------------------------------------------------------
   1801 
   1802    tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
   1803 
   1804    // If it's distinguished, make it undistinguished if necessary.
   1805    sm_ptr = get_secmap_ptr(a);
   1806    if (is_distinguished_sm(*sm_ptr)) {
   1807       if (*sm_ptr == example_dsm) {
   1808          // Sec-map already has the V+A bits that we want, so stop.
   1809          PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK);
   1810          return;
   1811       } else {
   1812          PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2);
   1813          *sm_ptr = copy_for_writing(*sm_ptr);
   1814       }
   1815    }
   1816    sm = *sm_ptr;
   1817 
   1818    // 8-aligned, 8 byte steps
   1819    while (True) {
   1820       if (lenB < 8) break;
   1821       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B);
   1822       sm_off16 = SM_OFF_16(a);
   1823       sm->vabits16[sm_off16] = vabits16;
   1824       a    += 8;
   1825       lenB -= 8;
   1826    }
   1827    // 1 byte steps
   1828    while (True) {
   1829       if (lenB < 1) return;
   1830       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C);
   1831       sm_off = SM_OFF(a);
   1832       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1833       a    += 1;
   1834       lenB -= 1;
   1835    }
   1836 }
   1837 
   1838 
   1839 /* --- Set permissions for arbitrary address ranges --- */
   1840 
   1841 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
   1842 {
   1843    PROF_EVENT(MCPE_MAKE_MEM_NOACCESS);
   1844    DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
   1845    set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
   1846    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1847       ocache_sarp_Clear_Origins ( a, len );
   1848 }
   1849 
   1850 static void make_mem_undefined ( Addr a, SizeT len )
   1851 {
   1852    PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED);
   1853    DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
   1854    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
   1855 }
   1856 
   1857 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
   1858 {
   1859    PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED_W_OTAG);
   1860    DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
   1861    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
   1862    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1863       ocache_sarp_Set_Origins ( a, len, otag );
   1864 }
   1865 
   1866 static
   1867 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
   1868                                           ThreadId tid, UInt okind )
   1869 {
   1870    UInt        ecu;
   1871    ExeContext* here;
   1872    /* VG_(record_ExeContext) checks for validity of tid, and asserts
   1873       if it is invalid.  So no need to do it here. */
   1874    tl_assert(okind <= 3);
   1875    here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
   1876    tl_assert(here);
   1877    ecu = VG_(get_ECU_from_ExeContext)(here);
   1878    tl_assert(VG_(is_plausible_ECU)(ecu));
   1879    MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
   1880 }
   1881 
   1882 static
   1883 void mc_new_mem_w_tid_make_ECU  ( Addr a, SizeT len, ThreadId tid )
   1884 {
   1885    make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
   1886 }
   1887 
   1888 static
   1889 void mc_new_mem_w_tid_no_ECU  ( Addr a, SizeT len, ThreadId tid )
   1890 {
   1891    MC_(make_mem_undefined_w_otag) ( a, len, MC_OKIND_UNKNOWN );
   1892 }
   1893 
   1894 void MC_(make_mem_defined) ( Addr a, SizeT len )
   1895 {
   1896    PROF_EVENT(MCPE_MAKE_MEM_DEFINED);
   1897    DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
   1898    set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
   1899    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1900       ocache_sarp_Clear_Origins ( a, len );
   1901 }
   1902 
   1903 __attribute__((unused))
   1904 static void make_mem_defined_w_tid ( Addr a, SizeT len, ThreadId tid )
   1905 {
   1906    MC_(make_mem_defined)(a, len);
   1907 }
   1908 
   1909 /* For each byte in [a,a+len), if the byte is addressable, make it be
   1910    defined, but if it isn't addressible, leave it alone.  In other
   1911    words a version of MC_(make_mem_defined) that doesn't mess with
   1912    addressibility.  Low-performance implementation. */
   1913 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
   1914 {
   1915    SizeT i;
   1916    UChar vabits2;
   1917    DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
   1918    for (i = 0; i < len; i++) {
   1919       vabits2 = get_vabits2( a+i );
   1920       if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
   1921          set_vabits2(a+i, VA_BITS2_DEFINED);
   1922          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
   1923             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
   1924          }
   1925       }
   1926    }
   1927 }
   1928 
   1929 /* Similarly (needed for mprotect handling ..) */
   1930 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
   1931 {
   1932    SizeT i;
   1933    UChar vabits2;
   1934    DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
   1935    for (i = 0; i < len; i++) {
   1936       vabits2 = get_vabits2( a+i );
   1937       if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
   1938          set_vabits2(a+i, VA_BITS2_DEFINED);
   1939          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
   1940             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
   1941          }
   1942       }
   1943    }
   1944 }
   1945 
   1946 /* --- Block-copy permissions (needed for implementing realloc() and
   1947        sys_mremap). --- */
   1948 
   1949 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
   1950 {
   1951    SizeT i, j;
   1952    UChar vabits2, vabits8;
   1953    Bool  aligned, nooverlap;
   1954 
   1955    DEBUG("MC_(copy_address_range_state)\n");
   1956    PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE);
   1957 
   1958    if (len == 0 || src == dst)
   1959       return;
   1960 
   1961    aligned   = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
   1962    nooverlap = src+len <= dst || dst+len <= src;
   1963 
   1964    if (nooverlap && aligned) {
   1965 
   1966       /* Vectorised fast case, when no overlap and suitably aligned */
   1967       /* vector loop */
   1968       i = 0;
   1969       while (len >= 4) {
   1970          vabits8 = get_vabits8_for_aligned_word32( src+i );
   1971          set_vabits8_for_aligned_word32( dst+i, vabits8 );
   1972          if (LIKELY(VA_BITS8_DEFINED == vabits8
   1973                             || VA_BITS8_UNDEFINED == vabits8
   1974                             || VA_BITS8_NOACCESS == vabits8)) {
   1975             /* do nothing */
   1976          } else {
   1977             /* have to copy secondary map info */
   1978             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
   1979                set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
   1980             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
   1981                set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
   1982             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
   1983                set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
   1984             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
   1985                set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
   1986          }
   1987          i += 4;
   1988          len -= 4;
   1989       }
   1990       /* fixup loop */
   1991       while (len >= 1) {
   1992          vabits2 = get_vabits2( src+i );
   1993          set_vabits2( dst+i, vabits2 );
   1994          if (VA_BITS2_PARTDEFINED == vabits2) {
   1995             set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
   1996          }
   1997          i++;
   1998          len--;
   1999       }
   2000 
   2001    } else {
   2002 
   2003       /* We have to do things the slow way */
   2004       if (src < dst) {
   2005          for (i = 0, j = len-1; i < len; i++, j--) {
   2006             PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1);
   2007             vabits2 = get_vabits2( src+j );
   2008             set_vabits2( dst+j, vabits2 );
   2009             if (VA_BITS2_PARTDEFINED == vabits2) {
   2010                set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
   2011             }
   2012          }
   2013       }
   2014 
   2015       if (src > dst) {
   2016          for (i = 0; i < len; i++) {
   2017             PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2);
   2018             vabits2 = get_vabits2( src+i );
   2019             set_vabits2( dst+i, vabits2 );
   2020             if (VA_BITS2_PARTDEFINED == vabits2) {
   2021                set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
   2022             }
   2023          }
   2024       }
   2025    }
   2026 
   2027 }
   2028 
   2029 
   2030 /*------------------------------------------------------------*/
   2031 /*--- Origin tracking stuff - cache basics                 ---*/
   2032 /*------------------------------------------------------------*/
   2033 
   2034 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
   2035    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2036 
   2037    Note that this implementation draws inspiration from the "origin
   2038    tracking by value piggybacking" scheme described in "Tracking Bad
   2039    Apples: Reporting the Origin of Null and Undefined Value Errors"
   2040    (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
   2041    Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
   2042    implemented completely differently.
   2043 
   2044    Origin tags and ECUs -- about the shadow values
   2045    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2046 
   2047    This implementation tracks the defining point of all uninitialised
   2048    values using so called "origin tags", which are 32-bit integers,
   2049    rather than using the values themselves to encode the origins.  The
   2050    latter, so-called value piggybacking", is what the OOPSLA07 paper
   2051    describes.
   2052 
   2053    Origin tags, as tracked by the machinery below, are 32-bit unsigned
   2054    ints (UInts), regardless of the machine's word size.  Each tag
   2055    comprises an upper 30-bit ECU field and a lower 2-bit
   2056    'kind' field.  The ECU field is a number given out by m_execontext
   2057    and has a 1-1 mapping with ExeContext*s.  An ECU can be used
   2058    directly as an origin tag (otag), but in fact we want to put
   2059    additional information 'kind' field to indicate roughly where the
   2060    tag came from.  This helps print more understandable error messages
   2061    for the user -- it has no other purpose.  In summary:
   2062 
   2063    * Both ECUs and origin tags are represented as 32-bit words
   2064 
   2065    * m_execontext and the core-tool interface deal purely in ECUs.
   2066      They have no knowledge of origin tags - that is a purely
   2067      Memcheck-internal matter.
   2068 
   2069    * all valid ECUs have the lowest 2 bits zero and at least
   2070      one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
   2071 
   2072    * to convert from an ECU to an otag, OR in one of the MC_OKIND_
   2073      constants defined in mc_include.h.
   2074 
   2075    * to convert an otag back to an ECU, AND it with ~3
   2076 
   2077    One important fact is that no valid otag is zero.  A zero otag is
   2078    used by the implementation to indicate "no origin", which could
   2079    mean that either the value is defined, or it is undefined but the
   2080    implementation somehow managed to lose the origin.
   2081 
   2082    The ECU used for memory created by malloc etc is derived from the
   2083    stack trace at the time the malloc etc happens.  This means the
   2084    mechanism can show the exact allocation point for heap-created
   2085    uninitialised values.
   2086 
   2087    In contrast, it is simply too expensive to create a complete
   2088    backtrace for each stack allocation.  Therefore we merely use a
   2089    depth-1 backtrace for stack allocations, which can be done once at
   2090    translation time, rather than N times at run time.  The result of
   2091    this is that, for stack created uninitialised values, Memcheck can
   2092    only show the allocating function, and not what called it.
   2093    Furthermore, compilers tend to move the stack pointer just once at
   2094    the start of the function, to allocate all locals, and so in fact
   2095    the stack origin almost always simply points to the opening brace
   2096    of the function.  Net result is, for stack origins, the mechanism
   2097    can tell you in which function the undefined value was created, but
   2098    that's all.  Users will need to carefully check all locals in the
   2099    specified function.
   2100 
   2101    Shadowing registers and memory
   2102    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2103 
   2104    Memory is shadowed using a two level cache structure (ocacheL1 and
   2105    ocacheL2).  Memory references are first directed to ocacheL1.  This
   2106    is a traditional 2-way set associative cache with 32-byte lines and
   2107    approximate LRU replacement within each set.
   2108 
   2109    A naive implementation would require storing one 32 bit otag for
   2110    each byte of memory covered, a 4:1 space overhead.  Instead, there
   2111    is one otag for every 4 bytes of memory covered, plus a 4-bit mask
   2112    that shows which of the 4 bytes have that shadow value and which
   2113    have a shadow value of zero (indicating no origin).  Hence a lot of
   2114    space is saved, but the cost is that only one different origin per
   2115    4 bytes of address space can be represented.  This is a source of
   2116    imprecision, but how much of a problem it really is remains to be
   2117    seen.
   2118 
   2119    A cache line that contains all zeroes ("no origins") contains no
   2120    useful information, and can be ejected from the L1 cache "for
   2121    free", in the sense that a read miss on the L1 causes a line of
   2122    zeroes to be installed.  However, ejecting a line containing
   2123    nonzeroes risks losing origin information permanently.  In order to
   2124    prevent such lossage, ejected nonzero lines are placed in a
   2125    secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
   2126    lines.  This can grow arbitrarily large, and so should ensure that
   2127    Memcheck runs out of memory in preference to losing useful origin
   2128    info due to cache size limitations.
   2129 
   2130    Shadowing registers is a bit tricky, because the shadow values are
   2131    32 bits, regardless of the size of the register.  That gives a
   2132    problem for registers smaller than 32 bits.  The solution is to
   2133    find spaces in the guest state that are unused, and use those to
   2134    shadow guest state fragments smaller than 32 bits.  For example, on
   2135    ppc32/64, each vector register is 16 bytes long.  If 4 bytes of the
   2136    shadow are allocated for the register's otag, then there are still
   2137    12 bytes left over which could be used to shadow 3 other values.
   2138 
   2139    This implies there is some non-obvious mapping from guest state
   2140    (start,length) pairs to the relevant shadow offset (for the origin
   2141    tags).  And it is unfortunately guest-architecture specific.  The
   2142    mapping is contained in mc_machine.c, which is quite lengthy but
   2143    straightforward.
   2144 
   2145    Instrumenting the IR
   2146    ~~~~~~~~~~~~~~~~~~~~
   2147 
   2148    Instrumentation is largely straightforward, and done by the
   2149    functions schemeE and schemeS in mc_translate.c.  These generate
   2150    code for handling the origin tags of expressions (E) and statements
   2151    (S) respectively.  The rather strange names are a reference to the
   2152    "compilation schemes" shown in Simon Peyton Jones' book "The
   2153    Implementation of Functional Programming Languages" (Prentice Hall,
   2154    1987, see
   2155    http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
   2156 
   2157    schemeS merely arranges to move shadow values around the guest
   2158    state to track the incoming IR.  schemeE is largely trivial too.
   2159    The only significant point is how to compute the otag corresponding
   2160    to binary (or ternary, quaternary, etc) operator applications.  The
   2161    rule is simple: just take whichever value is larger (32-bit
   2162    unsigned max).  Constants get the special value zero.  Hence this
   2163    rule always propagates a nonzero (known) otag in preference to a
   2164    zero (unknown, or more likely, value-is-defined) tag, as we want.
   2165    If two different undefined values are inputs to a binary operator
   2166    application, then which is propagated is arbitrary, but that
   2167    doesn't matter, since the program is erroneous in using either of
   2168    the values, and so there's no point in attempting to propagate
   2169    both.
   2170 
   2171    Since constants are abstracted to (otag) zero, much of the
   2172    instrumentation code can be folded out without difficulty by the
   2173    generic post-instrumentation IR cleanup pass, using these rules:
   2174    Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
   2175    constants is evaluated at JIT time.  And the resulting dead code
   2176    removal.  In practice this causes surprisingly few Max32Us to
   2177    survive through to backend code generation.
   2178 
   2179    Integration with the V-bits machinery
   2180    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2181 
   2182    This is again largely straightforward.  Mostly the otag and V bits
   2183    stuff are independent.  The only point of interaction is when the V
   2184    bits instrumenter creates a call to a helper function to report an
   2185    uninitialised value error -- in that case it must first use schemeE
   2186    to get hold of the origin tag expression for the value, and pass
   2187    that to the helper too.
   2188 
   2189    There is the usual stuff to do with setting address range
   2190    permissions.  When memory is painted undefined, we must also know
   2191    the origin tag to paint with, which involves some tedious plumbing,
   2192    particularly to do with the fast case stack handlers.  When memory
   2193    is painted defined or noaccess then the origin tags must be forced
   2194    to zero.
   2195 
   2196    One of the goals of the implementation was to ensure that the
   2197    non-origin tracking mode isn't slowed down at all.  To do this,
   2198    various functions to do with memory permissions setting (again,
   2199    mostly pertaining to the stack) are duplicated for the with- and
   2200    without-otag case.
   2201 
   2202    Dealing with stack redzones, and the NIA cache
   2203    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2204 
   2205    This is one of the few non-obvious parts of the implementation.
   2206 
   2207    Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
   2208    reserved area below the stack pointer, that can be used as scratch
   2209    space by compiler generated code for functions.  In the Memcheck
   2210    sources this is referred to as the "stack redzone".  The important
   2211    thing here is that such redzones are considered volatile across
   2212    function calls and returns.  So Memcheck takes care to mark them as
   2213    undefined for each call and return, on the afflicted platforms.
   2214    Past experience shows this is essential in order to get reliable
   2215    messages about uninitialised values that come from the stack.
   2216 
   2217    So the question is, when we paint a redzone undefined, what origin
   2218    tag should we use for it?  Consider a function f() calling g().  If
   2219    we paint the redzone using an otag derived from the ExeContext of
   2220    the CALL/BL instruction in f, then any errors in g causing it to
   2221    use uninitialised values that happen to lie in the redzone, will be
   2222    reported as having their origin in f.  Which is highly confusing.
   2223 
   2224    The same applies for returns: if, on a return, we paint the redzone
   2225    using a origin tag derived from the ExeContext of the RET/BLR
   2226    instruction in g, then any later errors in f causing it to use
   2227    uninitialised values in the redzone, will be reported as having
   2228    their origin in g.  Which is just as confusing.
   2229 
   2230    To do it right, in both cases we need to use an origin tag which
   2231    pertains to the instruction which dynamically follows the CALL/BL
   2232    or RET/BLR.  In short, one derived from the NIA - the "next
   2233    instruction address".
   2234 
   2235    To make this work, Memcheck's redzone-painting helper,
   2236    MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
   2237    NIA.  It converts the NIA to a 1-element ExeContext, and uses that
   2238    ExeContext's ECU as the basis for the otag used to paint the
   2239    redzone.  The expensive part of this is converting an NIA into an
   2240    ECU, since this happens once for every call and every return.  So
   2241    we use a simple 511-line, 2-way set associative cache
   2242    (nia_to_ecu_cache) to cache the mappings, and that knocks most of
   2243    the cost out.
   2244 
   2245    Further background comments
   2246    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2247 
   2248    > Question: why is otag a UInt?  Wouldn't a UWord be better?  Isn't
   2249    > it really just the address of the relevant ExeContext?
   2250 
   2251    Well, it's not the address, but a value which has a 1-1 mapping
   2252    with ExeContexts, and is guaranteed not to be zero, since zero
   2253    denotes (to memcheck) "unknown origin or defined value".  So these
   2254    UInts are just numbers starting at 4 and incrementing by 4; each
   2255    ExeContext is given a number when it is created.  (*** NOTE this
   2256    confuses otags and ECUs; see comments above ***).
   2257 
   2258    Making these otags 32-bit regardless of the machine's word size
   2259    makes the 64-bit implementation easier (next para).  And it doesn't
   2260    really limit us in any way, since for the tags to overflow would
   2261    require that the program somehow caused 2^30-1 different
   2262    ExeContexts to be created, in which case it is probably in deep
   2263    trouble.  Not to mention V will have soaked up many tens of
   2264    gigabytes of memory merely to store them all.
   2265 
   2266    So having 64-bit origins doesn't really buy you anything, and has
   2267    the following downsides:
   2268 
   2269    Suppose that instead, an otag is a UWord.  This would mean that, on
   2270    a 64-bit target,
   2271 
   2272    1. It becomes hard to shadow any element of guest state which is
   2273       smaller than 8 bytes.  To do so means you'd need to find some
   2274       8-byte-sized hole in the guest state which you don't want to
   2275       shadow, and use that instead to hold the otag.  On ppc64, the
   2276       condition code register(s) are split into 20 UChar sized pieces,
   2277       all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
   2278       and so that would entail finding 160 bytes somewhere else in the
   2279       guest state.
   2280 
   2281       Even on x86, I want to track origins for %AH .. %DH (bits 15:8
   2282       of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
   2283       same) and so I had to look for 4 untracked otag-sized areas in
   2284       the guest state to make that possible.
   2285 
   2286       The same problem exists of course when origin tags are only 32
   2287       bits, but it's less extreme.
   2288 
   2289    2. (More compelling) it doubles the size of the origin shadow
   2290       memory.  Given that the shadow memory is organised as a fixed
   2291       size cache, and that accuracy of tracking is limited by origins
   2292       falling out the cache due to space conflicts, this isn't good.
   2293 
   2294    > Another question: is the origin tracking perfect, or are there
   2295    > cases where it fails to determine an origin?
   2296 
   2297    It is imperfect for at least for the following reasons, and
   2298    probably more:
   2299 
   2300    * Insufficient capacity in the origin cache.  When a line is
   2301      evicted from the cache it is gone forever, and so subsequent
   2302      queries for the line produce zero, indicating no origin
   2303      information.  Interestingly, a line containing all zeroes can be
   2304      evicted "free" from the cache, since it contains no useful
   2305      information, so there is scope perhaps for some cleverer cache
   2306      management schemes.  (*** NOTE, with the introduction of the
   2307      second level origin tag cache, ocacheL2, this is no longer a
   2308      problem. ***)
   2309 
   2310    * The origin cache only stores one otag per 32-bits of address
   2311      space, plus 4 bits indicating which of the 4 bytes has that tag
   2312      and which are considered defined.  The result is that if two
   2313      undefined bytes in the same word are stored in memory, the first
   2314      stored byte's origin will be lost and replaced by the origin for
   2315      the second byte.
   2316 
   2317    * Nonzero origin tags for defined values.  Consider a binary
   2318      operator application op(x,y).  Suppose y is undefined (and so has
   2319      a valid nonzero origin tag), and x is defined, but erroneously
   2320      has a nonzero origin tag (defined values should have tag zero).
   2321      If the erroneous tag has a numeric value greater than y's tag,
   2322      then the rule for propagating origin tags though binary
   2323      operations, which is simply to take the unsigned max of the two
   2324      tags, will erroneously propagate x's tag rather than y's.
   2325 
   2326    * Some obscure uses of x86/amd64 byte registers can cause lossage
   2327      or confusion of origins.  %AH .. %DH are treated as different
   2328      from, and unrelated to, their parent registers, %EAX .. %EDX.
   2329      So some weird sequences like
   2330 
   2331         movb undefined-value, %AH
   2332         movb defined-value, %AL
   2333         .. use %AX or %EAX ..
   2334 
   2335      will cause the origin attributed to %AH to be ignored, since %AL,
   2336      %AX, %EAX are treated as the same register, and %AH as a
   2337      completely separate one.
   2338 
   2339    But having said all that, it actually seems to work fairly well in
   2340    practice.
   2341 */
   2342 
   2343 static UWord stats_ocacheL1_find           = 0;
   2344 static UWord stats_ocacheL1_found_at_1     = 0;
   2345 static UWord stats_ocacheL1_found_at_N     = 0;
   2346 static UWord stats_ocacheL1_misses         = 0;
   2347 static UWord stats_ocacheL1_lossage        = 0;
   2348 static UWord stats_ocacheL1_movefwds       = 0;
   2349 
   2350 static UWord stats__ocacheL2_refs          = 0;
   2351 static UWord stats__ocacheL2_misses        = 0;
   2352 static UWord stats__ocacheL2_n_nodes_max   = 0;
   2353 
   2354 /* Cache of 32-bit values, one every 32 bits of address space */
   2355 
   2356 #define OC_BITS_PER_LINE 5
   2357 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
   2358 
   2359 static INLINE UWord oc_line_offset ( Addr a ) {
   2360    return (a >> 2) & (OC_W32S_PER_LINE - 1);
   2361 }
   2362 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
   2363    return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
   2364 }
   2365 
   2366 #define OC_LINES_PER_SET 2
   2367 
   2368 #define OC_N_SET_BITS    20
   2369 #define OC_N_SETS        (1 << OC_N_SET_BITS)
   2370 
   2371 /* These settings give:
   2372    64 bit host: ocache:  100,663,296 sizeB    67,108,864 useful
   2373    32 bit host: ocache:   92,274,688 sizeB    67,108,864 useful
   2374 */
   2375 
   2376 #define OC_MOVE_FORWARDS_EVERY_BITS 7
   2377 
   2378 
   2379 typedef
   2380    struct {
   2381       Addr  tag;
   2382       UInt  w32[OC_W32S_PER_LINE];
   2383       UChar descr[OC_W32S_PER_LINE];
   2384    }
   2385    OCacheLine;
   2386 
   2387 /* Classify and also sanity-check 'line'.  Return 'e' (empty) if not
   2388    in use, 'n' (nonzero) if it contains at least one valid origin tag,
   2389    and 'z' if all the represented tags are zero. */
   2390 static UChar classify_OCacheLine ( OCacheLine* line )
   2391 {
   2392    UWord i;
   2393    if (line->tag == 1/*invalid*/)
   2394       return 'e'; /* EMPTY */
   2395    tl_assert(is_valid_oc_tag(line->tag));
   2396    for (i = 0; i < OC_W32S_PER_LINE; i++) {
   2397       tl_assert(0 == ((~0xF) & line->descr[i]));
   2398       if (line->w32[i] > 0 && line->descr[i] > 0)
   2399          return 'n'; /* NONZERO - contains useful info */
   2400    }
   2401    return 'z'; /* ZERO - no useful info */
   2402 }
   2403 
   2404 typedef
   2405    struct {
   2406       OCacheLine line[OC_LINES_PER_SET];
   2407    }
   2408    OCacheSet;
   2409 
   2410 typedef
   2411    struct {
   2412       OCacheSet set[OC_N_SETS];
   2413    }
   2414    OCache;
   2415 
   2416 static OCache* ocacheL1 = NULL;
   2417 static UWord   ocacheL1_event_ctr = 0;
   2418 
   2419 static void init_ocacheL2 ( void ); /* fwds */
   2420 static void init_OCache ( void )
   2421 {
   2422    UWord line, set;
   2423    tl_assert(MC_(clo_mc_level) >= 3);
   2424    tl_assert(ocacheL1 == NULL);
   2425    ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
   2426    if (ocacheL1 == NULL) {
   2427       VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
   2428                                    sizeof(OCache) );
   2429    }
   2430    tl_assert(ocacheL1 != NULL);
   2431    for (set = 0; set < OC_N_SETS; set++) {
   2432       for (line = 0; line < OC_LINES_PER_SET; line++) {
   2433          ocacheL1->set[set].line[line].tag = 1/*invalid*/;
   2434       }
   2435    }
   2436    init_ocacheL2();
   2437 }
   2438 
   2439 static void moveLineForwards ( OCacheSet* set, UWord lineno )
   2440 {
   2441    OCacheLine tmp;
   2442    stats_ocacheL1_movefwds++;
   2443    tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
   2444    tmp = set->line[lineno-1];
   2445    set->line[lineno-1] = set->line[lineno];
   2446    set->line[lineno] = tmp;
   2447 }
   2448 
   2449 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
   2450    UWord i;
   2451    for (i = 0; i < OC_W32S_PER_LINE; i++) {
   2452       line->w32[i] = 0; /* NO ORIGIN */
   2453       line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
   2454    }
   2455    line->tag = tag;
   2456 }
   2457 
   2458 //////////////////////////////////////////////////////////////
   2459 //// OCache backing store
   2460 
   2461 static OSet* ocacheL2 = NULL;
   2462 
   2463 static void* ocacheL2_malloc ( const HChar* cc, SizeT szB ) {
   2464    return VG_(malloc)(cc, szB);
   2465 }
   2466 static void ocacheL2_free ( void* v ) {
   2467    VG_(free)( v );
   2468 }
   2469 
   2470 /* Stats: # nodes currently in tree */
   2471 static UWord stats__ocacheL2_n_nodes = 0;
   2472 
   2473 static void init_ocacheL2 ( void )
   2474 {
   2475    tl_assert(!ocacheL2);
   2476    tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
   2477    tl_assert(0 == offsetof(OCacheLine,tag));
   2478    ocacheL2
   2479       = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
   2480                              NULL, /* fast cmp */
   2481                              ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
   2482    stats__ocacheL2_n_nodes = 0;
   2483 }
   2484 
   2485 /* Find line with the given tag in the tree, or NULL if not found. */
   2486 static OCacheLine* ocacheL2_find_tag ( Addr tag )
   2487 {
   2488    OCacheLine* line;
   2489    tl_assert(is_valid_oc_tag(tag));
   2490    stats__ocacheL2_refs++;
   2491    line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
   2492    return line;
   2493 }
   2494 
   2495 /* Delete the line with the given tag from the tree, if it is present, and
   2496    free up the associated memory. */
   2497 static void ocacheL2_del_tag ( Addr tag )
   2498 {
   2499    OCacheLine* line;
   2500    tl_assert(is_valid_oc_tag(tag));
   2501    stats__ocacheL2_refs++;
   2502    line = VG_(OSetGen_Remove)( ocacheL2, &tag );
   2503    if (line) {
   2504       VG_(OSetGen_FreeNode)(ocacheL2, line);
   2505       tl_assert(stats__ocacheL2_n_nodes > 0);
   2506       stats__ocacheL2_n_nodes--;
   2507    }
   2508 }
   2509 
   2510 /* Add a copy of the given line to the tree.  It must not already be
   2511    present. */
   2512 static void ocacheL2_add_line ( OCacheLine* line )
   2513 {
   2514    OCacheLine* copy;
   2515    tl_assert(is_valid_oc_tag(line->tag));
   2516    copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
   2517    *copy = *line;
   2518    stats__ocacheL2_refs++;
   2519    VG_(OSetGen_Insert)( ocacheL2, copy );
   2520    stats__ocacheL2_n_nodes++;
   2521    if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
   2522       stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
   2523 }
   2524 
   2525 ////
   2526 //////////////////////////////////////////////////////////////
   2527 
   2528 __attribute__((noinline))
   2529 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
   2530 {
   2531    OCacheLine *victim, *inL2;
   2532    UChar c;
   2533    UWord line;
   2534    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
   2535    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
   2536    UWord tag     = a & tagmask;
   2537    tl_assert(setno >= 0 && setno < OC_N_SETS);
   2538 
   2539    /* we already tried line == 0; skip therefore. */
   2540    for (line = 1; line < OC_LINES_PER_SET; line++) {
   2541       if (ocacheL1->set[setno].line[line].tag == tag) {
   2542          if (line == 1) {
   2543             stats_ocacheL1_found_at_1++;
   2544          } else {
   2545             stats_ocacheL1_found_at_N++;
   2546          }
   2547          if (UNLIKELY(0 == (ocacheL1_event_ctr++
   2548                             & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
   2549             moveLineForwards( &ocacheL1->set[setno], line );
   2550             line--;
   2551          }
   2552          return &ocacheL1->set[setno].line[line];
   2553       }
   2554    }
   2555 
   2556    /* A miss.  Use the last slot.  Implicitly this means we're
   2557       ejecting the line in the last slot. */
   2558    stats_ocacheL1_misses++;
   2559    tl_assert(line == OC_LINES_PER_SET);
   2560    line--;
   2561    tl_assert(line > 0);
   2562 
   2563    /* First, move the to-be-ejected line to the L2 cache. */
   2564    victim = &ocacheL1->set[setno].line[line];
   2565    c = classify_OCacheLine(victim);
   2566    switch (c) {
   2567       case 'e':
   2568          /* the line is empty (has invalid tag); ignore it. */
   2569          break;
   2570       case 'z':
   2571          /* line contains zeroes.  We must ensure the backing store is
   2572             updated accordingly, either by copying the line there
   2573             verbatim, or by ensuring it isn't present there.  We
   2574             chosse the latter on the basis that it reduces the size of
   2575             the backing store. */
   2576          ocacheL2_del_tag( victim->tag );
   2577          break;
   2578       case 'n':
   2579          /* line contains at least one real, useful origin.  Copy it
   2580             to the backing store. */
   2581          stats_ocacheL1_lossage++;
   2582          inL2 = ocacheL2_find_tag( victim->tag );
   2583          if (inL2) {
   2584             *inL2 = *victim;
   2585          } else {
   2586             ocacheL2_add_line( victim );
   2587          }
   2588          break;
   2589       default:
   2590          tl_assert(0);
   2591    }
   2592 
   2593    /* Now we must reload the L1 cache from the backing tree, if
   2594       possible. */
   2595    tl_assert(tag != victim->tag); /* stay sane */
   2596    inL2 = ocacheL2_find_tag( tag );
   2597    if (inL2) {
   2598       /* We're in luck.  It's in the L2. */
   2599       ocacheL1->set[setno].line[line] = *inL2;
   2600    } else {
   2601       /* Missed at both levels of the cache hierarchy.  We have to
   2602          declare it as full of zeroes (unknown origins). */
   2603       stats__ocacheL2_misses++;
   2604       zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
   2605    }
   2606 
   2607    /* Move it one forwards */
   2608    moveLineForwards( &ocacheL1->set[setno], line );
   2609    line--;
   2610 
   2611    return &ocacheL1->set[setno].line[line];
   2612 }
   2613 
   2614 static INLINE OCacheLine* find_OCacheLine ( Addr a )
   2615 {
   2616    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
   2617    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
   2618    UWord tag     = a & tagmask;
   2619 
   2620    stats_ocacheL1_find++;
   2621 
   2622    if (OC_ENABLE_ASSERTIONS) {
   2623       tl_assert(setno >= 0 && setno < OC_N_SETS);
   2624       tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
   2625    }
   2626 
   2627    if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
   2628       return &ocacheL1->set[setno].line[0];
   2629    }
   2630 
   2631    return find_OCacheLine_SLOW( a );
   2632 }
   2633 
   2634 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
   2635 {
   2636    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2637    //// Set the origins for a+0 .. a+7
   2638    { OCacheLine* line;
   2639      UWord lineoff = oc_line_offset(a);
   2640      if (OC_ENABLE_ASSERTIONS) {
   2641         tl_assert(lineoff >= 0
   2642                   && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2643      }
   2644      line = find_OCacheLine( a );
   2645      line->descr[lineoff+0] = 0xF;
   2646      line->descr[lineoff+1] = 0xF;
   2647      line->w32[lineoff+0]   = otag;
   2648      line->w32[lineoff+1]   = otag;
   2649    }
   2650    //// END inlined, specialised version of MC_(helperc_b_store8)
   2651 }
   2652 
   2653 
   2654 /*------------------------------------------------------------*/
   2655 /*--- Aligned fast case permission setters,                ---*/
   2656 /*--- for dealing with stacks                              ---*/
   2657 /*------------------------------------------------------------*/
   2658 
   2659 /*--------------------- 32-bit ---------------------*/
   2660 
   2661 /* Nb: by "aligned" here we mean 4-byte aligned */
   2662 
   2663 static INLINE void make_aligned_word32_undefined ( Addr a )
   2664 {
   2665   PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED);
   2666 
   2667 #ifndef PERF_FAST_STACK2
   2668    make_mem_undefined(a, 4);
   2669 #else
   2670    {
   2671       UWord   sm_off;
   2672       SecMap* sm;
   2673 
   2674       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2675          PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW);
   2676          make_mem_undefined(a, 4);
   2677          return;
   2678       }
   2679 
   2680       sm                  = get_secmap_for_writing_low(a);
   2681       sm_off              = SM_OFF(a);
   2682       sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
   2683    }
   2684 #endif
   2685 }
   2686 
   2687 static INLINE
   2688 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
   2689 {
   2690    make_aligned_word32_undefined(a);
   2691    //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
   2692    //// Set the origins for a+0 .. a+3
   2693    { OCacheLine* line;
   2694      UWord lineoff = oc_line_offset(a);
   2695      if (OC_ENABLE_ASSERTIONS) {
   2696         tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   2697      }
   2698      line = find_OCacheLine( a );
   2699      line->descr[lineoff] = 0xF;
   2700      line->w32[lineoff]   = otag;
   2701    }
   2702    //// END inlined, specialised version of MC_(helperc_b_store4)
   2703 }
   2704 
   2705 static INLINE
   2706 void make_aligned_word32_noaccess ( Addr a )
   2707 {
   2708    PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS);
   2709 
   2710 #ifndef PERF_FAST_STACK2
   2711    MC_(make_mem_noaccess)(a, 4);
   2712 #else
   2713    {
   2714       UWord   sm_off;
   2715       SecMap* sm;
   2716 
   2717       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2718          PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW);
   2719          MC_(make_mem_noaccess)(a, 4);
   2720          return;
   2721       }
   2722 
   2723       sm                  = get_secmap_for_writing_low(a);
   2724       sm_off              = SM_OFF(a);
   2725       sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
   2726 
   2727       //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
   2728       //// Set the origins for a+0 .. a+3.
   2729       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   2730          OCacheLine* line;
   2731          UWord lineoff = oc_line_offset(a);
   2732          if (OC_ENABLE_ASSERTIONS) {
   2733             tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   2734          }
   2735          line = find_OCacheLine( a );
   2736          line->descr[lineoff] = 0;
   2737       }
   2738       //// END inlined, specialised version of MC_(helperc_b_store4)
   2739    }
   2740 #endif
   2741 }
   2742 
   2743 /*--------------------- 64-bit ---------------------*/
   2744 
   2745 /* Nb: by "aligned" here we mean 8-byte aligned */
   2746 
   2747 static INLINE void make_aligned_word64_undefined ( Addr a )
   2748 {
   2749    PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED);
   2750 
   2751 #ifndef PERF_FAST_STACK2
   2752    make_mem_undefined(a, 8);
   2753 #else
   2754    {
   2755       UWord   sm_off16;
   2756       SecMap* sm;
   2757 
   2758       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2759          PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW);
   2760          make_mem_undefined(a, 8);
   2761          return;
   2762       }
   2763 
   2764       sm       = get_secmap_for_writing_low(a);
   2765       sm_off16 = SM_OFF_16(a);
   2766       sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
   2767    }
   2768 #endif
   2769 }
   2770 
   2771 static INLINE
   2772 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
   2773 {
   2774    make_aligned_word64_undefined(a);
   2775    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2776    //// Set the origins for a+0 .. a+7
   2777    { OCacheLine* line;
   2778      UWord lineoff = oc_line_offset(a);
   2779      tl_assert(lineoff >= 0
   2780                && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2781      line = find_OCacheLine( a );
   2782      line->descr[lineoff+0] = 0xF;
   2783      line->descr[lineoff+1] = 0xF;
   2784      line->w32[lineoff+0]   = otag;
   2785      line->w32[lineoff+1]   = otag;
   2786    }
   2787    //// END inlined, specialised version of MC_(helperc_b_store8)
   2788 }
   2789 
   2790 static INLINE
   2791 void make_aligned_word64_noaccess ( Addr a )
   2792 {
   2793    PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS);
   2794 
   2795 #ifndef PERF_FAST_STACK2
   2796    MC_(make_mem_noaccess)(a, 8);
   2797 #else
   2798    {
   2799       UWord   sm_off16;
   2800       SecMap* sm;
   2801 
   2802       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2803          PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW);
   2804          MC_(make_mem_noaccess)(a, 8);
   2805          return;
   2806       }
   2807 
   2808       sm       = get_secmap_for_writing_low(a);
   2809       sm_off16 = SM_OFF_16(a);
   2810       sm->vabits16[sm_off16] = VA_BITS16_NOACCESS;
   2811 
   2812       //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2813       //// Clear the origins for a+0 .. a+7.
   2814       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   2815          OCacheLine* line;
   2816          UWord lineoff = oc_line_offset(a);
   2817          tl_assert(lineoff >= 0
   2818                    && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2819          line = find_OCacheLine( a );
   2820          line->descr[lineoff+0] = 0;
   2821          line->descr[lineoff+1] = 0;
   2822       }
   2823       //// END inlined, specialised version of MC_(helperc_b_store8)
   2824    }
   2825 #endif
   2826 }
   2827 
   2828 
   2829 /*------------------------------------------------------------*/
   2830 /*--- Stack pointer adjustment                             ---*/
   2831 /*------------------------------------------------------------*/
   2832 
   2833 #ifdef PERF_FAST_STACK
   2834 #  define MAYBE_USED
   2835 #else
   2836 #  define MAYBE_USED __attribute__((unused))
   2837 #endif
   2838 
   2839 /*--------------- adjustment by 4 bytes ---------------*/
   2840 
   2841 MAYBE_USED
   2842 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
   2843 {
   2844    UInt otag = ecu | MC_OKIND_STACK;
   2845    PROF_EVENT(MCPE_NEW_MEM_STACK_4);
   2846    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2847       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
   2848    } else {
   2849       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
   2850    }
   2851 }
   2852 
   2853 MAYBE_USED
   2854 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
   2855 {
   2856    PROF_EVENT(MCPE_NEW_MEM_STACK_4);
   2857    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2858       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2859    } else {
   2860       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
   2861    }
   2862 }
   2863 
   2864 MAYBE_USED
   2865 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
   2866 {
   2867    PROF_EVENT(MCPE_DIE_MEM_STACK_4);
   2868    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2869       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
   2870    } else {
   2871       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
   2872    }
   2873 }
   2874 
   2875 /*--------------- adjustment by 8 bytes ---------------*/
   2876 
   2877 MAYBE_USED
   2878 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
   2879 {
   2880    UInt otag = ecu | MC_OKIND_STACK;
   2881    PROF_EVENT(MCPE_NEW_MEM_STACK_8);
   2882    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2883       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
   2884    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2885       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2886       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
   2887    } else {
   2888       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
   2889    }
   2890 }
   2891 
   2892 MAYBE_USED
   2893 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
   2894 {
   2895    PROF_EVENT(MCPE_NEW_MEM_STACK_8);
   2896    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2897       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2898    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2899       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2900       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   2901    } else {
   2902       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
   2903    }
   2904 }
   2905 
   2906 MAYBE_USED
   2907 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
   2908 {
   2909    PROF_EVENT(MCPE_DIE_MEM_STACK_8);
   2910    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2911       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
   2912    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2913       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
   2914       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
   2915    } else {
   2916       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
   2917    }
   2918 }
   2919 
   2920 /*--------------- adjustment by 12 bytes ---------------*/
   2921 
   2922 MAYBE_USED
   2923 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
   2924 {
   2925    UInt otag = ecu | MC_OKIND_STACK;
   2926    PROF_EVENT(MCPE_NEW_MEM_STACK_12);
   2927    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2928       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2929       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
   2930    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2931       /* from previous test we don't have 8-alignment at offset +0,
   2932          hence must have 8 alignment at offsets +4/-4.  Hence safe to
   2933          do 4 at +0 and then 8 at +4/. */
   2934       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2935       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
   2936    } else {
   2937       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
   2938    }
   2939 }
   2940 
   2941 MAYBE_USED
   2942 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
   2943 {
   2944    PROF_EVENT(MCPE_NEW_MEM_STACK_12);
   2945    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2946       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2947       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2948    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2949       /* from previous test we don't have 8-alignment at offset +0,
   2950          hence must have 8 alignment at offsets +4/-4.  Hence safe to
   2951          do 4 at +0 and then 8 at +4/. */
   2952       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2953       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   2954    } else {
   2955       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
   2956    }
   2957 }
   2958 
   2959 MAYBE_USED
   2960 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
   2961 {
   2962    PROF_EVENT(MCPE_DIE_MEM_STACK_12);
   2963    /* Note the -12 in the test */
   2964    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
   2965       /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
   2966          -4. */
   2967       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2968       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   2969    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2970       /* We have 4-alignment at +0, but we don't have 8-alignment at
   2971          -12.  So we must have 8-alignment at -8.  Hence do 4 at -12
   2972          and then 8 at -8. */
   2973       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2974       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
   2975    } else {
   2976       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
   2977    }
   2978 }
   2979 
   2980 /*--------------- adjustment by 16 bytes ---------------*/
   2981 
   2982 MAYBE_USED
   2983 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
   2984 {
   2985    UInt otag = ecu | MC_OKIND_STACK;
   2986    PROF_EVENT(MCPE_NEW_MEM_STACK_16);
   2987    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2988       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
   2989       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2990       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
   2991    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2992       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
   2993          Hence do 4 at +0, 8 at +4, 4 at +12. */
   2994       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2995       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
   2996       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
   2997    } else {
   2998       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
   2999    }
   3000 }
   3001 
   3002 MAYBE_USED
   3003 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
   3004 {
   3005    PROF_EVENT(MCPE_NEW_MEM_STACK_16);
   3006    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3007       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
   3008       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3009       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3010    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3011       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
   3012          Hence do 4 at +0, 8 at +4, 4 at +12. */
   3013       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3014       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4  );
   3015       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
   3016    } else {
   3017       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
   3018    }
   3019 }
   3020 
   3021 MAYBE_USED
   3022 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
   3023 {
   3024    PROF_EVENT(MCPE_DIE_MEM_STACK_16);
   3025    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3026       /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
   3027       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3028       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
   3029    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3030       /* 8 alignment must be at -12.  Do 4 at -16, 8 at -12, 4 at -4. */
   3031       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3032       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   3033       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   3034    } else {
   3035       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
   3036    }
   3037 }
   3038 
   3039 /*--------------- adjustment by 32 bytes ---------------*/
   3040 
   3041 MAYBE_USED
   3042 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
   3043 {
   3044    UInt otag = ecu | MC_OKIND_STACK;
   3045    PROF_EVENT(MCPE_NEW_MEM_STACK_32);
   3046    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3047       /* Straightforward */
   3048       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   3049       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   3050       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   3051       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   3052    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3053       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
   3054          +0,+28. */
   3055       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   3056       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
   3057       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
   3058       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
   3059       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
   3060    } else {
   3061       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
   3062    }
   3063 }
   3064 
   3065 MAYBE_USED
   3066 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
   3067 {
   3068    PROF_EVENT(MCPE_NEW_MEM_STACK_32);
   3069    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3070       /* Straightforward */
   3071       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3072       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3073       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3074       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3075    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3076       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
   3077          +0,+28. */
   3078       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3079       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   3080       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
   3081       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
   3082       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
   3083    } else {
   3084       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
   3085    }
   3086 }
   3087 
   3088 MAYBE_USED
   3089 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
   3090 {
   3091    PROF_EVENT(MCPE_DIE_MEM_STACK_32);
   3092    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3093       /* Straightforward */
   3094       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3095       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3096       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3097       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3098    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3099       /* 8 alignment must be at -4 etc.  Hence do 8 at -12,-20,-28 and
   3100          4 at -32,-4. */
   3101       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3102       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
   3103       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
   3104       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   3105       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   3106    } else {
   3107       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
   3108    }
   3109 }
   3110 
   3111 /*--------------- adjustment by 112 bytes ---------------*/
   3112 
   3113 MAYBE_USED
   3114 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
   3115 {
   3116    UInt otag = ecu | MC_OKIND_STACK;
   3117    PROF_EVENT(MCPE_NEW_MEM_STACK_112);
   3118    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3119       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   3120       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   3121       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   3122       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   3123       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
   3124       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
   3125       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
   3126       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
   3127       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
   3128       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
   3129       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
   3130       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
   3131       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
   3132       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3133    } else {
   3134       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
   3135    }
   3136 }
   3137 
   3138 MAYBE_USED
   3139 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
   3140 {
   3141    PROF_EVENT(MCPE_NEW_MEM_STACK_112);
   3142    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3143       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3144       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3145       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3146       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3147       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3148       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3149       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3150       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3151       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3152       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3153       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3154       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3155       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3156       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3157    } else {
   3158       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
   3159    }
   3160 }
   3161 
   3162 MAYBE_USED
   3163 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
   3164 {
   3165    PROF_EVENT(MCPE_DIE_MEM_STACK_112);
   3166    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3167       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3168       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3169       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3170       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3171       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3172       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3173       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3174       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3175       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3176       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3177       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3178       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3179       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3180       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3181    } else {
   3182       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
   3183    }
   3184 }
   3185 
   3186 /*--------------- adjustment by 128 bytes ---------------*/
   3187 
   3188 MAYBE_USED
   3189 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
   3190 {
   3191    UInt otag = ecu | MC_OKIND_STACK;
   3192    PROF_EVENT(MCPE_NEW_MEM_STACK_128);
   3193    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3194       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   3195       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   3196       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   3197       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   3198       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
   3199       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
   3200       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
   3201       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
   3202       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
   3203       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
   3204       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
   3205       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
   3206       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
   3207       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3208       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   3209       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   3210    } else {
   3211       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
   3212    }
   3213 }
   3214 
   3215 MAYBE_USED
   3216 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
   3217 {
   3218    PROF_EVENT(MCPE_NEW_MEM_STACK_128);
   3219    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3220       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3221       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3222       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3223       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3224       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3225       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3226       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3227       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3228       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3229       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3230       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3231       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3232       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3233       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3234       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   3235       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   3236    } else {
   3237       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
   3238    }
   3239 }
   3240 
   3241 MAYBE_USED
   3242 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
   3243 {
   3244    PROF_EVENT(MCPE_DIE_MEM_STACK_128);
   3245    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3246       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   3247       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   3248       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3249       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3250       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3251       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3252       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3253       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3254       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3255       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3256       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3257       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3258       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3259       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3260       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3261       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3262    } else {
   3263       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
   3264    }
   3265 }
   3266 
   3267 /*--------------- adjustment by 144 bytes ---------------*/
   3268 
   3269 MAYBE_USED
   3270 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
   3271 {
   3272    UInt otag = ecu | MC_OKIND_STACK;
   3273    PROF_EVENT(MCPE_NEW_MEM_STACK_144);
   3274    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3275       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
   3276       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
   3277       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
   3278       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
   3279       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
   3280       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
   3281       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
   3282       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
   3283       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
   3284       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
   3285       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
   3286       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
   3287       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
   3288       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3289       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   3290       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   3291       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
   3292       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
   3293    } else {
   3294       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
   3295    }
   3296 }
   3297 
   3298 MAYBE_USED
   3299 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
   3300 {
   3301    PROF_EVENT(MCPE_NEW_MEM_STACK_144);
   3302    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3303       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3304       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3305       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3306       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3307       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3308       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3309       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3310       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3311       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3312       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3313       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3314       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3315       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3316       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3317       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   3318       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   3319       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
   3320       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
   3321    } else {
   3322       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
   3323    }
   3324 }
   3325 
   3326 MAYBE_USED
   3327 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
   3328 {
   3329    PROF_EVENT(MCPE_DIE_MEM_STACK_144);
   3330    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3331       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
   3332       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
   3333       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   3334       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   3335       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3336       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3337       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3338       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3339       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3340       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3341       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3342       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3343       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3344       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3345       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3346       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3347       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3348       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3349    } else {
   3350       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
   3351    }
   3352 }
   3353 
   3354 /*--------------- adjustment by 160 bytes ---------------*/
   3355 
   3356 MAYBE_USED
   3357 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
   3358 {
   3359    UInt otag = ecu | MC_OKIND_STACK;
   3360    PROF_EVENT(MCPE_NEW_MEM_STACK_160);
   3361    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3362       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
   3363       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
   3364       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
   3365       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
   3366       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
   3367       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
   3368       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
   3369       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
   3370       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
   3371       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
   3372       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
   3373       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
   3374       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
   3375       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3376       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   3377       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   3378       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
   3379       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
   3380       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
   3381       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
   3382    } else {
   3383       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
   3384    }
   3385 }
   3386 
   3387 MAYBE_USED
   3388 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
   3389 {
   3390    PROF_EVENT(MCPE_NEW_MEM_STACK_160);
   3391    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3392       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3393       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3394       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3395       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3396       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3397       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3398       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3399       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3400       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3401       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3402       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3403       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3404       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3405       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3406       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   3407       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   3408       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
   3409       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
   3410       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
   3411       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
   3412    } else {
   3413       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
   3414    }
   3415 }
   3416 
   3417 MAYBE_USED
   3418 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
   3419 {
   3420    PROF_EVENT(MCPE_DIE_MEM_STACK_160);
   3421    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3422       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
   3423       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
   3424       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
   3425       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
   3426       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   3427       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   3428       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3429       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3430       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3431       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3432       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3433       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3434       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3435       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3436       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3437       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3438       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3439       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3440       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3441       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3442    } else {
   3443       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
   3444    }
   3445 }
   3446 
   3447 /*--------------- adjustment by N bytes ---------------*/
   3448 
   3449 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
   3450 {
   3451    UInt otag = ecu | MC_OKIND_STACK;
   3452    PROF_EVENT(MCPE_NEW_MEM_STACK);
   3453    MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
   3454 }
   3455 
   3456 static void mc_new_mem_stack ( Addr a, SizeT len )
   3457 {
   3458    PROF_EVENT(MCPE_NEW_MEM_STACK);
   3459    make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
   3460 }
   3461 
   3462 static void mc_die_mem_stack ( Addr a, SizeT len )
   3463 {
   3464    PROF_EVENT(MCPE_DIE_MEM_STACK);
   3465    MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
   3466 }
   3467 
   3468 
   3469 /* The AMD64 ABI says:
   3470 
   3471    "The 128-byte area beyond the location pointed to by %rsp is considered
   3472     to be reserved and shall not be modified by signal or interrupt
   3473     handlers.  Therefore, functions may use this area for temporary data
   3474     that is not needed across function calls.  In particular, leaf functions
   3475     may use this area for their entire stack frame, rather than adjusting
   3476     the stack pointer in the prologue and epilogue.  This area is known as
   3477     red zone [sic]."
   3478 
   3479    So after any call or return we need to mark this redzone as containing
   3480    undefined values.
   3481 
   3482    Consider this:  we're in function f.  f calls g.  g moves rsp down
   3483    modestly (say 16 bytes) and writes stuff all over the red zone, making it
   3484    defined.  g returns.  f is buggy and reads from parts of the red zone
   3485    that it didn't write on.  But because g filled that area in, f is going
   3486    to be picking up defined V bits and so any errors from reading bits of
   3487    the red zone it didn't write, will be missed.  The only solution I could
   3488    think of was to make the red zone undefined when g returns to f.
   3489 
   3490    This is in accordance with the ABI, which makes it clear the redzone
   3491    is volatile across function calls.
   3492 
   3493    The problem occurs the other way round too: f could fill the RZ up
   3494    with defined values and g could mistakenly read them.  So the RZ
   3495    also needs to be nuked on function calls.
   3496 */
   3497 
   3498 
   3499 /* Here's a simple cache to hold nia -> ECU mappings.  It could be
   3500    improved so as to have a lower miss rate. */
   3501 
   3502 static UWord stats__nia_cache_queries = 0;
   3503 static UWord stats__nia_cache_misses  = 0;
   3504 
   3505 typedef
   3506    struct { UWord nia0; UWord ecu0;   /* nia0 maps to ecu0 */
   3507             UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
   3508    WCacheEnt;
   3509 
   3510 #define N_NIA_TO_ECU_CACHE 511
   3511 
   3512 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
   3513 
   3514 static void init_nia_to_ecu_cache ( void )
   3515 {
   3516    UWord       i;
   3517    Addr        zero_addr = 0;
   3518    ExeContext* zero_ec;
   3519    UInt        zero_ecu;
   3520    /* Fill all the slots with an entry for address zero, and the
   3521       relevant otags accordingly.  Hence the cache is initially filled
   3522       with valid data. */
   3523    zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
   3524    tl_assert(zero_ec);
   3525    zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
   3526    tl_assert(VG_(is_plausible_ECU)(zero_ecu));
   3527    for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
   3528       nia_to_ecu_cache[i].nia0 = zero_addr;
   3529       nia_to_ecu_cache[i].ecu0 = zero_ecu;
   3530       nia_to_ecu_cache[i].nia1 = zero_addr;
   3531       nia_to_ecu_cache[i].ecu1 = zero_ecu;
   3532    }
   3533 }
   3534 
   3535 static inline UInt convert_nia_to_ecu ( Addr nia )
   3536 {
   3537    UWord i;
   3538    UInt        ecu;
   3539    ExeContext* ec;
   3540 
   3541    tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
   3542 
   3543    stats__nia_cache_queries++;
   3544    i = nia % N_NIA_TO_ECU_CACHE;
   3545    tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
   3546 
   3547    if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
   3548       return nia_to_ecu_cache[i].ecu0;
   3549 
   3550    if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
   3551 #     define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
   3552       SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
   3553       SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
   3554 #     undef SWAP
   3555       return nia_to_ecu_cache[i].ecu0;
   3556    }
   3557 
   3558    stats__nia_cache_misses++;
   3559    ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
   3560    tl_assert(ec);
   3561    ecu = VG_(get_ECU_from_ExeContext)(ec);
   3562    tl_assert(VG_(is_plausible_ECU)(ecu));
   3563 
   3564    nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
   3565    nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
   3566 
   3567    nia_to_ecu_cache[i].nia0 = nia;
   3568    nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
   3569    return ecu;
   3570 }
   3571 
   3572 
   3573 /* This marks the stack as addressible but undefined, after a call or
   3574    return for a target that has an ABI defined stack redzone.  It
   3575    happens quite a lot and needs to be fast.  This is the version for
   3576    origin tracking.  The non-origin-tracking version is below. */
   3577 VG_REGPARM(3)
   3578 void MC_(helperc_MAKE_STACK_UNINIT_w_o) ( Addr base, UWord len, Addr nia )
   3579 {
   3580    PROF_EVENT(MCPE_MAKE_STACK_UNINIT_W_O);
   3581    if (0)
   3582       VG_(printf)("helperc_MAKE_STACK_UNINIT_w_o (%#lx,%lu,nia=%#lx)\n",
   3583                   base, len, nia );
   3584 
   3585    UInt ecu = convert_nia_to_ecu ( nia );
   3586    tl_assert(VG_(is_plausible_ECU)(ecu));
   3587 
   3588    UInt otag = ecu | MC_OKIND_STACK;
   3589 
   3590 #  if 0
   3591    /* Slow(ish) version, which is fairly easily seen to be correct.
   3592    */
   3593    if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
   3594       make_aligned_word64_undefined_w_otag(base +   0, otag);
   3595       make_aligned_word64_undefined_w_otag(base +   8, otag);
   3596       make_aligned_word64_undefined_w_otag(base +  16, otag);
   3597       make_aligned_word64_undefined_w_otag(base +  24, otag);
   3598 
   3599       make_aligned_word64_undefined_w_otag(base +  32, otag);
   3600       make_aligned_word64_undefined_w_otag(base +  40, otag);
   3601       make_aligned_word64_undefined_w_otag(base +  48, otag);
   3602       make_aligned_word64_undefined_w_otag(base +  56, otag);
   3603 
   3604       make_aligned_word64_undefined_w_otag(base +  64, otag);
   3605       make_aligned_word64_undefined_w_otag(base +  72, otag);
   3606       make_aligned_word64_undefined_w_otag(base +  80, otag);
   3607       make_aligned_word64_undefined_w_otag(base +  88, otag);
   3608 
   3609       make_aligned_word64_undefined_w_otag(base +  96, otag);
   3610       make_aligned_word64_undefined_w_otag(base + 104, otag);
   3611       make_aligned_word64_undefined_w_otag(base + 112, otag);
   3612       make_aligned_word64_undefined_w_otag(base + 120, otag);
   3613    } else {
   3614       MC_(make_mem_undefined_w_otag)(base, len, otag);
   3615    }
   3616 #  endif
   3617 
   3618    /* Idea is: go fast when
   3619          * 8-aligned and length is 128
   3620          * the sm is available in the main primary map
   3621          * the address range falls entirely with a single secondary map
   3622       If all those conditions hold, just update the V+A bits by writing
   3623       directly into the vabits array.  (If the sm was distinguished, this
   3624       will make a copy and then write to it.)
   3625    */
   3626    if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
   3627       /* Now we know the address range is suitably sized and aligned. */
   3628       UWord a_lo = (UWord)(base);
   3629       UWord a_hi = (UWord)(base + 128 - 1);
   3630       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
   3631       if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
   3632          /* Now we know the entire range is within the main primary map. */
   3633          UWord pm_off_lo = get_primary_map_low_offset(a_lo);
   3634          UWord pm_off_hi = get_primary_map_low_offset(a_hi);
   3635          if (LIKELY(pm_off_lo == pm_off_hi)) {
   3636            /* Now we know that the entire address range falls within a
   3637               single secondary map, and that that secondary 'lives' in
   3638               the main primary map. */
   3639             SecMap* sm      = get_secmap_for_writing_low(a_lo);
   3640             UWord   v_off16 = SM_OFF_16(a_lo);
   3641             UShort* p       = &sm->vabits16[v_off16];
   3642             p[ 0] = VA_BITS16_UNDEFINED;
   3643             p[ 1] = VA_BITS16_UNDEFINED;
   3644             p[ 2] = VA_BITS16_UNDEFINED;
   3645             p[ 3] = VA_BITS16_UNDEFINED;
   3646             p[ 4] = VA_BITS16_UNDEFINED;
   3647             p[ 5] = VA_BITS16_UNDEFINED;
   3648             p[ 6] = VA_BITS16_UNDEFINED;
   3649             p[ 7] = VA_BITS16_UNDEFINED;
   3650             p[ 8] = VA_BITS16_UNDEFINED;
   3651             p[ 9] = VA_BITS16_UNDEFINED;
   3652             p[10] = VA_BITS16_UNDEFINED;
   3653             p[11] = VA_BITS16_UNDEFINED;
   3654             p[12] = VA_BITS16_UNDEFINED;
   3655             p[13] = VA_BITS16_UNDEFINED;
   3656             p[14] = VA_BITS16_UNDEFINED;
   3657             p[15] = VA_BITS16_UNDEFINED;
   3658             set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
   3659             set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
   3660             set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
   3661             set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
   3662             set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
   3663             set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
   3664             set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
   3665             set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
   3666             set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
   3667             set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
   3668             set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
   3669             set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
   3670             set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
   3671             set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
   3672             set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
   3673             set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
   3674             return;
   3675          }
   3676       }
   3677    }
   3678 
   3679    /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
   3680    if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
   3681       /* Now we know the address range is suitably sized and aligned. */
   3682       UWord a_lo = (UWord)(base);
   3683       UWord a_hi = (UWord)(base + 288 - 1);
   3684       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
   3685       if (a_hi <= MAX_PRIMARY_ADDRESS) {
   3686          UWord pm_off_lo = get_primary_map_low_offset(a_lo);
   3687          UWord pm_off_hi = get_primary_map_low_offset(a_hi);
   3688          if (LIKELY(pm_off_lo == pm_off_hi)) {
   3689            /* Now we know that the entire address range falls within a
   3690               single secondary map, and that that secondary 'lives' in
   3691               the main primary map. */
   3692             SecMap* sm      = get_secmap_for_writing_low(a_lo);
   3693             UWord   v_off16 = SM_OFF_16(a_lo);
   3694             UShort* p       = &sm->vabits16[v_off16];
   3695             p[ 0] = VA_BITS16_UNDEFINED;
   3696             p[ 1] = VA_BITS16_UNDEFINED;
   3697             p[ 2] = VA_BITS16_UNDEFINED;
   3698             p[ 3] = VA_BITS16_UNDEFINED;
   3699             p[ 4] = VA_BITS16_UNDEFINED;
   3700             p[ 5] = VA_BITS16_UNDEFINED;
   3701             p[ 6] = VA_BITS16_UNDEFINED;
   3702             p[ 7] = VA_BITS16_UNDEFINED;
   3703             p[ 8] = VA_BITS16_UNDEFINED;
   3704             p[ 9] = VA_BITS16_UNDEFINED;
   3705             p[10] = VA_BITS16_UNDEFINED;
   3706             p[11] = VA_BITS16_UNDEFINED;
   3707             p[12] = VA_BITS16_UNDEFINED;
   3708             p[13] = VA_BITS16_UNDEFINED;
   3709             p[14] = VA_BITS16_UNDEFINED;
   3710             p[15] = VA_BITS16_UNDEFINED;
   3711             p[16] = VA_BITS16_UNDEFINED;
   3712             p[17] = VA_BITS16_UNDEFINED;
   3713             p[18] = VA_BITS16_UNDEFINED;
   3714             p[19] = VA_BITS16_UNDEFINED;
   3715             p[20] = VA_BITS16_UNDEFINED;
   3716             p[21] = VA_BITS16_UNDEFINED;
   3717             p[22] = VA_BITS16_UNDEFINED;
   3718             p[23] = VA_BITS16_UNDEFINED;
   3719             p[24] = VA_BITS16_UNDEFINED;
   3720             p[25] = VA_BITS16_UNDEFINED;
   3721             p[26] = VA_BITS16_UNDEFINED;
   3722             p[27] = VA_BITS16_UNDEFINED;
   3723             p[28] = VA_BITS16_UNDEFINED;
   3724             p[29] = VA_BITS16_UNDEFINED;
   3725             p[30] = VA_BITS16_UNDEFINED;
   3726             p[31] = VA_BITS16_UNDEFINED;
   3727             p[32] = VA_BITS16_UNDEFINED;
   3728             p[33] = VA_BITS16_UNDEFINED;
   3729             p[34] = VA_BITS16_UNDEFINED;
   3730             p[35] = VA_BITS16_UNDEFINED;
   3731             set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
   3732             set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
   3733             set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
   3734             set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
   3735             set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
   3736             set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
   3737             set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
   3738             set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
   3739             set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
   3740             set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
   3741             set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
   3742             set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
   3743             set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
   3744             set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
   3745             set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
   3746             set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
   3747             set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
   3748             set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
   3749             set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
   3750             set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
   3751             set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
   3752             set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
   3753             set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
   3754             set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
   3755             set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
   3756             set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
   3757             set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
   3758             set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
   3759             set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
   3760             set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
   3761             set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
   3762             set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
   3763             set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
   3764             set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
   3765             set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
   3766             set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
   3767             return;
   3768          }
   3769       }
   3770    }
   3771 
   3772    /* else fall into slow case */
   3773    MC_(make_mem_undefined_w_otag)(base, len, otag);
   3774 }
   3775 
   3776 
   3777 /* This is a version of MC_(helperc_MAKE_STACK_UNINIT_w_o) that is
   3778    specialised for the non-origin-tracking case. */
   3779 VG_REGPARM(2)
   3780 void MC_(helperc_MAKE_STACK_UNINIT_no_o) ( Addr base, UWord len )
   3781 {
   3782    PROF_EVENT(MCPE_MAKE_STACK_UNINIT_NO_O);
   3783    if (0)
   3784       VG_(printf)("helperc_MAKE_STACK_UNINIT_no_o (%#lx,%lu)\n",
   3785                   base, len );
   3786 
   3787 #  if 0
   3788    /* Slow(ish) version, which is fairly easily seen to be correct.
   3789    */
   3790    if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
   3791       make_aligned_word64_undefined(base +   0);
   3792       make_aligned_word64_undefined(base +   8);
   3793       make_aligned_word64_undefined(base +  16);
   3794       make_aligned_word64_undefined(base +  24);
   3795 
   3796       make_aligned_word64_undefined(base +  32);
   3797       make_aligned_word64_undefined(base +  40);
   3798       make_aligned_word64_undefined(base +  48);
   3799       make_aligned_word64_undefined(base +  56);
   3800 
   3801       make_aligned_word64_undefined(base +  64);
   3802       make_aligned_word64_undefined(base +  72);
   3803       make_aligned_word64_undefined(base +  80);
   3804       make_aligned_word64_undefined(base +  88);
   3805 
   3806       make_aligned_word64_undefined(base +  96);
   3807       make_aligned_word64_undefined(base + 104);
   3808       make_aligned_word64_undefined(base + 112);
   3809       make_aligned_word64_undefined(base + 120);
   3810    } else {
   3811       make_mem_undefined(base, len);
   3812    }
   3813 #  endif
   3814 
   3815    /* Idea is: go fast when
   3816          * 8-aligned and length is 128
   3817          * the sm is available in the main primary map
   3818          * the address range falls entirely with a single secondary map
   3819       If all those conditions hold, just update the V+A bits by writing
   3820       directly into the vabits array.  (If the sm was distinguished, this
   3821       will make a copy and then write to it.)
   3822    */
   3823    if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
   3824       /* Now we know the address range is suitably sized and aligned. */
   3825       UWord a_lo = (UWord)(base);
   3826       UWord a_hi = (UWord)(base + 128 - 1);
   3827       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
   3828       if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
   3829          /* Now we know the entire range is within the main primary map. */
   3830          UWord pm_off_lo = get_primary_map_low_offset(a_lo);
   3831          UWord pm_off_hi = get_primary_map_low_offset(a_hi);
   3832          if (LIKELY(pm_off_lo == pm_off_hi)) {
   3833            /* Now we know that the entire address range falls within a
   3834               single secondary map, and that that secondary 'lives' in
   3835               the main primary map. */
   3836             SecMap* sm      = get_secmap_for_writing_low(a_lo);
   3837             UWord   v_off16 = SM_OFF_16(a_lo);
   3838             UShort* p       = &sm->vabits16[v_off16];
   3839             p[ 0] = VA_BITS16_UNDEFINED;
   3840             p[ 1] = VA_BITS16_UNDEFINED;
   3841             p[ 2] = VA_BITS16_UNDEFINED;
   3842             p[ 3] = VA_BITS16_UNDEFINED;
   3843             p[ 4] = VA_BITS16_UNDEFINED;
   3844             p[ 5] = VA_BITS16_UNDEFINED;
   3845             p[ 6] = VA_BITS16_UNDEFINED;
   3846             p[ 7] = VA_BITS16_UNDEFINED;
   3847             p[ 8] = VA_BITS16_UNDEFINED;
   3848             p[ 9] = VA_BITS16_UNDEFINED;
   3849             p[10] = VA_BITS16_UNDEFINED;
   3850             p[11] = VA_BITS16_UNDEFINED;
   3851             p[12] = VA_BITS16_UNDEFINED;
   3852             p[13] = VA_BITS16_UNDEFINED;
   3853             p[14] = VA_BITS16_UNDEFINED;
   3854             p[15] = VA_BITS16_UNDEFINED;
   3855             return;
   3856          }
   3857       }
   3858    }
   3859 
   3860    /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
   3861    if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
   3862       /* Now we know the address range is suitably sized and aligned. */
   3863       UWord a_lo = (UWord)(base);
   3864       UWord a_hi = (UWord)(base + 288 - 1);
   3865       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
   3866       if (a_hi <= MAX_PRIMARY_ADDRESS) {
   3867          UWord pm_off_lo = get_primary_map_low_offset(a_lo);
   3868          UWord pm_off_hi = get_primary_map_low_offset(a_hi);
   3869          if (LIKELY(pm_off_lo == pm_off_hi)) {
   3870            /* Now we know that the entire address range falls within a
   3871               single secondary map, and that that secondary 'lives' in
   3872               the main primary map. */
   3873             SecMap* sm      = get_secmap_for_writing_low(a_lo);
   3874             UWord   v_off16 = SM_OFF_16(a_lo);
   3875             UShort* p       = &sm->vabits16[v_off16];
   3876             p[ 0] = VA_BITS16_UNDEFINED;
   3877             p[ 1] = VA_BITS16_UNDEFINED;
   3878             p[ 2] = VA_BITS16_UNDEFINED;
   3879             p[ 3] = VA_BITS16_UNDEFINED;
   3880             p[ 4] = VA_BITS16_UNDEFINED;
   3881             p[ 5] = VA_BITS16_UNDEFINED;
   3882             p[ 6] = VA_BITS16_UNDEFINED;
   3883             p[ 7] = VA_BITS16_UNDEFINED;
   3884             p[ 8] = VA_BITS16_UNDEFINED;
   3885             p[ 9] = VA_BITS16_UNDEFINED;
   3886             p[10] = VA_BITS16_UNDEFINED;
   3887             p[11] = VA_BITS16_UNDEFINED;
   3888             p[12] = VA_BITS16_UNDEFINED;
   3889             p[13] = VA_BITS16_UNDEFINED;
   3890             p[14] = VA_BITS16_UNDEFINED;
   3891             p[15] = VA_BITS16_UNDEFINED;
   3892             p[16] = VA_BITS16_UNDEFINED;
   3893             p[17] = VA_BITS16_UNDEFINED;
   3894             p[18] = VA_BITS16_UNDEFINED;
   3895             p[19] = VA_BITS16_UNDEFINED;
   3896             p[20] = VA_BITS16_UNDEFINED;
   3897             p[21] = VA_BITS16_UNDEFINED;
   3898             p[22] = VA_BITS16_UNDEFINED;
   3899             p[23] = VA_BITS16_UNDEFINED;
   3900             p[24] = VA_BITS16_UNDEFINED;
   3901             p[25] = VA_BITS16_UNDEFINED;
   3902             p[26] = VA_BITS16_UNDEFINED;
   3903             p[27] = VA_BITS16_UNDEFINED;
   3904             p[28] = VA_BITS16_UNDEFINED;
   3905             p[29] = VA_BITS16_UNDEFINED;
   3906             p[30] = VA_BITS16_UNDEFINED;
   3907             p[31] = VA_BITS16_UNDEFINED;
   3908             p[32] = VA_BITS16_UNDEFINED;
   3909             p[33] = VA_BITS16_UNDEFINED;
   3910             p[34] = VA_BITS16_UNDEFINED;
   3911             p[35] = VA_BITS16_UNDEFINED;
   3912             return;
   3913          }
   3914       }
   3915    }
   3916 
   3917    /* else fall into slow case */
   3918    make_mem_undefined(base, len);
   3919 }
   3920 
   3921 
   3922 /* And this is an even more specialised case, for the case where there
   3923    is no origin tracking, and the length is 128. */
   3924 VG_REGPARM(1)
   3925 void MC_(helperc_MAKE_STACK_UNINIT_128_no_o) ( Addr base )
   3926 {
   3927    PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O);
   3928    if (0)
   3929       VG_(printf)("helperc_MAKE_STACK_UNINIT_128_no_o (%#lx)\n", base );
   3930 
   3931 #  if 0
   3932    /* Slow(ish) version, which is fairly easily seen to be correct.
   3933    */
   3934    if (LIKELY( VG_IS_8_ALIGNED(base) )) {
   3935       make_aligned_word64_undefined(base +   0);
   3936       make_aligned_word64_undefined(base +   8);
   3937       make_aligned_word64_undefined(base +  16);
   3938       make_aligned_word64_undefined(base +  24);
   3939 
   3940       make_aligned_word64_undefined(base +  32);
   3941       make_aligned_word64_undefined(base +  40);
   3942       make_aligned_word64_undefined(base +  48);
   3943       make_aligned_word64_undefined(base +  56);
   3944 
   3945       make_aligned_word64_undefined(base +  64);
   3946       make_aligned_word64_undefined(base +  72);
   3947       make_aligned_word64_undefined(base +  80);
   3948       make_aligned_word64_undefined(base +  88);
   3949 
   3950       make_aligned_word64_undefined(base +  96);
   3951       make_aligned_word64_undefined(base + 104);
   3952       make_aligned_word64_undefined(base + 112);
   3953       make_aligned_word64_undefined(base + 120);
   3954    } else {
   3955       make_mem_undefined(base, 128);
   3956    }
   3957 #  endif
   3958 
   3959    /* Idea is: go fast when
   3960          * 16-aligned and length is 128
   3961          * the sm is available in the main primary map
   3962          * the address range falls entirely with a single secondary map
   3963       If all those conditions hold, just update the V+A bits by writing
   3964       directly into the vabits array.  (If the sm was distinguished, this
   3965       will make a copy and then write to it.)
   3966 
   3967       Typically this applies to amd64 'ret' instructions, since RSP is
   3968       16-aligned (0 % 16) after the instruction (per the amd64-ELF ABI).
   3969    */
   3970    if (LIKELY( VG_IS_16_ALIGNED(base) )) {
   3971       /* Now we know the address range is suitably sized and aligned. */
   3972       UWord a_lo = (UWord)(base);
   3973       UWord a_hi = (UWord)(base + 128 - 1);
   3974       /* FIXME: come up with a sane story on the wraparound case
   3975          (which of course cnanot happen, but still..) */
   3976       /* tl_assert(a_lo < a_hi); */            // paranoia: detect overflow
   3977       if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
   3978          /* Now we know the entire range is within the main primary map. */
   3979          UWord pm_off_lo = get_primary_map_low_offset(a_lo);
   3980          UWord pm_off_hi = get_primary_map_low_offset(a_hi);
   3981          if (LIKELY(pm_off_lo == pm_off_hi)) {
   3982            /* Now we know that the entire address range falls within a
   3983               single secondary map, and that that secondary 'lives' in
   3984               the main primary map. */
   3985             PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16);
   3986             SecMap* sm    = get_secmap_for_writing_low(a_lo);
   3987             UWord   v_off = SM_OFF(a_lo);
   3988             UInt*   w32   = ASSUME_ALIGNED(UInt*, &sm->vabits8[v_off]);
   3989             w32[ 0] = VA_BITS32_UNDEFINED;
   3990             w32[ 1] = VA_BITS32_UNDEFINED;
   3991             w32[ 2] = VA_BITS32_UNDEFINED;
   3992             w32[ 3] = VA_BITS32_UNDEFINED;
   3993             w32[ 4] = VA_BITS32_UNDEFINED;
   3994             w32[ 5] = VA_BITS32_UNDEFINED;
   3995             w32[ 6] = VA_BITS32_UNDEFINED;
   3996             w32[ 7] = VA_BITS32_UNDEFINED;
   3997             return;
   3998          }
   3999       }
   4000    }
   4001 
   4002    /* The same, but for when base is 8 % 16, which is the situation
   4003       with RSP for amd64-ELF immediately after call instructions.
   4004    */
   4005    if (LIKELY( VG_IS_16_ALIGNED(base+8) )) { // restricts to 8 aligned
   4006       /* Now we know the address range is suitably sized and aligned. */
   4007       UWord a_lo = (UWord)(base);
   4008       UWord a_hi = (UWord)(base + 128 - 1);
   4009       /* FIXME: come up with a sane story on the wraparound case
   4010          (which of course cnanot happen, but still..) */
   4011       /* tl_assert(a_lo < a_hi); */            // paranoia: detect overflow
   4012       if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
   4013          /* Now we know the entire range is within the main primary map. */
   4014          UWord pm_off_lo = get_primary_map_low_offset(a_lo);
   4015          UWord pm_off_hi = get_primary_map_low_offset(a_hi);
   4016          if (LIKELY(pm_off_lo == pm_off_hi)) {
   4017             PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8);
   4018            /* Now we know that the entire address range falls within a
   4019               single secondary map, and that that secondary 'lives' in
   4020               the main primary map. */
   4021             SecMap* sm      = get_secmap_for_writing_low(a_lo);
   4022             UWord   v_off16 = SM_OFF_16(a_lo);
   4023             UShort* w16     = &sm->vabits16[v_off16];
   4024             UInt*   w32     = ASSUME_ALIGNED(UInt*, &w16[1]);
   4025             /* The following assertion is commented out for obvious
   4026                performance reasons, but was verified as valid when
   4027                running the entire testsuite and also Firefox. */
   4028             /* tl_assert(VG_IS_4_ALIGNED(w32)); */
   4029             w16[ 0] = VA_BITS16_UNDEFINED; // w16[0]
   4030             w32[ 0] = VA_BITS32_UNDEFINED; // w16[1,2]
   4031             w32[ 1] = VA_BITS32_UNDEFINED; // w16[3,4]
   4032             w32[ 2] = VA_BITS32_UNDEFINED; // w16[5,6]
   4033             w32[ 3] = VA_BITS32_UNDEFINED; // w16[7,8]
   4034             w32[ 4] = VA_BITS32_UNDEFINED; // w16[9,10]
   4035             w32[ 5] = VA_BITS32_UNDEFINED; // w16[11,12]
   4036             w32[ 6] = VA_BITS32_UNDEFINED; // w16[13,14]
   4037             w16[15] = VA_BITS16_UNDEFINED; // w16[15]
   4038             return;
   4039          }
   4040       }
   4041    }
   4042 
   4043    /* else fall into slow case */
   4044    PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE);
   4045    make_mem_undefined(base, 128);
   4046 }
   4047 
   4048 
   4049 /*------------------------------------------------------------*/
   4050 /*--- Checking memory                                      ---*/
   4051 /*------------------------------------------------------------*/
   4052 
   4053 typedef
   4054    enum {
   4055       MC_Ok = 5,
   4056       MC_AddrErr = 6,
   4057       MC_ValueErr = 7
   4058    }
   4059    MC_ReadResult;
   4060 
   4061 
   4062 /* Check permissions for address range.  If inadequate permissions
   4063    exist, *bad_addr is set to the offending address, so the caller can
   4064    know what it is. */
   4065 
   4066 /* Returns True if [a .. a+len) is not addressible.  Otherwise,
   4067    returns False, and if bad_addr is non-NULL, sets *bad_addr to
   4068    indicate the lowest failing address.  Functions below are
   4069    similar. */
   4070 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
   4071 {
   4072    SizeT i;
   4073    UWord vabits2;
   4074 
   4075    PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS);
   4076    for (i = 0; i < len; i++) {
   4077       PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS_LOOP);
   4078       vabits2 = get_vabits2(a);
   4079       if (VA_BITS2_NOACCESS != vabits2) {
   4080          if (bad_addr != NULL) *bad_addr = a;
   4081          return False;
   4082       }
   4083       a++;
   4084    }
   4085    return True;
   4086 }
   4087 
   4088 static Bool is_mem_addressable ( Addr a, SizeT len,
   4089                                  /*OUT*/Addr* bad_addr )
   4090 {
   4091    SizeT i;
   4092    UWord vabits2;
   4093 
   4094    PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE);
   4095    for (i = 0; i < len; i++) {
   4096       PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE_LOOP);
   4097       vabits2 = get_vabits2(a);
   4098       if (VA_BITS2_NOACCESS == vabits2) {
   4099          if (bad_addr != NULL) *bad_addr = a;
   4100          return False;
   4101       }
   4102       a++;
   4103    }
   4104    return True;
   4105 }
   4106 
   4107 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
   4108                                       /*OUT*/Addr* bad_addr,
   4109                                       /*OUT*/UInt* otag )
   4110 {
   4111    SizeT i;
   4112    UWord vabits2;
   4113 
   4114    PROF_EVENT(MCPE_IS_MEM_DEFINED);
   4115    DEBUG("is_mem_defined\n");
   4116 
   4117    if (otag)     *otag = 0;
   4118    if (bad_addr) *bad_addr = 0;
   4119    for (i = 0; i < len; i++) {
   4120       PROF_EVENT(MCPE_IS_MEM_DEFINED_LOOP);
   4121       vabits2 = get_vabits2(a);
   4122       if (VA_BITS2_DEFINED != vabits2) {
   4123          // Error!  Nb: Report addressability errors in preference to
   4124          // definedness errors.  And don't report definedeness errors unless
   4125          // --undef-value-errors=yes.
   4126          if (bad_addr) {
   4127             *bad_addr = a;
   4128          }
   4129          if (VA_BITS2_NOACCESS == vabits2) {
   4130             return MC_AddrErr;
   4131          }
   4132          if (MC_(clo_mc_level) >= 2) {
   4133             if (otag && MC_(clo_mc_level) == 3) {
   4134                *otag = MC_(helperc_b_load1)( a );
   4135             }
   4136             return MC_ValueErr;
   4137          }
   4138       }
   4139       a++;
   4140    }
   4141    return MC_Ok;
   4142 }
   4143 
   4144 
   4145 /* Like is_mem_defined but doesn't give up at the first uninitialised
   4146    byte -- the entire range is always checked.  This is important for
   4147    detecting errors in the case where a checked range strays into
   4148    invalid memory, but that fact is not detected by the ordinary
   4149    is_mem_defined(), because of an undefined section that precedes the
   4150    out of range section, possibly as a result of an alignment hole in
   4151    the checked data.  This version always checks the entire range and
   4152    can report both a definedness and an accessbility error, if
   4153    necessary. */
   4154 static void is_mem_defined_comprehensive (
   4155                Addr a, SizeT len,
   4156                /*OUT*/Bool* errorV,    /* is there a definedness err? */
   4157                /*OUT*/Addr* bad_addrV, /* if so where? */
   4158                /*OUT*/UInt* otagV,     /* and what's its otag? */
   4159                /*OUT*/Bool* errorA,    /* is there an addressability err? */
   4160                /*OUT*/Addr* bad_addrA  /* if so where? */
   4161             )
   4162 {
   4163    SizeT i;
   4164    UWord vabits2;
   4165    Bool  already_saw_errV = False;
   4166 
   4167    PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE);
   4168    DEBUG("is_mem_defined_comprehensive\n");
   4169 
   4170    tl_assert(!(*errorV || *errorA));
   4171 
   4172    for (i = 0; i < len; i++) {
   4173       PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP);
   4174       vabits2 = get_vabits2(a);
   4175       switch (vabits2) {
   4176          case VA_BITS2_DEFINED:
   4177             a++;
   4178             break;
   4179          case VA_BITS2_UNDEFINED:
   4180          case VA_BITS2_PARTDEFINED:
   4181             if (!already_saw_errV) {
   4182                *errorV    = True;
   4183                *bad_addrV = a;
   4184                if (MC_(clo_mc_level) == 3) {
   4185                   *otagV = MC_(helperc_b_load1)( a );
   4186                } else {
   4187                   *otagV = 0;
   4188                }
   4189                already_saw_errV = True;
   4190             }
   4191             a++; /* keep going */
   4192             break;
   4193          case VA_BITS2_NOACCESS:
   4194             *errorA    = True;
   4195             *bad_addrA = a;
   4196             return; /* give up now. */
   4197          default:
   4198             tl_assert(0);
   4199       }
   4200    }
   4201 }
   4202 
   4203 
   4204 /* Check a zero-terminated ascii string.  Tricky -- don't want to
   4205    examine the actual bytes, to find the end, until we're sure it is
   4206    safe to do so. */
   4207 
   4208 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
   4209 {
   4210    UWord vabits2;
   4211 
   4212    PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ);
   4213    DEBUG("mc_is_defined_asciiz\n");
   4214 
   4215    if (otag)     *otag = 0;
   4216    if (bad_addr) *bad_addr = 0;
   4217    while (True) {
   4218       PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ_LOOP);
   4219       vabits2 = get_vabits2(a);
   4220       if (VA_BITS2_DEFINED != vabits2) {
   4221          // Error!  Nb: Report addressability errors in preference to
   4222          // definedness errors.  And don't report definedeness errors unless
   4223          // --undef-value-errors=yes.
   4224          if (bad_addr) {
   4225             *bad_addr = a;
   4226          }
   4227          if (VA_BITS2_NOACCESS == vabits2) {
   4228             return MC_AddrErr;
   4229          }
   4230          if (MC_(clo_mc_level) >= 2) {
   4231             if (otag && MC_(clo_mc_level) == 3) {
   4232                *otag = MC_(helperc_b_load1)( a );
   4233             }
   4234             return MC_ValueErr;
   4235          }
   4236       }
   4237       /* Ok, a is safe to read. */
   4238       if (* ((UChar*)a) == 0) {
   4239          return MC_Ok;
   4240       }
   4241       a++;
   4242    }
   4243 }
   4244 
   4245 
   4246 /*------------------------------------------------------------*/
   4247 /*--- Memory event handlers                                ---*/
   4248 /*------------------------------------------------------------*/
   4249 
   4250 static
   4251 void check_mem_is_addressable ( CorePart part, ThreadId tid, const HChar* s,
   4252                                 Addr base, SizeT size )
   4253 {
   4254    Addr bad_addr;
   4255    Bool ok = is_mem_addressable ( base, size, &bad_addr );
   4256 
   4257    if (!ok) {
   4258       switch (part) {
   4259       case Vg_CoreSysCall:
   4260          MC_(record_memparam_error) ( tid, bad_addr,
   4261                                       /*isAddrErr*/True, s, 0/*otag*/ );
   4262          break;
   4263 
   4264       case Vg_CoreSignal:
   4265          MC_(record_core_mem_error)( tid, s );
   4266          break;
   4267 
   4268       default:
   4269          VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
   4270       }
   4271    }
   4272 }
   4273 
   4274 static
   4275 void check_mem_is_defined ( CorePart part, ThreadId tid, const HChar* s,
   4276                             Addr base, SizeT size )
   4277 {
   4278    UInt otag = 0;
   4279    Addr bad_addr;
   4280    MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
   4281 
   4282    if (MC_Ok != res) {
   4283       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
   4284 
   4285       switch (part) {
   4286       case Vg_CoreSysCall:
   4287          MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
   4288                                       isAddrErr ? 0 : otag );
   4289          break;
   4290 
   4291       case Vg_CoreSysCallArgInMem:
   4292          MC_(record_regparam_error) ( tid, s, otag );
   4293          break;
   4294 
   4295       /* If we're being asked to jump to a silly address, record an error
   4296          message before potentially crashing the entire system. */
   4297       case Vg_CoreTranslate:
   4298          MC_(record_jump_error)( tid, bad_addr );
   4299          break;
   4300 
   4301       default:
   4302          VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
   4303       }
   4304    }
   4305 }
   4306 
   4307 static
   4308 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
   4309                                    const HChar* s, Addr str )
   4310 {
   4311    MC_ReadResult res;
   4312    Addr bad_addr = 0;   // shut GCC up
   4313    UInt otag = 0;
   4314 
   4315    tl_assert(part == Vg_CoreSysCall);
   4316    res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
   4317    if (MC_Ok != res) {
   4318       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
   4319       MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
   4320                                    isAddrErr ? 0 : otag );
   4321    }
   4322 }
   4323 
   4324 /* Handling of mmap and mprotect is not as simple as it seems.
   4325 
   4326    The underlying semantics are that memory obtained from mmap is
   4327    always initialised, but may be inaccessible.  And changes to the
   4328    protection of memory do not change its contents and hence not its
   4329    definedness state.  Problem is we can't model
   4330    inaccessible-but-with-some-definedness state; once we mark memory
   4331    as inaccessible we lose all info about definedness, and so can't
   4332    restore that if it is later made accessible again.
   4333 
   4334    One obvious thing to do is this:
   4335 
   4336       mmap/mprotect NONE  -> noaccess
   4337       mmap/mprotect other -> defined
   4338 
   4339    The problem case here is: taking accessible memory, writing
   4340    uninitialised data to it, mprotecting it NONE and later mprotecting
   4341    it back to some accessible state causes the undefinedness to be
   4342    lost.
   4343 
   4344    A better proposal is:
   4345 
   4346      (1) mmap NONE       ->  make noaccess
   4347      (2) mmap other      ->  make defined
   4348 
   4349      (3) mprotect NONE   ->  # no change
   4350      (4) mprotect other  ->  change any "noaccess" to "defined"
   4351 
   4352    (2) is OK because memory newly obtained from mmap really is defined
   4353        (zeroed out by the kernel -- doing anything else would
   4354        constitute a massive security hole.)
   4355 
   4356    (1) is OK because the only way to make the memory usable is via
   4357        (4), in which case we also wind up correctly marking it all as
   4358        defined.
   4359 
   4360    (3) is the weak case.  We choose not to change memory state.
   4361        (presumably the range is in some mixture of "defined" and
   4362        "undefined", viz, accessible but with arbitrary V bits).  Doing
   4363        nothing means we retain the V bits, so that if the memory is
   4364        later mprotected "other", the V bits remain unchanged, so there
   4365        can be no false negatives.  The bad effect is that if there's
   4366        an access in the area, then MC cannot warn; but at least we'll
   4367        get a SEGV to show, so it's better than nothing.
   4368 
   4369    Consider the sequence (3) followed by (4).  Any memory that was
   4370    "defined" or "undefined" previously retains its state (as
   4371    required).  Any memory that was "noaccess" before can only have
   4372    been made that way by (1), and so it's OK to change it to
   4373    "defined".
   4374 
   4375    See https://bugs.kde.org/show_bug.cgi?id=205541
   4376    and https://bugs.kde.org/show_bug.cgi?id=210268
   4377 */
   4378 static
   4379 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
   4380                        ULong di_handle )
   4381 {
   4382    if (rr || ww || xx) {
   4383       /* (2) mmap/mprotect other -> defined */
   4384       MC_(make_mem_defined)(a, len);
   4385    } else {
   4386       /* (1) mmap/mprotect NONE  -> noaccess */
   4387       MC_(make_mem_noaccess)(a, len);
   4388    }
   4389 }
   4390 
   4391 static
   4392 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
   4393 {
   4394    if (rr || ww || xx) {
   4395       /* (4) mprotect other  ->  change any "noaccess" to "defined" */
   4396       make_mem_defined_if_noaccess(a, len);
   4397    } else {
   4398       /* (3) mprotect NONE   ->  # no change */
   4399       /* do nothing */
   4400    }
   4401 }
   4402 
   4403 
   4404 static
   4405 void mc_new_mem_startup( Addr a, SizeT len,
   4406                          Bool rr, Bool ww, Bool xx, ULong di_handle )
   4407 {
   4408    // Because code is defined, initialised variables get put in the data
   4409    // segment and are defined, and uninitialised variables get put in the
   4410    // bss segment and are auto-zeroed (and so defined).
   4411    //
   4412    // It's possible that there will be padding between global variables.
   4413    // This will also be auto-zeroed, and marked as defined by Memcheck.  If
   4414    // a program uses it, Memcheck will not complain.  This is arguably a
   4415    // false negative, but it's a grey area -- the behaviour is defined (the
   4416    // padding is zeroed) but it's probably not what the user intended.  And
   4417    // we can't avoid it.
   4418    //
   4419    // Note: we generally ignore RWX permissions, because we can't track them
   4420    // without requiring more than one A bit which would slow things down a
   4421    // lot.  But on Darwin the 0th page is mapped but !R and !W and !X.
   4422    // So we mark any such pages as "unaddressable".
   4423    DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
   4424          a, (ULong)len, rr, ww, xx);
   4425    mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
   4426 }
   4427 
   4428 static
   4429 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
   4430 {
   4431    MC_(make_mem_defined)(a, len);
   4432 }
   4433 
   4434 
   4435 /*------------------------------------------------------------*/
   4436 /*--- Register event handlers                              ---*/
   4437 /*------------------------------------------------------------*/
   4438 
   4439 /* Try and get a nonzero origin for the guest state section of thread
   4440    tid characterised by (offset,size).  Return 0 if nothing to show
   4441    for it. */
   4442 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
   4443                                              Int offset, SizeT size )
   4444 {
   4445    Int   sh2off;
   4446    UInt  area[3];
   4447    UInt  otag;
   4448    sh2off = MC_(get_otrack_shadow_offset)( offset, size );
   4449    if (sh2off == -1)
   4450       return 0;  /* This piece of guest state is not tracked */
   4451    tl_assert(sh2off >= 0);
   4452    tl_assert(0 == (sh2off % 4));
   4453    area[0] = 0x31313131;
   4454    area[2] = 0x27272727;
   4455    VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
   4456    tl_assert(area[0] == 0x31313131);
   4457    tl_assert(area[2] == 0x27272727);
   4458    otag = area[1];
   4459    return otag;
   4460 }
   4461 
   4462 
   4463 /* When some chunk of guest state is written, mark the corresponding
   4464    shadow area as valid.  This is used to initialise arbitrarily large
   4465    chunks of guest state, hence the _SIZE value, which has to be as
   4466    big as the biggest guest state.
   4467 */
   4468 static void mc_post_reg_write ( CorePart part, ThreadId tid,
   4469                                 PtrdiffT offset, SizeT size)
   4470 {
   4471 #  define MAX_REG_WRITE_SIZE 1728
   4472    UChar area[MAX_REG_WRITE_SIZE];
   4473    tl_assert(size <= MAX_REG_WRITE_SIZE);
   4474    VG_(memset)(area, V_BITS8_DEFINED, size);
   4475    VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
   4476 #  undef MAX_REG_WRITE_SIZE
   4477 }
   4478 
   4479 static
   4480 void mc_post_reg_write_clientcall ( ThreadId tid,
   4481                                     PtrdiffT offset, SizeT size, Addr f)
   4482 {
   4483    mc_post_reg_write(/*dummy*/0, tid, offset, size);
   4484 }
   4485 
   4486 /* Look at the definedness of the guest's shadow state for
   4487    [offset, offset+len).  If any part of that is undefined, record
   4488    a parameter error.
   4489 */
   4490 static void mc_pre_reg_read ( CorePart part, ThreadId tid, const HChar* s,
   4491                               PtrdiffT offset, SizeT size)
   4492 {
   4493    Int   i;
   4494    Bool  bad;
   4495    UInt  otag;
   4496 
   4497    UChar area[16];
   4498    tl_assert(size <= 16);
   4499 
   4500    VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
   4501 
   4502    bad = False;
   4503    for (i = 0; i < size; i++) {
   4504       if (area[i] != V_BITS8_DEFINED) {
   4505          bad = True;
   4506          break;
   4507       }
   4508    }
   4509 
   4510    if (!bad)
   4511       return;
   4512 
   4513    /* We've found some undefinedness.  See if we can also find an
   4514       origin for it. */
   4515    otag = mb_get_origin_for_guest_offset( tid, offset, size );
   4516    MC_(record_regparam_error) ( tid, s, otag );
   4517 }
   4518 
   4519 
   4520 /*------------------------------------------------------------*/
   4521 /*--- Register-memory event handlers                       ---*/
   4522 /*------------------------------------------------------------*/
   4523 
   4524 static void mc_copy_mem_to_reg ( CorePart part, ThreadId tid, Addr a,
   4525                                  PtrdiffT guest_state_offset, SizeT size )
   4526 {
   4527    SizeT i;
   4528    UChar vbits8;
   4529    Int offset;
   4530    UInt d32;
   4531 
   4532    /* Slow loop. */
   4533    for (i = 0; i < size; i++) {
   4534       get_vbits8( a+i, &vbits8 );
   4535       VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/, guest_state_offset+i,
   4536                                  1, &vbits8 );
   4537    }
   4538 
   4539    if (MC_(clo_mc_level) != 3)
   4540       return;
   4541 
   4542    /* Track origins. */
   4543    offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
   4544    if (offset == -1)
   4545       return;
   4546 
   4547    switch (size) {
   4548    case 1:
   4549       d32 = MC_(helperc_b_load1)( a );
   4550       break;
   4551    case 2:
   4552       d32 = MC_(helperc_b_load2)( a );
   4553       break;
   4554    case 4:
   4555       d32 = MC_(helperc_b_load4)( a );
   4556       break;
   4557    case 8:
   4558       d32 = MC_(helperc_b_load8)( a );
   4559       break;
   4560    case 16:
   4561       d32 = MC_(helperc_b_load16)( a );
   4562       break;
   4563    case 32:
   4564       d32 = MC_(helperc_b_load32)( a );
   4565       break;
   4566    default:
   4567       tl_assert(0);
   4568    }
   4569 
   4570    VG_(set_shadow_regs_area)( tid, 2/*shadowNo*/, offset, 4, (UChar*)&d32 );
   4571 }
   4572 
   4573 static void mc_copy_reg_to_mem ( CorePart part, ThreadId tid,
   4574                                  PtrdiffT guest_state_offset, Addr a,
   4575                                  SizeT size )
   4576 {
   4577    SizeT i;
   4578    UChar vbits8;
   4579    Int offset;
   4580    UInt d32;
   4581 
   4582    /* Slow loop. */
   4583    for (i = 0; i < size; i++) {
   4584       VG_(get_shadow_regs_area)( tid, &vbits8, 1/*shadowNo*/,
   4585                                  guest_state_offset+i, 1 );
   4586       set_vbits8( a+i, vbits8 );
   4587    }
   4588 
   4589    if (MC_(clo_mc_level) != 3)
   4590       return;
   4591 
   4592    /* Track origins. */
   4593    offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
   4594    if (offset == -1)
   4595       return;
   4596 
   4597    VG_(get_shadow_regs_area)( tid, (UChar*)&d32, 2/*shadowNo*/, offset, 4 );
   4598    switch (size) {
   4599    case 1:
   4600       MC_(helperc_b_store1)( a, d32 );
   4601       break;
   4602    case 2:
   4603       MC_(helperc_b_store2)( a, d32 );
   4604       break;
   4605    case 4:
   4606       MC_(helperc_b_store4)( a, d32 );
   4607       break;
   4608    case 8:
   4609       MC_(helperc_b_store8)( a, d32 );
   4610       break;
   4611    case 16:
   4612       MC_(helperc_b_store16)( a, d32 );
   4613       break;
   4614    case 32:
   4615       MC_(helperc_b_store32)( a, d32 );
   4616       break;
   4617    default:
   4618       tl_assert(0);
   4619    }
   4620 }
   4621 
   4622 
   4623 /*------------------------------------------------------------*/
   4624 /*--- Some static assertions                               ---*/
   4625 /*------------------------------------------------------------*/
   4626 
   4627 /* The handwritten assembly helpers below have baked-in assumptions
   4628    about various constant values.  These assertions attempt to make
   4629    that a bit safer by checking those values and flagging changes that
   4630    would make the assembly invalid.  Not perfect but it's better than
   4631    nothing. */
   4632 
   4633 STATIC_ASSERT(SM_CHUNKS * 4 == 65536);
   4634 
   4635 STATIC_ASSERT(VA_BITS8_DEFINED   == 0xAA);
   4636 STATIC_ASSERT(VA_BITS8_UNDEFINED == 0x55);
   4637 
   4638 STATIC_ASSERT(V_BITS32_DEFINED   == 0x00000000);
   4639 STATIC_ASSERT(V_BITS32_UNDEFINED == 0xFFFFFFFF);
   4640 
   4641 STATIC_ASSERT(VA_BITS4_DEFINED == 0xA);
   4642 STATIC_ASSERT(VA_BITS4_UNDEFINED == 0x5);
   4643 
   4644 STATIC_ASSERT(V_BITS16_DEFINED == 0x0000);
   4645 STATIC_ASSERT(V_BITS16_UNDEFINED == 0xFFFF);
   4646 
   4647 STATIC_ASSERT(VA_BITS2_DEFINED == 2);
   4648 STATIC_ASSERT(VA_BITS2_UNDEFINED == 1);
   4649 
   4650 STATIC_ASSERT(V_BITS8_DEFINED == 0x00);
   4651 STATIC_ASSERT(V_BITS8_UNDEFINED == 0xFF);
   4652 
   4653 
   4654 /*------------------------------------------------------------*/
   4655 /*--- Functions called directly from generated code:       ---*/
   4656 /*--- Load/store handlers.                                 ---*/
   4657 /*------------------------------------------------------------*/
   4658 
   4659 /* Types:  LOADV32, LOADV16, LOADV8 are:
   4660                UWord fn ( Addr a )
   4661    so they return 32-bits on 32-bit machines and 64-bits on
   4662    64-bit machines.  Addr has the same size as a host word.
   4663 
   4664    LOADV64 is always  ULong fn ( Addr a )
   4665 
   4666    Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
   4667    are a UWord, and for STOREV64 they are a ULong.
   4668 */
   4669 
   4670 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
   4671    naturally '_sz/8'-aligned, or it exceeds the range covered by the
   4672    primary map.  This is all very tricky (and important!), so let's
   4673    work through the maths by hand (below), *and* assert for these
   4674    values at startup. */
   4675 #define MASK(_szInBytes) \
   4676    ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
   4677 
   4678 /* MASK only exists so as to define this macro. */
   4679 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
   4680    ((_a) & MASK((_szInBits>>3)))
   4681 
   4682 /* On a 32-bit machine:
   4683 
   4684    N_PRIMARY_BITS          == 16, so
   4685    N_PRIMARY_MAP           == 0x10000, so
   4686    N_PRIMARY_MAP-1         == 0xFFFF, so
   4687    (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
   4688 
   4689    MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
   4690            = ~ ( 0xFFFF | 0xFFFF0000 )
   4691            = ~ 0xFFFF'FFFF
   4692            = 0
   4693 
   4694    MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
   4695            = ~ ( 0xFFFE | 0xFFFF0000 )
   4696            = ~ 0xFFFF'FFFE
   4697            = 1
   4698 
   4699    MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
   4700            = ~ ( 0xFFFC | 0xFFFF0000 )
   4701            = ~ 0xFFFF'FFFC
   4702            = 3
   4703 
   4704    MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
   4705            = ~ ( 0xFFF8 | 0xFFFF0000 )
   4706            = ~ 0xFFFF'FFF8
   4707            = 7
   4708 
   4709    Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
   4710    precisely when a is not 1/2/4/8-bytes aligned.  And obviously, for
   4711    the 1-byte alignment case, it is always a zero value, since MASK(1)
   4712    is zero.  All as expected.
   4713 
   4714    On a 64-bit machine, it's more complex, since we're testing
   4715    simultaneously for misalignment and for the address being at or
   4716    above 64G:
   4717 
   4718    N_PRIMARY_BITS          == 20, so
   4719    N_PRIMARY_MAP           == 0x100000, so
   4720    N_PRIMARY_MAP-1         == 0xFFFFF, so
   4721    (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
   4722 
   4723    MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
   4724            = ~ ( 0xFFFF | 0xF'FFFF'0000 )
   4725            = ~ 0xF'FFFF'FFFF
   4726            = 0xFFFF'FFF0'0000'0000
   4727 
   4728    MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
   4729            = ~ ( 0xFFFE | 0xF'FFFF'0000 )
   4730            = ~ 0xF'FFFF'FFFE
   4731            = 0xFFFF'FFF0'0000'0001
   4732 
   4733    MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
   4734            = ~ ( 0xFFFC | 0xF'FFFF'0000 )
   4735            = ~ 0xF'FFFF'FFFC
   4736            = 0xFFFF'FFF0'0000'0003
   4737 
   4738    MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
   4739            = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
   4740            = ~ 0xF'FFFF'FFF8
   4741            = 0xFFFF'FFF0'0000'0007
   4742 */
   4743 
   4744 /*------------------------------------------------------------*/
   4745 /*--- LOADV256 and LOADV128                                ---*/
   4746 /*------------------------------------------------------------*/
   4747 
   4748 static INLINE
   4749 void mc_LOADV_128_or_256 ( /*OUT*/ULong* res,
   4750                            Addr a, SizeT nBits, Bool isBigEndian )
   4751 {
   4752    PROF_EVENT(MCPE_LOADV_128_OR_256);
   4753 
   4754 #ifndef PERF_FAST_LOADV
   4755    mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
   4756    return;
   4757 #else
   4758    {
   4759       UWord   sm_off16, vabits16, j;
   4760       UWord   nBytes  = nBits / 8;
   4761       UWord   nULongs = nBytes / 8;
   4762       SecMap* sm;
   4763 
   4764       if (UNLIKELY( UNALIGNED_OR_HIGH(a,nBits) )) {
   4765          PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW1);
   4766          mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
   4767          return;
   4768       }
   4769 
   4770       /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
   4771          suitably aligned, is mapped, and addressible. */
   4772       for (j = 0; j < nULongs; j++) {
   4773          sm       = get_secmap_for_reading_low(a + 8*j);
   4774          sm_off16 = SM_OFF_16(a + 8*j);
   4775          vabits16 = sm->vabits16[sm_off16];
   4776 
   4777          // Convert V bits from compact memory form to expanded
   4778          // register form.
   4779          if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
   4780             res[j] = V_BITS64_DEFINED;
   4781          } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
   4782             res[j] = V_BITS64_UNDEFINED;
   4783          } else {
   4784             /* Slow case: some block of 8 bytes are not all-defined or
   4785                all-undefined. */
   4786             PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW2);
   4787             mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
   4788             return;
   4789          }
   4790       }
   4791       return;
   4792    }
   4793 #endif
   4794 }
   4795 
   4796 VG_REGPARM(2) void MC_(helperc_LOADV256be) ( /*OUT*/V256* res, Addr a )
   4797 {
   4798    mc_LOADV_128_or_256(&res->w64[0], a, 256, True);
   4799 }
   4800 VG_REGPARM(2) void MC_(helperc_LOADV256le) ( /*OUT*/V256* res, Addr a )
   4801 {
   4802    mc_LOADV_128_or_256(&res->w64[0], a, 256, False);
   4803 }
   4804 
   4805 VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128* res, Addr a )
   4806 {
   4807    mc_LOADV_128_or_256(&res->w64[0], a, 128, True);
   4808 }
   4809 VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128* res, Addr a )
   4810 {
   4811    mc_LOADV_128_or_256(&res->w64[0], a, 128, False);
   4812 }
   4813 
   4814 /*------------------------------------------------------------*/
   4815 /*--- LOADV64                                              ---*/
   4816 /*------------------------------------------------------------*/
   4817 
   4818 static INLINE
   4819 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
   4820 {
   4821    PROF_EVENT(MCPE_LOADV64);
   4822 
   4823 #ifndef PERF_FAST_LOADV
   4824    return mc_LOADVn_slow( a, 64, isBigEndian );
   4825 #else
   4826    {
   4827       UWord   sm_off16, vabits16;
   4828       SecMap* sm;
   4829 
   4830       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
   4831          PROF_EVENT(MCPE_LOADV64_SLOW1);
   4832          return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
   4833       }
   4834 
   4835       sm       = get_secmap_for_reading_low(a);
   4836       sm_off16 = SM_OFF_16(a);
   4837       vabits16 = sm->vabits16[sm_off16];
   4838 
   4839       // Handle common case quickly: a is suitably aligned, is mapped, and
   4840       // addressible.
   4841       // Convert V bits from compact memory form to expanded register form.
   4842       if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
   4843          return V_BITS64_DEFINED;
   4844       } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
   4845          return V_BITS64_UNDEFINED;
   4846       } else {
   4847          /* Slow case: the 8 bytes are not all-defined or all-undefined. */
   4848          PROF_EVENT(MCPE_LOADV64_SLOW2);
   4849          return mc_LOADVn_slow( a, 64, isBigEndian );
   4850       }
   4851    }
   4852 #endif
   4853 }
   4854 
   4855 // Generic for all platforms
   4856 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
   4857 {
   4858    return mc_LOADV64(a, True);
   4859 }
   4860 
   4861 // Non-generic assembly for arm32-linux
   4862 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
   4863     && defined(VGP_arm_linux)
   4864 __asm__( /* Derived from the 32 bit assembly helper */
   4865 ".text                                  \n"
   4866 ".align 2                               \n"
   4867 ".global vgMemCheck_helperc_LOADV64le   \n"
   4868 ".type   vgMemCheck_helperc_LOADV64le, %function \n"
   4869 "vgMemCheck_helperc_LOADV64le:          \n"
   4870 "      tst    r0, #7                    \n"
   4871 "      movw   r3, #:lower16:primary_map \n"
   4872 "      bne    .LLV64LEc4                \n" // if misaligned
   4873 "      lsr    r2, r0, #16               \n"
   4874 "      movt   r3, #:upper16:primary_map \n"
   4875 "      ldr    r2, [r3, r2, lsl #2]      \n"
   4876 "      uxth   r1, r0                    \n" // r1 is 0-(16)-0 X-(13)-X 000
   4877 "      movw   r3, #0xAAAA               \n"
   4878 "      lsr    r1, r1, #2                \n" // r1 is 0-(16)-0 00 X-(13)-X 0
   4879 "      ldrh   r1, [r2, r1]              \n"
   4880 "      cmp    r1, r3                    \n" // 0xAAAA == VA_BITS16_DEFINED
   4881 "      bne    .LLV64LEc0                \n" // if !all_defined
   4882 "      mov    r1, #0x0                  \n" // 0x0 == V_BITS32_DEFINED
   4883 "      mov    r0, #0x0                  \n" // 0x0 == V_BITS32_DEFINED
   4884 "      bx     lr                        \n"
   4885 ".LLV64LEc0:                            \n"
   4886 "      movw   r3, #0x5555               \n"
   4887 "      cmp    r1, r3                    \n" // 0x5555 == VA_BITS16_UNDEFINED
   4888 "      bne    .LLV64LEc4                \n" // if !all_undefined
   4889 "      mov    r1, #0xFFFFFFFF           \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
   4890 "      mov    r0, #0xFFFFFFFF           \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
   4891 "      bx     lr                        \n"
   4892 ".LLV64LEc4:                            \n"
   4893 "      push   {r4, lr}                  \n"
   4894 "      mov    r2, #0                    \n"
   4895 "      mov    r1, #64                   \n"
   4896 "      bl     mc_LOADVn_slow            \n"
   4897 "      pop    {r4, pc}                  \n"
   4898 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le \n"
   4899 ".previous\n"
   4900 );
   4901 
   4902 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
   4903       && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
   4904 __asm__(
   4905 ".text\n"
   4906 ".align 16\n"
   4907 ".global vgMemCheck_helperc_LOADV64le\n"
   4908 ".type   vgMemCheck_helperc_LOADV64le, @function\n"
   4909 "vgMemCheck_helperc_LOADV64le:\n"
   4910 "      test   $0x7,  %eax\n"
   4911 "      jne    .LLV64LE2\n"          /* jump if not aligned */
   4912 "      mov    %eax,  %ecx\n"
   4913 "      movzwl %ax,   %edx\n"
   4914 "      shr    $0x10, %ecx\n"
   4915 "      mov    primary_map(,%ecx,4), %ecx\n"
   4916 "      shr    $0x3,  %edx\n"
   4917 "      movzwl (%ecx,%edx,2), %edx\n"
   4918 "      cmp    $0xaaaa, %edx\n"
   4919 "      jne    .LLV64LE1\n"          /* jump if not all defined */
   4920 "      xor    %eax, %eax\n"         /* return 0 in edx:eax */
   4921 "      xor    %edx, %edx\n"
   4922 "      ret\n"
   4923 ".LLV64LE1:\n"
   4924 "      cmp    $0x5555, %edx\n"
   4925 "      jne    .LLV64LE2\n"         /* jump if not all undefined */
   4926 "      or     $0xffffffff, %eax\n" /* else return all bits set in edx:eax */
   4927 "      or     $0xffffffff, %edx\n"
   4928 "      ret\n"
   4929 ".LLV64LE2:\n"
   4930 "      xor    %ecx,  %ecx\n"  /* tail call to mc_LOADVn_slow(a, 64, 0) */
   4931 "      mov    $64,   %edx\n"
   4932 "      jmp    mc_LOADVn_slow\n"
   4933 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le\n"
   4934 ".previous\n"
   4935 );
   4936 
   4937 #else
   4938 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
   4939 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
   4940 {
   4941    return mc_LOADV64(a, False);
   4942 }
   4943 #endif
   4944 
   4945 /*------------------------------------------------------------*/
   4946 /*--- STOREV64                                             ---*/
   4947 /*------------------------------------------------------------*/
   4948 
   4949 static INLINE
   4950 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
   4951 {
   4952    PROF_EVENT(MCPE_STOREV64);
   4953 
   4954 #ifndef PERF_FAST_STOREV
   4955    // XXX: this slow case seems to be marginally faster than the fast case!
   4956    // Investigate further.
   4957    mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4958 #else
   4959    {
   4960       UWord   sm_off16, vabits16;
   4961       SecMap* sm;
   4962 
   4963       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
   4964          PROF_EVENT(MCPE_STOREV64_SLOW1);
   4965          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4966          return;
   4967       }
   4968 
   4969       sm       = get_secmap_for_reading_low(a);
   4970       sm_off16 = SM_OFF_16(a);
   4971       vabits16 = sm->vabits16[sm_off16];
   4972 
   4973       // To understand the below cleverness, see the extensive comments
   4974       // in MC_(helperc_STOREV8).
   4975       if (LIKELY(V_BITS64_DEFINED == vbits64)) {
   4976          if (LIKELY(vabits16 == (UShort)VA_BITS16_DEFINED)) {
   4977             return;
   4978          }
   4979          if (!is_distinguished_sm(sm) && VA_BITS16_UNDEFINED == vabits16) {
   4980             sm->vabits16[sm_off16] = VA_BITS16_DEFINED;
   4981             return;
   4982          }
   4983          PROF_EVENT(MCPE_STOREV64_SLOW2);
   4984          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4985          return;
   4986       }
   4987       if (V_BITS64_UNDEFINED == vbits64) {
   4988          if (vabits16 == (UShort)VA_BITS16_UNDEFINED) {
   4989             return;
   4990          }
   4991          if (!is_distinguished_sm(sm) && VA_BITS16_DEFINED == vabits16) {
   4992             sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
   4993             return;
   4994          }
   4995          PROF_EVENT(MCPE_STOREV64_SLOW3);
   4996          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4997          return;
   4998       }
   4999 
   5000       PROF_EVENT(MCPE_STOREV64_SLOW4);
   5001       mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   5002    }
   5003 #endif
   5004 }
   5005 
   5006 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
   5007 {
   5008    mc_STOREV64(a, vbits64, True);
   5009 }
   5010 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
   5011 {
   5012    mc_STOREV64(a, vbits64, False);
   5013 }
   5014 
   5015 /*------------------------------------------------------------*/
   5016 /*--- LOADV32                                              ---*/
   5017 /*------------------------------------------------------------*/
   5018 
   5019 static INLINE
   5020 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
   5021 {
   5022    PROF_EVENT(MCPE_LOADV32);
   5023 
   5024 #ifndef PERF_FAST_LOADV
   5025    return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
   5026 #else
   5027    {
   5028       UWord   sm_off, vabits8;
   5029       SecMap* sm;
   5030 
   5031       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
   5032          PROF_EVENT(MCPE_LOADV32_SLOW1);
   5033          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
   5034       }
   5035 
   5036       sm      = get_secmap_for_reading_low(a);
   5037       sm_off  = SM_OFF(a);
   5038       vabits8 = sm->vabits8[sm_off];
   5039 
   5040       // Handle common case quickly: a is suitably aligned, is mapped, and the
   5041       // entire word32 it lives in is addressible.
   5042       // Convert V bits from compact memory form to expanded register form.
   5043       // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
   5044       // Almost certainly not necessary, but be paranoid.
   5045       if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
   5046          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
   5047       } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
   5048          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
   5049       } else {
   5050          /* Slow case: the 4 bytes are not all-defined or all-undefined. */
   5051          PROF_EVENT(MCPE_LOADV32_SLOW2);
   5052          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
   5053       }
   5054    }
   5055 #endif
   5056 }
   5057 
   5058 // Generic for all platforms
   5059 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
   5060 {
   5061    return mc_LOADV32(a, True);
   5062 }
   5063 
   5064 // Non-generic assembly for arm32-linux
   5065 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
   5066     && defined(VGP_arm_linux)
   5067 __asm__( /* Derived from NCode template */
   5068 ".text                                  \n"
   5069 ".align 2                               \n"
   5070 ".global vgMemCheck_helperc_LOADV32le   \n"
   5071 ".type   vgMemCheck_helperc_LOADV32le, %function \n"
   5072 "vgMemCheck_helperc_LOADV32le:          \n"
   5073 "      tst    r0, #3                    \n" // 1
   5074 "      movw   r3, #:lower16:primary_map \n" // 1
   5075 "      bne    .LLV32LEc4                \n" // 2  if misaligned
   5076 "      lsr    r2, r0, #16               \n" // 3
   5077 "      movt   r3, #:upper16:primary_map \n" // 3
   5078 "      ldr    r2, [r3, r2, lsl #2]      \n" // 4
   5079 "      uxth   r1, r0                    \n" // 4
   5080 "      ldrb   r1, [r2, r1, lsr #2]      \n" // 5
   5081 "      cmp    r1, #0xAA                 \n" // 6  0xAA == VA_BITS8_DEFINED
   5082 "      bne    .LLV32LEc0                \n" // 7  if !all_defined
   5083 "      mov    r0, #0x0                  \n" // 8  0x0 == V_BITS32_DEFINED
   5084 "      bx     lr                        \n" // 9
   5085 ".LLV32LEc0:                            \n"
   5086 "      cmp    r1, #0x55                 \n" // 0x55 == VA_BITS8_UNDEFINED
   5087 "      bne    .LLV32LEc4                \n" // if !all_undefined
   5088 "      mov    r0, #0xFFFFFFFF           \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
   5089 "      bx     lr                        \n"
   5090 ".LLV32LEc4:                            \n"
   5091 "      push   {r4, lr}                  \n"
   5092 "      mov    r2, #0                    \n"
   5093 "      mov    r1, #32                   \n"
   5094 "      bl     mc_LOADVn_slow            \n"
   5095 "      pop    {r4, pc}                  \n"
   5096 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le \n"
   5097 ".previous\n"
   5098 );
   5099 
   5100 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
   5101       && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
   5102 __asm__(
   5103 ".text\n"
   5104 ".align 16\n"
   5105 ".global vgMemCheck_helperc_LOADV32le\n"
   5106 ".type   vgMemCheck_helperc_LOADV32le, @function\n"
   5107 "vgMemCheck_helperc_LOADV32le:\n"
   5108 "      test   $0x3,  %eax\n"
   5109 "      jnz    .LLV32LE2\n"         /* jump if misaligned */
   5110 "      mov    %eax,  %edx\n"
   5111 "      shr    $16,   %edx\n"
   5112 "      mov    primary_map(,%edx,4), %ecx\n"
   5113 "      movzwl %ax,   %edx\n"
   5114 "      shr    $2,    %edx\n"
   5115 "      movzbl (%ecx,%edx,1), %edx\n"
   5116 "      cmp    $0xaa, %edx\n"       /* compare to VA_BITS8_DEFINED */
   5117 "      jne    .LLV32LE1\n"         /* jump if not completely defined */
   5118 "      xor    %eax,  %eax\n"       /* else return V_BITS32_DEFINED */
   5119 "      ret\n"
   5120 ".LLV32LE1:\n"
   5121 "      cmp    $0x55, %edx\n"       /* compare to VA_BITS8_UNDEFINED */
   5122 "      jne    .LLV32LE2\n"         /* jump if not completely undefined */
   5123 "      or     $0xffffffff, %eax\n" /* else return V_BITS32_UNDEFINED */
   5124 "      ret\n"
   5125 ".LLV32LE2:\n"
   5126 "      xor    %ecx,  %ecx\n"       /* tail call mc_LOADVn_slow(a, 32, 0) */
   5127 "      mov    $32,   %edx\n"
   5128 "      jmp    mc_LOADVn_slow\n"
   5129 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le\n"
   5130 ".previous\n"
   5131 );
   5132 
   5133 #else
   5134 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
   5135 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
   5136 {
   5137    return mc_LOADV32(a, False);
   5138 }
   5139 #endif
   5140 
   5141 /*------------------------------------------------------------*/
   5142 /*--- STOREV32                                             ---*/
   5143 /*------------------------------------------------------------*/
   5144 
   5145 static INLINE
   5146 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
   5147 {
   5148    PROF_EVENT(MCPE_STOREV32);
   5149 
   5150 #ifndef PERF_FAST_STOREV
   5151    mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   5152 #else
   5153    {
   5154       UWord   sm_off, vabits8;
   5155       SecMap* sm;
   5156 
   5157       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
   5158          PROF_EVENT(MCPE_STOREV32_SLOW1);
   5159          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   5160          return;
   5161       }
   5162 
   5163       sm      = get_secmap_for_reading_low(a);
   5164       sm_off  = SM_OFF(a);
   5165       vabits8 = sm->vabits8[sm_off];
   5166 
   5167       // To understand the below cleverness, see the extensive comments
   5168       // in MC_(helperc_STOREV8).
   5169       if (LIKELY(V_BITS32_DEFINED == vbits32)) {
   5170          if (LIKELY(vabits8 == (UInt)VA_BITS8_DEFINED)) {
   5171             return;
   5172          }
   5173          if (!is_distinguished_sm(sm)  && VA_BITS8_UNDEFINED == vabits8) {
   5174             sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
   5175             return;
   5176          }
   5177          PROF_EVENT(MCPE_STOREV32_SLOW2);
   5178          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   5179          return;
   5180       }
   5181       if (V_BITS32_UNDEFINED == vbits32) {
   5182          if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
   5183             return;
   5184          }
   5185          if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
   5186             sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
   5187             return;
   5188          }
   5189          PROF_EVENT(MCPE_STOREV32_SLOW3);
   5190          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   5191          return;
   5192       }
   5193 
   5194       PROF_EVENT(MCPE_STOREV32_SLOW4);
   5195       mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   5196    }
   5197 #endif
   5198 }
   5199 
   5200 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
   5201 {
   5202    mc_STOREV32(a, vbits32, True);
   5203 }
   5204 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
   5205 {
   5206    mc_STOREV32(a, vbits32, False);
   5207 }
   5208 
   5209 /*------------------------------------------------------------*/
   5210 /*--- LOADV16                                              ---*/
   5211 /*------------------------------------------------------------*/
   5212 
   5213 static INLINE
   5214 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
   5215 {
   5216    PROF_EVENT(MCPE_LOADV16);
   5217 
   5218 #ifndef PERF_FAST_LOADV
   5219    return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
   5220 #else
   5221    {
   5222       UWord   sm_off, vabits8;
   5223       SecMap* sm;
   5224 
   5225       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
   5226          PROF_EVENT(MCPE_LOADV16_SLOW1);
   5227          return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
   5228       }
   5229 
   5230       sm      = get_secmap_for_reading_low(a);
   5231       sm_off  = SM_OFF(a);
   5232       vabits8 = sm->vabits8[sm_off];
   5233       // Handle common case quickly: a is suitably aligned, is mapped, and is
   5234       // addressible.
   5235       // Convert V bits from compact memory form to expanded register form
   5236       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS16_DEFINED;   }
   5237       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
   5238       else {
   5239          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
   5240          // the two sub-bytes.
   5241          UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
   5242          if      (vabits4 == VA_BITS4_DEFINED  ) { return V_BITS16_DEFINED;   }
   5243          else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
   5244          else {
   5245             /* Slow case: the two bytes are not all-defined or all-undefined. */
   5246             PROF_EVENT(MCPE_LOADV16_SLOW2);
   5247             return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
   5248          }
   5249       }
   5250    }
   5251 #endif
   5252 }
   5253 
   5254 // Generic for all platforms
   5255 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
   5256 {
   5257    return mc_LOADV16(a, True);
   5258 }
   5259 
   5260 // Non-generic assembly for arm32-linux
   5261 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
   5262     && defined(VGP_arm_linux)
   5263 __asm__( /* Derived from NCode template */
   5264 ".text                                  \n"
   5265 ".align 2                               \n"
   5266 ".global vgMemCheck_helperc_LOADV16le   \n"
   5267 ".type   vgMemCheck_helperc_LOADV16le, %function \n"
   5268 "vgMemCheck_helperc_LOADV16le:          \n" //
   5269 "      tst    r0, #1                    \n" //
   5270 "      bne    .LLV16LEc12               \n" // if misaligned
   5271 "      lsr    r2, r0, #16               \n" // r2 = pri-map-ix
   5272 "      movw   r3, #:lower16:primary_map \n" //
   5273 "      uxth   r1, r0                    \n" // r1 = sec-map-offB
   5274 "      movt   r3, #:upper16:primary_map \n" //
   5275 "      ldr    r2, [r3, r2, lsl #2]      \n" // r2 = sec-map
   5276 "      ldrb   r1, [r2, r1, lsr #2]      \n" // r1 = sec-map-VABITS8
   5277 "      cmp    r1, #0xAA                 \n" // r1 == VA_BITS8_DEFINED?
   5278 "      bne    .LLV16LEc0                \n" // no, goto .LLV16LEc0
   5279 ".LLV16LEh9:                            \n" //
   5280 "      mov    r0, #0xFFFFFFFF           \n" //
   5281 "      lsl    r0, r0, #16               \n" // V_BITS16_DEFINED | top16safe
   5282 "      bx     lr                        \n" //
   5283 ".LLV16LEc0:                            \n" //
   5284 "      cmp    r1, #0x55                 \n" // VA_BITS8_UNDEFINED
   5285 "      bne    .LLV16LEc4                \n" //
   5286 ".LLV16LEc2:                            \n" //
   5287 "      mov    r0, #0xFFFFFFFF           \n" // V_BITS16_UNDEFINED | top16safe
   5288 "      bx     lr                        \n" //
   5289 ".LLV16LEc4:                            \n" //
   5290        // r1 holds sec-map-VABITS8.  r0 holds the address and is 2-aligned.
   5291        // Extract the relevant 4 bits and inspect.
   5292 "      and    r2, r0, #2       \n" // addr & 2
   5293 "      add    r2, r2, r2       \n" // 2 * (addr & 2)
   5294 "      lsr    r1, r1, r2       \n" // sec-map-VABITS8 >> (2 * (addr & 2))
   5295 "      and    r1, r1, #15      \n" // (sec-map-VABITS8 >> (2 * (addr & 2))) & 15
   5296 
   5297 "      cmp    r1, #0xA                  \n" // VA_BITS4_DEFINED
   5298 "      beq    .LLV16LEh9                \n" //
   5299 
   5300 "      cmp    r1, #0x5                  \n" // VA_BITS4_UNDEFINED
   5301 "      beq    .LLV16LEc2                \n" //
   5302 
   5303 ".LLV16LEc12:                           \n" //
   5304 "      push   {r4, lr}                  \n" //
   5305 "      mov    r2, #0                    \n" //
   5306 "      mov    r1, #16                   \n" //
   5307 "      bl     mc_LOADVn_slow            \n" //
   5308 "      pop    {r4, pc}                  \n" //
   5309 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
   5310 ".previous\n"
   5311 );
   5312 
   5313 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
   5314       && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
   5315 __asm__(
   5316 ".text\n"
   5317 ".align 16\n"
   5318 ".global vgMemCheck_helperc_LOADV16le\n"
   5319 ".type   vgMemCheck_helperc_LOADV16le, @function\n"
   5320 "vgMemCheck_helperc_LOADV16le:\n"
   5321 "      test   $0x1,  %eax\n"
   5322 "      jne    .LLV16LE5\n"          /* jump if not aligned */
   5323 "      mov    %eax,  %edx\n"
   5324 "      shr    $0x10, %edx\n"
   5325 "      mov    primary_map(,%edx,4), %ecx\n"
   5326 "      movzwl %ax,   %edx\n"
   5327 "      shr    $0x2,  %edx\n"
   5328 "      movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
   5329 "      cmp    $0xaa, %edx\n"        /* compare to VA_BITS8_DEFINED */
   5330 "      jne    .LLV16LE2\n"          /* jump if not all 32bits defined */
   5331 ".LLV16LE1:\n"
   5332 "      mov    $0xffff0000,%eax\n"   /* V_BITS16_DEFINED | top16safe */
   5333 "      ret\n"
   5334 ".LLV16LE2:\n"
   5335 "      cmp    $0x55, %edx\n"        /* compare to VA_BITS8_UNDEFINED */
   5336 "      jne    .LLV16LE4\n"          /* jump if not all 32bits undefined */
   5337 ".LLV16LE3:\n"
   5338 "      or     $0xffffffff,%eax\n"   /* V_BITS16_UNDEFINED | top16safe */
   5339 "      ret\n"
   5340 ".LLV16LE4:\n"
   5341 "      mov    %eax,  %ecx\n"
   5342 "      and    $0x2,  %ecx\n"
   5343 "      add    %ecx,  %ecx\n"
   5344 "      sar    %cl,   %edx\n"
   5345 "      and    $0xf,  %edx\n"
   5346 "      cmp    $0xa,  %edx\n"
   5347 "      je     .LLV16LE1\n"          /* jump if all 16bits are defined */
   5348 "      cmp    $0x5,  %edx\n"
   5349 "      je     .LLV16LE3\n"          /* jump if all 16bits are undefined */
   5350 ".LLV16LE5:\n"
   5351 "      xor    %ecx,  %ecx\n"        /* tail call mc_LOADVn_slow(a, 16, 0) */
   5352 "      mov    $16,   %edx\n"
   5353 "      jmp    mc_LOADVn_slow\n"
   5354 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
   5355 ".previous\n"
   5356 );
   5357 
   5358 #else
   5359 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
   5360 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
   5361 {
   5362    return mc_LOADV16(a, False);
   5363 }
   5364 #endif
   5365 
   5366 /*------------------------------------------------------------*/
   5367 /*--- STOREV16                                             ---*/
   5368 /*------------------------------------------------------------*/
   5369 
   5370 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
   5371 static INLINE
   5372 Bool accessible_vabits4_in_vabits8 ( Addr a, UChar vabits8 )
   5373 {
   5374    UInt shift;
   5375    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
   5376    shift = (a & 2) << 1;               // shift by 0 or 4
   5377    vabits8 >>= shift;                  // shift the four bits to the bottom
   5378     // check 2 x vabits2 != VA_BITS2_NOACCESS
   5379    return ((0x3 & vabits8) != VA_BITS2_NOACCESS)
   5380       &&  ((0xc & vabits8) != VA_BITS2_NOACCESS << 2);
   5381 }
   5382 
   5383 static INLINE
   5384 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
   5385 {
   5386    PROF_EVENT(MCPE_STOREV16);
   5387 
   5388 #ifndef PERF_FAST_STOREV
   5389    mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   5390 #else
   5391    {
   5392       UWord   sm_off, vabits8;
   5393       SecMap* sm;
   5394 
   5395       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
   5396          PROF_EVENT(MCPE_STOREV16_SLOW1);
   5397          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   5398          return;
   5399       }
   5400 
   5401       sm      = get_secmap_for_reading_low(a);
   5402       sm_off  = SM_OFF(a);
   5403       vabits8 = sm->vabits8[sm_off];
   5404 
   5405       // To understand the below cleverness, see the extensive comments
   5406       // in MC_(helperc_STOREV8).
   5407       if (LIKELY(V_BITS16_DEFINED == vbits16)) {
   5408          if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
   5409             return;
   5410          }
   5411          if (!is_distinguished_sm(sm)
   5412              && accessible_vabits4_in_vabits8(a, vabits8)) {
   5413             insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED,
   5414                                          &(sm->vabits8[sm_off]) );
   5415             return;
   5416          }
   5417          PROF_EVENT(MCPE_STOREV16_SLOW2);
   5418          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   5419       }
   5420       if (V_BITS16_UNDEFINED == vbits16) {
   5421          if (vabits8 == VA_BITS8_UNDEFINED) {
   5422             return;
   5423          }
   5424          if (!is_distinguished_sm(sm)
   5425              && accessible_vabits4_in_vabits8(a, vabits8)) {
   5426             insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
   5427                                          &(sm->vabits8[sm_off]) );
   5428             return;
   5429          }
   5430          PROF_EVENT(MCPE_STOREV16_SLOW3);
   5431          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   5432          return;
   5433       }
   5434 
   5435       PROF_EVENT(MCPE_STOREV16_SLOW4);
   5436       mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   5437    }
   5438 #endif
   5439 }
   5440 
   5441 
   5442 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
   5443 {
   5444    mc_STOREV16(a, vbits16, True);
   5445 }
   5446 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
   5447 {
   5448    mc_STOREV16(a, vbits16, False);
   5449 }
   5450 
   5451 /*------------------------------------------------------------*/
   5452 /*--- LOADV8                                               ---*/
   5453 /*------------------------------------------------------------*/
   5454 
   5455 /* Note: endianness is irrelevant for size == 1 */
   5456 
   5457 // Non-generic assembly for arm32-linux
   5458 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
   5459     && defined(VGP_arm_linux)
   5460 __asm__( /* Derived from NCode template */
   5461 ".text                                  \n"
   5462 ".align 2                               \n"
   5463 ".global vgMemCheck_helperc_LOADV8      \n"
   5464 ".type   vgMemCheck_helperc_LOADV8, %function \n"
   5465 "vgMemCheck_helperc_LOADV8:             \n" //
   5466 "      lsr    r2, r0, #16               \n" // r2 = pri-map-ix
   5467 "      movw   r3, #:lower16:primary_map \n" //
   5468 "      uxth   r1, r0                    \n" // r1 = sec-map-offB
   5469 "      movt   r3, #:upper16:primary_map \n" //
   5470 "      ldr    r2, [r3, r2, lsl #2]      \n" // r2 = sec-map
   5471 "      ldrb   r1, [r2, r1, lsr #2]      \n" // r1 = sec-map-VABITS8
   5472 "      cmp    r1, #0xAA                 \n" // r1 == VA_BITS8_DEFINED?
   5473 "      bne    .LLV8c0                   \n" // no, goto .LLV8c0
   5474 ".LLV8h9:                               \n" //
   5475 "      mov    r0, #0xFFFFFF00           \n" // V_BITS8_DEFINED | top24safe
   5476 "      bx     lr                        \n" //
   5477 ".LLV8c0:                               \n" //
   5478 "      cmp    r1, #0x55                 \n" // VA_BITS8_UNDEFINED
   5479 "      bne    .LLV8c4                   \n" //
   5480 ".LLV8c2:                               \n" //
   5481 "      mov    r0, #0xFFFFFFFF           \n" // V_BITS8_UNDEFINED | top24safe
   5482 "      bx     lr                        \n" //
   5483 ".LLV8c4:                               \n" //
   5484        // r1 holds sec-map-VABITS8
   5485        // r0 holds the address.  Extract the relevant 2 bits and inspect.
   5486 "      and    r2, r0, #3       \n" // addr & 3
   5487 "      add    r2, r2, r2       \n" // 2 * (addr & 3)
   5488 "      lsr    r1, r1, r2       \n" // sec-map-VABITS8 >> (2 * (addr & 3))
   5489 "      and    r1, r1, #3       \n" // (sec-map-VABITS8 >> (2 * (addr & 3))) & 3
   5490 
   5491 "      cmp    r1, #2                    \n" // VA_BITS2_DEFINED
   5492 "      beq    .LLV8h9                   \n" //
   5493 
   5494 "      cmp    r1, #1                    \n" // VA_BITS2_UNDEFINED
   5495 "      beq    .LLV8c2                   \n" //
   5496 
   5497 "      push   {r4, lr}                  \n" //
   5498 "      mov    r2, #0                    \n" //
   5499 "      mov    r1, #8                    \n" //
   5500 "      bl     mc_LOADVn_slow            \n" //
   5501 "      pop    {r4, pc}                  \n" //
   5502 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8 \n"
   5503 ".previous\n"
   5504 );
   5505 
   5506 /* Non-generic assembly for x86-linux */
   5507 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
   5508       && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
   5509 __asm__(
   5510 ".text\n"
   5511 ".align 16\n"
   5512 ".global vgMemCheck_helperc_LOADV8\n"
   5513 ".type   vgMemCheck_helperc_LOADV8, @function\n"
   5514 "vgMemCheck_helperc_LOADV8:\n"
   5515 "      mov    %eax,  %edx\n"
   5516 "      shr    $0x10, %edx\n"
   5517 "      mov    primary_map(,%edx,4), %ecx\n"
   5518 "      movzwl %ax,   %edx\n"
   5519 "      shr    $0x2,  %edx\n"
   5520 "      movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
   5521 "      cmp    $0xaa, %edx\n"        /* compare to VA_BITS8_DEFINED? */
   5522 "      jne    .LLV8LE2\n"           /* jump if not defined */
   5523 ".LLV8LE1:\n"
   5524 "      mov    $0xffffff00, %eax\n"  /* V_BITS8_DEFINED | top24safe */
   5525 "      ret\n"
   5526 ".LLV8LE2:\n"
   5527 "      cmp    $0x55, %edx\n"        /* compare to VA_BITS8_UNDEFINED */
   5528 "      jne    .LLV8LE4\n"           /* jump if not all 32bits are undefined */
   5529 ".LLV8LE3:\n"
   5530 "      or     $0xffffffff, %eax\n"  /* V_BITS8_UNDEFINED | top24safe */
   5531 "      ret\n"
   5532 ".LLV8LE4:\n"
   5533 "      mov    %eax,  %ecx\n"
   5534 "      and    $0x3,  %ecx\n"
   5535 "      add    %ecx,  %ecx\n"
   5536 "      sar    %cl,   %edx\n"
   5537 "      and    $0x3,  %edx\n"
   5538 "      cmp    $0x2,  %edx\n"
   5539 "      je     .LLV8LE1\n"           /* jump if all 8bits are defined */
   5540 "      cmp    $0x1,  %edx\n"
   5541 "      je     .LLV8LE3\n"           /* jump if all 8bits are undefined */
   5542 "      xor    %ecx,  %ecx\n"        /* tail call to mc_LOADVn_slow(a, 8, 0) */
   5543 "      mov    $0x8,  %edx\n"
   5544 "      jmp    mc_LOADVn_slow\n"
   5545 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8\n"
   5546 ".previous\n"
   5547 );
   5548 
   5549 #else
   5550 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
   5551 VG_REGPARM(1)
   5552 UWord MC_(helperc_LOADV8) ( Addr a )
   5553 {
   5554    PROF_EVENT(MCPE_LOADV8);
   5555 
   5556 #ifndef PERF_FAST_LOADV
   5557    return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
   5558 #else
   5559    {
   5560       UWord   sm_off, vabits8;
   5561       SecMap* sm;
   5562 
   5563       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
   5564          PROF_EVENT(MCPE_LOADV8_SLOW1);
   5565          return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
   5566       }
   5567 
   5568       sm      = get_secmap_for_reading_low(a);
   5569       sm_off  = SM_OFF(a);
   5570       vabits8 = sm->vabits8[sm_off];
   5571       // Convert V bits from compact memory form to expanded register form
   5572       // Handle common case quickly: a is mapped, and the entire
   5573       // word32 it lives in is addressible.
   5574       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS8_DEFINED;   }
   5575       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
   5576       else {
   5577          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
   5578          // the single byte.
   5579          UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
   5580          if      (vabits2 == VA_BITS2_DEFINED  ) { return V_BITS8_DEFINED;   }
   5581          else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
   5582          else {
   5583             /* Slow case: the byte is not all-defined or all-undefined. */
   5584             PROF_EVENT(MCPE_LOADV8_SLOW2);
   5585             return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
   5586          }
   5587       }
   5588    }
   5589 #endif
   5590 }
   5591 #endif
   5592 
   5593 /*------------------------------------------------------------*/
   5594 /*--- STOREV8                                              ---*/
   5595 /*------------------------------------------------------------*/
   5596 
   5597 VG_REGPARM(2)
   5598 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
   5599 {
   5600    PROF_EVENT(MCPE_STOREV8);
   5601 
   5602 #ifndef PERF_FAST_STOREV
   5603    mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   5604 #else
   5605    {
   5606       UWord   sm_off, vabits8;
   5607       SecMap* sm;
   5608 
   5609       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
   5610          PROF_EVENT(MCPE_STOREV8_SLOW1);
   5611          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   5612          return;
   5613       }
   5614 
   5615       sm      = get_secmap_for_reading_low(a);
   5616       sm_off  = SM_OFF(a);
   5617       vabits8 = sm->vabits8[sm_off];
   5618 
   5619       // Clevernesses to speed up storing V bits.
   5620       // The 64/32/16 bit cases also have similar clevernesses, but it
   5621       // works a little differently to the code below.
   5622       //
   5623       // Cleverness 1:  sometimes we don't have to write the shadow memory at
   5624       // all, if we can tell that what we want to write is the same as what is
   5625       // already there. These cases are marked below as "defined on defined" and
   5626       // "undefined on undefined".
   5627       //
   5628       // Cleverness 2:
   5629       // We also avoid to call mc_STOREVn_slow if the V bits can directly
   5630       // be written in the secondary map. V bits can be directly written
   5631       // if 4 conditions are respected:
   5632       //   * The address for which V bits are written is naturally aligned
   5633       //        on 1 byte  for STOREV8 (this is always true)
   5634       //        on 2 bytes for STOREV16
   5635       //        on 4 bytes for STOREV32
   5636       //        on 8 bytes for STOREV64.
   5637       //   * V bits being written are either fully defined or fully undefined.
   5638       //     (for partially defined V bits, V bits cannot be directly written,
   5639       //      as the secondary vbits table must be maintained).
   5640       //   * the secmap is not distinguished (distinguished maps cannot be
   5641       //     modified).
   5642       //   * the memory corresponding to the V bits being written is
   5643       //     accessible (if one or more bytes are not accessible,
   5644       //     we must call mc_STOREVn_slow in order to report accessibility
   5645       //     errors).
   5646       //     Note that for STOREV32 and STOREV64, it is too expensive
   5647       //     to verify the accessibility of each byte for the benefit it
   5648       //     brings. Instead, a quicker check is done by comparing to
   5649       //     VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
   5650       //     but misses some opportunity of direct modifications.
   5651       //     Checking each byte accessibility was measured for
   5652       //     STOREV32+perf tests and was slowing down all perf tests.
   5653       // The cases corresponding to cleverness 2 are marked below as
   5654       // "direct mod".
   5655       if (LIKELY(V_BITS8_DEFINED == vbits8)) {
   5656          if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
   5657             return; // defined on defined
   5658          }
   5659          if (!is_distinguished_sm(sm)
   5660              && VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) {
   5661             // direct mod
   5662             insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
   5663                                          &(sm->vabits8[sm_off]) );
   5664             return;
   5665          }
   5666          PROF_EVENT(MCPE_STOREV8_SLOW2);
   5667          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   5668          return;
   5669       }
   5670       if (V_BITS8_UNDEFINED == vbits8) {
   5671          if (vabits8 == VA_BITS8_UNDEFINED) {
   5672             return; // undefined on undefined
   5673          }
   5674          if (!is_distinguished_sm(sm)
   5675              && (VA_BITS2_NOACCESS
   5676                  != extract_vabits2_from_vabits8(a, vabits8))) {
   5677             // direct mod
   5678             insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
   5679                                          &(sm->vabits8[sm_off]) );
   5680             return;
   5681          }
   5682          PROF_EVENT(MCPE_STOREV8_SLOW3);
   5683          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   5684          return;
   5685       }
   5686 
   5687       // Partially defined word
   5688       PROF_EVENT(MCPE_STOREV8_SLOW4);
   5689       mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   5690    }
   5691 #endif
   5692 }
   5693 
   5694 
   5695 /*------------------------------------------------------------*/
   5696 /*--- Functions called directly from generated code:       ---*/
   5697 /*--- Value-check failure handlers.                        ---*/
   5698 /*------------------------------------------------------------*/
   5699 
   5700 /* Call these ones when an origin is available ... */
   5701 VG_REGPARM(1)
   5702 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
   5703    MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
   5704 }
   5705 
   5706 VG_REGPARM(1)
   5707 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
   5708    MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
   5709 }
   5710 
   5711 VG_REGPARM(1)
   5712 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
   5713    MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
   5714 }
   5715 
   5716 VG_REGPARM(1)
   5717 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
   5718    MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
   5719 }
   5720 
   5721 VG_REGPARM(2)
   5722 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
   5723    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
   5724 }
   5725 
   5726 /* ... and these when an origin isn't available. */
   5727 
   5728 VG_REGPARM(0)
   5729 void MC_(helperc_value_check0_fail_no_o) ( void ) {
   5730    MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
   5731 }
   5732 
   5733 VG_REGPARM(0)
   5734 void MC_(helperc_value_check1_fail_no_o) ( void ) {
   5735    MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
   5736 }
   5737 
   5738 VG_REGPARM(0)
   5739 void MC_(helperc_value_check4_fail_no_o) ( void ) {
   5740    MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
   5741 }
   5742 
   5743 VG_REGPARM(0)
   5744 void MC_(helperc_value_check8_fail_no_o) ( void ) {
   5745    MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
   5746 }
   5747 
   5748 VG_REGPARM(1)
   5749 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
   5750    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
   5751 }
   5752 
   5753 
   5754 /*------------------------------------------------------------*/
   5755 /*--- Metadata get/set functions, for client requests.     ---*/
   5756 /*------------------------------------------------------------*/
   5757 
   5758 // Nb: this expands the V+A bits out into register-form V bits, even though
   5759 // they're in memory.  This is for backward compatibility, and because it's
   5760 // probably what the user wants.
   5761 
   5762 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
   5763    error [no longer used], 3 == addressing error. */
   5764 /* Nb: We used to issue various definedness/addressability errors from here,
   5765    but we took them out because they ranged from not-very-helpful to
   5766    downright annoying, and they complicated the error data structures. */
   5767 static Int mc_get_or_set_vbits_for_client (
   5768    Addr a,
   5769    Addr vbits,
   5770    SizeT szB,
   5771    Bool setting, /* True <=> set vbits,  False <=> get vbits */
   5772    Bool is_client_request /* True <=> real user request
   5773                              False <=> internal call from gdbserver */
   5774 )
   5775 {
   5776    SizeT i;
   5777    Bool  ok;
   5778    UChar vbits8;
   5779 
   5780    /* Check that arrays are addressible before doing any getting/setting.
   5781       vbits to be checked only for real user request. */
   5782    for (i = 0; i < szB; i++) {
   5783       if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
   5784           (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
   5785          return 3;
   5786       }
   5787    }
   5788 
   5789    /* Do the copy */
   5790    if (setting) {
   5791       /* setting */
   5792       for (i = 0; i < szB; i++) {
   5793          ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
   5794          tl_assert(ok);
   5795       }
   5796    } else {
   5797       /* getting */
   5798       for (i = 0; i < szB; i++) {
   5799          ok = get_vbits8(a + i, &vbits8);
   5800          tl_assert(ok);
   5801          ((UChar*)vbits)[i] = vbits8;
   5802       }
   5803       if (is_client_request)
   5804         // The bytes in vbits[] have now been set, so mark them as such.
   5805         MC_(make_mem_defined)(vbits, szB);
   5806    }
   5807 
   5808    return 1;
   5809 }
   5810 
   5811 
   5812 /*------------------------------------------------------------*/
   5813 /*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
   5814 /*------------------------------------------------------------*/
   5815 
   5816 /* For the memory leak detector, say whether an entire 64k chunk of
   5817    address space is possibly in use, or not.  If in doubt return
   5818    True.
   5819 */
   5820 Bool MC_(is_within_valid_secondary) ( Addr a )
   5821 {
   5822    SecMap* sm = maybe_get_secmap_for ( a );
   5823    if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
   5824       /* Definitely not in use. */
   5825       return False;
   5826    } else {
   5827       return True;
   5828    }
   5829 }
   5830 
   5831 
   5832 /* For the memory leak detector, say whether or not a given word
   5833    address is to be regarded as valid. */
   5834 Bool MC_(is_valid_aligned_word) ( Addr a )
   5835 {
   5836    tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
   5837    tl_assert(VG_IS_WORD_ALIGNED(a));
   5838    if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
   5839       return False;
   5840    if (sizeof(UWord) == 8) {
   5841       if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
   5842          return False;
   5843    }
   5844    if (UNLIKELY(MC_(in_ignored_range)(a)))
   5845       return False;
   5846    else
   5847       return True;
   5848 }
   5849 
   5850 
   5851 /*------------------------------------------------------------*/
   5852 /*--- Initialisation                                       ---*/
   5853 /*------------------------------------------------------------*/
   5854 
   5855 static void init_shadow_memory ( void )
   5856 {
   5857    Int     i;
   5858    SecMap* sm;
   5859 
   5860    tl_assert(V_BIT_UNDEFINED   == 1);
   5861    tl_assert(V_BIT_DEFINED     == 0);
   5862    tl_assert(V_BITS8_UNDEFINED == 0xFF);
   5863    tl_assert(V_BITS8_DEFINED   == 0);
   5864 
   5865    /* Build the 3 distinguished secondaries */
   5866    sm = &sm_distinguished[SM_DIST_NOACCESS];
   5867    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
   5868 
   5869    sm = &sm_distinguished[SM_DIST_UNDEFINED];
   5870    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
   5871 
   5872    sm = &sm_distinguished[SM_DIST_DEFINED];
   5873    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
   5874 
   5875    /* Set up the primary map. */
   5876    /* These entries gradually get overwritten as the used address
   5877       space expands. */
   5878    for (i = 0; i < N_PRIMARY_MAP; i++)
   5879       primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
   5880 
   5881    /* Auxiliary primary maps */
   5882    init_auxmap_L1_L2();
   5883 
   5884    /* auxmap_size = auxmap_used = 0;
   5885       no ... these are statically initialised */
   5886 
   5887    /* Secondary V bit table */
   5888    secVBitTable = createSecVBitTable();
   5889 }
   5890 
   5891 
   5892 /*------------------------------------------------------------*/
   5893 /*--- Sanity check machinery (permanently engaged)         ---*/
   5894 /*------------------------------------------------------------*/
   5895 
   5896 static Bool mc_cheap_sanity_check ( void )
   5897 {
   5898    n_sanity_cheap++;
   5899    PROF_EVENT(MCPE_CHEAP_SANITY_CHECK);
   5900    /* Check for sane operating level */
   5901    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
   5902       return False;
   5903    /* nothing else useful we can rapidly check */
   5904    return True;
   5905 }
   5906 
   5907 static Bool mc_expensive_sanity_check ( void )
   5908 {
   5909    Int     i;
   5910    Word    n_secmaps_found;
   5911    SecMap* sm;
   5912    const HChar*  errmsg;
   5913    Bool    bad = False;
   5914 
   5915    if (0) VG_(printf)("expensive sanity check\n");
   5916    if (0) return True;
   5917 
   5918    n_sanity_expensive++;
   5919    PROF_EVENT(MCPE_EXPENSIVE_SANITY_CHECK);
   5920 
   5921    /* Check for sane operating level */
   5922    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
   5923       return False;
   5924 
   5925    /* Check that the 3 distinguished SMs are still as they should be. */
   5926 
   5927    /* Check noaccess DSM. */
   5928    sm = &sm_distinguished[SM_DIST_NOACCESS];
   5929    for (i = 0; i < SM_CHUNKS; i++)
   5930       if (sm->vabits8[i] != VA_BITS8_NOACCESS)
   5931          bad = True;
   5932 
   5933    /* Check undefined DSM. */
   5934    sm = &sm_distinguished[SM_DIST_UNDEFINED];
   5935    for (i = 0; i < SM_CHUNKS; i++)
   5936       if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
   5937          bad = True;
   5938 
   5939    /* Check defined DSM. */
   5940    sm = &sm_distinguished[SM_DIST_DEFINED];
   5941    for (i = 0; i < SM_CHUNKS; i++)
   5942       if (sm->vabits8[i] != VA_BITS8_DEFINED)
   5943          bad = True;
   5944 
   5945    if (bad) {
   5946       VG_(printf)("memcheck expensive sanity: "
   5947                   "distinguished_secondaries have changed\n");
   5948       return False;
   5949    }
   5950 
   5951    /* If we're not checking for undefined value errors, the secondary V bit
   5952     * table should be empty. */
   5953    if (MC_(clo_mc_level) == 1) {
   5954       if (0 != VG_(OSetGen_Size)(secVBitTable))
   5955          return False;
   5956    }
   5957 
   5958    /* check the auxiliary maps, very thoroughly */
   5959    n_secmaps_found = 0;
   5960    errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
   5961    if (errmsg) {
   5962       VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
   5963       return False;
   5964    }
   5965 
   5966    /* n_secmaps_found is now the number referred to by the auxiliary
   5967       primary map.  Now add on the ones referred to by the main
   5968       primary map. */
   5969    for (i = 0; i < N_PRIMARY_MAP; i++) {
   5970       if (primary_map[i] == NULL) {
   5971          bad = True;
   5972       } else {
   5973          if (!is_distinguished_sm(primary_map[i]))
   5974             n_secmaps_found++;
   5975       }
   5976    }
   5977 
   5978    /* check that the number of secmaps issued matches the number that
   5979       are reachable (iow, no secmap leaks) */
   5980    if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
   5981       bad = True;
   5982 
   5983    if (bad) {
   5984       VG_(printf)("memcheck expensive sanity: "
   5985                   "apparent secmap leakage\n");
   5986       return False;
   5987    }
   5988 
   5989    if (bad) {
   5990       VG_(printf)("memcheck expensive sanity: "
   5991                   "auxmap covers wrong address space\n");
   5992       return False;
   5993    }
   5994 
   5995    /* there is only one pointer to each secmap (expensive) */
   5996 
   5997    return True;
   5998 }
   5999 
   6000 /*------------------------------------------------------------*/
   6001 /*--- Command line args                                    ---*/
   6002 /*------------------------------------------------------------*/
   6003 
   6004 /* 31 Aug 2015: Vectorised code is now so widespread that
   6005    --partial-loads-ok needs to be enabled by default on all platforms.
   6006    Not doing so causes lots of false errors. */
   6007 Bool          MC_(clo_partial_loads_ok)       = True;
   6008 Long          MC_(clo_freelist_vol)           = 20*1000*1000LL;
   6009 Long          MC_(clo_freelist_big_blocks)    =  1*1000*1000LL;
   6010 LeakCheckMode MC_(clo_leak_check)             = LC_Summary;
   6011 VgRes         MC_(clo_leak_resolution)        = Vg_HighRes;
   6012 UInt          MC_(clo_show_leak_kinds)        = R2S(Possible) | R2S(Unreached);
   6013 UInt          MC_(clo_error_for_leak_kinds)   = R2S(Possible) | R2S(Unreached);
   6014 UInt          MC_(clo_leak_check_heuristics)  =   H2S(LchStdString)
   6015                                                 | H2S( LchLength64)
   6016                                                 | H2S( LchNewArray)
   6017                                                 | H2S( LchMultipleInheritance);
   6018 Bool          MC_(clo_xtree_leak)             = False;
   6019 const HChar*  MC_(clo_xtree_leak_file) = "xtleak.kcg.%p";
   6020 Bool          MC_(clo_workaround_gcc296_bugs) = False;
   6021 Int           MC_(clo_malloc_fill)            = -1;
   6022 Int           MC_(clo_free_fill)              = -1;
   6023 KeepStacktraces MC_(clo_keep_stacktraces)     = KS_alloc_and_free;
   6024 Int           MC_(clo_mc_level)               = 2;
   6025 Bool          MC_(clo_show_mismatched_frees)  = True;
   6026 Bool          MC_(clo_expensive_definedness_checks) = False;
   6027 Bool          MC_(clo_ignore_range_below_sp)               = False;
   6028 UInt          MC_(clo_ignore_range_below_sp__first_offset) = 0;
   6029 UInt          MC_(clo_ignore_range_below_sp__last_offset)  = 0;
   6030 
   6031 static const HChar * MC_(parse_leak_heuristics_tokens) =
   6032    "-,stdstring,length64,newarray,multipleinheritance";
   6033 /* The first heuristic value (LchNone) has no keyword, as this is
   6034    a fake heuristic used to collect the blocks found without any
   6035    heuristic. */
   6036 
   6037 static Bool mc_process_cmd_line_options(const HChar* arg)
   6038 {
   6039    const HChar* tmp_str;
   6040    Int   tmp_show;
   6041 
   6042    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
   6043 
   6044    /* Set MC_(clo_mc_level):
   6045          1 = A bit tracking only
   6046          2 = A and V bit tracking, but no V bit origins
   6047          3 = A and V bit tracking, and V bit origins
   6048 
   6049       Do this by inspecting --undef-value-errors= and
   6050       --track-origins=.  Reject the case --undef-value-errors=no
   6051       --track-origins=yes as meaningless.
   6052    */
   6053    if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
   6054       if (MC_(clo_mc_level) == 3) {
   6055          goto bad_level;
   6056       } else {
   6057          MC_(clo_mc_level) = 1;
   6058          return True;
   6059       }
   6060    }
   6061    if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
   6062       if (MC_(clo_mc_level) == 1)
   6063          MC_(clo_mc_level) = 2;
   6064       return True;
   6065    }
   6066    if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
   6067       if (MC_(clo_mc_level) == 3)
   6068          MC_(clo_mc_level) = 2;
   6069       return True;
   6070    }
   6071    if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
   6072       if (MC_(clo_mc_level) == 1) {
   6073          goto bad_level;
   6074       } else {
   6075          MC_(clo_mc_level) = 3;
   6076          return True;
   6077       }
   6078    }
   6079 
   6080         if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
   6081    else if VG_USET_CLO(arg, "--errors-for-leak-kinds",
   6082                        MC_(parse_leak_kinds_tokens),
   6083                        MC_(clo_error_for_leak_kinds)) {}
   6084    else if VG_USET_CLO(arg, "--show-leak-kinds",
   6085                        MC_(parse_leak_kinds_tokens),
   6086                        MC_(clo_show_leak_kinds)) {}
   6087    else if VG_USET_CLO(arg, "--leak-check-heuristics",
   6088                        MC_(parse_leak_heuristics_tokens),
   6089                        MC_(clo_leak_check_heuristics)) {}
   6090    else if (VG_BOOL_CLO(arg, "--show-reachable", tmp_show)) {
   6091       if (tmp_show) {
   6092          MC_(clo_show_leak_kinds) = MC_(all_Reachedness)();
   6093       } else {
   6094          MC_(clo_show_leak_kinds) &= ~R2S(Reachable);
   6095       }
   6096    }
   6097    else if VG_BOOL_CLO(arg, "--show-possibly-lost", tmp_show) {
   6098       if (tmp_show) {
   6099          MC_(clo_show_leak_kinds) |= R2S(Possible);
   6100       } else {
   6101          MC_(clo_show_leak_kinds) &= ~R2S(Possible);
   6102       }
   6103    }
   6104    else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
   6105                                             MC_(clo_workaround_gcc296_bugs)) {}
   6106 
   6107    else if VG_BINT_CLO(arg, "--freelist-vol",  MC_(clo_freelist_vol),
   6108                                                0, 10*1000*1000*1000LL) {}
   6109 
   6110    else if VG_BINT_CLO(arg, "--freelist-big-blocks",
   6111                        MC_(clo_freelist_big_blocks),
   6112                        0, 10*1000*1000*1000LL) {}
   6113 
   6114    else if VG_XACT_CLO(arg, "--leak-check=no",
   6115                             MC_(clo_leak_check), LC_Off) {}
   6116    else if VG_XACT_CLO(arg, "--leak-check=summary",
   6117                             MC_(clo_leak_check), LC_Summary) {}
   6118    else if VG_XACT_CLO(arg, "--leak-check=yes",
   6119                             MC_(clo_leak_check), LC_Full) {}
   6120    else if VG_XACT_CLO(arg, "--leak-check=full",
   6121                             MC_(clo_leak_check), LC_Full) {}
   6122 
   6123    else if VG_XACT_CLO(arg, "--leak-resolution=low",
   6124                             MC_(clo_leak_resolution), Vg_LowRes) {}
   6125    else if VG_XACT_CLO(arg, "--leak-resolution=med",
   6126                             MC_(clo_leak_resolution), Vg_MedRes) {}
   6127    else if VG_XACT_CLO(arg, "--leak-resolution=high",
   6128                             MC_(clo_leak_resolution), Vg_HighRes) {}
   6129 
   6130    else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
   6131       Bool ok = parse_ignore_ranges(tmp_str);
   6132       if (!ok) {
   6133          VG_(message)(Vg_DebugMsg,
   6134             "ERROR: --ignore-ranges: "
   6135             "invalid syntax, or end <= start in range\n");
   6136          return False;
   6137       }
   6138       if (gIgnoredAddressRanges) {
   6139          UInt i;
   6140          for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
   6141             UWord val     = IAR_INVALID;
   6142             UWord key_min = ~(UWord)0;
   6143             UWord key_max = (UWord)0;
   6144             VG_(indexRangeMap)( &key_min, &key_max, &val,
   6145                                 gIgnoredAddressRanges, i );
   6146             tl_assert(key_min <= key_max);
   6147             UWord limit = 0x4000000; /* 64M - entirely arbitrary limit */
   6148             if (key_max - key_min > limit && val == IAR_CommandLine) {
   6149                VG_(message)(Vg_DebugMsg,
   6150                   "ERROR: --ignore-ranges: suspiciously large range:\n");
   6151                VG_(message)(Vg_DebugMsg,
   6152                    "       0x%lx-0x%lx (size %lu)\n", key_min, key_max,
   6153                    key_max - key_min + 1);
   6154                return False;
   6155             }
   6156          }
   6157       }
   6158    }
   6159 
   6160    else if VG_STR_CLO(arg, "--ignore-range-below-sp", tmp_str) {
   6161       /* This seems at first a bit weird, but: in order to imply
   6162          a non-wrapped-around address range, the first offset needs to be
   6163          larger than the second one.  For example
   6164             --ignore-range-below-sp=8192,8189
   6165          would cause accesses to in the range [SP-8192, SP-8189] to be
   6166          ignored. */
   6167       UInt offs1 = 0, offs2 = 0;
   6168       Bool ok = parse_UInt_pair(&tmp_str, &offs1, &offs2);
   6169       // Ensure we used all the text after the '=' sign.
   6170       if (ok && *tmp_str != 0) ok = False;
   6171       if (!ok) {
   6172          VG_(message)(Vg_DebugMsg,
   6173                       "ERROR: --ignore-range-below-sp: invalid syntax. "
   6174                       " Expected \"...=decimalnumber-decimalnumber\".\n");
   6175          return False;
   6176       }
   6177       if (offs1 > 1000*1000 /*arbitrary*/ || offs2 > 1000*1000 /*ditto*/) {
   6178          VG_(message)(Vg_DebugMsg,
   6179                       "ERROR: --ignore-range-below-sp: suspiciously large "
   6180                       "offset(s): %u and %u\n", offs1, offs2);
   6181          return False;
   6182       }
   6183       if (offs1 <= offs2) {
   6184          VG_(message)(Vg_DebugMsg,
   6185                       "ERROR: --ignore-range-below-sp: invalid offsets "
   6186                       "(the first must be larger): %u and %u\n", offs1, offs2);
   6187          return False;
   6188       }
   6189       tl_assert(offs1 > offs2);
   6190       if (offs1 - offs2 > 4096 /*arbitrary*/) {
   6191          VG_(message)(Vg_DebugMsg,
   6192                       "ERROR: --ignore-range-below-sp: suspiciously large "
   6193                       "range: %u-%u (size %u)\n", offs1, offs2, offs1 - offs2);
   6194          return False;
   6195       }
   6196       MC_(clo_ignore_range_below_sp) = True;
   6197       MC_(clo_ignore_range_below_sp__first_offset) = offs1;
   6198       MC_(clo_ignore_range_below_sp__last_offset)  = offs2;
   6199       return True;
   6200    }
   6201 
   6202    else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
   6203    else if VG_BHEX_CLO(arg, "--free-fill",   MC_(clo_free_fill),   0x00,0xFF) {}
   6204 
   6205    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc",
   6206                        MC_(clo_keep_stacktraces), KS_alloc) {}
   6207    else if VG_XACT_CLO(arg, "--keep-stacktraces=free",
   6208                        MC_(clo_keep_stacktraces), KS_free) {}
   6209    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-and-free",
   6210                        MC_(clo_keep_stacktraces), KS_alloc_and_free) {}
   6211    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-then-free",
   6212                        MC_(clo_keep_stacktraces), KS_alloc_then_free) {}
   6213    else if VG_XACT_CLO(arg, "--keep-stacktraces=none",
   6214                        MC_(clo_keep_stacktraces), KS_none) {}
   6215 
   6216    else if VG_BOOL_CLO(arg, "--show-mismatched-frees",
   6217                        MC_(clo_show_mismatched_frees)) {}
   6218    else if VG_BOOL_CLO(arg, "--expensive-definedness-checks",
   6219                        MC_(clo_expensive_definedness_checks)) {}
   6220 
   6221    else if VG_BOOL_CLO(arg, "--xtree-leak",
   6222                        MC_(clo_xtree_leak)) {}
   6223    else if VG_STR_CLO (arg, "--xtree-leak-file",
   6224                        MC_(clo_xtree_leak_file)) {}
   6225 
   6226    else
   6227       return VG_(replacement_malloc_process_cmd_line_option)(arg);
   6228 
   6229    return True;
   6230 
   6231 
   6232   bad_level:
   6233    VG_(fmsg_bad_option)(arg,
   6234       "--track-origins=yes has no effect when --undef-value-errors=no.\n");
   6235 }
   6236 
   6237 static void mc_print_usage(void)
   6238 {
   6239    VG_(printf)(
   6240 "    --leak-check=no|summary|full     search for memory leaks at exit?  [summary]\n"
   6241 "    --leak-resolution=low|med|high   differentiation of leak stack traces [high]\n"
   6242 "    --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
   6243 "                                            [definite,possible]\n"
   6244 "    --errors-for-leak-kinds=kind1,kind2,..  which leak kinds are errors?\n"
   6245 "                                            [definite,possible]\n"
   6246 "        where kind is one of:\n"
   6247 "          definite indirect possible reachable all none\n"
   6248 "    --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
   6249 "        improving leak search false positive [all]\n"
   6250 "        where heur is one of:\n"
   6251 "          stdstring length64 newarray multipleinheritance all none\n"
   6252 "    --show-reachable=yes             same as --show-leak-kinds=all\n"
   6253 "    --show-reachable=no --show-possibly-lost=yes\n"
   6254 "                                     same as --show-leak-kinds=definite,possible\n"
   6255 "    --show-reachable=no --show-possibly-lost=no\n"
   6256 "                                     same as --show-leak-kinds=definite\n"
   6257 "    --xtree-leak=no|yes              output leak result in xtree format? [no]\n"
   6258 "    --xtree-leak-file=<file>         xtree leak report file [xtleak.kcg.%%p]\n"
   6259 "    --undef-value-errors=no|yes      check for undefined value errors [yes]\n"
   6260 "    --track-origins=no|yes           show origins of undefined values? [no]\n"
   6261 "    --partial-loads-ok=no|yes        too hard to explain here; see manual [yes]\n"
   6262 "    --expensive-definedness-checks=no|yes\n"
   6263 "                                     Use extra-precise definedness tracking [no]\n"
   6264 "    --freelist-vol=<number>          volume of freed blocks queue     [20000000]\n"
   6265 "    --freelist-big-blocks=<number>   releases first blocks with size>= [1000000]\n"
   6266 "    --workaround-gcc296-bugs=no|yes  self explanatory [no].  Deprecated.\n"
   6267 "                                     Use --ignore-range-below-sp instead.\n"
   6268 "    --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS]   assume given addresses are OK\n"
   6269 "    --ignore-range-below-sp=<number>-<number>  do not report errors for\n"
   6270 "                                     accesses at the given offsets below SP\n"
   6271 "    --malloc-fill=<hexnumber>        fill malloc'd areas with given value\n"
   6272 "    --free-fill=<hexnumber>          fill free'd areas with given value\n"
   6273 "    --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
   6274 "        stack trace(s) to keep for malloc'd/free'd areas       [alloc-and-free]\n"
   6275 "    --show-mismatched-frees=no|yes   show frees that don't match the allocator? [yes]\n"
   6276    );
   6277 }
   6278 
   6279 static void mc_print_debug_usage(void)
   6280 {
   6281    VG_(printf)(
   6282 "    (none)\n"
   6283    );
   6284 }
   6285 
   6286 
   6287 /*------------------------------------------------------------*/
   6288 /*--- Client blocks                                        ---*/
   6289 /*------------------------------------------------------------*/
   6290 
   6291 /* Client block management:
   6292 
   6293    This is managed as an expanding array of client block descriptors.
   6294    Indices of live descriptors are issued to the client, so it can ask
   6295    to free them later.  Therefore we cannot slide live entries down
   6296    over dead ones.  Instead we must use free/inuse flags and scan for
   6297    an empty slot at allocation time.  This in turn means allocation is
   6298    relatively expensive, so we hope this does not happen too often.
   6299 
   6300    An unused block has start == size == 0
   6301 */
   6302 
   6303 /* type CGenBlock is defined in mc_include.h */
   6304 
   6305 /* This subsystem is self-initialising. */
   6306 static UWord      cgb_size = 0;
   6307 static UWord      cgb_used = 0;
   6308 static CGenBlock* cgbs     = NULL;
   6309 
   6310 /* Stats for this subsystem. */
   6311 static ULong cgb_used_MAX = 0;   /* Max in use. */
   6312 static ULong cgb_allocs   = 0;   /* Number of allocs. */
   6313 static ULong cgb_discards = 0;   /* Number of discards. */
   6314 static ULong cgb_search   = 0;   /* Number of searches. */
   6315 
   6316 
   6317 /* Get access to the client block array. */
   6318 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
   6319                                  /*OUT*/UWord* nBlocks )
   6320 {
   6321    *blocks  = cgbs;
   6322    *nBlocks = cgb_used;
   6323 }
   6324 
   6325 
   6326 static
   6327 Int alloc_client_block ( void )
   6328 {
   6329    UWord      i, sz_new;
   6330    CGenBlock* cgbs_new;
   6331 
   6332    cgb_allocs++;
   6333 
   6334    for (i = 0; i < cgb_used; i++) {
   6335       cgb_search++;
   6336       if (cgbs[i].start == 0 && cgbs[i].size == 0)
   6337          return i;
   6338    }
   6339 
   6340    /* Not found.  Try to allocate one at the end. */
   6341    if (cgb_used < cgb_size) {
   6342       cgb_used++;
   6343       return cgb_used-1;
   6344    }
   6345 
   6346    /* Ok, we have to allocate a new one. */
   6347    tl_assert(cgb_used == cgb_size);
   6348    sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
   6349 
   6350    cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
   6351    for (i = 0; i < cgb_used; i++)
   6352       cgbs_new[i] = cgbs[i];
   6353 
   6354    if (cgbs != NULL)
   6355       VG_(free)( cgbs );
   6356    cgbs = cgbs_new;
   6357 
   6358    cgb_size = sz_new;
   6359    cgb_used++;
   6360    if (cgb_used > cgb_used_MAX)
   6361       cgb_used_MAX = cgb_used;
   6362    return cgb_used-1;
   6363 }
   6364 
   6365 
   6366 static void show_client_block_stats ( void )
   6367 {
   6368    VG_(message)(Vg_DebugMsg,
   6369       "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
   6370       cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
   6371    );
   6372 }
   6373 static void print_monitor_help ( void )
   6374 {
   6375    VG_(gdb_printf)
   6376       (
   6377 "\n"
   6378 "memcheck monitor commands:\n"
   6379 "  xb <addr> [<len>]\n"
   6380 "        prints validity bits for <len> (or 1) bytes at <addr>\n"
   6381 "            bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
   6382 "        Then prints the bytes values below the corresponding validity bits\n"
   6383 "        in a layout similar to the gdb command 'x /<len>xb <addr>'\n"
   6384 "        Example: xb 0x8049c78 10\n"
   6385 "  get_vbits <addr> [<len>]\n"
   6386 "        Similar to xb, but only prints the validity bytes by group of 4.\n"
   6387 "  make_memory [noaccess|undefined\n"
   6388 "                     |defined|Definedifaddressable] <addr> [<len>]\n"
   6389 "        mark <len> (or 1) bytes at <addr> with the given accessibility\n"
   6390 "  check_memory [addressable|defined] <addr> [<len>]\n"
   6391 "        check that <len> (or 1) bytes at <addr> have the given accessibility\n"
   6392 "            and outputs a description of <addr>\n"
   6393 "  leak_check [full*|summary|xtleak]\n"
   6394 "                [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
   6395 "                [heuristics heur1,heur2,...]\n"
   6396 "                [increased*|changed|any]\n"
   6397 "                [unlimited*|limited <max_loss_records_output>]\n"
   6398 "            * = defaults\n"
   6399 "         xtleak produces an xtree full leak result in xtleak.kcg.%%p.%%n\n"
   6400 "       where kind is one of:\n"
   6401 "         definite indirect possible reachable all none\n"
   6402 "       where heur is one of:\n"
   6403 "         stdstring length64 newarray multipleinheritance all none*\n"
   6404 "       Examples: leak_check\n"
   6405 "                 leak_check summary any\n"
   6406 "                 leak_check full kinds indirect,possible\n"
   6407 "                 leak_check full reachable any limited 100\n"
   6408 "  block_list <loss_record_nr>|<loss_record_nr_from>..<loss_record_nr_to>\n"
   6409 "                [unlimited*|limited <max_blocks>]\n"
   6410 "                [heuristics heur1,heur2,...]\n"
   6411 "        after a leak search, shows the list of blocks of <loss_record_nr>\n"
   6412 "        (or of the range <loss_record_nr_from>..<loss_record_nr_to>).\n"
   6413 "        With heuristics, only shows the blocks found via heur1,heur2,...\n"
   6414 "            * = defaults\n"
   6415 "  who_points_at <addr> [<len>]\n"
   6416 "        shows places pointing inside <len> (default 1) bytes at <addr>\n"
   6417 "        (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
   6418 "         with len > 1, will also show \"interior pointers\")\n"
   6419 "  xtmemory [<filename>]\n"
   6420 "        dump xtree memory profile in <filename> (default xtmemory.kcg.%%p.%%n)\n"
   6421 "\n");
   6422 }
   6423 
   6424 /* Print szB bytes at address, with a format similar to the gdb command
   6425    x /<szB>xb address.
   6426    res[i] == 1 indicates the corresponding byte is addressable. */
   6427 static void gdb_xb (Addr address, SizeT szB, Int res[])
   6428 {
   6429    UInt i;
   6430 
   6431    for (i = 0; i < szB; i++) {
   6432       UInt bnr = i % 8;
   6433       if (bnr == 0) {
   6434          if (i != 0)
   6435             VG_(printf) ("\n"); // Terminate previous line
   6436          VG_(printf) ("%p:", (void*)(address+i));
   6437       }
   6438       if (res[i] == 1)
   6439          VG_(printf) ("\t0x%02x", *(UChar*)(address+i));
   6440       else
   6441          VG_(printf) ("\t0x??");
   6442    }
   6443    VG_(printf) ("\n"); // Terminate previous line
   6444 }
   6445 
   6446 
   6447 /* Returns the address of the next non space character,
   6448    or address of the string terminator. */
   6449 static HChar* next_non_space (HChar *s)
   6450 {
   6451    while (*s && *s == ' ')
   6452       s++;
   6453    return s;
   6454 }
   6455 
   6456 /* Parse an integer slice, i.e. a single integer or a range of integer.
   6457    Syntax is:
   6458        <integer>[..<integer> ]
   6459    (spaces are allowed before and/or after ..).
   6460    Return True if range correctly parsed, False otherwise. */
   6461 static Bool VG_(parse_slice) (HChar* s, HChar** saveptr,
   6462                               UInt *from, UInt *to)
   6463 {
   6464    HChar* wl;
   6465    HChar *endptr;
   6466    endptr = NULL;////
   6467    wl = VG_(strtok_r) (s, " ", saveptr);
   6468 
   6469    /* slice must start with an integer. */
   6470    if (wl == NULL) {
   6471       VG_(gdb_printf) ("expecting integer or slice <from>..<to>\n");
   6472       return False;
   6473    }
   6474    *from = VG_(strtoull10) (wl, &endptr);
   6475    if (endptr == wl) {
   6476       VG_(gdb_printf) ("invalid integer or slice <from>..<to>\n");
   6477       return False;
   6478    }
   6479 
   6480    if (*endptr == '\0' && *next_non_space(*saveptr) != '.') {
   6481       /* wl token is an integer terminating the string
   6482          or else next token does not start with .
   6483          In both cases, the slice is a single integer. */
   6484       *to = *from;
   6485       return True;
   6486    }
   6487 
   6488    if (*endptr == '\0') {
   6489       // iii ..    => get the next token
   6490       wl =  VG_(strtok_r) (NULL, " .", saveptr);
   6491    } else {
   6492       // It must be iii..
   6493       if (*endptr != '.' && *(endptr+1) != '.') {
   6494          VG_(gdb_printf) ("expecting slice <from>..<to>\n");
   6495          return False;
   6496       }
   6497       if ( *(endptr+2) == ' ') {
   6498          // It must be iii.. jjj  => get the next token
   6499          wl =  VG_(strtok_r) (NULL, " .", saveptr);
   6500       } else {
   6501          // It must be iii..jjj
   6502          wl = endptr+2;
   6503       }
   6504    }
   6505 
   6506    *to = VG_(strtoull10) (wl, &endptr);
   6507    if (*endptr != '\0') {
   6508       VG_(gdb_printf) ("missing/wrong 'to' of slice <from>..<to>\n");
   6509       return False;
   6510    }
   6511 
   6512    if (*from > *to) {
   6513       VG_(gdb_printf) ("<from> cannot be bigger than <to> "
   6514                        "in slice <from>..<to>\n");
   6515       return False;
   6516    }
   6517 
   6518    return True;
   6519 }
   6520 
   6521 /* return True if request recognised, False otherwise */
   6522 static Bool handle_gdb_monitor_command (ThreadId tid, HChar *req)
   6523 {
   6524    HChar* wcmd;
   6525    HChar s[VG_(strlen)(req) + 1]; /* copy for strtok_r */
   6526    HChar *ssaveptr;
   6527 
   6528    VG_(strcpy) (s, req);
   6529 
   6530    wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
   6531    /* NB: if possible, avoid introducing a new command below which
   6532       starts with the same first letter(s) as an already existing
   6533       command. This ensures a shorter abbreviation for the user. */
   6534    switch (VG_(keyword_id)
   6535            ("help get_vbits leak_check make_memory check_memory "
   6536             "block_list who_points_at xb xtmemory",
   6537             wcmd, kwd_report_duplicated_matches)) {
   6538    case -2: /* multiple matches */
   6539       return True;
   6540    case -1: /* not found */
   6541       return False;
   6542    case  0: /* help */
   6543       print_monitor_help();
   6544       return True;
   6545    case  1: { /* get_vbits */
   6546       Addr address;
   6547       SizeT szB = 1;
   6548       if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
   6549          UChar vbits;
   6550          Int i;
   6551          Int unaddressable = 0;
   6552          for (i = 0; i < szB; i++) {
   6553             Int res = mc_get_or_set_vbits_for_client
   6554                (address+i, (Addr) &vbits, 1,
   6555                 False, /* get them */
   6556                 False  /* is client request */ );
   6557             /* we are before the first character on next line, print a \n. */
   6558             if ((i % 32) == 0 && i != 0)
   6559                VG_(printf) ("\n");
   6560             /* we are before the next block of 4 starts, print a space. */
   6561             else if ((i % 4) == 0 && i != 0)
   6562                VG_(printf) (" ");
   6563             if (res == 1) {
   6564                VG_(printf) ("%02x", vbits);
   6565             } else {
   6566                tl_assert(3 == res);
   6567                unaddressable++;
   6568                VG_(printf) ("__");
   6569             }
   6570          }
   6571          VG_(printf) ("\n");
   6572          if (unaddressable) {
   6573             VG_(printf)
   6574                ("Address %p len %lu has %d bytes unaddressable\n",
   6575                 (void *)address, szB, unaddressable);
   6576          }
   6577       }
   6578       return True;
   6579    }
   6580    case  2: { /* leak_check */
   6581       Int err = 0;
   6582       LeakCheckParams lcp;
   6583       HChar* xt_filename = NULL;
   6584       HChar* kw;
   6585 
   6586       lcp.mode               = LC_Full;
   6587       lcp.show_leak_kinds    = R2S(Possible) | R2S(Unreached);
   6588       lcp.errors_for_leak_kinds = 0; // no errors for interactive leak search.
   6589       lcp.heuristics         = 0;
   6590       lcp.deltamode          = LCD_Increased;
   6591       lcp.max_loss_records_output = 999999999;
   6592       lcp.requested_by_monitor_command = True;
   6593       lcp.xt_filename = NULL;
   6594 
   6595       for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
   6596            kw != NULL;
   6597            kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
   6598          switch (VG_(keyword_id)
   6599                  ("full summary xtleak "
   6600                   "kinds reachable possibleleak definiteleak "
   6601                   "heuristics "
   6602                   "increased changed any "
   6603                   "unlimited limited ",
   6604                   kw, kwd_report_all)) {
   6605          case -2: err++; break;
   6606          case -1: err++; break;
   6607          case  0: /* full */
   6608             lcp.mode = LC_Full; break;
   6609          case  1: /* summary */
   6610             lcp.mode = LC_Summary; break;
   6611          case  2: /* xtleak */
   6612             lcp.mode = LC_Full;
   6613             xt_filename
   6614                = VG_(expand_file_name)("--xtleak-mc_main.c",
   6615                                        "xtleak.kcg.%p.%n");
   6616             lcp.xt_filename = xt_filename;
   6617             break;
   6618          case  3: { /* kinds */
   6619             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
   6620             if (wcmd == NULL
   6621                 || !VG_(parse_enum_set)(MC_(parse_leak_kinds_tokens),
   6622                                         True/*allow_all*/,
   6623                                         wcmd,
   6624                                         &lcp.show_leak_kinds)) {
   6625                VG_(gdb_printf) ("missing or malformed leak kinds set\n");
   6626                err++;
   6627             }
   6628             break;
   6629          }
   6630          case  4: /* reachable */
   6631             lcp.show_leak_kinds = MC_(all_Reachedness)();
   6632             break;
   6633          case  5: /* possibleleak */
   6634             lcp.show_leak_kinds
   6635                = R2S(Possible) | R2S(IndirectLeak) | R2S(Unreached);
   6636             break;
   6637          case  6: /* definiteleak */
   6638             lcp.show_leak_kinds = R2S(Unreached);
   6639             break;
   6640          case  7: { /* heuristics */
   6641             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
   6642             if (wcmd == NULL
   6643                 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
   6644                                         True,/*allow_all*/
   6645                                         wcmd,
   6646                                         &lcp.heuristics)) {
   6647                VG_(gdb_printf) ("missing or malformed heuristics set\n");
   6648                err++;
   6649             }
   6650             break;
   6651          }
   6652          case  8: /* increased */
   6653             lcp.deltamode = LCD_Increased; break;
   6654          case  9: /* changed */
   6655             lcp.deltamode = LCD_Changed; break;
   6656          case 10: /* any */
   6657             lcp.deltamode = LCD_Any; break;
   6658          case 11: /* unlimited */
   6659             lcp.max_loss_records_output = 999999999; break;
   6660          case 12: { /* limited */
   6661             Int int_value;
   6662             const HChar* endptr;
   6663 
   6664             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
   6665             if (wcmd == NULL) {
   6666                int_value = 0;
   6667                endptr = "empty"; /* to report an error below */
   6668             } else {
   6669                HChar *the_end;
   6670                int_value = VG_(strtoll10) (wcmd, &the_end);
   6671                endptr = the_end;
   6672             }
   6673             if (*endptr != '\0')
   6674                VG_(gdb_printf) ("missing or malformed integer value\n");
   6675             else if (int_value > 0)
   6676                lcp.max_loss_records_output = (UInt) int_value;
   6677             else
   6678                VG_(gdb_printf) ("max_loss_records_output must be >= 1,"
   6679                                 " got %d\n", int_value);
   6680             break;
   6681          }
   6682          default:
   6683             tl_assert (0);
   6684          }
   6685       }
   6686       if (!err)
   6687          MC_(detect_memory_leaks)(tid, &lcp);
   6688       if (xt_filename != NULL)
   6689          VG_(free)(xt_filename);
   6690       return True;
   6691    }
   6692 
   6693    case  3: { /* make_memory */
   6694       Addr address;
   6695       SizeT szB = 1;
   6696       Int kwdid = VG_(keyword_id)
   6697          ("noaccess undefined defined Definedifaddressable",
   6698           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
   6699       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
   6700          return True;
   6701       switch (kwdid) {
   6702       case -2: break;
   6703       case -1: break;
   6704       case  0: MC_(make_mem_noaccess) (address, szB); break;
   6705       case  1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
   6706                                                     MC_OKIND_USER ); break;
   6707       case  2: MC_(make_mem_defined) ( address, szB ); break;
   6708       case  3: make_mem_defined_if_addressable ( address, szB ); break;;
   6709       default: tl_assert(0);
   6710       }
   6711       return True;
   6712    }
   6713 
   6714    case  4: { /* check_memory */
   6715       Addr address;
   6716       SizeT szB = 1;
   6717       Addr bad_addr;
   6718       UInt okind;
   6719       const HChar* src;
   6720       UInt otag;
   6721       UInt ecu;
   6722       ExeContext* origin_ec;
   6723       MC_ReadResult res;
   6724 
   6725       Int kwdid = VG_(keyword_id)
   6726          ("addressable defined",
   6727           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
   6728       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
   6729          return True;
   6730       switch (kwdid) {
   6731       case -2: break;
   6732       case -1: break;
   6733       case  0: /* addressable */
   6734          if (is_mem_addressable ( address, szB, &bad_addr ))
   6735             VG_(printf) ("Address %p len %lu addressable\n",
   6736                              (void *)address, szB);
   6737          else
   6738             VG_(printf)
   6739                ("Address %p len %lu not addressable:\nbad address %p\n",
   6740                 (void *)address, szB, (void *) bad_addr);
   6741          MC_(pp_describe_addr) (address);
   6742          break;
   6743       case  1: /* defined */
   6744          res = is_mem_defined ( address, szB, &bad_addr, &otag );
   6745          if (MC_AddrErr == res)
   6746             VG_(printf)
   6747                ("Address %p len %lu not addressable:\nbad address %p\n",
   6748                 (void *)address, szB, (void *) bad_addr);
   6749          else if (MC_ValueErr == res) {
   6750             okind = otag & 3;
   6751             switch (okind) {
   6752             case MC_OKIND_STACK:
   6753                src = " was created by a stack allocation"; break;
   6754             case MC_OKIND_HEAP:
   6755                src = " was created by a heap allocation"; break;
   6756             case MC_OKIND_USER:
   6757                src = " was created by a client request"; break;
   6758             case MC_OKIND_UNKNOWN:
   6759                src = ""; break;
   6760             default: tl_assert(0);
   6761             }
   6762             VG_(printf)
   6763                ("Address %p len %lu not defined:\n"
   6764                 "Uninitialised value at %p%s\n",
   6765                 (void *)address, szB, (void *) bad_addr, src);
   6766             ecu = otag & ~3;
   6767             if (VG_(is_plausible_ECU)(ecu)) {
   6768                origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
   6769                VG_(pp_ExeContext)( origin_ec );
   6770             }
   6771          }
   6772          else
   6773             VG_(printf) ("Address %p len %lu defined\n",
   6774                          (void *)address, szB);
   6775          MC_(pp_describe_addr) (address);
   6776          break;
   6777       default: tl_assert(0);
   6778       }
   6779       return True;
   6780    }
   6781 
   6782    case  5: { /* block_list */
   6783       HChar* wl;
   6784       HChar *the_end;
   6785       UInt lr_nr_from = 0;
   6786       UInt lr_nr_to = 0;
   6787 
   6788       if (VG_(parse_slice) (NULL, &ssaveptr, &lr_nr_from, &lr_nr_to)) {
   6789          UInt limit_blocks = 999999999;
   6790          Int int_value;
   6791          UInt heuristics = 0;
   6792 
   6793          for (wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
   6794               wl != NULL;
   6795               wl = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
   6796             switch (VG_(keyword_id) ("unlimited limited heuristics ",
   6797                                      wl,  kwd_report_all)) {
   6798             case -2: return True;
   6799             case -1: return True;
   6800             case  0: /* unlimited */
   6801                limit_blocks = 999999999; break;
   6802             case  1: /* limited */
   6803                wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
   6804                if (wcmd == NULL) {
   6805                   VG_(gdb_printf) ("missing integer value\n");
   6806                   return True;
   6807                }
   6808                int_value = VG_(strtoll10) (wcmd, &the_end);
   6809                if (*the_end != '\0') {
   6810                   VG_(gdb_printf) ("malformed integer value\n");
   6811                   return True;
   6812                }
   6813                if (int_value <= 0) {
   6814                   VG_(gdb_printf) ("max_blocks must be >= 1,"
   6815                                    " got %d\n", int_value);
   6816                   return True;
   6817                }
   6818                limit_blocks = (UInt) int_value;
   6819                break;
   6820             case  2: /* heuristics */
   6821                wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
   6822                if (wcmd == NULL
   6823                    || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
   6824                                            True,/*allow_all*/
   6825                                            wcmd,
   6826                                            &heuristics)) {
   6827                   VG_(gdb_printf) ("missing or malformed heuristics set\n");
   6828                   return True;
   6829                }
   6830                break;
   6831             default:
   6832                tl_assert (0);
   6833             }
   6834          }
   6835          /* substract 1 from lr_nr_from/lr_nr_to  as what is shown to the user
   6836             is 1 more than the index in lr_array. */
   6837          if (lr_nr_from == 0 || ! MC_(print_block_list) (lr_nr_from-1,
   6838                                                          lr_nr_to-1,
   6839                                                          limit_blocks,
   6840                                                          heuristics))
   6841             VG_(gdb_printf) ("invalid loss record nr\n");
   6842       }
   6843       return True;
   6844    }
   6845 
   6846    case  6: { /* who_points_at */
   6847       Addr address;
   6848       SizeT szB = 1;
   6849 
   6850       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
   6851          return True;
   6852       if (address == (Addr) 0) {
   6853          VG_(gdb_printf) ("Cannot search who points at 0x0\n");
   6854          return True;
   6855       }
   6856       MC_(who_points_at) (address, szB);
   6857       return True;
   6858    }
   6859 
   6860    case  7: { /* xb */
   6861       Addr address;
   6862       SizeT szB = 1;
   6863       if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
   6864          UChar vbits[8];
   6865          Int res[8];
   6866          Int i;
   6867          Int unaddressable = 0;
   6868          for (i = 0; i < szB; i++) {
   6869             Int bnr = i % 8;
   6870             res[bnr] = mc_get_or_set_vbits_for_client
   6871                (address+i, (Addr) &vbits[bnr], 1,
   6872                 False, /* get them */
   6873                 False  /* is client request */ );
   6874             /* We going to print the first vabits of a new line.
   6875                Terminate the previous line if needed: prints a line with the
   6876                address and the data. */
   6877             if (bnr == 0) {
   6878                if (i != 0) {
   6879                   VG_(printf) ("\n");
   6880                   gdb_xb (address + i - 8, 8, res);
   6881                }
   6882                VG_(printf) ("\t"); // To align VABITS with gdb_xb layout
   6883             }
   6884             if (res[bnr] == 1) {
   6885                VG_(printf) ("\t  %02x", vbits[bnr]);
   6886             } else {
   6887                tl_assert(3 == res[bnr]);
   6888                unaddressable++;
   6889                VG_(printf) ("\t  __");
   6890             }
   6891          }
   6892          VG_(printf) ("\n");
   6893          if (szB % 8 == 0 && szB > 0)
   6894             gdb_xb (address + szB - 8, 8, res);
   6895          else
   6896             gdb_xb (address + szB - szB % 8, szB % 8, res);
   6897          if (unaddressable) {
   6898             VG_(printf)
   6899                ("Address %p len %lu has %d bytes unaddressable\n",
   6900                 (void *)address, szB, unaddressable);
   6901          }
   6902       }
   6903       return True;
   6904    }
   6905 
   6906    case  8: { /* xtmemory */
   6907       HChar* filename;
   6908       filename = VG_(strtok_r) (NULL, " ", &ssaveptr);
   6909       MC_(xtmemory_report)(filename, False);
   6910       return True;
   6911    }
   6912 
   6913    default:
   6914       tl_assert(0);
   6915       return False;
   6916    }
   6917 }
   6918 
   6919 /*------------------------------------------------------------*/
   6920 /*--- Client requests                                      ---*/
   6921 /*------------------------------------------------------------*/
   6922 
   6923 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
   6924 {
   6925    Int   i;
   6926    Addr  bad_addr;
   6927 
   6928    if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
   6929        && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
   6930        && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
   6931        && VG_USERREQ__FREELIKE_BLOCK   != arg[0]
   6932        && VG_USERREQ__CREATE_MEMPOOL   != arg[0]
   6933        && VG_USERREQ__DESTROY_MEMPOOL  != arg[0]
   6934        && VG_USERREQ__MEMPOOL_ALLOC    != arg[0]
   6935        && VG_USERREQ__MEMPOOL_FREE     != arg[0]
   6936        && VG_USERREQ__MEMPOOL_TRIM     != arg[0]
   6937        && VG_USERREQ__MOVE_MEMPOOL     != arg[0]
   6938        && VG_USERREQ__MEMPOOL_CHANGE   != arg[0]
   6939        && VG_USERREQ__MEMPOOL_EXISTS   != arg[0]
   6940        && VG_USERREQ__GDB_MONITOR_COMMAND   != arg[0]
   6941        && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0]
   6942        && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0])
   6943       return False;
   6944 
   6945    switch (arg[0]) {
   6946       case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE: {
   6947          Bool ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
   6948          if (!ok)
   6949             MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
   6950          *ret = ok ? (UWord)NULL : bad_addr;
   6951          break;
   6952       }
   6953 
   6954       case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
   6955          Bool errorV    = False;
   6956          Addr bad_addrV = 0;
   6957          UInt otagV     = 0;
   6958          Bool errorA    = False;
   6959          Addr bad_addrA = 0;
   6960          is_mem_defined_comprehensive(
   6961             arg[1], arg[2],
   6962             &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
   6963          );
   6964          if (errorV) {
   6965             MC_(record_user_error) ( tid, bad_addrV,
   6966                                      /*isAddrErr*/False, otagV );
   6967          }
   6968          if (errorA) {
   6969             MC_(record_user_error) ( tid, bad_addrA,
   6970                                      /*isAddrErr*/True, 0 );
   6971          }
   6972          /* Return the lower of the two erring addresses, if any. */
   6973          *ret = 0;
   6974          if (errorV && !errorA) {
   6975             *ret = bad_addrV;
   6976          }
   6977          if (!errorV && errorA) {
   6978             *ret = bad_addrA;
   6979          }
   6980          if (errorV && errorA) {
   6981             *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
   6982          }
   6983          break;
   6984       }
   6985 
   6986       case VG_USERREQ__DO_LEAK_CHECK: {
   6987          LeakCheckParams lcp;
   6988 
   6989          if (arg[1] == 0)
   6990             lcp.mode = LC_Full;
   6991          else if (arg[1] == 1)
   6992             lcp.mode = LC_Summary;
   6993          else {
   6994             VG_(message)(Vg_UserMsg,
   6995                          "Warning: unknown memcheck leak search mode\n");
   6996             lcp.mode = LC_Full;
   6997          }
   6998 
   6999          lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
   7000          lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
   7001          lcp.heuristics = MC_(clo_leak_check_heuristics);
   7002 
   7003          if (arg[2] == 0)
   7004             lcp.deltamode = LCD_Any;
   7005          else if (arg[2] == 1)
   7006             lcp.deltamode = LCD_Increased;
   7007          else if (arg[2] == 2)
   7008             lcp.deltamode = LCD_Changed;
   7009          else {
   7010             VG_(message)
   7011                (Vg_UserMsg,
   7012                 "Warning: unknown memcheck leak search deltamode\n");
   7013             lcp.deltamode = LCD_Any;
   7014          }
   7015          lcp.max_loss_records_output = 999999999;
   7016          lcp.requested_by_monitor_command = False;
   7017          lcp.xt_filename = NULL;
   7018 
   7019          MC_(detect_memory_leaks)(tid, &lcp);
   7020          *ret = 0; /* return value is meaningless */
   7021          break;
   7022       }
   7023 
   7024       case VG_USERREQ__MAKE_MEM_NOACCESS:
   7025          MC_(make_mem_noaccess) ( arg[1], arg[2] );
   7026          *ret = -1;
   7027          break;
   7028 
   7029       case VG_USERREQ__MAKE_MEM_UNDEFINED:
   7030          make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
   7031                                               MC_OKIND_USER );
   7032          *ret = -1;
   7033          break;
   7034 
   7035       case VG_USERREQ__MAKE_MEM_DEFINED:
   7036          MC_(make_mem_defined) ( arg[1], arg[2] );
   7037          *ret = -1;
   7038          break;
   7039 
   7040       case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
   7041          make_mem_defined_if_addressable ( arg[1], arg[2] );
   7042          *ret = -1;
   7043          break;
   7044 
   7045       case VG_USERREQ__CREATE_BLOCK: /* describe a block */
   7046          if (arg[1] != 0 && arg[2] != 0) {
   7047             i = alloc_client_block();
   7048             /* VG_(printf)("allocated %d %p\n", i, cgbs); */
   7049             cgbs[i].start = arg[1];
   7050             cgbs[i].size  = arg[2];
   7051             cgbs[i].desc  = VG_(strdup)("mc.mhcr.1", (HChar *)arg[3]);
   7052             cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
   7053             *ret = i;
   7054          } else
   7055             *ret = -1;
   7056          break;
   7057 
   7058       case VG_USERREQ__DISCARD: /* discard */
   7059          if (cgbs == NULL
   7060              || arg[2] >= cgb_used ||
   7061              (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
   7062             *ret = 1;
   7063          } else {
   7064             tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
   7065             cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
   7066             VG_(free)(cgbs[arg[2]].desc);
   7067             cgb_discards++;
   7068             *ret = 0;
   7069          }
   7070          break;
   7071 
   7072       case VG_USERREQ__GET_VBITS:
   7073          *ret = mc_get_or_set_vbits_for_client
   7074                    ( arg[1], arg[2], arg[3],
   7075                      False /* get them */,
   7076                      True /* is client request */ );
   7077          break;
   7078 
   7079       case VG_USERREQ__SET_VBITS:
   7080          *ret = mc_get_or_set_vbits_for_client
   7081                    ( arg[1], arg[2], arg[3],
   7082                      True /* set them */,
   7083                      True /* is client request */ );
   7084          break;
   7085 
   7086       case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
   7087          UWord** argp = (UWord**)arg;
   7088          // MC_(bytes_leaked) et al were set by the last leak check (or zero
   7089          // if no prior leak checks performed).
   7090          *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
   7091          *argp[2] = MC_(bytes_dubious);
   7092          *argp[3] = MC_(bytes_reachable);
   7093          *argp[4] = MC_(bytes_suppressed);
   7094          // there is no argp[5]
   7095          //*argp[5] = MC_(bytes_indirect);
   7096          // XXX need to make *argp[1-4] defined;  currently done in the
   7097          // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
   7098          *ret = 0;
   7099          return True;
   7100       }
   7101       case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
   7102          UWord** argp = (UWord**)arg;
   7103          // MC_(blocks_leaked) et al were set by the last leak check (or zero
   7104          // if no prior leak checks performed).
   7105          *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
   7106          *argp[2] = MC_(blocks_dubious);
   7107          *argp[3] = MC_(blocks_reachable);
   7108          *argp[4] = MC_(blocks_suppressed);
   7109          // there is no argp[5]
   7110          //*argp[5] = MC_(blocks_indirect);
   7111          // XXX need to make *argp[1-4] defined;  currently done in the
   7112          // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
   7113          *ret = 0;
   7114          return True;
   7115       }
   7116       case VG_USERREQ__MALLOCLIKE_BLOCK: {
   7117          Addr p         = (Addr)arg[1];
   7118          SizeT sizeB    =       arg[2];
   7119          UInt rzB       =       arg[3];
   7120          Bool is_zeroed = (Bool)arg[4];
   7121 
   7122          MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
   7123                           MC_AllocCustom, MC_(malloc_list) );
   7124          if (rzB > 0) {
   7125             MC_(make_mem_noaccess) ( p - rzB, rzB);
   7126             MC_(make_mem_noaccess) ( p + sizeB, rzB);
   7127          }
   7128          return True;
   7129       }
   7130       case VG_USERREQ__RESIZEINPLACE_BLOCK: {
   7131          Addr p         = (Addr)arg[1];
   7132          SizeT oldSizeB =       arg[2];
   7133          SizeT newSizeB =       arg[3];
   7134          UInt rzB       =       arg[4];
   7135 
   7136          MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
   7137          return True;
   7138       }
   7139       case VG_USERREQ__FREELIKE_BLOCK: {
   7140          Addr p         = (Addr)arg[1];
   7141          UInt rzB       =       arg[2];
   7142 
   7143          MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
   7144          return True;
   7145       }
   7146 
   7147       case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
   7148          HChar* s  = (HChar*)arg[1];
   7149          Addr  dst = (Addr) arg[2];
   7150          Addr  src = (Addr) arg[3];
   7151          SizeT len = (SizeT)arg[4];
   7152          MC_(record_overlap_error)(tid, s, src, dst, len);
   7153          return True;
   7154       }
   7155 
   7156       case VG_USERREQ__CREATE_MEMPOOL: {
   7157          Addr pool      = (Addr)arg[1];
   7158          UInt rzB       =       arg[2];
   7159          Bool is_zeroed = (Bool)arg[3];
   7160          UInt flags     =       arg[4];
   7161 
   7162          // The create_mempool function does not know these mempool flags,
   7163          // pass as booleans.
   7164          MC_(create_mempool) ( pool, rzB, is_zeroed,
   7165                                (flags & VALGRIND_MEMPOOL_AUTO_FREE),
   7166                                (flags & VALGRIND_MEMPOOL_METAPOOL) );
   7167          return True;
   7168       }
   7169 
   7170       case VG_USERREQ__DESTROY_MEMPOOL: {
   7171          Addr pool      = (Addr)arg[1];
   7172 
   7173          MC_(destroy_mempool) ( pool );
   7174          return True;
   7175       }
   7176 
   7177       case VG_USERREQ__MEMPOOL_ALLOC: {
   7178          Addr pool      = (Addr)arg[1];
   7179          Addr addr      = (Addr)arg[2];
   7180          UInt size      =       arg[3];
   7181 
   7182          MC_(mempool_alloc) ( tid, pool, addr, size );
   7183          return True;
   7184       }
   7185 
   7186       case VG_USERREQ__MEMPOOL_FREE: {
   7187          Addr pool      = (Addr)arg[1];
   7188          Addr addr      = (Addr)arg[2];
   7189 
   7190          MC_(mempool_free) ( pool, addr );
   7191          return True;
   7192       }
   7193 
   7194       case VG_USERREQ__MEMPOOL_TRIM: {
   7195          Addr pool      = (Addr)arg[1];
   7196          Addr addr      = (Addr)arg[2];
   7197          UInt size      =       arg[3];
   7198 
   7199          MC_(mempool_trim) ( pool, addr, size );
   7200          return True;
   7201       }
   7202 
   7203       case VG_USERREQ__MOVE_MEMPOOL: {
   7204          Addr poolA     = (Addr)arg[1];
   7205          Addr poolB     = (Addr)arg[2];
   7206 
   7207          MC_(move_mempool) ( poolA, poolB );
   7208          return True;
   7209       }
   7210 
   7211       case VG_USERREQ__MEMPOOL_CHANGE: {
   7212          Addr pool      = (Addr)arg[1];
   7213          Addr addrA     = (Addr)arg[2];
   7214          Addr addrB     = (Addr)arg[3];
   7215          UInt size      =       arg[4];
   7216 
   7217          MC_(mempool_change) ( pool, addrA, addrB, size );
   7218          return True;
   7219       }
   7220 
   7221       case VG_USERREQ__MEMPOOL_EXISTS: {
   7222          Addr pool      = (Addr)arg[1];
   7223 
   7224          *ret = (UWord) MC_(mempool_exists) ( pool );
   7225 	 return True;
   7226       }
   7227 
   7228       case VG_USERREQ__GDB_MONITOR_COMMAND: {
   7229          Bool handled = handle_gdb_monitor_command (tid, (HChar*)arg[1]);
   7230          if (handled)
   7231             *ret = 1;
   7232          else
   7233             *ret = 0;
   7234          return handled;
   7235       }
   7236 
   7237       case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE:
   7238       case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE: {
   7239          Bool addRange
   7240             = arg[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE;
   7241          Bool ok
   7242             = modify_ignore_ranges(addRange, arg[1], arg[2]);
   7243          *ret = ok ? 1 : 0;
   7244          return True;
   7245       }
   7246 
   7247       default:
   7248          VG_(message)(
   7249             Vg_UserMsg,
   7250             "Warning: unknown memcheck client request code %llx\n",
   7251             (ULong)arg[0]
   7252          );
   7253          return False;
   7254    }
   7255    return True;
   7256 }
   7257 
   7258 
   7259 /*------------------------------------------------------------*/
   7260 /*--- Crude profiling machinery.                           ---*/
   7261 /*------------------------------------------------------------*/
   7262 
   7263 // We track a number of interesting events (using PROF_EVENT)
   7264 // if MC_PROFILE_MEMORY is defined.
   7265 
   7266 #ifdef MC_PROFILE_MEMORY
   7267 
   7268 ULong  MC_(event_ctr)[MCPE_LAST];
   7269 
   7270 /* Event counter names. Use the name of the function that increases the
   7271    event counter. Drop any MC_() and mc_ prefices. */
   7272 static const HChar* MC_(event_ctr_name)[MCPE_LAST] = {
   7273    [MCPE_LOADVN_SLOW] = "LOADVn_slow",
   7274    [MCPE_LOADVN_SLOW_LOOP] = "LOADVn_slow_loop",
   7275    [MCPE_STOREVN_SLOW] = "STOREVn_slow",
   7276    [MCPE_STOREVN_SLOW_LOOP] = "STOREVn_slow(loop)",
   7277    [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED] = "make_aligned_word32_undefined",
   7278    [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW] =
   7279         "make_aligned_word32_undefined_slow",
   7280    [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED] = "make_aligned_word64_undefined",
   7281    [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW] =
   7282         "make_aligned_word64_undefined_slow",
   7283    [MCPE_MAKE_ALIGNED_WORD32_NOACCESS] = "make_aligned_word32_noaccess",
   7284    [MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW] =
   7285          "make_aligned_word32_noaccess_slow",
   7286    [MCPE_MAKE_ALIGNED_WORD64_NOACCESS] = "make_aligned_word64_noaccess",
   7287    [MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW] =
   7288         "make_aligned_word64_noaccess_slow",
   7289    [MCPE_MAKE_MEM_NOACCESS] = "make_mem_noaccess",
   7290    [MCPE_MAKE_MEM_UNDEFINED] = "make_mem_undefined",
   7291    [MCPE_MAKE_MEM_UNDEFINED_W_OTAG] = "make_mem_undefined_w_otag",
   7292    [MCPE_MAKE_MEM_DEFINED] = "make_mem_defined",
   7293    [MCPE_CHEAP_SANITY_CHECK] = "cheap_sanity_check",
   7294    [MCPE_EXPENSIVE_SANITY_CHECK] = "expensive_sanity_check",
   7295    [MCPE_COPY_ADDRESS_RANGE_STATE] = "copy_address_range_state",
   7296    [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1] = "copy_address_range_state(loop1)",
   7297    [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2] = "copy_address_range_state(loop2)",
   7298    [MCPE_CHECK_MEM_IS_NOACCESS] = "check_mem_is_noaccess",
   7299    [MCPE_CHECK_MEM_IS_NOACCESS_LOOP] = "check_mem_is_noaccess(loop)",
   7300    [MCPE_IS_MEM_ADDRESSABLE] = "is_mem_addressable",
   7301    [MCPE_IS_MEM_ADDRESSABLE_LOOP] = "is_mem_addressable(loop)",
   7302    [MCPE_IS_MEM_DEFINED] = "is_mem_defined",
   7303    [MCPE_IS_MEM_DEFINED_LOOP] = "is_mem_defined(loop)",
   7304    [MCPE_IS_MEM_DEFINED_COMPREHENSIVE] = "is_mem_defined_comprehensive",
   7305    [MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP] =
   7306         "is_mem_defined_comprehensive(loop)",
   7307    [MCPE_IS_DEFINED_ASCIIZ] = "is_defined_asciiz",
   7308    [MCPE_IS_DEFINED_ASCIIZ_LOOP] = "is_defined_asciiz(loop)",
   7309    [MCPE_FIND_CHUNK_FOR_OLD] = "find_chunk_for_OLD",
   7310    [MCPE_FIND_CHUNK_FOR_OLD_LOOP] = "find_chunk_for_OLD(loop)",
   7311    [MCPE_SET_ADDRESS_RANGE_PERMS] = "set_address_range_perms",
   7312    [MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP] =
   7313         "set_address_range_perms(single-secmap)",
   7314    [MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP] =
   7315         "set_address_range_perms(startof-secmap)",
   7316    [MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS] =
   7317    "set_address_range_perms(multiple-secmaps)",
   7318    [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1] =
   7319         "set_address_range_perms(dist-sm1)",
   7320    [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2] =
   7321         "set_address_range_perms(dist-sm2)",
   7322    [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK] =
   7323         "set_address_range_perms(dist-sm1-quick)",
   7324    [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK] =
   7325         "set_address_range_perms(dist-sm2-quick)",
   7326    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A] = "set_address_range_perms(loop1a)",
   7327    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B] = "set_address_range_perms(loop1b)",
   7328    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C] = "set_address_range_perms(loop1c)",
   7329    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A] = "set_address_range_perms(loop8a)",
   7330    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B] = "set_address_range_perms(loop8b)",
   7331    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K] = "set_address_range_perms(loop64K)",
   7332    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM] =
   7333         "set_address_range_perms(loop64K-free-dist-sm)",
   7334    [MCPE_LOADV_128_OR_256_SLOW_LOOP] = "LOADV_128_or_256_slow(loop)",
   7335    [MCPE_LOADV_128_OR_256]       = "LOADV_128_or_256",
   7336    [MCPE_LOADV_128_OR_256_SLOW1] = "LOADV_128_or_256-slow1",
   7337    [MCPE_LOADV_128_OR_256_SLOW2] = "LOADV_128_or_256-slow2",
   7338    [MCPE_LOADV64]        = "LOADV64",
   7339    [MCPE_LOADV64_SLOW1]  = "LOADV64-slow1",
   7340    [MCPE_LOADV64_SLOW2]  = "LOADV64-slow2",
   7341    [MCPE_STOREV64]       = "STOREV64",
   7342    [MCPE_STOREV64_SLOW1] = "STOREV64-slow1",
   7343    [MCPE_STOREV64_SLOW2] = "STOREV64-slow2",
   7344    [MCPE_STOREV64_SLOW3] = "STOREV64-slow3",
   7345    [MCPE_STOREV64_SLOW4] = "STOREV64-slow4",
   7346    [MCPE_LOADV32]        = "LOADV32",
   7347    [MCPE_LOADV32_SLOW1]  = "LOADV32-slow1",
   7348    [MCPE_LOADV32_SLOW2]  = "LOADV32-slow2",
   7349    [MCPE_STOREV32]       = "STOREV32",
   7350    [MCPE_STOREV32_SLOW1] = "STOREV32-slow1",
   7351    [MCPE_STOREV32_SLOW2] = "STOREV32-slow2",
   7352    [MCPE_STOREV32_SLOW3] = "STOREV32-slow3",
   7353    [MCPE_STOREV32_SLOW4] = "STOREV32-slow4",
   7354    [MCPE_LOADV16]        = "LOADV16",
   7355    [MCPE_LOADV16_SLOW1]  = "LOADV16-slow1",
   7356    [MCPE_LOADV16_SLOW2]  = "LOADV16-slow2",
   7357    [MCPE_STOREV16]       = "STOREV16",
   7358    [MCPE_STOREV16_SLOW1] = "STOREV16-slow1",
   7359    [MCPE_STOREV16_SLOW2] = "STOREV16-slow2",
   7360    [MCPE_STOREV16_SLOW3] = "STOREV16-slow3",
   7361    [MCPE_STOREV16_SLOW4] = "STOREV16-slow4",
   7362    [MCPE_LOADV8]         = "LOADV8",
   7363    [MCPE_LOADV8_SLOW1]   = "LOADV8-slow1",
   7364    [MCPE_LOADV8_SLOW2]   = "LOADV8-slow2",
   7365    [MCPE_STOREV8]        = "STOREV8",
   7366    [MCPE_STOREV8_SLOW1]  = "STOREV8-slow1",
   7367    [MCPE_STOREV8_SLOW2]  = "STOREV8-slow2",
   7368    [MCPE_STOREV8_SLOW3]  = "STOREV8-slow3",
   7369    [MCPE_STOREV8_SLOW4]  = "STOREV8-slow4",
   7370    [MCPE_NEW_MEM_STACK_4]   = "new_mem_stack_4",
   7371    [MCPE_NEW_MEM_STACK_8]   = "new_mem_stack_8",
   7372    [MCPE_NEW_MEM_STACK_12]  = "new_mem_stack_12",
   7373    [MCPE_NEW_MEM_STACK_16]  = "new_mem_stack_16",
   7374    [MCPE_NEW_MEM_STACK_32]  = "new_mem_stack_32",
   7375    [MCPE_NEW_MEM_STACK_112] = "new_mem_stack_112",
   7376    [MCPE_NEW_MEM_STACK_128] = "new_mem_stack_128",
   7377    [MCPE_NEW_MEM_STACK_144] = "new_mem_stack_144",
   7378    [MCPE_NEW_MEM_STACK_160] = "new_mem_stack_160",
   7379    [MCPE_DIE_MEM_STACK_4]   = "die_mem_stack_4",
   7380    [MCPE_DIE_MEM_STACK_8]   = "die_mem_stack_8",
   7381    [MCPE_DIE_MEM_STACK_12]  = "die_mem_stack_12",
   7382    [MCPE_DIE_MEM_STACK_16]  = "die_mem_stack_16",
   7383    [MCPE_DIE_MEM_STACK_32]  = "die_mem_stack_32",
   7384    [MCPE_DIE_MEM_STACK_112] = "die_mem_stack_112",
   7385    [MCPE_DIE_MEM_STACK_128] = "die_mem_stack_128",
   7386    [MCPE_DIE_MEM_STACK_144] = "die_mem_stack_144",
   7387    [MCPE_DIE_MEM_STACK_160] = "die_mem_stack_160",
   7388    [MCPE_NEW_MEM_STACK]     = "new_mem_stack",
   7389    [MCPE_DIE_MEM_STACK]     = "die_mem_stack",
   7390    [MCPE_MAKE_STACK_UNINIT_W_O]      = "MAKE_STACK_UNINIT_w_o",
   7391    [MCPE_MAKE_STACK_UNINIT_NO_O]     = "MAKE_STACK_UNINIT_no_o",
   7392    [MCPE_MAKE_STACK_UNINIT_128_NO_O] = "MAKE_STACK_UNINIT_128_no_o",
   7393    [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16]
   7394                                      = "MAKE_STACK_UNINIT_128_no_o_aligned_16",
   7395    [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8]
   7396                                      = "MAKE_STACK_UNINIT_128_no_o_aligned_8",
   7397    [MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE]
   7398                                      = "MAKE_STACK_UNINIT_128_no_o_slowcase",
   7399 };
   7400 
   7401 static void init_prof_mem ( void )
   7402 {
   7403    Int i, name_count = 0;
   7404 
   7405    for (i = 0; i < MCPE_LAST; i++) {
   7406       MC_(event_ctr)[i] = 0;
   7407       if (MC_(event_ctr_name)[i] != NULL)
   7408          ++name_count;
   7409    }
   7410 
   7411    /* Make sure every profiling event has a name */
   7412    tl_assert(name_count == MCPE_LAST);
   7413 }
   7414 
   7415 static void done_prof_mem ( void )
   7416 {
   7417    Int  i, n;
   7418    Bool spaced = False;
   7419    for (i = n = 0; i < MCPE_LAST; i++) {
   7420       if (!spaced && (n % 10) == 0) {
   7421          VG_(printf)("\n");
   7422          spaced = True;
   7423       }
   7424       if (MC_(event_ctr)[i] > 0) {
   7425          spaced = False;
   7426          ++n;
   7427          VG_(printf)( "prof mem event %3d: %11llu   %s\n",
   7428                       i, MC_(event_ctr)[i],
   7429                       MC_(event_ctr_name)[i]);
   7430       }
   7431    }
   7432 }
   7433 
   7434 #else
   7435 
   7436 static void init_prof_mem ( void ) { }
   7437 static void done_prof_mem ( void ) { }
   7438 
   7439 #endif
   7440 
   7441 
   7442 /*------------------------------------------------------------*/
   7443 /*--- Origin tracking stuff                                ---*/
   7444 /*------------------------------------------------------------*/
   7445 
   7446 /*--------------------------------------------*/
   7447 /*--- Origin tracking: load handlers       ---*/
   7448 /*--------------------------------------------*/
   7449 
   7450 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
   7451    return or1 > or2 ? or1 : or2;
   7452 }
   7453 
   7454 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
   7455    OCacheLine* line;
   7456    UChar descr;
   7457    UWord lineoff = oc_line_offset(a);
   7458    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
   7459 
   7460    if (OC_ENABLE_ASSERTIONS) {
   7461       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   7462    }
   7463 
   7464    line = find_OCacheLine( a );
   7465 
   7466    descr = line->descr[lineoff];
   7467    if (OC_ENABLE_ASSERTIONS) {
   7468       tl_assert(descr < 0x10);
   7469    }
   7470 
   7471    if (LIKELY(0 == (descr & (1 << byteoff))))  {
   7472       return 0;
   7473    } else {
   7474       return line->w32[lineoff];
   7475    }
   7476 }
   7477 
   7478 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
   7479    OCacheLine* line;
   7480    UChar descr;
   7481    UWord lineoff, byteoff;
   7482 
   7483    if (UNLIKELY(a & 1)) {
   7484       /* Handle misaligned case, slowly. */
   7485       UInt oLo   = (UInt)MC_(helperc_b_load1)( a + 0 );
   7486       UInt oHi   = (UInt)MC_(helperc_b_load1)( a + 1 );
   7487       return merge_origins(oLo, oHi);
   7488    }
   7489 
   7490    lineoff = oc_line_offset(a);
   7491    byteoff = a & 3; /* 0 or 2 */
   7492 
   7493    if (OC_ENABLE_ASSERTIONS) {
   7494       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   7495    }
   7496    line = find_OCacheLine( a );
   7497 
   7498    descr = line->descr[lineoff];
   7499    if (OC_ENABLE_ASSERTIONS) {
   7500       tl_assert(descr < 0x10);
   7501    }
   7502 
   7503    if (LIKELY(0 == (descr & (3 << byteoff)))) {
   7504       return 0;
   7505    } else {
   7506       return line->w32[lineoff];
   7507    }
   7508 }
   7509 
   7510 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
   7511    OCacheLine* line;
   7512    UChar descr;
   7513    UWord lineoff;
   7514 
   7515    if (UNLIKELY(a & 3)) {
   7516       /* Handle misaligned case, slowly. */
   7517       UInt oLo   = (UInt)MC_(helperc_b_load2)( a + 0 );
   7518       UInt oHi   = (UInt)MC_(helperc_b_load2)( a + 2 );
   7519       return merge_origins(oLo, oHi);
   7520    }
   7521 
   7522    lineoff = oc_line_offset(a);
   7523    if (OC_ENABLE_ASSERTIONS) {
   7524       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   7525    }
   7526 
   7527    line = find_OCacheLine( a );
   7528 
   7529    descr = line->descr[lineoff];
   7530    if (OC_ENABLE_ASSERTIONS) {
   7531       tl_assert(descr < 0x10);
   7532    }
   7533 
   7534    if (LIKELY(0 == descr)) {
   7535       return 0;
   7536    } else {
   7537       return line->w32[lineoff];
   7538    }
   7539 }
   7540 
   7541 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
   7542    OCacheLine* line;
   7543    UChar descrLo, descrHi, descr;
   7544    UWord lineoff;
   7545 
   7546    if (UNLIKELY(a & 7)) {
   7547       /* Handle misaligned case, slowly. */
   7548       UInt oLo   = (UInt)MC_(helperc_b_load4)( a + 0 );
   7549       UInt oHi   = (UInt)MC_(helperc_b_load4)( a + 4 );
   7550       return merge_origins(oLo, oHi);
   7551    }
   7552 
   7553    lineoff = oc_line_offset(a);
   7554    if (OC_ENABLE_ASSERTIONS) {
   7555       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
   7556    }
   7557 
   7558    line = find_OCacheLine( a );
   7559 
   7560    descrLo = line->descr[lineoff + 0];
   7561    descrHi = line->descr[lineoff + 1];
   7562    descr   = descrLo | descrHi;
   7563    if (OC_ENABLE_ASSERTIONS) {
   7564       tl_assert(descr < 0x10);
   7565    }
   7566 
   7567    if (LIKELY(0 == descr)) {
   7568       return 0; /* both 32-bit chunks are defined */
   7569    } else {
   7570       UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
   7571       UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
   7572       return merge_origins(oLo, oHi);
   7573    }
   7574 }
   7575 
   7576 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
   7577    UInt oLo   = (UInt)MC_(helperc_b_load8)( a + 0 );
   7578    UInt oHi   = (UInt)MC_(helperc_b_load8)( a + 8 );
   7579    UInt oBoth = merge_origins(oLo, oHi);
   7580    return (UWord)oBoth;
   7581 }
   7582 
   7583 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
   7584    UInt oQ0   = (UInt)MC_(helperc_b_load8)( a + 0 );
   7585    UInt oQ1   = (UInt)MC_(helperc_b_load8)( a + 8 );
   7586    UInt oQ2   = (UInt)MC_(helperc_b_load8)( a + 16 );
   7587    UInt oQ3   = (UInt)MC_(helperc_b_load8)( a + 24 );
   7588    UInt oAll  = merge_origins(merge_origins(oQ0, oQ1),
   7589                               merge_origins(oQ2, oQ3));
   7590    return (UWord)oAll;
   7591 }
   7592 
   7593 
   7594 /*--------------------------------------------*/
   7595 /*--- Origin tracking: store handlers      ---*/
   7596 /*--------------------------------------------*/
   7597 
   7598 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
   7599    OCacheLine* line;
   7600    UWord lineoff = oc_line_offset(a);
   7601    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
   7602 
   7603    if (OC_ENABLE_ASSERTIONS) {
   7604       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   7605    }
   7606 
   7607    line = find_OCacheLine( a );
   7608 
   7609    if (d32 == 0) {
   7610       line->descr[lineoff] &= ~(1 << byteoff);
   7611    } else {
   7612       line->descr[lineoff] |= (1 << byteoff);
   7613       line->w32[lineoff] = d32;
   7614    }
   7615 }
   7616 
   7617 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
   7618    OCacheLine* line;
   7619    UWord lineoff, byteoff;
   7620 
   7621    if (UNLIKELY(a & 1)) {
   7622       /* Handle misaligned case, slowly. */
   7623       MC_(helperc_b_store1)( a + 0, d32 );
   7624       MC_(helperc_b_store1)( a + 1, d32 );
   7625       return;
   7626    }
   7627 
   7628    lineoff = oc_line_offset(a);
   7629    byteoff = a & 3; /* 0 or 2 */
   7630 
   7631    if (OC_ENABLE_ASSERTIONS) {
   7632       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   7633    }
   7634 
   7635    line = find_OCacheLine( a );
   7636 
   7637    if (d32 == 0) {
   7638       line->descr[lineoff] &= ~(3 << byteoff);
   7639    } else {
   7640       line->descr[lineoff] |= (3 << byteoff);
   7641       line->w32[lineoff] = d32;
   7642    }
   7643 }
   7644 
   7645 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
   7646    OCacheLine* line;
   7647    UWord lineoff;
   7648 
   7649    if (UNLIKELY(a & 3)) {
   7650       /* Handle misaligned case, slowly. */
   7651       MC_(helperc_b_store2)( a + 0, d32 );
   7652       MC_(helperc_b_store2)( a + 2, d32 );
   7653       return;
   7654    }
   7655 
   7656    lineoff = oc_line_offset(a);
   7657    if (OC_ENABLE_ASSERTIONS) {
   7658       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   7659    }
   7660 
   7661    line = find_OCacheLine( a );
   7662 
   7663    if (d32 == 0) {
   7664       line->descr[lineoff] = 0;
   7665    } else {
   7666       line->descr[lineoff] = 0xF;
   7667       line->w32[lineoff] = d32;
   7668    }
   7669 }
   7670 
   7671 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
   7672    OCacheLine* line;
   7673    UWord lineoff;
   7674 
   7675    if (UNLIKELY(a & 7)) {
   7676       /* Handle misaligned case, slowly. */
   7677       MC_(helperc_b_store4)( a + 0, d32 );
   7678       MC_(helperc_b_store4)( a + 4, d32 );
   7679       return;
   7680    }
   7681 
   7682    lineoff = oc_line_offset(a);
   7683    if (OC_ENABLE_ASSERTIONS) {
   7684       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
   7685    }
   7686 
   7687    line = find_OCacheLine( a );
   7688 
   7689    if (d32 == 0) {
   7690       line->descr[lineoff + 0] = 0;
   7691       line->descr[lineoff + 1] = 0;
   7692    } else {
   7693       line->descr[lineoff + 0] = 0xF;
   7694       line->descr[lineoff + 1] = 0xF;
   7695       line->w32[lineoff + 0] = d32;
   7696       line->w32[lineoff + 1] = d32;
   7697    }
   7698 }
   7699 
   7700 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
   7701    MC_(helperc_b_store8)( a + 0, d32 );
   7702    MC_(helperc_b_store8)( a + 8, d32 );
   7703 }
   7704 
   7705 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
   7706    MC_(helperc_b_store8)( a +  0, d32 );
   7707    MC_(helperc_b_store8)( a +  8, d32 );
   7708    MC_(helperc_b_store8)( a + 16, d32 );
   7709    MC_(helperc_b_store8)( a + 24, d32 );
   7710 }
   7711 
   7712 
   7713 /*--------------------------------------------*/
   7714 /*--- Origin tracking: sarp handlers       ---*/
   7715 /*--------------------------------------------*/
   7716 
   7717 __attribute__((noinline))
   7718 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
   7719    if ((a & 1) && len >= 1) {
   7720       MC_(helperc_b_store1)( a, otag );
   7721       a++;
   7722       len--;
   7723    }
   7724    if ((a & 2) && len >= 2) {
   7725       MC_(helperc_b_store2)( a, otag );
   7726       a += 2;
   7727       len -= 2;
   7728    }
   7729    if (len >= 4)
   7730       tl_assert(0 == (a & 3));
   7731    while (len >= 4) {
   7732       MC_(helperc_b_store4)( a, otag );
   7733       a += 4;
   7734       len -= 4;
   7735    }
   7736    if (len >= 2) {
   7737       MC_(helperc_b_store2)( a, otag );
   7738       a += 2;
   7739       len -= 2;
   7740    }
   7741    if (len >= 1) {
   7742       MC_(helperc_b_store1)( a, otag );
   7743       //a++;
   7744       len--;
   7745    }
   7746    tl_assert(len == 0);
   7747 }
   7748 
   7749 __attribute__((noinline))
   7750 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
   7751    if ((a & 1) && len >= 1) {
   7752       MC_(helperc_b_store1)( a, 0 );
   7753       a++;
   7754       len--;
   7755    }
   7756    if ((a & 2) && len >= 2) {
   7757       MC_(helperc_b_store2)( a, 0 );
   7758       a += 2;
   7759       len -= 2;
   7760    }
   7761    if (len >= 4)
   7762       tl_assert(0 == (a & 3));
   7763    while (len >= 4) {
   7764       MC_(helperc_b_store4)( a, 0 );
   7765       a += 4;
   7766       len -= 4;
   7767    }
   7768    if (len >= 2) {
   7769       MC_(helperc_b_store2)( a, 0 );
   7770       a += 2;
   7771       len -= 2;
   7772    }
   7773    if (len >= 1) {
   7774       MC_(helperc_b_store1)( a, 0 );
   7775       //a++;
   7776       len--;
   7777    }
   7778    tl_assert(len == 0);
   7779 }
   7780 
   7781 
   7782 /*------------------------------------------------------------*/
   7783 /*--- Setup and finalisation                               ---*/
   7784 /*------------------------------------------------------------*/
   7785 
   7786 static void mc_post_clo_init ( void )
   7787 {
   7788    /* If we've been asked to emit XML, mash around various other
   7789       options so as to constrain the output somewhat. */
   7790    if (VG_(clo_xml)) {
   7791       /* Extract as much info as possible from the leak checker. */
   7792       MC_(clo_leak_check) = LC_Full;
   7793    }
   7794 
   7795    if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol)
   7796        && VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
   7797       VG_(message)(Vg_UserMsg,
   7798                    "Warning: --freelist-big-blocks value %lld has no effect\n"
   7799                    "as it is >= to --freelist-vol value %lld\n",
   7800                    MC_(clo_freelist_big_blocks),
   7801                    MC_(clo_freelist_vol));
   7802    }
   7803 
   7804    if (MC_(clo_workaround_gcc296_bugs)
   7805        && VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
   7806       VG_(umsg)(
   7807          "Warning: --workaround-gcc296-bugs=yes is deprecated.\n"
   7808          "Warning: Instead use: --ignore-range-below-sp=1024-1\n"
   7809          "\n"
   7810       );
   7811    }
   7812 
   7813    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
   7814 
   7815    if (MC_(clo_mc_level) == 3) {
   7816       /* We're doing origin tracking. */
   7817 #     ifdef PERF_FAST_STACK
   7818       VG_(track_new_mem_stack_4_w_ECU)   ( mc_new_mem_stack_4_w_ECU   );
   7819       VG_(track_new_mem_stack_8_w_ECU)   ( mc_new_mem_stack_8_w_ECU   );
   7820       VG_(track_new_mem_stack_12_w_ECU)  ( mc_new_mem_stack_12_w_ECU  );
   7821       VG_(track_new_mem_stack_16_w_ECU)  ( mc_new_mem_stack_16_w_ECU  );
   7822       VG_(track_new_mem_stack_32_w_ECU)  ( mc_new_mem_stack_32_w_ECU  );
   7823       VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
   7824       VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
   7825       VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
   7826       VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
   7827 #     endif
   7828       VG_(track_new_mem_stack_w_ECU)     ( mc_new_mem_stack_w_ECU     );
   7829       VG_(track_new_mem_stack_signal)    ( mc_new_mem_w_tid_make_ECU );
   7830    } else {
   7831       /* Not doing origin tracking */
   7832 #     ifdef PERF_FAST_STACK
   7833       VG_(track_new_mem_stack_4)   ( mc_new_mem_stack_4   );
   7834       VG_(track_new_mem_stack_8)   ( mc_new_mem_stack_8   );
   7835       VG_(track_new_mem_stack_12)  ( mc_new_mem_stack_12  );
   7836       VG_(track_new_mem_stack_16)  ( mc_new_mem_stack_16  );
   7837       VG_(track_new_mem_stack_32)  ( mc_new_mem_stack_32  );
   7838       VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
   7839       VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
   7840       VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
   7841       VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
   7842 #     endif
   7843       VG_(track_new_mem_stack)     ( mc_new_mem_stack     );
   7844       VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_no_ECU );
   7845    }
   7846 
   7847    // We assume that brk()/sbrk() does not initialise new memory.  Is this
   7848    // accurate?  John Reiser says:
   7849    //
   7850    //   0) sbrk() can *decrease* process address space.  No zero fill is done
   7851    //   for a decrease, not even the fragment on the high end of the last page
   7852    //   that is beyond the new highest address.  For maximum safety and
   7853    //   portability, then the bytes in the last page that reside above [the
   7854    //   new] sbrk(0) should be considered to be uninitialized, but in practice
   7855    //   it is exceedingly likely that they will retain their previous
   7856    //   contents.
   7857    //
   7858    //   1) If an increase is large enough to require new whole pages, then
   7859    //   those new whole pages (like all new pages) are zero-filled by the
   7860    //   operating system.  So if sbrk(0) already is page aligned, then
   7861    //   sbrk(PAGE_SIZE) *does* zero-fill the new memory.
   7862    //
   7863    //   2) Any increase that lies within an existing allocated page is not
   7864    //   changed.  So if (x = sbrk(0)) is not page aligned, then
   7865    //   sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
   7866    //   existing contents, and an additional PAGE_SIZE bytes which are zeroed.
   7867    //   ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
   7868    //   of them come along for the ride because the operating system deals
   7869    //   only in whole pages.  Again, for maximum safety and portability, then
   7870    //   anything that lives above [the new] sbrk(0) should be considered
   7871    //   uninitialized, but in practice will retain previous contents [zero in
   7872    //   this case.]"
   7873    //
   7874    // In short:
   7875    //
   7876    //   A key property of sbrk/brk is that new whole pages that are supplied
   7877    //   by the operating system *do* get initialized to zero.
   7878    //
   7879    // As for the portability of all this:
   7880    //
   7881    //   sbrk and brk are not POSIX.  However, any system that is a derivative
   7882    //   of *nix has sbrk and brk because there are too many software (such as
   7883    //   the Bourne shell) which rely on the traditional memory map (.text,
   7884    //   .data+.bss, stack) and the existence of sbrk/brk.
   7885    //
   7886    // So we should arguably observe all this.  However:
   7887    // - The current inaccuracy has caused maybe one complaint in seven years(?)
   7888    // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
   7889    //   doubt most programmers know the above information.
   7890    // So I'm not terribly unhappy with marking it as undefined. --njn.
   7891    //
   7892    // [More:  I think most of what John said only applies to sbrk().  It seems
   7893    // that brk() always deals in whole pages.  And since this event deals
   7894    // directly with brk(), not with sbrk(), perhaps it would be reasonable to
   7895    // just mark all memory it allocates as defined.]
   7896    //
   7897 #  if !defined(VGO_solaris)
   7898    if (MC_(clo_mc_level) == 3)
   7899       VG_(track_new_mem_brk)         ( mc_new_mem_w_tid_make_ECU );
   7900    else
   7901       VG_(track_new_mem_brk)         ( mc_new_mem_w_tid_no_ECU );
   7902 #  else
   7903    // On Solaris, brk memory has to be marked as defined, otherwise we get
   7904    // many false positives.
   7905    VG_(track_new_mem_brk)         ( make_mem_defined_w_tid );
   7906 #  endif
   7907 
   7908    /* This origin tracking cache is huge (~100M), so only initialise
   7909       if we need it. */
   7910    if (MC_(clo_mc_level) >= 3) {
   7911       init_OCache();
   7912       tl_assert(ocacheL1 != NULL);
   7913       tl_assert(ocacheL2 != NULL);
   7914    } else {
   7915       tl_assert(ocacheL1 == NULL);
   7916       tl_assert(ocacheL2 == NULL);
   7917    }
   7918 
   7919    MC_(chunk_poolalloc) = VG_(newPA)
   7920       (sizeof(MC_Chunk) + MC_(n_where_pointers)() * sizeof(ExeContext*),
   7921        1000,
   7922        VG_(malloc),
   7923        "mc.cMC.1 (MC_Chunk pools)",
   7924        VG_(free));
   7925 
   7926    /* Do not check definedness of guest state if --undef-value-errors=no */
   7927    if (MC_(clo_mc_level) >= 2)
   7928       VG_(track_pre_reg_read) ( mc_pre_reg_read );
   7929 
   7930    if (VG_(clo_xtree_memory) == Vg_XTMemory_Full) {
   7931       if (MC_(clo_keep_stacktraces) == KS_none
   7932           || MC_(clo_keep_stacktraces) == KS_free)
   7933          VG_(fmsg_bad_option)("--keep-stacktraces",
   7934                               "To use --xtree-memory=full, you must"
   7935                               " keep at least the alloc stacktrace\n");
   7936       // Activate full xtree memory profiling.
   7937       VG_(XTMemory_Full_init)(VG_(XT_filter_1top_and_maybe_below_main));
   7938    }
   7939 
   7940 }
   7941 
   7942 static void print_SM_info(const HChar* type, Int n_SMs)
   7943 {
   7944    VG_(message)(Vg_DebugMsg,
   7945       " memcheck: SMs: %s = %d (%luk, %luM)\n",
   7946       type,
   7947       n_SMs,
   7948       n_SMs * sizeof(SecMap) / 1024UL,
   7949       n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
   7950 }
   7951 
   7952 static void mc_print_stats (void)
   7953 {
   7954    SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
   7955 
   7956    VG_(message)(Vg_DebugMsg, " memcheck: freelist: vol %lld length %lld\n",
   7957                 VG_(free_queue_volume), VG_(free_queue_length));
   7958    VG_(message)(Vg_DebugMsg,
   7959       " memcheck: sanity checks: %d cheap, %d expensive\n",
   7960       n_sanity_cheap, n_sanity_expensive );
   7961    VG_(message)(Vg_DebugMsg,
   7962       " memcheck: auxmaps: %llu auxmap entries (%lluk, %lluM) in use\n",
   7963       n_auxmap_L2_nodes,
   7964       n_auxmap_L2_nodes * 64,
   7965       n_auxmap_L2_nodes / 16 );
   7966    VG_(message)(Vg_DebugMsg,
   7967       " memcheck: auxmaps_L1: %llu searches, %llu cmps, ratio %llu:10\n",
   7968       n_auxmap_L1_searches, n_auxmap_L1_cmps,
   7969       (10ULL * n_auxmap_L1_cmps)
   7970          / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
   7971    );
   7972    VG_(message)(Vg_DebugMsg,
   7973       " memcheck: auxmaps_L2: %llu searches, %llu nodes\n",
   7974       n_auxmap_L2_searches, n_auxmap_L2_nodes
   7975    );
   7976 
   7977    print_SM_info("n_issued     ", n_issued_SMs);
   7978    print_SM_info("n_deissued   ", n_deissued_SMs);
   7979    print_SM_info("max_noaccess ", max_noaccess_SMs);
   7980    print_SM_info("max_undefined", max_undefined_SMs);
   7981    print_SM_info("max_defined  ", max_defined_SMs);
   7982    print_SM_info("max_non_DSM  ", max_non_DSM_SMs);
   7983 
   7984    // Three DSMs, plus the non-DSM ones
   7985    max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
   7986    // The 3*sizeof(Word) bytes is the AVL node metadata size.
   7987    // The VG_ROUNDUP is because the OSet pool allocator will/must align
   7988    // the elements on pointer size.
   7989    // Note that the pool allocator has some additional small overhead
   7990    // which is not counted in the below.
   7991    // Hardwiring this logic sucks, but I don't see how else to do it.
   7992    max_secVBit_szB = max_secVBit_nodes *
   7993          (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
   7994    max_shmem_szB   = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
   7995 
   7996    VG_(message)(Vg_DebugMsg,
   7997       " memcheck: max sec V bit nodes:    %d (%luk, %luM)\n",
   7998       max_secVBit_nodes, max_secVBit_szB / 1024,
   7999                          max_secVBit_szB / (1024 * 1024));
   8000    VG_(message)(Vg_DebugMsg,
   8001       " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
   8002       sec_vbits_new_nodes + sec_vbits_updates,
   8003       sec_vbits_new_nodes, sec_vbits_updates );
   8004    VG_(message)(Vg_DebugMsg,
   8005       " memcheck: max shadow mem size:   %luk, %luM\n",
   8006       max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
   8007 
   8008    if (MC_(clo_mc_level) >= 3) {
   8009       VG_(message)(Vg_DebugMsg,
   8010                    " ocacheL1: %'12lu refs   %'12lu misses (%'lu lossage)\n",
   8011                    stats_ocacheL1_find,
   8012                    stats_ocacheL1_misses,
   8013                    stats_ocacheL1_lossage );
   8014       VG_(message)(Vg_DebugMsg,
   8015                    " ocacheL1: %'12lu at 0   %'12lu at 1\n",
   8016                    stats_ocacheL1_find - stats_ocacheL1_misses
   8017                       - stats_ocacheL1_found_at_1
   8018                       - stats_ocacheL1_found_at_N,
   8019                    stats_ocacheL1_found_at_1 );
   8020       VG_(message)(Vg_DebugMsg,
   8021                    " ocacheL1: %'12lu at 2+  %'12lu move-fwds\n",
   8022                    stats_ocacheL1_found_at_N,
   8023                    stats_ocacheL1_movefwds );
   8024       VG_(message)(Vg_DebugMsg,
   8025                    " ocacheL1: %'12lu sizeB  %'12d useful\n",
   8026                    (SizeT)sizeof(OCache),
   8027                    4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
   8028       VG_(message)(Vg_DebugMsg,
   8029                    " ocacheL2: %'12lu refs   %'12lu misses\n",
   8030                    stats__ocacheL2_refs,
   8031                    stats__ocacheL2_misses );
   8032       VG_(message)(Vg_DebugMsg,
   8033                    " ocacheL2:    %'9lu max nodes %'9lu curr nodes\n",
   8034                    stats__ocacheL2_n_nodes_max,
   8035                    stats__ocacheL2_n_nodes );
   8036       VG_(message)(Vg_DebugMsg,
   8037                    " niacache: %'12lu refs   %'12lu misses\n",
   8038                    stats__nia_cache_queries, stats__nia_cache_misses);
   8039    } else {
   8040       tl_assert(ocacheL1 == NULL);
   8041       tl_assert(ocacheL2 == NULL);
   8042    }
   8043 }
   8044 
   8045 
   8046 static void mc_fini ( Int exitcode )
   8047 {
   8048    MC_(xtmemory_report) (VG_(clo_xtree_memory_file), True);
   8049    MC_(print_malloc_stats)();
   8050 
   8051    if (MC_(clo_leak_check) != LC_Off) {
   8052       LeakCheckParams lcp;
   8053       HChar* xt_filename = NULL;
   8054       lcp.mode = MC_(clo_leak_check);
   8055       lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
   8056       lcp.heuristics = MC_(clo_leak_check_heuristics);
   8057       lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
   8058       lcp.deltamode = LCD_Any;
   8059       lcp.max_loss_records_output = 999999999;
   8060       lcp.requested_by_monitor_command = False;
   8061       if (MC_(clo_xtree_leak)) {
   8062          xt_filename = VG_(expand_file_name)("--xtree-leak-file",
   8063                                              MC_(clo_xtree_leak_file));
   8064          lcp.xt_filename = xt_filename;
   8065          lcp.mode = LC_Full;
   8066       }
   8067       else
   8068          lcp.xt_filename = NULL;
   8069       MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
   8070       if (MC_(clo_xtree_leak))
   8071          VG_(free)(xt_filename);
   8072    } else {
   8073       if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
   8074          VG_(umsg)(
   8075             "For a detailed leak analysis, rerun with: --leak-check=full\n"
   8076             "\n"
   8077          );
   8078       }
   8079    }
   8080 
   8081    if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
   8082       VG_(message)(Vg_UserMsg,
   8083                    "For counts of detected and suppressed errors, rerun with: -v\n");
   8084    }
   8085 
   8086    if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
   8087        && MC_(clo_mc_level) == 2) {
   8088       VG_(message)(Vg_UserMsg,
   8089                    "Use --track-origins=yes to see where "
   8090                    "uninitialised values come from\n");
   8091    }
   8092 
   8093    /* Print a warning if any client-request generated ignore-ranges
   8094       still exist.  It would be reasonable to expect that a properly
   8095       written program would remove any such ranges before exiting, and
   8096       since they are a bit on the dangerous side, let's comment.  By
   8097       contrast ranges which are specified on the command line normally
   8098       pertain to hardware mapped into the address space, and so we
   8099       can't expect the client to have got rid of them. */
   8100    if (gIgnoredAddressRanges) {
   8101       UInt i, nBad = 0;
   8102       for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
   8103          UWord val     = IAR_INVALID;
   8104          UWord key_min = ~(UWord)0;
   8105          UWord key_max = (UWord)0;
   8106          VG_(indexRangeMap)( &key_min, &key_max, &val,
   8107                              gIgnoredAddressRanges, i );
   8108          if (val != IAR_ClientReq)
   8109            continue;
   8110          /* Print the offending range.  Also, if it is the first,
   8111             print a banner before it. */
   8112          nBad++;
   8113          if (nBad == 1) {
   8114             VG_(umsg)(
   8115               "WARNING: exiting program has the following client-requested\n"
   8116               "WARNING: address error disablement range(s) still in force,\n"
   8117               "WARNING: "
   8118                  "possibly as a result of some mistake in the use of the\n"
   8119               "WARNING: "
   8120                  "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
   8121             );
   8122          }
   8123          VG_(umsg)("   [%u]  0x%016lx-0x%016lx  %s\n",
   8124                    i, key_min, key_max, showIARKind(val));
   8125       }
   8126    }
   8127 
   8128    done_prof_mem();
   8129 
   8130    if (VG_(clo_stats))
   8131       mc_print_stats();
   8132 
   8133    if (0) {
   8134       VG_(message)(Vg_DebugMsg,
   8135         "------ Valgrind's client block stats follow ---------------\n" );
   8136       show_client_block_stats();
   8137    }
   8138 }
   8139 
   8140 /* mark the given addr/len unaddressable for watchpoint implementation
   8141    The PointKind will be handled at access time */
   8142 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
   8143                                                   Addr addr, SizeT len)
   8144 {
   8145    /* GDBTD this is somewhat fishy. We might rather have to save the previous
   8146       accessibility and definedness in gdbserver so as to allow restoring it
   8147       properly. Currently, we assume that the user only watches things
   8148       which are properly addressable and defined */
   8149    if (insert)
   8150       MC_(make_mem_noaccess) (addr, len);
   8151    else
   8152       MC_(make_mem_defined)  (addr, len);
   8153    return True;
   8154 }
   8155 
   8156 static void mc_pre_clo_init(void)
   8157 {
   8158    VG_(details_name)            ("Memcheck");
   8159    VG_(details_version)         (NULL);
   8160    VG_(details_description)     ("a memory error detector");
   8161    VG_(details_copyright_author)(
   8162       "Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.");
   8163    VG_(details_bug_reports_to)  (VG_BUGS_TO);
   8164    VG_(details_avg_translation_sizeB) ( 640 );
   8165 
   8166    VG_(basic_tool_funcs)          (mc_post_clo_init,
   8167                                    MC_(instrument),
   8168                                    mc_fini);
   8169 
   8170    VG_(needs_final_IR_tidy_pass)  ( MC_(final_tidy) );
   8171 
   8172 
   8173    VG_(needs_core_errors)         ();
   8174    VG_(needs_tool_errors)         (MC_(eq_Error),
   8175                                    MC_(before_pp_Error),
   8176                                    MC_(pp_Error),
   8177                                    True,/*show TIDs for errors*/
   8178                                    MC_(update_Error_extra),
   8179                                    MC_(is_recognised_suppression),
   8180                                    MC_(read_extra_suppression_info),
   8181                                    MC_(error_matches_suppression),
   8182                                    MC_(get_error_name),
   8183                                    MC_(get_extra_suppression_info),
   8184                                    MC_(print_extra_suppression_use),
   8185                                    MC_(update_extra_suppression_use));
   8186    VG_(needs_libc_freeres)        ();
   8187    VG_(needs_cxx_freeres)         ();
   8188    VG_(needs_command_line_options)(mc_process_cmd_line_options,
   8189                                    mc_print_usage,
   8190                                    mc_print_debug_usage);
   8191    VG_(needs_client_requests)     (mc_handle_client_request);
   8192    VG_(needs_sanity_checks)       (mc_cheap_sanity_check,
   8193                                    mc_expensive_sanity_check);
   8194    VG_(needs_print_stats)         (mc_print_stats);
   8195    VG_(needs_info_location)       (MC_(pp_describe_addr));
   8196    VG_(needs_malloc_replacement)  (MC_(malloc),
   8197                                    MC_(__builtin_new),
   8198                                    MC_(__builtin_vec_new),
   8199                                    MC_(memalign),
   8200                                    MC_(calloc),
   8201                                    MC_(free),
   8202                                    MC_(__builtin_delete),
   8203                                    MC_(__builtin_vec_delete),
   8204                                    MC_(realloc),
   8205                                    MC_(malloc_usable_size),
   8206                                    MC_MALLOC_DEFAULT_REDZONE_SZB );
   8207    MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
   8208 
   8209    VG_(needs_xml_output)          ();
   8210 
   8211    VG_(track_new_mem_startup)     ( mc_new_mem_startup );
   8212 
   8213    // Handling of mmap and mprotect isn't simple (well, it is simple,
   8214    // but the justification isn't.)  See comments above, just prior to
   8215    // mc_new_mem_mmap.
   8216    VG_(track_new_mem_mmap)        ( mc_new_mem_mmap );
   8217    VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
   8218 
   8219    VG_(track_copy_mem_remap)      ( MC_(copy_address_range_state) );
   8220 
   8221    VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
   8222    VG_(track_die_mem_brk)         ( MC_(make_mem_noaccess) );
   8223    VG_(track_die_mem_munmap)      ( MC_(make_mem_noaccess) );
   8224 
   8225    /* Defer the specification of the new_mem_stack functions to the
   8226       post_clo_init function, since we need to first parse the command
   8227       line before deciding which set to use. */
   8228 
   8229 #  ifdef PERF_FAST_STACK
   8230    VG_(track_die_mem_stack_4)     ( mc_die_mem_stack_4   );
   8231    VG_(track_die_mem_stack_8)     ( mc_die_mem_stack_8   );
   8232    VG_(track_die_mem_stack_12)    ( mc_die_mem_stack_12  );
   8233    VG_(track_die_mem_stack_16)    ( mc_die_mem_stack_16  );
   8234    VG_(track_die_mem_stack_32)    ( mc_die_mem_stack_32  );
   8235    VG_(track_die_mem_stack_112)   ( mc_die_mem_stack_112 );
   8236    VG_(track_die_mem_stack_128)   ( mc_die_mem_stack_128 );
   8237    VG_(track_die_mem_stack_144)   ( mc_die_mem_stack_144 );
   8238    VG_(track_die_mem_stack_160)   ( mc_die_mem_stack_160 );
   8239 #  endif
   8240    VG_(track_die_mem_stack)       ( mc_die_mem_stack     );
   8241 
   8242    VG_(track_ban_mem_stack)       ( MC_(make_mem_noaccess) );
   8243 
   8244    VG_(track_pre_mem_read)        ( check_mem_is_defined );
   8245    VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
   8246    VG_(track_pre_mem_write)       ( check_mem_is_addressable );
   8247    VG_(track_post_mem_write)      ( mc_post_mem_write );
   8248 
   8249    VG_(track_post_reg_write)                  ( mc_post_reg_write );
   8250    VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
   8251 
   8252    if (MC_(clo_mc_level) >= 2) {
   8253       VG_(track_copy_mem_to_reg)  ( mc_copy_mem_to_reg );
   8254       VG_(track_copy_reg_to_mem)  ( mc_copy_reg_to_mem );
   8255    }
   8256 
   8257    VG_(needs_watchpoint)          ( mc_mark_unaddressable_for_watchpoint );
   8258 
   8259    init_shadow_memory();
   8260    // MC_(chunk_poolalloc) must be allocated in post_clo_init
   8261    tl_assert(MC_(chunk_poolalloc) == NULL);
   8262    MC_(malloc_list)  = VG_(HT_construct)( "MC_(malloc_list)" );
   8263    MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
   8264    init_prof_mem();
   8265 
   8266    tl_assert( mc_expensive_sanity_check() );
   8267 
   8268    // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
   8269    tl_assert(sizeof(UWord) == sizeof(Addr));
   8270    // Call me paranoid.  I don't care.
   8271    tl_assert(sizeof(void*) == sizeof(Addr));
   8272 
   8273    // BYTES_PER_SEC_VBIT_NODE must be a power of two.
   8274    tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
   8275 
   8276    /* This is small.  Always initialise it. */
   8277    init_nia_to_ecu_cache();
   8278 
   8279    /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
   8280       if we need to, since the command line args haven't been
   8281       processed yet.  Hence defer it to mc_post_clo_init. */
   8282    tl_assert(ocacheL1 == NULL);
   8283    tl_assert(ocacheL2 == NULL);
   8284 
   8285    /* Check some important stuff.  See extensive comments above
   8286       re UNALIGNED_OR_HIGH for background. */
   8287 #  if VG_WORDSIZE == 4
   8288    tl_assert(sizeof(void*) == 4);
   8289    tl_assert(sizeof(Addr)  == 4);
   8290    tl_assert(sizeof(UWord) == 4);
   8291    tl_assert(sizeof(Word)  == 4);
   8292    tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
   8293    tl_assert(MASK(1) == 0UL);
   8294    tl_assert(MASK(2) == 1UL);
   8295    tl_assert(MASK(4) == 3UL);
   8296    tl_assert(MASK(8) == 7UL);
   8297 #  else
   8298    tl_assert(VG_WORDSIZE == 8);
   8299    tl_assert(sizeof(void*) == 8);
   8300    tl_assert(sizeof(Addr)  == 8);
   8301    tl_assert(sizeof(UWord) == 8);
   8302    tl_assert(sizeof(Word)  == 8);
   8303    tl_assert(MAX_PRIMARY_ADDRESS == 0x1FFFFFFFFFULL);
   8304    tl_assert(MASK(1) == 0xFFFFFFE000000000ULL);
   8305    tl_assert(MASK(2) == 0xFFFFFFE000000001ULL);
   8306    tl_assert(MASK(4) == 0xFFFFFFE000000003ULL);
   8307    tl_assert(MASK(8) == 0xFFFFFFE000000007ULL);
   8308 #  endif
   8309 
   8310    /* Check some assertions to do with the instrumentation machinery. */
   8311    MC_(do_instrumentation_startup_checks)();
   8312 }
   8313 
   8314 STATIC_ASSERT(sizeof(UWord) == sizeof(SizeT));
   8315 
   8316 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
   8317 
   8318 /*--------------------------------------------------------------------*/
   8319 /*--- end                                                mc_main.c ---*/
   8320 /*--------------------------------------------------------------------*/
   8321