Home | History | Annotate | Download | only in memcheck
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- MemCheck: Maintain bitmaps of memory, tracking the           ---*/
      4 /*--- accessibility (A) and validity (V) status of each byte.      ---*/
      5 /*---                                                    mc_main.c ---*/
      6 /*--------------------------------------------------------------------*/
      7 
      8 /*
      9    This file is part of MemCheck, a heavyweight Valgrind tool for
     10    detecting memory errors.
     11 
     12    Copyright (C) 2000-2012 Julian Seward
     13       jseward (at) acm.org
     14 
     15    This program is free software; you can redistribute it and/or
     16    modify it under the terms of the GNU General Public License as
     17    published by the Free Software Foundation; either version 2 of the
     18    License, or (at your option) any later version.
     19 
     20    This program is distributed in the hope that it will be useful, but
     21    WITHOUT ANY WARRANTY; without even the implied warranty of
     22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     23    General Public License for more details.
     24 
     25    You should have received a copy of the GNU General Public License
     26    along with this program; if not, write to the Free Software
     27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     28    02111-1307, USA.
     29 
     30    The GNU General Public License is contained in the file COPYING.
     31 */
     32 
     33 #include "pub_tool_basics.h"
     34 #include "pub_tool_aspacemgr.h"
     35 #include "pub_tool_gdbserver.h"
     36 #include "pub_tool_poolalloc.h"
     37 #include "pub_tool_hashtable.h"     // For mc_include.h
     38 #include "pub_tool_libcbase.h"
     39 #include "pub_tool_libcassert.h"
     40 #include "pub_tool_libcprint.h"
     41 #include "pub_tool_machine.h"
     42 #include "pub_tool_mallocfree.h"
     43 #include "pub_tool_options.h"
     44 #include "pub_tool_oset.h"
     45 #include "pub_tool_replacemalloc.h"
     46 #include "pub_tool_tooliface.h"
     47 #include "pub_tool_threadstate.h"
     48 
     49 #include "mc_include.h"
     50 #include "memcheck.h"   /* for client requests */
     51 
     52 
     53 /* Set to 1 to do a little more sanity checking */
     54 #define VG_DEBUG_MEMORY 0
     55 
     56 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
     57 
     58 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
     59 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
     60 
     61 
     62 /*------------------------------------------------------------*/
     63 /*--- Fast-case knobs                                      ---*/
     64 /*------------------------------------------------------------*/
     65 
     66 // Comment these out to disable the fast cases (don't just set them to zero).
     67 
     68 #define PERF_FAST_LOADV    1
     69 #define PERF_FAST_STOREV   1
     70 
     71 #define PERF_FAST_SARP     1
     72 
     73 #define PERF_FAST_STACK    1
     74 #define PERF_FAST_STACK2   1
     75 
     76 /* Change this to 1 to enable assertions on origin tracking cache fast
     77    paths */
     78 #define OC_ENABLE_ASSERTIONS 0
     79 
     80 
     81 /*------------------------------------------------------------*/
     82 /*--- Comments on the origin tracking implementation       ---*/
     83 /*------------------------------------------------------------*/
     84 
     85 /* See detailed comment entitled
     86    AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
     87    which is contained further on in this file. */
     88 
     89 
     90 /*------------------------------------------------------------*/
     91 /*--- V bits and A bits                                    ---*/
     92 /*------------------------------------------------------------*/
     93 
     94 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
     95    thinks the corresponding value bit is defined.  And every memory byte
     96    has an A bit, which tracks whether Memcheck thinks the program can access
     97    it safely (ie. it's mapped, and has at least one of the RWX permission bits
     98    set).  So every N-bit register is shadowed with N V bits, and every memory
     99    byte is shadowed with 8 V bits and one A bit.
    100 
    101    In the implementation, we use two forms of compression (compressed V bits
    102    and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
    103    for memory.
    104 
    105    Memcheck also tracks extra information about each heap block that is
    106    allocated, for detecting memory leaks and other purposes.
    107 */
    108 
    109 /*------------------------------------------------------------*/
    110 /*--- Basic A/V bitmap representation.                     ---*/
    111 /*------------------------------------------------------------*/
    112 
    113 /* All reads and writes are checked against a memory map (a.k.a. shadow
    114    memory), which records the state of all memory in the process.
    115 
    116    On 32-bit machines the memory map is organised as follows.
    117    The top 16 bits of an address are used to index into a top-level
    118    map table, containing 65536 entries.  Each entry is a pointer to a
    119    second-level map, which records the accesibililty and validity
    120    permissions for the 65536 bytes indexed by the lower 16 bits of the
    121    address.  Each byte is represented by two bits (details are below).  So
    122    each second-level map contains 16384 bytes.  This two-level arrangement
    123    conveniently divides the 4G address space into 64k lumps, each size 64k
    124    bytes.
    125 
    126    All entries in the primary (top-level) map must point to a valid
    127    secondary (second-level) map.  Since many of the 64kB chunks will
    128    have the same status for every bit -- ie. noaccess (for unused
    129    address space) or entirely addressable and defined (for code segments) --
    130    there are three distinguished secondary maps, which indicate 'noaccess',
    131    'undefined' and 'defined'.  For these uniform 64kB chunks, the primary
    132    map entry points to the relevant distinguished map.  In practice,
    133    typically more than half of the addressable memory is represented with
    134    the 'undefined' or 'defined' distinguished secondary map, so it gives a
    135    good saving.  It also lets us set the V+A bits of large address regions
    136    quickly in set_address_range_perms().
    137 
    138    On 64-bit machines it's more complicated.  If we followed the same basic
    139    scheme we'd have a four-level table which would require too many memory
    140    accesses.  So instead the top-level map table has 2^19 entries (indexed
    141    using bits 16..34 of the address);  this covers the bottom 32GB.  Any
    142    accesses above 32GB are handled with a slow, sparse auxiliary table.
    143    Valgrind's address space manager tries very hard to keep things below
    144    this 32GB barrier so that performance doesn't suffer too much.
    145 
    146    Note that this file has a lot of different functions for reading and
    147    writing shadow memory.  Only a couple are strictly necessary (eg.
    148    get_vabits2 and set_vabits2), most are just specialised for specific
    149    common cases to improve performance.
    150 
    151    Aside: the V+A bits are less precise than they could be -- we have no way
    152    of marking memory as read-only.  It would be great if we could add an
    153    extra state VA_BITSn_READONLY.  But then we'd have 5 different states,
    154    which requires 2.3 bits to hold, and there's no way to do that elegantly
    155    -- we'd have to double up to 4 bits of metadata per byte, which doesn't
    156    seem worth it.
    157 */
    158 
    159 /* --------------- Basic configuration --------------- */
    160 
    161 /* Only change this.  N_PRIMARY_MAP *must* be a power of 2. */
    162 
    163 #if VG_WORDSIZE == 4
    164 
    165 /* cover the entire address space */
    166 #  define N_PRIMARY_BITS  16
    167 
    168 #else
    169 
    170 /* Just handle the first 32G fast and the rest via auxiliary
    171    primaries.  If you change this, Memcheck will assert at startup.
    172    See the definition of UNALIGNED_OR_HIGH for extensive comments. */
    173 #  define N_PRIMARY_BITS  19
    174 
    175 #endif
    176 
    177 
    178 /* Do not change this. */
    179 #define N_PRIMARY_MAP  ( ((UWord)1) << N_PRIMARY_BITS)
    180 
    181 /* Do not change this. */
    182 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
    183 
    184 
    185 /* --------------- Secondary maps --------------- */
    186 
    187 // Each byte of memory conceptually has an A bit, which indicates its
    188 // addressability, and 8 V bits, which indicates its definedness.
    189 //
    190 // But because very few bytes are partially defined, we can use a nice
    191 // compression scheme to reduce the size of shadow memory.  Each byte of
    192 // memory has 2 bits which indicates its state (ie. V+A bits):
    193 //
    194 //   00:  noaccess    (unaddressable but treated as fully defined)
    195 //   01:  undefined   (addressable and fully undefined)
    196 //   10:  defined     (addressable and fully defined)
    197 //   11:  partdefined (addressable and partially defined)
    198 //
    199 // In the "partdefined" case, we use a secondary table to store the V bits.
    200 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
    201 // bits.
    202 //
    203 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
    204 // four bytes (32 bits) of memory are in each chunk.  Hence the name
    205 // "vabits8".  This lets us get the V+A bits for four bytes at a time
    206 // easily (without having to do any shifting and/or masking), and that is a
    207 // very common operation.  (Note that although each vabits8 chunk
    208 // is 8 bits in size, it represents 32 bits of memory.)
    209 //
    210 // The representation is "inverse" little-endian... each 4 bytes of
    211 // memory is represented by a 1 byte value, where:
    212 //
    213 // - the status of byte (a+0) is held in bits [1..0]
    214 // - the status of byte (a+1) is held in bits [3..2]
    215 // - the status of byte (a+2) is held in bits [5..4]
    216 // - the status of byte (a+3) is held in bits [7..6]
    217 //
    218 // It's "inverse" because endianness normally describes a mapping from
    219 // value bits to memory addresses;  in this case the mapping is inverted.
    220 // Ie. instead of particular value bits being held in certain addresses, in
    221 // this case certain addresses are represented by particular value bits.
    222 // See insert_vabits2_into_vabits8() for an example.
    223 //
    224 // But note that we don't compress the V bits stored in registers;  they
    225 // need to be explicit to made the shadow operations possible.  Therefore
    226 // when moving values between registers and memory we need to convert
    227 // between the expanded in-register format and the compressed in-memory
    228 // format.  This isn't so difficult, it just requires careful attention in a
    229 // few places.
    230 
    231 // These represent eight bits of memory.
    232 #define VA_BITS2_NOACCESS     0x0      // 00b
    233 #define VA_BITS2_UNDEFINED    0x1      // 01b
    234 #define VA_BITS2_DEFINED      0x2      // 10b
    235 #define VA_BITS2_PARTDEFINED  0x3      // 11b
    236 
    237 // These represent 16 bits of memory.
    238 #define VA_BITS4_NOACCESS     0x0      // 00_00b
    239 #define VA_BITS4_UNDEFINED    0x5      // 01_01b
    240 #define VA_BITS4_DEFINED      0xa      // 10_10b
    241 
    242 // These represent 32 bits of memory.
    243 #define VA_BITS8_NOACCESS     0x00     // 00_00_00_00b
    244 #define VA_BITS8_UNDEFINED    0x55     // 01_01_01_01b
    245 #define VA_BITS8_DEFINED      0xaa     // 10_10_10_10b
    246 
    247 // These represent 64 bits of memory.
    248 #define VA_BITS16_NOACCESS    0x0000   // 00_00_00_00b x 2
    249 #define VA_BITS16_UNDEFINED   0x5555   // 01_01_01_01b x 2
    250 #define VA_BITS16_DEFINED     0xaaaa   // 10_10_10_10b x 2
    251 
    252 
    253 #define SM_CHUNKS             16384
    254 #define SM_OFF(aaa)           (((aaa) & 0xffff) >> 2)
    255 #define SM_OFF_16(aaa)        (((aaa) & 0xffff) >> 3)
    256 
    257 // Paranoia:  it's critical for performance that the requested inlining
    258 // occurs.  So try extra hard.
    259 #define INLINE    inline __attribute__((always_inline))
    260 
    261 static INLINE Addr start_of_this_sm ( Addr a ) {
    262    return (a & (~SM_MASK));
    263 }
    264 static INLINE Bool is_start_of_sm ( Addr a ) {
    265    return (start_of_this_sm(a) == a);
    266 }
    267 
    268 typedef
    269    struct {
    270       UChar vabits8[SM_CHUNKS];
    271    }
    272    SecMap;
    273 
    274 // 3 distinguished secondary maps, one for no-access, one for
    275 // accessible but undefined, and one for accessible and defined.
    276 // Distinguished secondaries may never be modified.
    277 #define SM_DIST_NOACCESS   0
    278 #define SM_DIST_UNDEFINED  1
    279 #define SM_DIST_DEFINED    2
    280 
    281 static SecMap sm_distinguished[3];
    282 
    283 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
    284    return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
    285 }
    286 
    287 // Forward declaration
    288 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
    289 
    290 /* dist_sm points to one of our three distinguished secondaries.  Make
    291    a copy of it so that we can write to it.
    292 */
    293 static SecMap* copy_for_writing ( SecMap* dist_sm )
    294 {
    295    SecMap* new_sm;
    296    tl_assert(dist_sm == &sm_distinguished[0]
    297           || dist_sm == &sm_distinguished[1]
    298           || dist_sm == &sm_distinguished[2]);
    299 
    300    new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
    301    if (new_sm == NULL)
    302       VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
    303                                    sizeof(SecMap) );
    304    VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
    305    update_SM_counts(dist_sm, new_sm);
    306    return new_sm;
    307 }
    308 
    309 /* --------------- Stats --------------- */
    310 
    311 static Int   n_issued_SMs      = 0;
    312 static Int   n_deissued_SMs    = 0;
    313 static Int   n_noaccess_SMs    = N_PRIMARY_MAP; // start with many noaccess DSMs
    314 static Int   n_undefined_SMs   = 0;
    315 static Int   n_defined_SMs     = 0;
    316 static Int   n_non_DSM_SMs     = 0;
    317 static Int   max_noaccess_SMs  = 0;
    318 static Int   max_undefined_SMs = 0;
    319 static Int   max_defined_SMs   = 0;
    320 static Int   max_non_DSM_SMs   = 0;
    321 
    322 /* # searches initiated in auxmap_L1, and # base cmps required */
    323 static ULong n_auxmap_L1_searches  = 0;
    324 static ULong n_auxmap_L1_cmps      = 0;
    325 /* # of searches that missed in auxmap_L1 and therefore had to
    326    be handed to auxmap_L2. And the number of nodes inserted. */
    327 static ULong n_auxmap_L2_searches  = 0;
    328 static ULong n_auxmap_L2_nodes     = 0;
    329 
    330 static Int   n_sanity_cheap     = 0;
    331 static Int   n_sanity_expensive = 0;
    332 
    333 static Int   n_secVBit_nodes   = 0;
    334 static Int   max_secVBit_nodes = 0;
    335 
    336 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
    337 {
    338    if      (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
    339    else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
    340    else if (oldSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  --;
    341    else                                                  { n_non_DSM_SMs  --;
    342                                                            n_deissued_SMs ++; }
    343 
    344    if      (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
    345    else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
    346    else if (newSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  ++;
    347    else                                                  { n_non_DSM_SMs  ++;
    348                                                            n_issued_SMs   ++; }
    349 
    350    if (n_noaccess_SMs  > max_noaccess_SMs ) max_noaccess_SMs  = n_noaccess_SMs;
    351    if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
    352    if (n_defined_SMs   > max_defined_SMs  ) max_defined_SMs   = n_defined_SMs;
    353    if (n_non_DSM_SMs   > max_non_DSM_SMs  ) max_non_DSM_SMs   = n_non_DSM_SMs;
    354 }
    355 
    356 /* --------------- Primary maps --------------- */
    357 
    358 /* The main primary map.  This covers some initial part of the address
    359    space, addresses 0 .. (N_PRIMARY_MAP << 16)-1.  The rest of it is
    360    handled using the auxiliary primary map.
    361 */
    362 static SecMap* primary_map[N_PRIMARY_MAP];
    363 
    364 
    365 /* An entry in the auxiliary primary map.  base must be a 64k-aligned
    366    value, and sm points at the relevant secondary map.  As with the
    367    main primary map, the secondary may be either a real secondary, or
    368    one of the three distinguished secondaries.  DO NOT CHANGE THIS
    369    LAYOUT: the first word has to be the key for OSet fast lookups.
    370 */
    371 typedef
    372    struct {
    373       Addr    base;
    374       SecMap* sm;
    375    }
    376    AuxMapEnt;
    377 
    378 /* Tunable parameter: How big is the L1 queue? */
    379 #define N_AUXMAP_L1 24
    380 
    381 /* Tunable parameter: How far along the L1 queue to insert
    382    entries resulting from L2 lookups? */
    383 #define AUXMAP_L1_INSERT_IX 12
    384 
    385 static struct {
    386           Addr       base;
    387           AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
    388        }
    389        auxmap_L1[N_AUXMAP_L1];
    390 
    391 static OSet* auxmap_L2 = NULL;
    392 
    393 static void init_auxmap_L1_L2 ( void )
    394 {
    395    Int i;
    396    for (i = 0; i < N_AUXMAP_L1; i++) {
    397       auxmap_L1[i].base = 0;
    398       auxmap_L1[i].ent  = NULL;
    399    }
    400 
    401    tl_assert(0 == offsetof(AuxMapEnt,base));
    402    tl_assert(sizeof(Addr) == sizeof(void*));
    403    auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/  offsetof(AuxMapEnt,base),
    404                                     /*fastCmp*/ NULL,
    405                                     VG_(malloc), "mc.iaLL.1", VG_(free) );
    406 }
    407 
    408 /* Check representation invariants; if OK return NULL; else a
    409    descriptive bit of text.  Also return the number of
    410    non-distinguished secondary maps referred to from the auxiliary
    411    primary maps. */
    412 
    413 static HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
    414 {
    415    Word i, j;
    416    /* On a 32-bit platform, the L2 and L1 tables should
    417       both remain empty forever.
    418 
    419       On a 64-bit platform:
    420       In the L2 table:
    421        all .base & 0xFFFF == 0
    422        all .base > MAX_PRIMARY_ADDRESS
    423       In the L1 table:
    424        all .base & 0xFFFF == 0
    425        all (.base > MAX_PRIMARY_ADDRESS
    426             .base & 0xFFFF == 0
    427             and .ent points to an AuxMapEnt with the same .base)
    428            or
    429            (.base == 0 and .ent == NULL)
    430    */
    431    *n_secmaps_found = 0;
    432    if (sizeof(void*) == 4) {
    433       /* 32-bit platform */
    434       if (VG_(OSetGen_Size)(auxmap_L2) != 0)
    435          return "32-bit: auxmap_L2 is non-empty";
    436       for (i = 0; i < N_AUXMAP_L1; i++)
    437         if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
    438       return "32-bit: auxmap_L1 is non-empty";
    439    } else {
    440       /* 64-bit platform */
    441       UWord elems_seen = 0;
    442       AuxMapEnt *elem, *res;
    443       AuxMapEnt key;
    444       /* L2 table */
    445       VG_(OSetGen_ResetIter)(auxmap_L2);
    446       while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
    447          elems_seen++;
    448          if (0 != (elem->base & (Addr)0xFFFF))
    449             return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
    450          if (elem->base <= MAX_PRIMARY_ADDRESS)
    451             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
    452          if (elem->sm == NULL)
    453             return "64-bit: .sm in _L2 is NULL";
    454          if (!is_distinguished_sm(elem->sm))
    455             (*n_secmaps_found)++;
    456       }
    457       if (elems_seen != n_auxmap_L2_nodes)
    458          return "64-bit: disagreement on number of elems in _L2";
    459       /* Check L1-L2 correspondence */
    460       for (i = 0; i < N_AUXMAP_L1; i++) {
    461          if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
    462             continue;
    463          if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
    464             return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
    465          if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
    466             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
    467          if (auxmap_L1[i].ent == NULL)
    468             return "64-bit: .ent is NULL in auxmap_L1";
    469          if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
    470             return "64-bit: _L1 and _L2 bases are inconsistent";
    471          /* Look it up in auxmap_L2. */
    472          key.base = auxmap_L1[i].base;
    473          key.sm   = 0;
    474          res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
    475          if (res == NULL)
    476             return "64-bit: _L1 .base not found in _L2";
    477          if (res != auxmap_L1[i].ent)
    478             return "64-bit: _L1 .ent disagrees with _L2 entry";
    479       }
    480       /* Check L1 contains no duplicates */
    481       for (i = 0; i < N_AUXMAP_L1; i++) {
    482          if (auxmap_L1[i].base == 0)
    483             continue;
    484 	 for (j = i+1; j < N_AUXMAP_L1; j++) {
    485             if (auxmap_L1[j].base == 0)
    486                continue;
    487             if (auxmap_L1[j].base == auxmap_L1[i].base)
    488                return "64-bit: duplicate _L1 .base entries";
    489          }
    490       }
    491    }
    492    return NULL; /* ok */
    493 }
    494 
    495 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
    496 {
    497    Word i;
    498    tl_assert(ent);
    499    tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
    500    for (i = N_AUXMAP_L1-1; i > rank; i--)
    501       auxmap_L1[i] = auxmap_L1[i-1];
    502    auxmap_L1[rank].base = ent->base;
    503    auxmap_L1[rank].ent  = ent;
    504 }
    505 
    506 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
    507 {
    508    AuxMapEnt  key;
    509    AuxMapEnt* res;
    510    Word       i;
    511 
    512    tl_assert(a > MAX_PRIMARY_ADDRESS);
    513    a &= ~(Addr)0xFFFF;
    514 
    515    /* First search the front-cache, which is a self-organising
    516       list containing the most popular entries. */
    517 
    518    if (LIKELY(auxmap_L1[0].base == a))
    519       return auxmap_L1[0].ent;
    520    if (LIKELY(auxmap_L1[1].base == a)) {
    521       Addr       t_base = auxmap_L1[0].base;
    522       AuxMapEnt* t_ent  = auxmap_L1[0].ent;
    523       auxmap_L1[0].base = auxmap_L1[1].base;
    524       auxmap_L1[0].ent  = auxmap_L1[1].ent;
    525       auxmap_L1[1].base = t_base;
    526       auxmap_L1[1].ent  = t_ent;
    527       return auxmap_L1[0].ent;
    528    }
    529 
    530    n_auxmap_L1_searches++;
    531 
    532    for (i = 0; i < N_AUXMAP_L1; i++) {
    533       if (auxmap_L1[i].base == a) {
    534          break;
    535       }
    536    }
    537    tl_assert(i >= 0 && i <= N_AUXMAP_L1);
    538 
    539    n_auxmap_L1_cmps += (ULong)(i+1);
    540 
    541    if (i < N_AUXMAP_L1) {
    542       if (i > 0) {
    543          Addr       t_base = auxmap_L1[i-1].base;
    544          AuxMapEnt* t_ent  = auxmap_L1[i-1].ent;
    545          auxmap_L1[i-1].base = auxmap_L1[i-0].base;
    546          auxmap_L1[i-1].ent  = auxmap_L1[i-0].ent;
    547          auxmap_L1[i-0].base = t_base;
    548          auxmap_L1[i-0].ent  = t_ent;
    549          i--;
    550       }
    551       return auxmap_L1[i].ent;
    552    }
    553 
    554    n_auxmap_L2_searches++;
    555 
    556    /* First see if we already have it. */
    557    key.base = a;
    558    key.sm   = 0;
    559 
    560    res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
    561    if (res)
    562       insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
    563    return res;
    564 }
    565 
    566 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
    567 {
    568    AuxMapEnt *nyu, *res;
    569 
    570    /* First see if we already have it. */
    571    res = maybe_find_in_auxmap( a );
    572    if (LIKELY(res))
    573       return res;
    574 
    575    /* Ok, there's no entry in the secondary map, so we'll have
    576       to allocate one. */
    577    a &= ~(Addr)0xFFFF;
    578 
    579    nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
    580    tl_assert(nyu);
    581    nyu->base = a;
    582    nyu->sm   = &sm_distinguished[SM_DIST_NOACCESS];
    583    VG_(OSetGen_Insert)( auxmap_L2, nyu );
    584    insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
    585    n_auxmap_L2_nodes++;
    586    return nyu;
    587 }
    588 
    589 /* --------------- SecMap fundamentals --------------- */
    590 
    591 // In all these, 'low' means it's definitely in the main primary map,
    592 // 'high' means it's definitely in the auxiliary table.
    593 
    594 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
    595 {
    596    UWord pm_off = a >> 16;
    597 #  if VG_DEBUG_MEMORY >= 1
    598    tl_assert(pm_off < N_PRIMARY_MAP);
    599 #  endif
    600    return &primary_map[ pm_off ];
    601 }
    602 
    603 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
    604 {
    605    AuxMapEnt* am = find_or_alloc_in_auxmap(a);
    606    return &am->sm;
    607 }
    608 
    609 static SecMap** get_secmap_ptr ( Addr a )
    610 {
    611    return ( a <= MAX_PRIMARY_ADDRESS
    612           ? get_secmap_low_ptr(a)
    613           : get_secmap_high_ptr(a));
    614 }
    615 
    616 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
    617 {
    618    return *get_secmap_low_ptr(a);
    619 }
    620 
    621 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
    622 {
    623    return *get_secmap_high_ptr(a);
    624 }
    625 
    626 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
    627 {
    628    SecMap** p = get_secmap_low_ptr(a);
    629    if (UNLIKELY(is_distinguished_sm(*p)))
    630       *p = copy_for_writing(*p);
    631    return *p;
    632 }
    633 
    634 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
    635 {
    636    SecMap** p = get_secmap_high_ptr(a);
    637    if (UNLIKELY(is_distinguished_sm(*p)))
    638       *p = copy_for_writing(*p);
    639    return *p;
    640 }
    641 
    642 /* Produce the secmap for 'a', either from the primary map or by
    643    ensuring there is an entry for it in the aux primary map.  The
    644    secmap may be a distinguished one as the caller will only want to
    645    be able to read it.
    646 */
    647 static INLINE SecMap* get_secmap_for_reading ( Addr a )
    648 {
    649    return ( a <= MAX_PRIMARY_ADDRESS
    650           ? get_secmap_for_reading_low (a)
    651           : get_secmap_for_reading_high(a) );
    652 }
    653 
    654 /* Produce the secmap for 'a', either from the primary map or by
    655    ensuring there is an entry for it in the aux primary map.  The
    656    secmap may not be a distinguished one, since the caller will want
    657    to be able to write it.  If it is a distinguished secondary, make a
    658    writable copy of it, install it, and return the copy instead.  (COW
    659    semantics).
    660 */
    661 static SecMap* get_secmap_for_writing ( Addr a )
    662 {
    663    return ( a <= MAX_PRIMARY_ADDRESS
    664           ? get_secmap_for_writing_low (a)
    665           : get_secmap_for_writing_high(a) );
    666 }
    667 
    668 /* If 'a' has a SecMap, produce it.  Else produce NULL.  But don't
    669    allocate one if one doesn't already exist.  This is used by the
    670    leak checker.
    671 */
    672 static SecMap* maybe_get_secmap_for ( Addr a )
    673 {
    674    if (a <= MAX_PRIMARY_ADDRESS) {
    675       return get_secmap_for_reading_low(a);
    676    } else {
    677       AuxMapEnt* am = maybe_find_in_auxmap(a);
    678       return am ? am->sm : NULL;
    679    }
    680 }
    681 
    682 /* --------------- Fundamental functions --------------- */
    683 
    684 static INLINE
    685 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
    686 {
    687    UInt shift =  (a & 3)  << 1;        // shift by 0, 2, 4, or 6
    688    *vabits8  &= ~(0x3     << shift);   // mask out the two old bits
    689    *vabits8  |=  (vabits2 << shift);   // mask  in the two new bits
    690 }
    691 
    692 static INLINE
    693 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
    694 {
    695    UInt shift;
    696    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
    697    shift     =  (a & 2)   << 1;        // shift by 0 or 4
    698    *vabits8 &= ~(0xf      << shift);   // mask out the four old bits
    699    *vabits8 |=  (vabits4 << shift);    // mask  in the four new bits
    700 }
    701 
    702 static INLINE
    703 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
    704 {
    705    UInt shift = (a & 3) << 1;          // shift by 0, 2, 4, or 6
    706    vabits8 >>= shift;                  // shift the two bits to the bottom
    707    return 0x3 & vabits8;               // mask out the rest
    708 }
    709 
    710 static INLINE
    711 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
    712 {
    713    UInt shift;
    714    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
    715    shift = (a & 2) << 1;               // shift by 0 or 4
    716    vabits8 >>= shift;                  // shift the four bits to the bottom
    717    return 0xf & vabits8;               // mask out the rest
    718 }
    719 
    720 // Note that these four are only used in slow cases.  The fast cases do
    721 // clever things like combine the auxmap check (in
    722 // get_secmap_{read,writ}able) with alignment checks.
    723 
    724 // *** WARNING! ***
    725 // Any time this function is called, if it is possible that vabits2
    726 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
    727 // sec-V-bits table must also be set!
    728 static INLINE
    729 void set_vabits2 ( Addr a, UChar vabits2 )
    730 {
    731    SecMap* sm       = get_secmap_for_writing(a);
    732    UWord   sm_off   = SM_OFF(a);
    733    insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
    734 }
    735 
    736 static INLINE
    737 UChar get_vabits2 ( Addr a )
    738 {
    739    SecMap* sm       = get_secmap_for_reading(a);
    740    UWord   sm_off   = SM_OFF(a);
    741    UChar   vabits8  = sm->vabits8[sm_off];
    742    return extract_vabits2_from_vabits8(a, vabits8);
    743 }
    744 
    745 // *** WARNING! ***
    746 // Any time this function is called, if it is possible that any of the
    747 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
    748 // corresponding entry(s) in the sec-V-bits table must also be set!
    749 static INLINE
    750 UChar get_vabits8_for_aligned_word32 ( Addr a )
    751 {
    752    SecMap* sm       = get_secmap_for_reading(a);
    753    UWord   sm_off   = SM_OFF(a);
    754    UChar   vabits8  = sm->vabits8[sm_off];
    755    return vabits8;
    756 }
    757 
    758 static INLINE
    759 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
    760 {
    761    SecMap* sm       = get_secmap_for_writing(a);
    762    UWord   sm_off   = SM_OFF(a);
    763    sm->vabits8[sm_off] = vabits8;
    764 }
    765 
    766 
    767 // Forward declarations
    768 static UWord get_sec_vbits8(Addr a);
    769 static void  set_sec_vbits8(Addr a, UWord vbits8);
    770 
    771 // Returns False if there was an addressability error.
    772 static INLINE
    773 Bool set_vbits8 ( Addr a, UChar vbits8 )
    774 {
    775    Bool  ok      = True;
    776    UChar vabits2 = get_vabits2(a);
    777    if ( VA_BITS2_NOACCESS != vabits2 ) {
    778       // Addressable.  Convert in-register format to in-memory format.
    779       // Also remove any existing sec V bit entry for the byte if no
    780       // longer necessary.
    781       if      ( V_BITS8_DEFINED   == vbits8 ) { vabits2 = VA_BITS2_DEFINED;   }
    782       else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
    783       else                                    { vabits2 = VA_BITS2_PARTDEFINED;
    784                                                 set_sec_vbits8(a, vbits8);  }
    785       set_vabits2(a, vabits2);
    786 
    787    } else {
    788       // Unaddressable!  Do nothing -- when writing to unaddressable
    789       // memory it acts as a black hole, and the V bits can never be seen
    790       // again.  So we don't have to write them at all.
    791       ok = False;
    792    }
    793    return ok;
    794 }
    795 
    796 // Returns False if there was an addressability error.  In that case, we put
    797 // all defined bits into vbits8.
    798 static INLINE
    799 Bool get_vbits8 ( Addr a, UChar* vbits8 )
    800 {
    801    Bool  ok      = True;
    802    UChar vabits2 = get_vabits2(a);
    803 
    804    // Convert the in-memory format to in-register format.
    805    if      ( VA_BITS2_DEFINED   == vabits2 ) { *vbits8 = V_BITS8_DEFINED;   }
    806    else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
    807    else if ( VA_BITS2_NOACCESS  == vabits2 ) {
    808       *vbits8 = V_BITS8_DEFINED;    // Make V bits defined!
    809       ok = False;
    810    } else {
    811       tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
    812       *vbits8 = get_sec_vbits8(a);
    813    }
    814    return ok;
    815 }
    816 
    817 
    818 /* --------------- Secondary V bit table ------------ */
    819 
    820 // This table holds the full V bit pattern for partially-defined bytes
    821 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
    822 // memory.
    823 //
    824 // Note: the nodes in this table can become stale.  Eg. if you write a PDB,
    825 // then overwrite the same address with a fully defined byte, the sec-V-bit
    826 // node will not necessarily be removed.  This is because checking for
    827 // whether removal is necessary would slow down the fast paths.
    828 //
    829 // To avoid the stale nodes building up too much, we periodically (once the
    830 // table reaches a certain size) garbage collect (GC) the table by
    831 // traversing it and evicting any nodes not having PDB.
    832 // If more than a certain proportion of nodes survived, we increase the
    833 // table size so that GCs occur less often.
    834 //
    835 // This policy is designed to avoid bad table bloat in the worst case where
    836 // a program creates huge numbers of stale PDBs -- we would get this bloat
    837 // if we had no GC -- while handling well the case where a node becomes
    838 // stale but shortly afterwards is rewritten with a PDB and so becomes
    839 // non-stale again (which happens quite often, eg. in perf/bz2).  If we just
    840 // remove all stale nodes as soon as possible, we just end up re-adding a
    841 // lot of them in later again.  The "sufficiently stale" approach avoids
    842 // this.  (If a program has many live PDBs, performance will just suck,
    843 // there's no way around that.)
    844 //
    845 // Further comments, JRS 14 Feb 2012.  It turns out that the policy of
    846 // holding on to stale entries for 2 GCs before discarding them can lead
    847 // to massive space leaks.  So we're changing to an arrangement where
    848 // lines are evicted as soon as they are observed to be stale during a
    849 // GC.  This also has a side benefit of allowing the sufficiently_stale
    850 // field to be removed from the SecVBitNode struct, reducing its size by
    851 // 8 bytes, which is a substantial space saving considering that the
    852 // struct was previously 32 or so bytes, on a 64 bit target.
    853 //
    854 // In order to try and mitigate the problem that the "sufficiently stale"
    855 // heuristic was designed to avoid, the table size is allowed to drift
    856 // up ("DRIFTUP") slowly to 80000, even if the residency is low.  This
    857 // means that nodes will exist in the table longer on average, and hopefully
    858 // will be deleted and re-added less frequently.
    859 //
    860 // The previous scaling up mechanism (now called STEPUP) is retained:
    861 // if residency exceeds 50%, the table is scaled up, although by a
    862 // factor sqrt(2) rather than 2 as before.  This effectively doubles the
    863 // frequency of GCs when there are many PDBs at reduces the tendency of
    864 // stale PDBs to reside for long periods in the table.
    865 
    866 static OSet* secVBitTable;
    867 
    868 // Stats
    869 static ULong sec_vbits_new_nodes = 0;
    870 static ULong sec_vbits_updates   = 0;
    871 
    872 // This must be a power of two;  this is checked in mc_pre_clo_init().
    873 // The size chosen here is a trade-off:  if the nodes are bigger (ie. cover
    874 // a larger address range) they take more space but we can get multiple
    875 // partially-defined bytes in one if they are close to each other, reducing
    876 // the number of total nodes.  In practice sometimes they are clustered (eg.
    877 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
    878 // row), but often not.  So we choose something intermediate.
    879 #define BYTES_PER_SEC_VBIT_NODE     16
    880 
    881 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
    882 // more than this many nodes survive a GC.
    883 #define STEPUP_SURVIVOR_PROPORTION  0.5
    884 #define STEPUP_GROWTH_FACTOR        1.414213562
    885 
    886 // If the above heuristic doesn't apply, then we may make the table
    887 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
    888 // this many nodes survive a GC, _and_ the total table size does
    889 // not exceed a fixed limit.  The numbers are somewhat arbitrary, but
    890 // work tolerably well on long Firefox runs.  The scaleup ratio of 1.5%
    891 // effectively although gradually reduces residency and increases time
    892 // between GCs for programs with small numbers of PDBs.  The 80000 limit
    893 // effectively limits the table size to around 2MB for programs with
    894 // small numbers of PDBs, whilst giving a reasonably long lifetime to
    895 // entries, to try and reduce the costs resulting from deleting and
    896 // re-adding of entries.
    897 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
    898 #define DRIFTUP_GROWTH_FACTOR       1.015
    899 #define DRIFTUP_MAX_SIZE            80000
    900 
    901 // We GC the table when it gets this many nodes in it, ie. it's effectively
    902 // the table size.  It can change.
    903 static Int  secVBitLimit = 1000;
    904 
    905 // The number of GCs done, used to age sec-V-bit nodes for eviction.
    906 // Because it's unsigned, wrapping doesn't matter -- the right answer will
    907 // come out anyway.
    908 static UInt GCs_done = 0;
    909 
    910 typedef
    911    struct {
    912       Addr  a;
    913       UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
    914    }
    915    SecVBitNode;
    916 
    917 static OSet* createSecVBitTable(void)
    918 {
    919    OSet* newSecVBitTable;
    920    newSecVBitTable = VG_(OSetGen_Create_With_Pool)
    921       ( offsetof(SecVBitNode, a),
    922         NULL, // use fast comparisons
    923         VG_(malloc), "mc.cSVT.1 (sec VBit table)",
    924         VG_(free),
    925         1000,
    926         sizeof(SecVBitNode));
    927    return newSecVBitTable;
    928 }
    929 
    930 static void gcSecVBitTable(void)
    931 {
    932    OSet*        secVBitTable2;
    933    SecVBitNode* n;
    934    Int          i, n_nodes = 0, n_survivors = 0;
    935 
    936    GCs_done++;
    937 
    938    // Create the new table.
    939    secVBitTable2 = createSecVBitTable();
    940 
    941    // Traverse the table, moving fresh nodes into the new table.
    942    VG_(OSetGen_ResetIter)(secVBitTable);
    943    while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
    944       // Keep node if any of its bytes are non-stale.  Using
    945       // get_vabits2() for the lookup is not very efficient, but I don't
    946       // think it matters.
    947       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
    948          if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
    949             // Found a non-stale byte, so keep =>
    950             // Insert a copy of the node into the new table.
    951             SecVBitNode* n2 =
    952                VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
    953             *n2 = *n;
    954             VG_(OSetGen_Insert)(secVBitTable2, n2);
    955             break;
    956          }
    957       }
    958    }
    959 
    960    // Get the before and after sizes.
    961    n_nodes     = VG_(OSetGen_Size)(secVBitTable);
    962    n_survivors = VG_(OSetGen_Size)(secVBitTable2);
    963 
    964    // Destroy the old table, and put the new one in its place.
    965    VG_(OSetGen_Destroy)(secVBitTable);
    966    secVBitTable = secVBitTable2;
    967 
    968    if (VG_(clo_verbosity) > 1) {
    969       Char percbuf[7];
    970       VG_(percentify)(n_survivors, n_nodes, 1, 6, percbuf);
    971       VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%s)\n",
    972                    n_nodes, n_survivors, percbuf);
    973    }
    974 
    975    // Increase table size if necessary.
    976    if ((Double)n_survivors
    977        > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
    978       secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
    979       if (VG_(clo_verbosity) > 1)
    980          VG_(message)(Vg_DebugMsg,
    981                       "memcheck GC: %d new table size (stepup)\n",
    982                       secVBitLimit);
    983    }
    984    else
    985    if (secVBitLimit < DRIFTUP_MAX_SIZE
    986        && (Double)n_survivors
    987           > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
    988       secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
    989       if (VG_(clo_verbosity) > 1)
    990          VG_(message)(Vg_DebugMsg,
    991                       "memcheck GC: %d new table size (driftup)\n",
    992                       secVBitLimit);
    993    }
    994 }
    995 
    996 static UWord get_sec_vbits8(Addr a)
    997 {
    998    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
    999    Int          amod     = a % BYTES_PER_SEC_VBIT_NODE;
   1000    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
   1001    UChar        vbits8;
   1002    tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
   1003    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
   1004    // make it to the secondary V bits table.
   1005    vbits8 = n->vbits8[amod];
   1006    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
   1007    return vbits8;
   1008 }
   1009 
   1010 static void set_sec_vbits8(Addr a, UWord vbits8)
   1011 {
   1012    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
   1013    Int          i, amod  = a % BYTES_PER_SEC_VBIT_NODE;
   1014    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
   1015    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
   1016    // make it to the secondary V bits table.
   1017    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
   1018    if (n) {
   1019       n->vbits8[amod] = vbits8;     // update
   1020       sec_vbits_updates++;
   1021    } else {
   1022       // Do a table GC if necessary.  Nb: do this before creating and
   1023       // inserting the new node, to avoid erroneously GC'ing the new node.
   1024       if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
   1025          gcSecVBitTable();
   1026       }
   1027 
   1028       // New node:  assign the specific byte, make the rest invalid (they
   1029       // should never be read as-is, but be cautious).
   1030       n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
   1031       n->a            = aAligned;
   1032       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
   1033          n->vbits8[i] = V_BITS8_UNDEFINED;
   1034       }
   1035       n->vbits8[amod] = vbits8;
   1036 
   1037       // Insert the new node.
   1038       VG_(OSetGen_Insert)(secVBitTable, n);
   1039       sec_vbits_new_nodes++;
   1040 
   1041       n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
   1042       if (n_secVBit_nodes > max_secVBit_nodes)
   1043          max_secVBit_nodes = n_secVBit_nodes;
   1044    }
   1045 }
   1046 
   1047 /* --------------- Endianness helpers --------------- */
   1048 
   1049 /* Returns the offset in memory of the byteno-th most significant byte
   1050    in a wordszB-sized word, given the specified endianness. */
   1051 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
   1052                                     UWord byteno ) {
   1053    return bigendian ? (wordszB-1-byteno) : byteno;
   1054 }
   1055 
   1056 
   1057 /* --------------- Ignored address ranges --------------- */
   1058 
   1059 #define M_IGNORE_RANGES 4
   1060 
   1061 typedef
   1062    struct {
   1063       Int  used;
   1064       Addr start[M_IGNORE_RANGES];
   1065       Addr end[M_IGNORE_RANGES];
   1066    }
   1067    IgnoreRanges;
   1068 
   1069 static IgnoreRanges ignoreRanges;
   1070 
   1071 INLINE Bool MC_(in_ignored_range) ( Addr a )
   1072 {
   1073    Int i;
   1074    if (LIKELY(ignoreRanges.used == 0))
   1075       return False;
   1076    for (i = 0; i < ignoreRanges.used; i++) {
   1077       if (a >= ignoreRanges.start[i] && a < ignoreRanges.end[i])
   1078          return True;
   1079    }
   1080    return False;
   1081 }
   1082 
   1083 /* Parse two Addr separated by a dash, or fail. */
   1084 
   1085 static Bool parse_range ( UChar** ppc, Addr* result1, Addr* result2 )
   1086 {
   1087    Bool ok = VG_(parse_Addr) (ppc, result1);
   1088    if (!ok)
   1089       return False;
   1090    if (**ppc != '-')
   1091       return False;
   1092    (*ppc)++;
   1093    ok = VG_(parse_Addr) (ppc, result2);
   1094    if (!ok)
   1095       return False;
   1096    return True;
   1097 }
   1098 
   1099 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
   1100    fail. */
   1101 
   1102 static Bool parse_ignore_ranges ( UChar* str0 )
   1103 {
   1104    Addr start, end;
   1105    Bool ok;
   1106    UChar*  str = str0;
   1107    UChar** ppc = &str;
   1108    ignoreRanges.used = 0;
   1109    while (1) {
   1110       ok = parse_range(ppc, &start, &end);
   1111       if (!ok)
   1112          return False;
   1113       if (ignoreRanges.used >= M_IGNORE_RANGES)
   1114          return False;
   1115       ignoreRanges.start[ignoreRanges.used] = start;
   1116       ignoreRanges.end[ignoreRanges.used] = end;
   1117       ignoreRanges.used++;
   1118       if (**ppc == 0)
   1119          return True;
   1120       if (**ppc != ',')
   1121          return False;
   1122       (*ppc)++;
   1123    }
   1124    /*NOTREACHED*/
   1125    return False;
   1126 }
   1127 
   1128 
   1129 /* --------------- Load/store slow cases. --------------- */
   1130 
   1131 static
   1132 __attribute__((noinline))
   1133 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
   1134 {
   1135    PROF_EVENT(30, "mc_LOADVn_slow");
   1136 
   1137    /* ------------ BEGIN semi-fast cases ------------ */
   1138    /* These deal quickly-ish with the common auxiliary primary map
   1139       cases on 64-bit platforms.  Are merely a speedup hack; can be
   1140       omitted without loss of correctness/functionality.  Note that in
   1141       both cases the "sizeof(void*) == 8" causes these cases to be
   1142       folded out by compilers on 32-bit platforms.  These are derived
   1143       from LOADV64 and LOADV32.
   1144    */
   1145    if (LIKELY(sizeof(void*) == 8
   1146                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
   1147       SecMap* sm       = get_secmap_for_reading(a);
   1148       UWord   sm_off16 = SM_OFF_16(a);
   1149       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   1150       if (LIKELY(vabits16 == VA_BITS16_DEFINED))
   1151          return V_BITS64_DEFINED;
   1152       if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
   1153          return V_BITS64_UNDEFINED;
   1154       /* else fall into the slow case */
   1155    }
   1156    if (LIKELY(sizeof(void*) == 8
   1157                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
   1158       SecMap* sm = get_secmap_for_reading(a);
   1159       UWord sm_off = SM_OFF(a);
   1160       UWord vabits8 = sm->vabits8[sm_off];
   1161       if (LIKELY(vabits8 == VA_BITS8_DEFINED))
   1162          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
   1163       if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
   1164          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
   1165       /* else fall into slow case */
   1166    }
   1167    /* ------------ END semi-fast cases ------------ */
   1168 
   1169    ULong  vbits64     = V_BITS64_UNDEFINED; /* result */
   1170    ULong  pessim64    = V_BITS64_DEFINED;   /* only used when p-l-ok=yes */
   1171    SSizeT szB         = nBits / 8;
   1172    SSizeT i;          /* Must be signed. */
   1173    SizeT  n_addrs_bad = 0;
   1174    Addr   ai;
   1175    UChar  vbits8;
   1176    Bool   ok;
   1177 
   1178    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
   1179 
   1180    /* Make up a 64-bit result V word, which contains the loaded data
   1181       for valid addresses and Defined for invalid addresses.  Iterate
   1182       over the bytes in the word, from the most significant down to
   1183       the least.  The vbits to return are calculated into vbits64.
   1184       Also compute the pessimising value to be used when
   1185       --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
   1186       info can be gleaned from pessim64) but is used as a
   1187       cross-check. */
   1188    for (i = szB-1; i >= 0; i--) {
   1189       PROF_EVENT(31, "mc_LOADVn_slow(loop)");
   1190       ai = a + byte_offset_w(szB, bigendian, i);
   1191       ok = get_vbits8(ai, &vbits8);
   1192       vbits64 <<= 8;
   1193       vbits64 |= vbits8;
   1194       if (!ok) n_addrs_bad++;
   1195       pessim64 <<= 8;
   1196       pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
   1197    }
   1198 
   1199    /* In the common case, all the addresses involved are valid, so we
   1200       just return the computed V bits and have done. */
   1201    if (LIKELY(n_addrs_bad == 0))
   1202       return vbits64;
   1203 
   1204    /* If there's no possibility of getting a partial-loads-ok
   1205       exemption, report the error and quit. */
   1206    if (!MC_(clo_partial_loads_ok)) {
   1207       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1208       return vbits64;
   1209    }
   1210 
   1211    /* The partial-loads-ok excemption might apply.  Find out if it
   1212       does.  If so, don't report an addressing error, but do return
   1213       Undefined for the bytes that are out of range, so as to avoid
   1214       false negatives.  If it doesn't apply, just report an addressing
   1215       error in the usual way. */
   1216 
   1217    /* Some code steps along byte strings in aligned word-sized chunks
   1218       even when there is only a partially defined word at the end (eg,
   1219       optimised strlen).  This is allowed by the memory model of
   1220       modern machines, since an aligned load cannot span two pages and
   1221       thus cannot "partially fault".  Despite such behaviour being
   1222       declared undefined by ANSI C/C++.
   1223 
   1224       Therefore, a load from a partially-addressible place is allowed
   1225       if all of the following hold:
   1226       - the command-line flag is set [by default, it isn't]
   1227       - it's a word-sized, word-aligned load
   1228       - at least one of the addresses in the word *is* valid
   1229 
   1230       Since this suppresses the addressing error, we avoid false
   1231       negatives by marking bytes undefined when they come from an
   1232       invalid address.
   1233    */
   1234 
   1235    /* "at least one of the addresses is invalid" */
   1236    tl_assert(pessim64 != V_BITS64_DEFINED);
   1237 
   1238    if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
   1239        && n_addrs_bad < VG_WORDSIZE) {
   1240       /* Exemption applies.  Use the previously computed pessimising
   1241          value for vbits64 and return the combined result, but don't
   1242          flag an addressing error.  The pessimising value is Defined
   1243          for valid addresses and Undefined for invalid addresses. */
   1244       /* for assumption that doing bitwise or implements UifU */
   1245       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
   1246       /* (really need "UifU" here...)
   1247          vbits64 UifU= pessim64  (is pessimised by it, iow) */
   1248       vbits64 |= pessim64;
   1249       return vbits64;
   1250    }
   1251 
   1252    /* Exemption doesn't apply.  Flag an addressing error in the normal
   1253       way. */
   1254    MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1255 
   1256    return vbits64;
   1257 }
   1258 
   1259 
   1260 static
   1261 __attribute__((noinline))
   1262 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
   1263 {
   1264    SizeT szB = nBits / 8;
   1265    SizeT i, n_addrs_bad = 0;
   1266    UChar vbits8;
   1267    Addr  ai;
   1268    Bool  ok;
   1269 
   1270    PROF_EVENT(35, "mc_STOREVn_slow");
   1271 
   1272    /* ------------ BEGIN semi-fast cases ------------ */
   1273    /* These deal quickly-ish with the common auxiliary primary map
   1274       cases on 64-bit platforms.  Are merely a speedup hack; can be
   1275       omitted without loss of correctness/functionality.  Note that in
   1276       both cases the "sizeof(void*) == 8" causes these cases to be
   1277       folded out by compilers on 32-bit platforms.  These are derived
   1278       from STOREV64 and STOREV32.
   1279    */
   1280    if (LIKELY(sizeof(void*) == 8
   1281                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
   1282       SecMap* sm       = get_secmap_for_reading(a);
   1283       UWord   sm_off16 = SM_OFF_16(a);
   1284       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   1285       if (LIKELY( !is_distinguished_sm(sm) &&
   1286                           (VA_BITS16_DEFINED   == vabits16 ||
   1287                            VA_BITS16_UNDEFINED == vabits16) )) {
   1288          /* Handle common case quickly: a is suitably aligned, */
   1289          /* is mapped, and is addressible. */
   1290          // Convert full V-bits in register to compact 2-bit form.
   1291          if (LIKELY(V_BITS64_DEFINED == vbytes)) {
   1292             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
   1293             return;
   1294          } else if (V_BITS64_UNDEFINED == vbytes) {
   1295             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
   1296             return;
   1297          }
   1298          /* else fall into the slow case */
   1299       }
   1300       /* else fall into the slow case */
   1301    }
   1302    if (LIKELY(sizeof(void*) == 8
   1303                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
   1304       SecMap* sm      = get_secmap_for_reading(a);
   1305       UWord   sm_off  = SM_OFF(a);
   1306       UWord   vabits8 = sm->vabits8[sm_off];
   1307       if (LIKELY( !is_distinguished_sm(sm) &&
   1308                           (VA_BITS8_DEFINED   == vabits8 ||
   1309                            VA_BITS8_UNDEFINED == vabits8) )) {
   1310          /* Handle common case quickly: a is suitably aligned, */
   1311          /* is mapped, and is addressible. */
   1312          // Convert full V-bits in register to compact 2-bit form.
   1313          if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
   1314             sm->vabits8[sm_off] = VA_BITS8_DEFINED;
   1315             return;
   1316          } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
   1317             sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
   1318             return;
   1319          }
   1320          /* else fall into the slow case */
   1321       }
   1322       /* else fall into the slow case */
   1323    }
   1324    /* ------------ END semi-fast cases ------------ */
   1325 
   1326    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
   1327 
   1328    /* Dump vbytes in memory, iterating from least to most significant
   1329       byte.  At the same time establish addressibility of the location. */
   1330    for (i = 0; i < szB; i++) {
   1331       PROF_EVENT(36, "mc_STOREVn_slow(loop)");
   1332       ai     = a + byte_offset_w(szB, bigendian, i);
   1333       vbits8 = vbytes & 0xff;
   1334       ok     = set_vbits8(ai, vbits8);
   1335       if (!ok) n_addrs_bad++;
   1336       vbytes >>= 8;
   1337    }
   1338 
   1339    /* If an address error has happened, report it. */
   1340    if (n_addrs_bad > 0)
   1341       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
   1342 }
   1343 
   1344 
   1345 /*------------------------------------------------------------*/
   1346 /*--- Setting permissions over address ranges.             ---*/
   1347 /*------------------------------------------------------------*/
   1348 
   1349 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
   1350                                       UWord dsm_num )
   1351 {
   1352    UWord    sm_off, sm_off16;
   1353    UWord    vabits2 = vabits16 & 0x3;
   1354    SizeT    lenA, lenB, len_to_next_secmap;
   1355    Addr     aNext;
   1356    SecMap*  sm;
   1357    SecMap** sm_ptr;
   1358    SecMap*  example_dsm;
   1359 
   1360    PROF_EVENT(150, "set_address_range_perms");
   1361 
   1362    /* Check the V+A bits make sense. */
   1363    tl_assert(VA_BITS16_NOACCESS  == vabits16 ||
   1364              VA_BITS16_UNDEFINED == vabits16 ||
   1365              VA_BITS16_DEFINED   == vabits16);
   1366 
   1367    // This code should never write PDBs;  ensure this.  (See comment above
   1368    // set_vabits2().)
   1369    tl_assert(VA_BITS2_PARTDEFINED != vabits2);
   1370 
   1371    if (lenT == 0)
   1372       return;
   1373 
   1374    if (lenT > 256 * 1024 * 1024) {
   1375       if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
   1376          Char* s = "unknown???";
   1377          if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
   1378          if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
   1379          if (vabits16 == VA_BITS16_DEFINED  ) s = "defined";
   1380          VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
   1381                                   "large range [0x%lx, 0x%lx) (%s)\n",
   1382                                   a, a + lenT, s);
   1383       }
   1384    }
   1385 
   1386 #ifndef PERF_FAST_SARP
   1387    /*------------------ debug-only case ------------------ */
   1388    {
   1389       // Endianness doesn't matter here because all bytes are being set to
   1390       // the same value.
   1391       // Nb: We don't have to worry about updating the sec-V-bits table
   1392       // after these set_vabits2() calls because this code never writes
   1393       // VA_BITS2_PARTDEFINED values.
   1394       SizeT i;
   1395       for (i = 0; i < lenT; i++) {
   1396          set_vabits2(a + i, vabits2);
   1397       }
   1398       return;
   1399    }
   1400 #endif
   1401 
   1402    /*------------------ standard handling ------------------ */
   1403 
   1404    /* Get the distinguished secondary that we might want
   1405       to use (part of the space-compression scheme). */
   1406    example_dsm = &sm_distinguished[dsm_num];
   1407 
   1408    // We have to handle ranges covering various combinations of partial and
   1409    // whole sec-maps.  Here is how parts 1, 2 and 3 are used in each case.
   1410    // Cases marked with a '*' are common.
   1411    //
   1412    //   TYPE                                             PARTS USED
   1413    //   ----                                             ----------
   1414    // * one partial sec-map                  (p)         1
   1415    // - one whole sec-map                    (P)         2
   1416    //
   1417    // * two partial sec-maps                 (pp)        1,3
   1418    // - one partial, one whole sec-map       (pP)        1,2
   1419    // - one whole, one partial sec-map       (Pp)        2,3
   1420    // - two whole sec-maps                   (PP)        2,2
   1421    //
   1422    // * one partial, one whole, one partial  (pPp)       1,2,3
   1423    // - one partial, two whole               (pPP)       1,2,2
   1424    // - two whole, one partial               (PPp)       2,2,3
   1425    // - three whole                          (PPP)       2,2,2
   1426    //
   1427    // * one partial, N-2 whole, one partial  (pP...Pp)   1,2...2,3
   1428    // - one partial, N-1 whole               (pP...PP)   1,2...2,2
   1429    // - N-1 whole, one partial               (PP...Pp)   2,2...2,3
   1430    // - N whole                              (PP...PP)   2,2...2,3
   1431 
   1432    // Break up total length (lenT) into two parts:  length in the first
   1433    // sec-map (lenA), and the rest (lenB);   lenT == lenA + lenB.
   1434    aNext = start_of_this_sm(a) + SM_SIZE;
   1435    len_to_next_secmap = aNext - a;
   1436    if ( lenT <= len_to_next_secmap ) {
   1437       // Range entirely within one sec-map.  Covers almost all cases.
   1438       PROF_EVENT(151, "set_address_range_perms-single-secmap");
   1439       lenA = lenT;
   1440       lenB = 0;
   1441    } else if (is_start_of_sm(a)) {
   1442       // Range spans at least one whole sec-map, and starts at the beginning
   1443       // of a sec-map; skip to Part 2.
   1444       PROF_EVENT(152, "set_address_range_perms-startof-secmap");
   1445       lenA = 0;
   1446       lenB = lenT;
   1447       goto part2;
   1448    } else {
   1449       // Range spans two or more sec-maps, first one is partial.
   1450       PROF_EVENT(153, "set_address_range_perms-multiple-secmaps");
   1451       lenA = len_to_next_secmap;
   1452       lenB = lenT - lenA;
   1453    }
   1454 
   1455    //------------------------------------------------------------------------
   1456    // Part 1: Deal with the first sec_map.  Most of the time the range will be
   1457    // entirely within a sec_map and this part alone will suffice.  Also,
   1458    // doing it this way lets us avoid repeatedly testing for the crossing of
   1459    // a sec-map boundary within these loops.
   1460    //------------------------------------------------------------------------
   1461 
   1462    // If it's distinguished, make it undistinguished if necessary.
   1463    sm_ptr = get_secmap_ptr(a);
   1464    if (is_distinguished_sm(*sm_ptr)) {
   1465       if (*sm_ptr == example_dsm) {
   1466          // Sec-map already has the V+A bits that we want, so skip.
   1467          PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick");
   1468          a    = aNext;
   1469          lenA = 0;
   1470       } else {
   1471          PROF_EVENT(155, "set_address_range_perms-dist-sm1");
   1472          *sm_ptr = copy_for_writing(*sm_ptr);
   1473       }
   1474    }
   1475    sm = *sm_ptr;
   1476 
   1477    // 1 byte steps
   1478    while (True) {
   1479       if (VG_IS_8_ALIGNED(a)) break;
   1480       if (lenA < 1)           break;
   1481       PROF_EVENT(156, "set_address_range_perms-loop1a");
   1482       sm_off = SM_OFF(a);
   1483       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1484       a    += 1;
   1485       lenA -= 1;
   1486    }
   1487    // 8-aligned, 8 byte steps
   1488    while (True) {
   1489       if (lenA < 8) break;
   1490       PROF_EVENT(157, "set_address_range_perms-loop8a");
   1491       sm_off16 = SM_OFF_16(a);
   1492       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
   1493       a    += 8;
   1494       lenA -= 8;
   1495    }
   1496    // 1 byte steps
   1497    while (True) {
   1498       if (lenA < 1) break;
   1499       PROF_EVENT(158, "set_address_range_perms-loop1b");
   1500       sm_off = SM_OFF(a);
   1501       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1502       a    += 1;
   1503       lenA -= 1;
   1504    }
   1505 
   1506    // We've finished the first sec-map.  Is that it?
   1507    if (lenB == 0)
   1508       return;
   1509 
   1510    //------------------------------------------------------------------------
   1511    // Part 2: Fast-set entire sec-maps at a time.
   1512    //------------------------------------------------------------------------
   1513   part2:
   1514    // 64KB-aligned, 64KB steps.
   1515    // Nb: we can reach here with lenB < SM_SIZE
   1516    tl_assert(0 == lenA);
   1517    while (True) {
   1518       if (lenB < SM_SIZE) break;
   1519       tl_assert(is_start_of_sm(a));
   1520       PROF_EVENT(159, "set_address_range_perms-loop64K");
   1521       sm_ptr = get_secmap_ptr(a);
   1522       if (!is_distinguished_sm(*sm_ptr)) {
   1523          PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm");
   1524          // Free the non-distinguished sec-map that we're replacing.  This
   1525          // case happens moderately often, enough to be worthwhile.
   1526          SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
   1527          tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
   1528       }
   1529       update_SM_counts(*sm_ptr, example_dsm);
   1530       // Make the sec-map entry point to the example DSM
   1531       *sm_ptr = example_dsm;
   1532       lenB -= SM_SIZE;
   1533       a    += SM_SIZE;
   1534    }
   1535 
   1536    // We've finished the whole sec-maps.  Is that it?
   1537    if (lenB == 0)
   1538       return;
   1539 
   1540    //------------------------------------------------------------------------
   1541    // Part 3: Finish off the final partial sec-map, if necessary.
   1542    //------------------------------------------------------------------------
   1543 
   1544    tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
   1545 
   1546    // If it's distinguished, make it undistinguished if necessary.
   1547    sm_ptr = get_secmap_ptr(a);
   1548    if (is_distinguished_sm(*sm_ptr)) {
   1549       if (*sm_ptr == example_dsm) {
   1550          // Sec-map already has the V+A bits that we want, so stop.
   1551          PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick");
   1552          return;
   1553       } else {
   1554          PROF_EVENT(162, "set_address_range_perms-dist-sm2");
   1555          *sm_ptr = copy_for_writing(*sm_ptr);
   1556       }
   1557    }
   1558    sm = *sm_ptr;
   1559 
   1560    // 8-aligned, 8 byte steps
   1561    while (True) {
   1562       if (lenB < 8) break;
   1563       PROF_EVENT(163, "set_address_range_perms-loop8b");
   1564       sm_off16 = SM_OFF_16(a);
   1565       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
   1566       a    += 8;
   1567       lenB -= 8;
   1568    }
   1569    // 1 byte steps
   1570    while (True) {
   1571       if (lenB < 1) return;
   1572       PROF_EVENT(164, "set_address_range_perms-loop1c");
   1573       sm_off = SM_OFF(a);
   1574       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1575       a    += 1;
   1576       lenB -= 1;
   1577    }
   1578 }
   1579 
   1580 
   1581 /* --- Set permissions for arbitrary address ranges --- */
   1582 
   1583 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
   1584 {
   1585    PROF_EVENT(40, "MC_(make_mem_noaccess)");
   1586    DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
   1587    set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
   1588    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1589       ocache_sarp_Clear_Origins ( a, len );
   1590 }
   1591 
   1592 static void make_mem_undefined ( Addr a, SizeT len )
   1593 {
   1594    PROF_EVENT(41, "make_mem_undefined");
   1595    DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
   1596    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
   1597 }
   1598 
   1599 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
   1600 {
   1601    PROF_EVENT(41, "MC_(make_mem_undefined)");
   1602    DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
   1603    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
   1604    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1605       ocache_sarp_Set_Origins ( a, len, otag );
   1606 }
   1607 
   1608 static
   1609 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
   1610                                           ThreadId tid, UInt okind )
   1611 {
   1612    UInt        ecu;
   1613    ExeContext* here;
   1614    /* VG_(record_ExeContext) checks for validity of tid, and asserts
   1615       if it is invalid.  So no need to do it here. */
   1616    tl_assert(okind <= 3);
   1617    here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
   1618    tl_assert(here);
   1619    ecu = VG_(get_ECU_from_ExeContext)(here);
   1620    tl_assert(VG_(is_plausible_ECU)(ecu));
   1621    MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
   1622 }
   1623 
   1624 static
   1625 void make_mem_undefined_w_tid ( Addr a, SizeT len, ThreadId tid ) {
   1626    make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
   1627 }
   1628 
   1629 
   1630 void MC_(make_mem_defined) ( Addr a, SizeT len )
   1631 {
   1632    PROF_EVENT(42, "MC_(make_mem_defined)");
   1633    DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
   1634    set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
   1635    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1636       ocache_sarp_Clear_Origins ( a, len );
   1637 }
   1638 
   1639 /* For each byte in [a,a+len), if the byte is addressable, make it be
   1640    defined, but if it isn't addressible, leave it alone.  In other
   1641    words a version of MC_(make_mem_defined) that doesn't mess with
   1642    addressibility.  Low-performance implementation. */
   1643 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
   1644 {
   1645    SizeT i;
   1646    UChar vabits2;
   1647    DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
   1648    for (i = 0; i < len; i++) {
   1649       vabits2 = get_vabits2( a+i );
   1650       if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
   1651          set_vabits2(a+i, VA_BITS2_DEFINED);
   1652          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
   1653             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
   1654          }
   1655       }
   1656    }
   1657 }
   1658 
   1659 /* Similarly (needed for mprotect handling ..) */
   1660 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
   1661 {
   1662    SizeT i;
   1663    UChar vabits2;
   1664    DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
   1665    for (i = 0; i < len; i++) {
   1666       vabits2 = get_vabits2( a+i );
   1667       if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
   1668          set_vabits2(a+i, VA_BITS2_DEFINED);
   1669          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
   1670             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
   1671          }
   1672       }
   1673    }
   1674 }
   1675 
   1676 /* --- Block-copy permissions (needed for implementing realloc() and
   1677        sys_mremap). --- */
   1678 
   1679 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
   1680 {
   1681    SizeT i, j;
   1682    UChar vabits2, vabits8;
   1683    Bool  aligned, nooverlap;
   1684 
   1685    DEBUG("MC_(copy_address_range_state)\n");
   1686    PROF_EVENT(50, "MC_(copy_address_range_state)");
   1687 
   1688    if (len == 0 || src == dst)
   1689       return;
   1690 
   1691    aligned   = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
   1692    nooverlap = src+len <= dst || dst+len <= src;
   1693 
   1694    if (nooverlap && aligned) {
   1695 
   1696       /* Vectorised fast case, when no overlap and suitably aligned */
   1697       /* vector loop */
   1698       i = 0;
   1699       while (len >= 4) {
   1700          vabits8 = get_vabits8_for_aligned_word32( src+i );
   1701          set_vabits8_for_aligned_word32( dst+i, vabits8 );
   1702          if (LIKELY(VA_BITS8_DEFINED == vabits8
   1703                             || VA_BITS8_UNDEFINED == vabits8
   1704                             || VA_BITS8_NOACCESS == vabits8)) {
   1705             /* do nothing */
   1706          } else {
   1707             /* have to copy secondary map info */
   1708             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
   1709                set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
   1710             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
   1711                set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
   1712             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
   1713                set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
   1714             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
   1715                set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
   1716          }
   1717          i += 4;
   1718          len -= 4;
   1719       }
   1720       /* fixup loop */
   1721       while (len >= 1) {
   1722          vabits2 = get_vabits2( src+i );
   1723          set_vabits2( dst+i, vabits2 );
   1724          if (VA_BITS2_PARTDEFINED == vabits2) {
   1725             set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
   1726          }
   1727          i++;
   1728          len--;
   1729       }
   1730 
   1731    } else {
   1732 
   1733       /* We have to do things the slow way */
   1734       if (src < dst) {
   1735          for (i = 0, j = len-1; i < len; i++, j--) {
   1736             PROF_EVENT(51, "MC_(copy_address_range_state)(loop)");
   1737             vabits2 = get_vabits2( src+j );
   1738             set_vabits2( dst+j, vabits2 );
   1739             if (VA_BITS2_PARTDEFINED == vabits2) {
   1740                set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
   1741             }
   1742          }
   1743       }
   1744 
   1745       if (src > dst) {
   1746          for (i = 0; i < len; i++) {
   1747             PROF_EVENT(52, "MC_(copy_address_range_state)(loop)");
   1748             vabits2 = get_vabits2( src+i );
   1749             set_vabits2( dst+i, vabits2 );
   1750             if (VA_BITS2_PARTDEFINED == vabits2) {
   1751                set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
   1752             }
   1753          }
   1754       }
   1755    }
   1756 
   1757 }
   1758 
   1759 
   1760 /*------------------------------------------------------------*/
   1761 /*--- Origin tracking stuff - cache basics                 ---*/
   1762 /*------------------------------------------------------------*/
   1763 
   1764 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
   1765    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1766 
   1767    Note that this implementation draws inspiration from the "origin
   1768    tracking by value piggybacking" scheme described in "Tracking Bad
   1769    Apples: Reporting the Origin of Null and Undefined Value Errors"
   1770    (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
   1771    Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
   1772    implemented completely differently.
   1773 
   1774    Origin tags and ECUs -- about the shadow values
   1775    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1776 
   1777    This implementation tracks the defining point of all uninitialised
   1778    values using so called "origin tags", which are 32-bit integers,
   1779    rather than using the values themselves to encode the origins.  The
   1780    latter, so-called value piggybacking", is what the OOPSLA07 paper
   1781    describes.
   1782 
   1783    Origin tags, as tracked by the machinery below, are 32-bit unsigned
   1784    ints (UInts), regardless of the machine's word size.  Each tag
   1785    comprises an upper 30-bit ECU field and a lower 2-bit
   1786    'kind' field.  The ECU field is a number given out by m_execontext
   1787    and has a 1-1 mapping with ExeContext*s.  An ECU can be used
   1788    directly as an origin tag (otag), but in fact we want to put
   1789    additional information 'kind' field to indicate roughly where the
   1790    tag came from.  This helps print more understandable error messages
   1791    for the user -- it has no other purpose.  In summary:
   1792 
   1793    * Both ECUs and origin tags are represented as 32-bit words
   1794 
   1795    * m_execontext and the core-tool interface deal purely in ECUs.
   1796      They have no knowledge of origin tags - that is a purely
   1797      Memcheck-internal matter.
   1798 
   1799    * all valid ECUs have the lowest 2 bits zero and at least
   1800      one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
   1801 
   1802    * to convert from an ECU to an otag, OR in one of the MC_OKIND_
   1803      constants defined in mc_include.h.
   1804 
   1805    * to convert an otag back to an ECU, AND it with ~3
   1806 
   1807    One important fact is that no valid otag is zero.  A zero otag is
   1808    used by the implementation to indicate "no origin", which could
   1809    mean that either the value is defined, or it is undefined but the
   1810    implementation somehow managed to lose the origin.
   1811 
   1812    The ECU used for memory created by malloc etc is derived from the
   1813    stack trace at the time the malloc etc happens.  This means the
   1814    mechanism can show the exact allocation point for heap-created
   1815    uninitialised values.
   1816 
   1817    In contrast, it is simply too expensive to create a complete
   1818    backtrace for each stack allocation.  Therefore we merely use a
   1819    depth-1 backtrace for stack allocations, which can be done once at
   1820    translation time, rather than N times at run time.  The result of
   1821    this is that, for stack created uninitialised values, Memcheck can
   1822    only show the allocating function, and not what called it.
   1823    Furthermore, compilers tend to move the stack pointer just once at
   1824    the start of the function, to allocate all locals, and so in fact
   1825    the stack origin almost always simply points to the opening brace
   1826    of the function.  Net result is, for stack origins, the mechanism
   1827    can tell you in which function the undefined value was created, but
   1828    that's all.  Users will need to carefully check all locals in the
   1829    specified function.
   1830 
   1831    Shadowing registers and memory
   1832    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1833 
   1834    Memory is shadowed using a two level cache structure (ocacheL1 and
   1835    ocacheL2).  Memory references are first directed to ocacheL1.  This
   1836    is a traditional 2-way set associative cache with 32-byte lines and
   1837    approximate LRU replacement within each set.
   1838 
   1839    A naive implementation would require storing one 32 bit otag for
   1840    each byte of memory covered, a 4:1 space overhead.  Instead, there
   1841    is one otag for every 4 bytes of memory covered, plus a 4-bit mask
   1842    that shows which of the 4 bytes have that shadow value and which
   1843    have a shadow value of zero (indicating no origin).  Hence a lot of
   1844    space is saved, but the cost is that only one different origin per
   1845    4 bytes of address space can be represented.  This is a source of
   1846    imprecision, but how much of a problem it really is remains to be
   1847    seen.
   1848 
   1849    A cache line that contains all zeroes ("no origins") contains no
   1850    useful information, and can be ejected from the L1 cache "for
   1851    free", in the sense that a read miss on the L1 causes a line of
   1852    zeroes to be installed.  However, ejecting a line containing
   1853    nonzeroes risks losing origin information permanently.  In order to
   1854    prevent such lossage, ejected nonzero lines are placed in a
   1855    secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
   1856    lines.  This can grow arbitrarily large, and so should ensure that
   1857    Memcheck runs out of memory in preference to losing useful origin
   1858    info due to cache size limitations.
   1859 
   1860    Shadowing registers is a bit tricky, because the shadow values are
   1861    32 bits, regardless of the size of the register.  That gives a
   1862    problem for registers smaller than 32 bits.  The solution is to
   1863    find spaces in the guest state that are unused, and use those to
   1864    shadow guest state fragments smaller than 32 bits.  For example, on
   1865    ppc32/64, each vector register is 16 bytes long.  If 4 bytes of the
   1866    shadow are allocated for the register's otag, then there are still
   1867    12 bytes left over which could be used to shadow 3 other values.
   1868 
   1869    This implies there is some non-obvious mapping from guest state
   1870    (start,length) pairs to the relevant shadow offset (for the origin
   1871    tags).  And it is unfortunately guest-architecture specific.  The
   1872    mapping is contained in mc_machine.c, which is quite lengthy but
   1873    straightforward.
   1874 
   1875    Instrumenting the IR
   1876    ~~~~~~~~~~~~~~~~~~~~
   1877 
   1878    Instrumentation is largely straightforward, and done by the
   1879    functions schemeE and schemeS in mc_translate.c.  These generate
   1880    code for handling the origin tags of expressions (E) and statements
   1881    (S) respectively.  The rather strange names are a reference to the
   1882    "compilation schemes" shown in Simon Peyton Jones' book "The
   1883    Implementation of Functional Programming Languages" (Prentice Hall,
   1884    1987, see
   1885    http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
   1886 
   1887    schemeS merely arranges to move shadow values around the guest
   1888    state to track the incoming IR.  schemeE is largely trivial too.
   1889    The only significant point is how to compute the otag corresponding
   1890    to binary (or ternary, quaternary, etc) operator applications.  The
   1891    rule is simple: just take whichever value is larger (32-bit
   1892    unsigned max).  Constants get the special value zero.  Hence this
   1893    rule always propagates a nonzero (known) otag in preference to a
   1894    zero (unknown, or more likely, value-is-defined) tag, as we want.
   1895    If two different undefined values are inputs to a binary operator
   1896    application, then which is propagated is arbitrary, but that
   1897    doesn't matter, since the program is erroneous in using either of
   1898    the values, and so there's no point in attempting to propagate
   1899    both.
   1900 
   1901    Since constants are abstracted to (otag) zero, much of the
   1902    instrumentation code can be folded out without difficulty by the
   1903    generic post-instrumentation IR cleanup pass, using these rules:
   1904    Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
   1905    constants is evaluated at JIT time.  And the resulting dead code
   1906    removal.  In practice this causes surprisingly few Max32Us to
   1907    survive through to backend code generation.
   1908 
   1909    Integration with the V-bits machinery
   1910    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1911 
   1912    This is again largely straightforward.  Mostly the otag and V bits
   1913    stuff are independent.  The only point of interaction is when the V
   1914    bits instrumenter creates a call to a helper function to report an
   1915    uninitialised value error -- in that case it must first use schemeE
   1916    to get hold of the origin tag expression for the value, and pass
   1917    that to the helper too.
   1918 
   1919    There is the usual stuff to do with setting address range
   1920    permissions.  When memory is painted undefined, we must also know
   1921    the origin tag to paint with, which involves some tedious plumbing,
   1922    particularly to do with the fast case stack handlers.  When memory
   1923    is painted defined or noaccess then the origin tags must be forced
   1924    to zero.
   1925 
   1926    One of the goals of the implementation was to ensure that the
   1927    non-origin tracking mode isn't slowed down at all.  To do this,
   1928    various functions to do with memory permissions setting (again,
   1929    mostly pertaining to the stack) are duplicated for the with- and
   1930    without-otag case.
   1931 
   1932    Dealing with stack redzones, and the NIA cache
   1933    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1934 
   1935    This is one of the few non-obvious parts of the implementation.
   1936 
   1937    Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
   1938    reserved area below the stack pointer, that can be used as scratch
   1939    space by compiler generated code for functions.  In the Memcheck
   1940    sources this is referred to as the "stack redzone".  The important
   1941    thing here is that such redzones are considered volatile across
   1942    function calls and returns.  So Memcheck takes care to mark them as
   1943    undefined for each call and return, on the afflicted platforms.
   1944    Past experience shows this is essential in order to get reliable
   1945    messages about uninitialised values that come from the stack.
   1946 
   1947    So the question is, when we paint a redzone undefined, what origin
   1948    tag should we use for it?  Consider a function f() calling g().  If
   1949    we paint the redzone using an otag derived from the ExeContext of
   1950    the CALL/BL instruction in f, then any errors in g causing it to
   1951    use uninitialised values that happen to lie in the redzone, will be
   1952    reported as having their origin in f.  Which is highly confusing.
   1953 
   1954    The same applies for returns: if, on a return, we paint the redzone
   1955    using a origin tag derived from the ExeContext of the RET/BLR
   1956    instruction in g, then any later errors in f causing it to use
   1957    uninitialised values in the redzone, will be reported as having
   1958    their origin in g.  Which is just as confusing.
   1959 
   1960    To do it right, in both cases we need to use an origin tag which
   1961    pertains to the instruction which dynamically follows the CALL/BL
   1962    or RET/BLR.  In short, one derived from the NIA - the "next
   1963    instruction address".
   1964 
   1965    To make this work, Memcheck's redzone-painting helper,
   1966    MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
   1967    NIA.  It converts the NIA to a 1-element ExeContext, and uses that
   1968    ExeContext's ECU as the basis for the otag used to paint the
   1969    redzone.  The expensive part of this is converting an NIA into an
   1970    ECU, since this happens once for every call and every return.  So
   1971    we use a simple 511-line, 2-way set associative cache
   1972    (nia_to_ecu_cache) to cache the mappings, and that knocks most of
   1973    the cost out.
   1974 
   1975    Further background comments
   1976    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1977 
   1978    > Question: why is otag a UInt?  Wouldn't a UWord be better?  Isn't
   1979    > it really just the address of the relevant ExeContext?
   1980 
   1981    Well, it's not the address, but a value which has a 1-1 mapping
   1982    with ExeContexts, and is guaranteed not to be zero, since zero
   1983    denotes (to memcheck) "unknown origin or defined value".  So these
   1984    UInts are just numbers starting at 4 and incrementing by 4; each
   1985    ExeContext is given a number when it is created.  (*** NOTE this
   1986    confuses otags and ECUs; see comments above ***).
   1987 
   1988    Making these otags 32-bit regardless of the machine's word size
   1989    makes the 64-bit implementation easier (next para).  And it doesn't
   1990    really limit us in any way, since for the tags to overflow would
   1991    require that the program somehow caused 2^30-1 different
   1992    ExeContexts to be created, in which case it is probably in deep
   1993    trouble.  Not to mention V will have soaked up many tens of
   1994    gigabytes of memory merely to store them all.
   1995 
   1996    So having 64-bit origins doesn't really buy you anything, and has
   1997    the following downsides:
   1998 
   1999    Suppose that instead, an otag is a UWord.  This would mean that, on
   2000    a 64-bit target,
   2001 
   2002    1. It becomes hard to shadow any element of guest state which is
   2003       smaller than 8 bytes.  To do so means you'd need to find some
   2004       8-byte-sized hole in the guest state which you don't want to
   2005       shadow, and use that instead to hold the otag.  On ppc64, the
   2006       condition code register(s) are split into 20 UChar sized pieces,
   2007       all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
   2008       and so that would entail finding 160 bytes somewhere else in the
   2009       guest state.
   2010 
   2011       Even on x86, I want to track origins for %AH .. %DH (bits 15:8
   2012       of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
   2013       same) and so I had to look for 4 untracked otag-sized areas in
   2014       the guest state to make that possible.
   2015 
   2016       The same problem exists of course when origin tags are only 32
   2017       bits, but it's less extreme.
   2018 
   2019    2. (More compelling) it doubles the size of the origin shadow
   2020       memory.  Given that the shadow memory is organised as a fixed
   2021       size cache, and that accuracy of tracking is limited by origins
   2022       falling out the cache due to space conflicts, this isn't good.
   2023 
   2024    > Another question: is the origin tracking perfect, or are there
   2025    > cases where it fails to determine an origin?
   2026 
   2027    It is imperfect for at least for the following reasons, and
   2028    probably more:
   2029 
   2030    * Insufficient capacity in the origin cache.  When a line is
   2031      evicted from the cache it is gone forever, and so subsequent
   2032      queries for the line produce zero, indicating no origin
   2033      information.  Interestingly, a line containing all zeroes can be
   2034      evicted "free" from the cache, since it contains no useful
   2035      information, so there is scope perhaps for some cleverer cache
   2036      management schemes.  (*** NOTE, with the introduction of the
   2037      second level origin tag cache, ocacheL2, this is no longer a
   2038      problem. ***)
   2039 
   2040    * The origin cache only stores one otag per 32-bits of address
   2041      space, plus 4 bits indicating which of the 4 bytes has that tag
   2042      and which are considered defined.  The result is that if two
   2043      undefined bytes in the same word are stored in memory, the first
   2044      stored byte's origin will be lost and replaced by the origin for
   2045      the second byte.
   2046 
   2047    * Nonzero origin tags for defined values.  Consider a binary
   2048      operator application op(x,y).  Suppose y is undefined (and so has
   2049      a valid nonzero origin tag), and x is defined, but erroneously
   2050      has a nonzero origin tag (defined values should have tag zero).
   2051      If the erroneous tag has a numeric value greater than y's tag,
   2052      then the rule for propagating origin tags though binary
   2053      operations, which is simply to take the unsigned max of the two
   2054      tags, will erroneously propagate x's tag rather than y's.
   2055 
   2056    * Some obscure uses of x86/amd64 byte registers can cause lossage
   2057      or confusion of origins.  %AH .. %DH are treated as different
   2058      from, and unrelated to, their parent registers, %EAX .. %EDX.
   2059      So some wierd sequences like
   2060 
   2061         movb undefined-value, %AH
   2062         movb defined-value, %AL
   2063         .. use %AX or %EAX ..
   2064 
   2065      will cause the origin attributed to %AH to be ignored, since %AL,
   2066      %AX, %EAX are treated as the same register, and %AH as a
   2067      completely separate one.
   2068 
   2069    But having said all that, it actually seems to work fairly well in
   2070    practice.
   2071 */
   2072 
   2073 static UWord stats_ocacheL1_find           = 0;
   2074 static UWord stats_ocacheL1_found_at_1     = 0;
   2075 static UWord stats_ocacheL1_found_at_N     = 0;
   2076 static UWord stats_ocacheL1_misses         = 0;
   2077 static UWord stats_ocacheL1_lossage        = 0;
   2078 static UWord stats_ocacheL1_movefwds       = 0;
   2079 
   2080 static UWord stats__ocacheL2_refs          = 0;
   2081 static UWord stats__ocacheL2_misses        = 0;
   2082 static UWord stats__ocacheL2_n_nodes_max   = 0;
   2083 
   2084 /* Cache of 32-bit values, one every 32 bits of address space */
   2085 
   2086 #define OC_BITS_PER_LINE 5
   2087 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
   2088 
   2089 static INLINE UWord oc_line_offset ( Addr a ) {
   2090    return (a >> 2) & (OC_W32S_PER_LINE - 1);
   2091 }
   2092 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
   2093    return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
   2094 }
   2095 
   2096 #define OC_LINES_PER_SET 2
   2097 
   2098 #define OC_N_SET_BITS    20
   2099 #define OC_N_SETS        (1 << OC_N_SET_BITS)
   2100 
   2101 /* These settings give:
   2102    64 bit host: ocache:  100,663,296 sizeB    67,108,864 useful
   2103    32 bit host: ocache:   92,274,688 sizeB    67,108,864 useful
   2104 */
   2105 
   2106 #define OC_MOVE_FORWARDS_EVERY_BITS 7
   2107 
   2108 
   2109 typedef
   2110    struct {
   2111       Addr  tag;
   2112       UInt  w32[OC_W32S_PER_LINE];
   2113       UChar descr[OC_W32S_PER_LINE];
   2114    }
   2115    OCacheLine;
   2116 
   2117 /* Classify and also sanity-check 'line'.  Return 'e' (empty) if not
   2118    in use, 'n' (nonzero) if it contains at least one valid origin tag,
   2119    and 'z' if all the represented tags are zero. */
   2120 static UChar classify_OCacheLine ( OCacheLine* line )
   2121 {
   2122    UWord i;
   2123    if (line->tag == 1/*invalid*/)
   2124       return 'e'; /* EMPTY */
   2125    tl_assert(is_valid_oc_tag(line->tag));
   2126    for (i = 0; i < OC_W32S_PER_LINE; i++) {
   2127       tl_assert(0 == ((~0xF) & line->descr[i]));
   2128       if (line->w32[i] > 0 && line->descr[i] > 0)
   2129          return 'n'; /* NONZERO - contains useful info */
   2130    }
   2131    return 'z'; /* ZERO - no useful info */
   2132 }
   2133 
   2134 typedef
   2135    struct {
   2136       OCacheLine line[OC_LINES_PER_SET];
   2137    }
   2138    OCacheSet;
   2139 
   2140 typedef
   2141    struct {
   2142       OCacheSet set[OC_N_SETS];
   2143    }
   2144    OCache;
   2145 
   2146 static OCache* ocacheL1 = NULL;
   2147 static UWord   ocacheL1_event_ctr = 0;
   2148 
   2149 static void init_ocacheL2 ( void ); /* fwds */
   2150 static void init_OCache ( void )
   2151 {
   2152    UWord line, set;
   2153    tl_assert(MC_(clo_mc_level) >= 3);
   2154    tl_assert(ocacheL1 == NULL);
   2155    ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
   2156    if (ocacheL1 == NULL) {
   2157       VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
   2158                                    sizeof(OCache) );
   2159    }
   2160    tl_assert(ocacheL1 != NULL);
   2161    for (set = 0; set < OC_N_SETS; set++) {
   2162       for (line = 0; line < OC_LINES_PER_SET; line++) {
   2163          ocacheL1->set[set].line[line].tag = 1/*invalid*/;
   2164       }
   2165    }
   2166    init_ocacheL2();
   2167 }
   2168 
   2169 static void moveLineForwards ( OCacheSet* set, UWord lineno )
   2170 {
   2171    OCacheLine tmp;
   2172    stats_ocacheL1_movefwds++;
   2173    tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
   2174    tmp = set->line[lineno-1];
   2175    set->line[lineno-1] = set->line[lineno];
   2176    set->line[lineno] = tmp;
   2177 }
   2178 
   2179 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
   2180    UWord i;
   2181    for (i = 0; i < OC_W32S_PER_LINE; i++) {
   2182       line->w32[i] = 0; /* NO ORIGIN */
   2183       line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
   2184    }
   2185    line->tag = tag;
   2186 }
   2187 
   2188 //////////////////////////////////////////////////////////////
   2189 //// OCache backing store
   2190 
   2191 static OSet* ocacheL2 = NULL;
   2192 
   2193 static void* ocacheL2_malloc ( HChar* cc, SizeT szB ) {
   2194    return VG_(malloc)(cc, szB);
   2195 }
   2196 static void ocacheL2_free ( void* v ) {
   2197    VG_(free)( v );
   2198 }
   2199 
   2200 /* Stats: # nodes currently in tree */
   2201 static UWord stats__ocacheL2_n_nodes = 0;
   2202 
   2203 static void init_ocacheL2 ( void )
   2204 {
   2205    tl_assert(!ocacheL2);
   2206    tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
   2207    tl_assert(0 == offsetof(OCacheLine,tag));
   2208    ocacheL2
   2209       = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
   2210                              NULL, /* fast cmp */
   2211                              ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
   2212    tl_assert(ocacheL2);
   2213    stats__ocacheL2_n_nodes = 0;
   2214 }
   2215 
   2216 /* Find line with the given tag in the tree, or NULL if not found. */
   2217 static OCacheLine* ocacheL2_find_tag ( Addr tag )
   2218 {
   2219    OCacheLine* line;
   2220    tl_assert(is_valid_oc_tag(tag));
   2221    stats__ocacheL2_refs++;
   2222    line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
   2223    return line;
   2224 }
   2225 
   2226 /* Delete the line with the given tag from the tree, if it is present, and
   2227    free up the associated memory. */
   2228 static void ocacheL2_del_tag ( Addr tag )
   2229 {
   2230    OCacheLine* line;
   2231    tl_assert(is_valid_oc_tag(tag));
   2232    stats__ocacheL2_refs++;
   2233    line = VG_(OSetGen_Remove)( ocacheL2, &tag );
   2234    if (line) {
   2235       VG_(OSetGen_FreeNode)(ocacheL2, line);
   2236       tl_assert(stats__ocacheL2_n_nodes > 0);
   2237       stats__ocacheL2_n_nodes--;
   2238    }
   2239 }
   2240 
   2241 /* Add a copy of the given line to the tree.  It must not already be
   2242    present. */
   2243 static void ocacheL2_add_line ( OCacheLine* line )
   2244 {
   2245    OCacheLine* copy;
   2246    tl_assert(is_valid_oc_tag(line->tag));
   2247    copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
   2248    tl_assert(copy);
   2249    *copy = *line;
   2250    stats__ocacheL2_refs++;
   2251    VG_(OSetGen_Insert)( ocacheL2, copy );
   2252    stats__ocacheL2_n_nodes++;
   2253    if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
   2254       stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
   2255 }
   2256 
   2257 ////
   2258 //////////////////////////////////////////////////////////////
   2259 
   2260 __attribute__((noinline))
   2261 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
   2262 {
   2263    OCacheLine *victim, *inL2;
   2264    UChar c;
   2265    UWord line;
   2266    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
   2267    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
   2268    UWord tag     = a & tagmask;
   2269    tl_assert(setno >= 0 && setno < OC_N_SETS);
   2270 
   2271    /* we already tried line == 0; skip therefore. */
   2272    for (line = 1; line < OC_LINES_PER_SET; line++) {
   2273       if (ocacheL1->set[setno].line[line].tag == tag) {
   2274          if (line == 1) {
   2275             stats_ocacheL1_found_at_1++;
   2276          } else {
   2277             stats_ocacheL1_found_at_N++;
   2278          }
   2279          if (UNLIKELY(0 == (ocacheL1_event_ctr++
   2280                             & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
   2281             moveLineForwards( &ocacheL1->set[setno], line );
   2282             line--;
   2283          }
   2284          return &ocacheL1->set[setno].line[line];
   2285       }
   2286    }
   2287 
   2288    /* A miss.  Use the last slot.  Implicitly this means we're
   2289       ejecting the line in the last slot. */
   2290    stats_ocacheL1_misses++;
   2291    tl_assert(line == OC_LINES_PER_SET);
   2292    line--;
   2293    tl_assert(line > 0);
   2294 
   2295    /* First, move the to-be-ejected line to the L2 cache. */
   2296    victim = &ocacheL1->set[setno].line[line];
   2297    c = classify_OCacheLine(victim);
   2298    switch (c) {
   2299       case 'e':
   2300          /* the line is empty (has invalid tag); ignore it. */
   2301          break;
   2302       case 'z':
   2303          /* line contains zeroes.  We must ensure the backing store is
   2304             updated accordingly, either by copying the line there
   2305             verbatim, or by ensuring it isn't present there.  We
   2306             chosse the latter on the basis that it reduces the size of
   2307             the backing store. */
   2308          ocacheL2_del_tag( victim->tag );
   2309          break;
   2310       case 'n':
   2311          /* line contains at least one real, useful origin.  Copy it
   2312             to the backing store. */
   2313          stats_ocacheL1_lossage++;
   2314          inL2 = ocacheL2_find_tag( victim->tag );
   2315          if (inL2) {
   2316             *inL2 = *victim;
   2317          } else {
   2318             ocacheL2_add_line( victim );
   2319          }
   2320          break;
   2321       default:
   2322          tl_assert(0);
   2323    }
   2324 
   2325    /* Now we must reload the L1 cache from the backing tree, if
   2326       possible. */
   2327    tl_assert(tag != victim->tag); /* stay sane */
   2328    inL2 = ocacheL2_find_tag( tag );
   2329    if (inL2) {
   2330       /* We're in luck.  It's in the L2. */
   2331       ocacheL1->set[setno].line[line] = *inL2;
   2332    } else {
   2333       /* Missed at both levels of the cache hierarchy.  We have to
   2334          declare it as full of zeroes (unknown origins). */
   2335       stats__ocacheL2_misses++;
   2336       zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
   2337    }
   2338 
   2339    /* Move it one forwards */
   2340    moveLineForwards( &ocacheL1->set[setno], line );
   2341    line--;
   2342 
   2343    return &ocacheL1->set[setno].line[line];
   2344 }
   2345 
   2346 static INLINE OCacheLine* find_OCacheLine ( Addr a )
   2347 {
   2348    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
   2349    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
   2350    UWord tag     = a & tagmask;
   2351 
   2352    stats_ocacheL1_find++;
   2353 
   2354    if (OC_ENABLE_ASSERTIONS) {
   2355       tl_assert(setno >= 0 && setno < OC_N_SETS);
   2356       tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
   2357    }
   2358 
   2359    if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
   2360       return &ocacheL1->set[setno].line[0];
   2361    }
   2362 
   2363    return find_OCacheLine_SLOW( a );
   2364 }
   2365 
   2366 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
   2367 {
   2368    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2369    //// Set the origins for a+0 .. a+7
   2370    { OCacheLine* line;
   2371      UWord lineoff = oc_line_offset(a);
   2372      if (OC_ENABLE_ASSERTIONS) {
   2373         tl_assert(lineoff >= 0
   2374                   && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2375      }
   2376      line = find_OCacheLine( a );
   2377      line->descr[lineoff+0] = 0xF;
   2378      line->descr[lineoff+1] = 0xF;
   2379      line->w32[lineoff+0]   = otag;
   2380      line->w32[lineoff+1]   = otag;
   2381    }
   2382    //// END inlined, specialised version of MC_(helperc_b_store8)
   2383 }
   2384 
   2385 
   2386 /*------------------------------------------------------------*/
   2387 /*--- Aligned fast case permission setters,                ---*/
   2388 /*--- for dealing with stacks                              ---*/
   2389 /*------------------------------------------------------------*/
   2390 
   2391 /*--------------------- 32-bit ---------------------*/
   2392 
   2393 /* Nb: by "aligned" here we mean 4-byte aligned */
   2394 
   2395 static INLINE void make_aligned_word32_undefined ( Addr a )
   2396 {
   2397    PROF_EVENT(300, "make_aligned_word32_undefined");
   2398 
   2399 #ifndef PERF_FAST_STACK2
   2400    make_mem_undefined(a, 4);
   2401 #else
   2402    {
   2403       UWord   sm_off;
   2404       SecMap* sm;
   2405 
   2406       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2407          PROF_EVENT(301, "make_aligned_word32_undefined-slow1");
   2408          make_mem_undefined(a, 4);
   2409          return;
   2410       }
   2411 
   2412       sm                  = get_secmap_for_writing_low(a);
   2413       sm_off              = SM_OFF(a);
   2414       sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
   2415    }
   2416 #endif
   2417 }
   2418 
   2419 static INLINE
   2420 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
   2421 {
   2422    make_aligned_word32_undefined(a);
   2423    //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
   2424    //// Set the origins for a+0 .. a+3
   2425    { OCacheLine* line;
   2426      UWord lineoff = oc_line_offset(a);
   2427      if (OC_ENABLE_ASSERTIONS) {
   2428         tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   2429      }
   2430      line = find_OCacheLine( a );
   2431      line->descr[lineoff] = 0xF;
   2432      line->w32[lineoff]   = otag;
   2433    }
   2434    //// END inlined, specialised version of MC_(helperc_b_store4)
   2435 }
   2436 
   2437 static INLINE
   2438 void make_aligned_word32_noaccess ( Addr a )
   2439 {
   2440    PROF_EVENT(310, "make_aligned_word32_noaccess");
   2441 
   2442 #ifndef PERF_FAST_STACK2
   2443    MC_(make_mem_noaccess)(a, 4);
   2444 #else
   2445    {
   2446       UWord   sm_off;
   2447       SecMap* sm;
   2448 
   2449       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2450          PROF_EVENT(311, "make_aligned_word32_noaccess-slow1");
   2451          MC_(make_mem_noaccess)(a, 4);
   2452          return;
   2453       }
   2454 
   2455       sm                  = get_secmap_for_writing_low(a);
   2456       sm_off              = SM_OFF(a);
   2457       sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
   2458 
   2459       //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
   2460       //// Set the origins for a+0 .. a+3.
   2461       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   2462          OCacheLine* line;
   2463          UWord lineoff = oc_line_offset(a);
   2464          if (OC_ENABLE_ASSERTIONS) {
   2465             tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   2466          }
   2467          line = find_OCacheLine( a );
   2468          line->descr[lineoff] = 0;
   2469       }
   2470       //// END inlined, specialised version of MC_(helperc_b_store4)
   2471    }
   2472 #endif
   2473 }
   2474 
   2475 /*--------------------- 64-bit ---------------------*/
   2476 
   2477 /* Nb: by "aligned" here we mean 8-byte aligned */
   2478 
   2479 static INLINE void make_aligned_word64_undefined ( Addr a )
   2480 {
   2481    PROF_EVENT(320, "make_aligned_word64_undefined");
   2482 
   2483 #ifndef PERF_FAST_STACK2
   2484    make_mem_undefined(a, 8);
   2485 #else
   2486    {
   2487       UWord   sm_off16;
   2488       SecMap* sm;
   2489 
   2490       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2491          PROF_EVENT(321, "make_aligned_word64_undefined-slow1");
   2492          make_mem_undefined(a, 8);
   2493          return;
   2494       }
   2495 
   2496       sm       = get_secmap_for_writing_low(a);
   2497       sm_off16 = SM_OFF_16(a);
   2498       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED;
   2499    }
   2500 #endif
   2501 }
   2502 
   2503 static INLINE
   2504 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
   2505 {
   2506    make_aligned_word64_undefined(a);
   2507    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2508    //// Set the origins for a+0 .. a+7
   2509    { OCacheLine* line;
   2510      UWord lineoff = oc_line_offset(a);
   2511      tl_assert(lineoff >= 0
   2512                && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2513      line = find_OCacheLine( a );
   2514      line->descr[lineoff+0] = 0xF;
   2515      line->descr[lineoff+1] = 0xF;
   2516      line->w32[lineoff+0]   = otag;
   2517      line->w32[lineoff+1]   = otag;
   2518    }
   2519    //// END inlined, specialised version of MC_(helperc_b_store8)
   2520 }
   2521 
   2522 static INLINE
   2523 void make_aligned_word64_noaccess ( Addr a )
   2524 {
   2525    PROF_EVENT(330, "make_aligned_word64_noaccess");
   2526 
   2527 #ifndef PERF_FAST_STACK2
   2528    MC_(make_mem_noaccess)(a, 8);
   2529 #else
   2530    {
   2531       UWord   sm_off16;
   2532       SecMap* sm;
   2533 
   2534       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2535          PROF_EVENT(331, "make_aligned_word64_noaccess-slow1");
   2536          MC_(make_mem_noaccess)(a, 8);
   2537          return;
   2538       }
   2539 
   2540       sm       = get_secmap_for_writing_low(a);
   2541       sm_off16 = SM_OFF_16(a);
   2542       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS;
   2543 
   2544       //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2545       //// Clear the origins for a+0 .. a+7.
   2546       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   2547          OCacheLine* line;
   2548          UWord lineoff = oc_line_offset(a);
   2549          tl_assert(lineoff >= 0
   2550                    && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2551          line = find_OCacheLine( a );
   2552          line->descr[lineoff+0] = 0;
   2553          line->descr[lineoff+1] = 0;
   2554       }
   2555       //// END inlined, specialised version of MC_(helperc_b_store8)
   2556    }
   2557 #endif
   2558 }
   2559 
   2560 
   2561 /*------------------------------------------------------------*/
   2562 /*--- Stack pointer adjustment                             ---*/
   2563 /*------------------------------------------------------------*/
   2564 
   2565 #ifdef PERF_FAST_STACK
   2566 #  define MAYBE_USED
   2567 #else
   2568 #  define MAYBE_USED __attribute__((unused))
   2569 #endif
   2570 
   2571 /*--------------- adjustment by 4 bytes ---------------*/
   2572 
   2573 MAYBE_USED
   2574 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
   2575 {
   2576    UInt otag = ecu | MC_OKIND_STACK;
   2577    PROF_EVENT(110, "new_mem_stack_4");
   2578    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2579       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
   2580    } else {
   2581       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
   2582    }
   2583 }
   2584 
   2585 MAYBE_USED
   2586 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
   2587 {
   2588    PROF_EVENT(110, "new_mem_stack_4");
   2589    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2590       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2591    } else {
   2592       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
   2593    }
   2594 }
   2595 
   2596 MAYBE_USED
   2597 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
   2598 {
   2599    PROF_EVENT(120, "die_mem_stack_4");
   2600    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2601       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
   2602    } else {
   2603       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
   2604    }
   2605 }
   2606 
   2607 /*--------------- adjustment by 8 bytes ---------------*/
   2608 
   2609 MAYBE_USED
   2610 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
   2611 {
   2612    UInt otag = ecu | MC_OKIND_STACK;
   2613    PROF_EVENT(111, "new_mem_stack_8");
   2614    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2615       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
   2616    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2617       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2618       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
   2619    } else {
   2620       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
   2621    }
   2622 }
   2623 
   2624 MAYBE_USED
   2625 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
   2626 {
   2627    PROF_EVENT(111, "new_mem_stack_8");
   2628    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2629       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2630    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2631       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2632       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   2633    } else {
   2634       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
   2635    }
   2636 }
   2637 
   2638 MAYBE_USED
   2639 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
   2640 {
   2641    PROF_EVENT(121, "die_mem_stack_8");
   2642    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2643       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
   2644    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2645       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
   2646       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
   2647    } else {
   2648       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
   2649    }
   2650 }
   2651 
   2652 /*--------------- adjustment by 12 bytes ---------------*/
   2653 
   2654 MAYBE_USED
   2655 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
   2656 {
   2657    UInt otag = ecu | MC_OKIND_STACK;
   2658    PROF_EVENT(112, "new_mem_stack_12");
   2659    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2660       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2661       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
   2662    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2663       /* from previous test we don't have 8-alignment at offset +0,
   2664          hence must have 8 alignment at offsets +4/-4.  Hence safe to
   2665          do 4 at +0 and then 8 at +4/. */
   2666       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2667       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
   2668    } else {
   2669       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
   2670    }
   2671 }
   2672 
   2673 MAYBE_USED
   2674 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
   2675 {
   2676    PROF_EVENT(112, "new_mem_stack_12");
   2677    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2678       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2679       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2680    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2681       /* from previous test we don't have 8-alignment at offset +0,
   2682          hence must have 8 alignment at offsets +4/-4.  Hence safe to
   2683          do 4 at +0 and then 8 at +4/. */
   2684       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2685       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   2686    } else {
   2687       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
   2688    }
   2689 }
   2690 
   2691 MAYBE_USED
   2692 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
   2693 {
   2694    PROF_EVENT(122, "die_mem_stack_12");
   2695    /* Note the -12 in the test */
   2696    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
   2697       /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
   2698          -4. */
   2699       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2700       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   2701    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2702       /* We have 4-alignment at +0, but we don't have 8-alignment at
   2703          -12.  So we must have 8-alignment at -8.  Hence do 4 at -12
   2704          and then 8 at -8. */
   2705       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2706       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
   2707    } else {
   2708       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
   2709    }
   2710 }
   2711 
   2712 /*--------------- adjustment by 16 bytes ---------------*/
   2713 
   2714 MAYBE_USED
   2715 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
   2716 {
   2717    UInt otag = ecu | MC_OKIND_STACK;
   2718    PROF_EVENT(113, "new_mem_stack_16");
   2719    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2720       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
   2721       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2722       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
   2723    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2724       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
   2725          Hence do 4 at +0, 8 at +4, 4 at +12. */
   2726       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2727       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
   2728       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
   2729    } else {
   2730       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
   2731    }
   2732 }
   2733 
   2734 MAYBE_USED
   2735 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
   2736 {
   2737    PROF_EVENT(113, "new_mem_stack_16");
   2738    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2739       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
   2740       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2741       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2742    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2743       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
   2744          Hence do 4 at +0, 8 at +4, 4 at +12. */
   2745       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2746       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4  );
   2747       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
   2748    } else {
   2749       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
   2750    }
   2751 }
   2752 
   2753 MAYBE_USED
   2754 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
   2755 {
   2756    PROF_EVENT(123, "die_mem_stack_16");
   2757    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2758       /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
   2759       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2760       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
   2761    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2762       /* 8 alignment must be at -12.  Do 4 at -16, 8 at -12, 4 at -4. */
   2763       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2764       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2765       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   2766    } else {
   2767       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
   2768    }
   2769 }
   2770 
   2771 /*--------------- adjustment by 32 bytes ---------------*/
   2772 
   2773 MAYBE_USED
   2774 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
   2775 {
   2776    UInt otag = ecu | MC_OKIND_STACK;
   2777    PROF_EVENT(114, "new_mem_stack_32");
   2778    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2779       /* Straightforward */
   2780       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2781       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   2782       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   2783       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   2784    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2785       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
   2786          +0,+28. */
   2787       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2788       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
   2789       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
   2790       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
   2791       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
   2792    } else {
   2793       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
   2794    }
   2795 }
   2796 
   2797 MAYBE_USED
   2798 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
   2799 {
   2800    PROF_EVENT(114, "new_mem_stack_32");
   2801    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2802       /* Straightforward */
   2803       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2804       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2805       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   2806       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   2807    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2808       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
   2809          +0,+28. */
   2810       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2811       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   2812       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
   2813       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
   2814       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
   2815    } else {
   2816       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
   2817    }
   2818 }
   2819 
   2820 MAYBE_USED
   2821 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
   2822 {
   2823    PROF_EVENT(124, "die_mem_stack_32");
   2824    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2825       /* Straightforward */
   2826       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   2827       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   2828       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2829       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   2830    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2831       /* 8 alignment must be at -4 etc.  Hence do 8 at -12,-20,-28 and
   2832          4 at -32,-4. */
   2833       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   2834       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
   2835       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
   2836       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2837       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   2838    } else {
   2839       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
   2840    }
   2841 }
   2842 
   2843 /*--------------- adjustment by 112 bytes ---------------*/
   2844 
   2845 MAYBE_USED
   2846 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
   2847 {
   2848    UInt otag = ecu | MC_OKIND_STACK;
   2849    PROF_EVENT(115, "new_mem_stack_112");
   2850    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2851       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2852       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   2853       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   2854       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   2855       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
   2856       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
   2857       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
   2858       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
   2859       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
   2860       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
   2861       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
   2862       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
   2863       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
   2864       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   2865    } else {
   2866       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
   2867    }
   2868 }
   2869 
   2870 MAYBE_USED
   2871 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
   2872 {
   2873    PROF_EVENT(115, "new_mem_stack_112");
   2874    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2875       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2876       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2877       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   2878       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   2879       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   2880       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   2881       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   2882       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   2883       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   2884       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   2885       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   2886       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   2887       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   2888       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   2889    } else {
   2890       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
   2891    }
   2892 }
   2893 
   2894 MAYBE_USED
   2895 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
   2896 {
   2897    PROF_EVENT(125, "die_mem_stack_112");
   2898    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2899       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   2900       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   2901       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   2902       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   2903       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   2904       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   2905       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   2906       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   2907       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   2908       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   2909       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   2910       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   2911       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2912       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   2913    } else {
   2914       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
   2915    }
   2916 }
   2917 
   2918 /*--------------- adjustment by 128 bytes ---------------*/
   2919 
   2920 MAYBE_USED
   2921 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
   2922 {
   2923    UInt otag = ecu | MC_OKIND_STACK;
   2924    PROF_EVENT(116, "new_mem_stack_128");
   2925    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2926       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2927       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   2928       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   2929       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   2930       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
   2931       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
   2932       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
   2933       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
   2934       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
   2935       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
   2936       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
   2937       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
   2938       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
   2939       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   2940       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   2941       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   2942    } else {
   2943       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
   2944    }
   2945 }
   2946 
   2947 MAYBE_USED
   2948 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
   2949 {
   2950    PROF_EVENT(116, "new_mem_stack_128");
   2951    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2952       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2953       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2954       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   2955       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   2956       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   2957       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   2958       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   2959       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   2960       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   2961       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   2962       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   2963       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   2964       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   2965       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   2966       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   2967       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   2968    } else {
   2969       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
   2970    }
   2971 }
   2972 
   2973 MAYBE_USED
   2974 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
   2975 {
   2976    PROF_EVENT(126, "die_mem_stack_128");
   2977    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2978       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   2979       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   2980       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   2981       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   2982       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   2983       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   2984       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   2985       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   2986       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   2987       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   2988       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   2989       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   2990       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   2991       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   2992       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2993       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   2994    } else {
   2995       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
   2996    }
   2997 }
   2998 
   2999 /*--------------- adjustment by 144 bytes ---------------*/
   3000 
   3001 MAYBE_USED
   3002 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
   3003 {
   3004    UInt otag = ecu | MC_OKIND_STACK;
   3005    PROF_EVENT(117, "new_mem_stack_144");
   3006    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3007       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
   3008       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
   3009       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
   3010       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
   3011       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
   3012       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
   3013       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
   3014       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
   3015       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
   3016       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
   3017       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
   3018       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
   3019       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
   3020       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3021       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   3022       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   3023       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
   3024       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
   3025    } else {
   3026       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
   3027    }
   3028 }
   3029 
   3030 MAYBE_USED
   3031 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
   3032 {
   3033    PROF_EVENT(117, "new_mem_stack_144");
   3034    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3035       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3036       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3037       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3038       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3039       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3040       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3041       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3042       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3043       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3044       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3045       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3046       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3047       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3048       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3049       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   3050       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   3051       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
   3052       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
   3053    } else {
   3054       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
   3055    }
   3056 }
   3057 
   3058 MAYBE_USED
   3059 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
   3060 {
   3061    PROF_EVENT(127, "die_mem_stack_144");
   3062    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3063       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
   3064       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
   3065       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   3066       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   3067       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3068       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3069       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3070       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3071       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3072       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3073       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3074       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3075       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3076       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3077       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3078       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3079       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3080       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3081    } else {
   3082       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
   3083    }
   3084 }
   3085 
   3086 /*--------------- adjustment by 160 bytes ---------------*/
   3087 
   3088 MAYBE_USED
   3089 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
   3090 {
   3091    UInt otag = ecu | MC_OKIND_STACK;
   3092    PROF_EVENT(118, "new_mem_stack_160");
   3093    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3094       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
   3095       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
   3096       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
   3097       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
   3098       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
   3099       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
   3100       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
   3101       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
   3102       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
   3103       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
   3104       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
   3105       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
   3106       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
   3107       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3108       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   3109       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   3110       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
   3111       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
   3112       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
   3113       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
   3114    } else {
   3115       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
   3116    }
   3117 }
   3118 
   3119 MAYBE_USED
   3120 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
   3121 {
   3122    PROF_EVENT(118, "new_mem_stack_160");
   3123    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3124       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3125       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3126       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3127       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3128       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3129       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3130       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3131       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3132       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3133       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3134       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3135       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3136       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3137       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3138       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   3139       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   3140       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
   3141       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
   3142       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
   3143       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
   3144    } else {
   3145       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
   3146    }
   3147 }
   3148 
   3149 MAYBE_USED
   3150 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
   3151 {
   3152    PROF_EVENT(128, "die_mem_stack_160");
   3153    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3154       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
   3155       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
   3156       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
   3157       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
   3158       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   3159       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   3160       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3161       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3162       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3163       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3164       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3165       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3166       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3167       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3168       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3169       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3170       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3171       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3172       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3173       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3174    } else {
   3175       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
   3176    }
   3177 }
   3178 
   3179 /*--------------- adjustment by N bytes ---------------*/
   3180 
   3181 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
   3182 {
   3183    UInt otag = ecu | MC_OKIND_STACK;
   3184    PROF_EVENT(115, "new_mem_stack_w_otag");
   3185    MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
   3186 }
   3187 
   3188 static void mc_new_mem_stack ( Addr a, SizeT len )
   3189 {
   3190    PROF_EVENT(115, "new_mem_stack");
   3191    make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
   3192 }
   3193 
   3194 static void mc_die_mem_stack ( Addr a, SizeT len )
   3195 {
   3196    PROF_EVENT(125, "die_mem_stack");
   3197    MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
   3198 }
   3199 
   3200 
   3201 /* The AMD64 ABI says:
   3202 
   3203    "The 128-byte area beyond the location pointed to by %rsp is considered
   3204     to be reserved and shall not be modified by signal or interrupt
   3205     handlers.  Therefore, functions may use this area for temporary data
   3206     that is not needed across function calls.  In particular, leaf functions
   3207     may use this area for their entire stack frame, rather than adjusting
   3208     the stack pointer in the prologue and epilogue.  This area is known as
   3209     red zone [sic]."
   3210 
   3211    So after any call or return we need to mark this redzone as containing
   3212    undefined values.
   3213 
   3214    Consider this:  we're in function f.  f calls g.  g moves rsp down
   3215    modestly (say 16 bytes) and writes stuff all over the red zone, making it
   3216    defined.  g returns.  f is buggy and reads from parts of the red zone
   3217    that it didn't write on.  But because g filled that area in, f is going
   3218    to be picking up defined V bits and so any errors from reading bits of
   3219    the red zone it didn't write, will be missed.  The only solution I could
   3220    think of was to make the red zone undefined when g returns to f.
   3221 
   3222    This is in accordance with the ABI, which makes it clear the redzone
   3223    is volatile across function calls.
   3224 
   3225    The problem occurs the other way round too: f could fill the RZ up
   3226    with defined values and g could mistakenly read them.  So the RZ
   3227    also needs to be nuked on function calls.
   3228 */
   3229 
   3230 
   3231 /* Here's a simple cache to hold nia -> ECU mappings.  It could be
   3232    improved so as to have a lower miss rate. */
   3233 
   3234 static UWord stats__nia_cache_queries = 0;
   3235 static UWord stats__nia_cache_misses  = 0;
   3236 
   3237 typedef
   3238    struct { UWord nia0; UWord ecu0;   /* nia0 maps to ecu0 */
   3239             UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
   3240    WCacheEnt;
   3241 
   3242 #define N_NIA_TO_ECU_CACHE 511
   3243 
   3244 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
   3245 
   3246 static void init_nia_to_ecu_cache ( void )
   3247 {
   3248    UWord       i;
   3249    Addr        zero_addr = 0;
   3250    ExeContext* zero_ec;
   3251    UInt        zero_ecu;
   3252    /* Fill all the slots with an entry for address zero, and the
   3253       relevant otags accordingly.  Hence the cache is initially filled
   3254       with valid data. */
   3255    zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
   3256    tl_assert(zero_ec);
   3257    zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
   3258    tl_assert(VG_(is_plausible_ECU)(zero_ecu));
   3259    for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
   3260       nia_to_ecu_cache[i].nia0 = zero_addr;
   3261       nia_to_ecu_cache[i].ecu0 = zero_ecu;
   3262       nia_to_ecu_cache[i].nia1 = zero_addr;
   3263       nia_to_ecu_cache[i].ecu1 = zero_ecu;
   3264    }
   3265 }
   3266 
   3267 static inline UInt convert_nia_to_ecu ( Addr nia )
   3268 {
   3269    UWord i;
   3270    UInt        ecu;
   3271    ExeContext* ec;
   3272 
   3273    tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
   3274 
   3275    stats__nia_cache_queries++;
   3276    i = nia % N_NIA_TO_ECU_CACHE;
   3277    tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
   3278 
   3279    if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
   3280       return nia_to_ecu_cache[i].ecu0;
   3281 
   3282    if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
   3283 #     define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
   3284       SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
   3285       SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
   3286 #     undef SWAP
   3287       return nia_to_ecu_cache[i].ecu0;
   3288    }
   3289 
   3290    stats__nia_cache_misses++;
   3291    ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
   3292    tl_assert(ec);
   3293    ecu = VG_(get_ECU_from_ExeContext)(ec);
   3294    tl_assert(VG_(is_plausible_ECU)(ecu));
   3295 
   3296    nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
   3297    nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
   3298 
   3299    nia_to_ecu_cache[i].nia0 = nia;
   3300    nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
   3301    return ecu;
   3302 }
   3303 
   3304 
   3305 /* Note that this serves both the origin-tracking and
   3306    no-origin-tracking modes.  We assume that calls to it are
   3307    sufficiently infrequent that it isn't worth specialising for the
   3308    with/without origin-tracking cases. */
   3309 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
   3310 {
   3311    UInt otag;
   3312    tl_assert(sizeof(UWord) == sizeof(SizeT));
   3313    if (0)
   3314       VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
   3315                   base, len, nia );
   3316 
   3317    if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   3318       UInt ecu = convert_nia_to_ecu ( nia );
   3319       tl_assert(VG_(is_plausible_ECU)(ecu));
   3320       otag = ecu | MC_OKIND_STACK;
   3321    } else {
   3322       tl_assert(nia == 0);
   3323       otag = 0;
   3324    }
   3325 
   3326 #  if 0
   3327    /* Really slow version */
   3328    MC_(make_mem_undefined)(base, len, otag);
   3329 #  endif
   3330 
   3331 #  if 0
   3332    /* Slow(ish) version, which is fairly easily seen to be correct.
   3333    */
   3334    if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
   3335       make_aligned_word64_undefined(base +   0, otag);
   3336       make_aligned_word64_undefined(base +   8, otag);
   3337       make_aligned_word64_undefined(base +  16, otag);
   3338       make_aligned_word64_undefined(base +  24, otag);
   3339 
   3340       make_aligned_word64_undefined(base +  32, otag);
   3341       make_aligned_word64_undefined(base +  40, otag);
   3342       make_aligned_word64_undefined(base +  48, otag);
   3343       make_aligned_word64_undefined(base +  56, otag);
   3344 
   3345       make_aligned_word64_undefined(base +  64, otag);
   3346       make_aligned_word64_undefined(base +  72, otag);
   3347       make_aligned_word64_undefined(base +  80, otag);
   3348       make_aligned_word64_undefined(base +  88, otag);
   3349 
   3350       make_aligned_word64_undefined(base +  96, otag);
   3351       make_aligned_word64_undefined(base + 104, otag);
   3352       make_aligned_word64_undefined(base + 112, otag);
   3353       make_aligned_word64_undefined(base + 120, otag);
   3354    } else {
   3355       MC_(make_mem_undefined)(base, len, otag);
   3356    }
   3357 #  endif
   3358 
   3359    /* Idea is: go fast when
   3360          * 8-aligned and length is 128
   3361          * the sm is available in the main primary map
   3362          * the address range falls entirely with a single secondary map
   3363       If all those conditions hold, just update the V+A bits by writing
   3364       directly into the vabits array.  (If the sm was distinguished, this
   3365       will make a copy and then write to it.)
   3366    */
   3367 
   3368    if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
   3369       /* Now we know the address range is suitably sized and aligned. */
   3370       UWord a_lo = (UWord)(base);
   3371       UWord a_hi = (UWord)(base + 128 - 1);
   3372       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
   3373       if (a_hi <= MAX_PRIMARY_ADDRESS) {
   3374          // Now we know the entire range is within the main primary map.
   3375          SecMap* sm    = get_secmap_for_writing_low(a_lo);
   3376          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
   3377          /* Now we know that the entire address range falls within a
   3378             single secondary map, and that that secondary 'lives' in
   3379             the main primary map. */
   3380          if (LIKELY(sm == sm_hi)) {
   3381             // Finally, we know that the range is entirely within one secmap.
   3382             UWord   v_off = SM_OFF(a_lo);
   3383             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
   3384             p[ 0] = VA_BITS16_UNDEFINED;
   3385             p[ 1] = VA_BITS16_UNDEFINED;
   3386             p[ 2] = VA_BITS16_UNDEFINED;
   3387             p[ 3] = VA_BITS16_UNDEFINED;
   3388             p[ 4] = VA_BITS16_UNDEFINED;
   3389             p[ 5] = VA_BITS16_UNDEFINED;
   3390             p[ 6] = VA_BITS16_UNDEFINED;
   3391             p[ 7] = VA_BITS16_UNDEFINED;
   3392             p[ 8] = VA_BITS16_UNDEFINED;
   3393             p[ 9] = VA_BITS16_UNDEFINED;
   3394             p[10] = VA_BITS16_UNDEFINED;
   3395             p[11] = VA_BITS16_UNDEFINED;
   3396             p[12] = VA_BITS16_UNDEFINED;
   3397             p[13] = VA_BITS16_UNDEFINED;
   3398             p[14] = VA_BITS16_UNDEFINED;
   3399             p[15] = VA_BITS16_UNDEFINED;
   3400             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   3401                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
   3402                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
   3403                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
   3404                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
   3405                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
   3406                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
   3407                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
   3408                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
   3409                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
   3410                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
   3411                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
   3412                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
   3413                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
   3414                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
   3415                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
   3416                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
   3417             }
   3418             return;
   3419          }
   3420       }
   3421    }
   3422 
   3423    /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
   3424    if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
   3425       /* Now we know the address range is suitably sized and aligned. */
   3426       UWord a_lo = (UWord)(base);
   3427       UWord a_hi = (UWord)(base + 288 - 1);
   3428       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
   3429       if (a_hi <= MAX_PRIMARY_ADDRESS) {
   3430          // Now we know the entire range is within the main primary map.
   3431          SecMap* sm    = get_secmap_for_writing_low(a_lo);
   3432          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
   3433          /* Now we know that the entire address range falls within a
   3434             single secondary map, and that that secondary 'lives' in
   3435             the main primary map. */
   3436          if (LIKELY(sm == sm_hi)) {
   3437             // Finally, we know that the range is entirely within one secmap.
   3438             UWord   v_off = SM_OFF(a_lo);
   3439             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
   3440             p[ 0] = VA_BITS16_UNDEFINED;
   3441             p[ 1] = VA_BITS16_UNDEFINED;
   3442             p[ 2] = VA_BITS16_UNDEFINED;
   3443             p[ 3] = VA_BITS16_UNDEFINED;
   3444             p[ 4] = VA_BITS16_UNDEFINED;
   3445             p[ 5] = VA_BITS16_UNDEFINED;
   3446             p[ 6] = VA_BITS16_UNDEFINED;
   3447             p[ 7] = VA_BITS16_UNDEFINED;
   3448             p[ 8] = VA_BITS16_UNDEFINED;
   3449             p[ 9] = VA_BITS16_UNDEFINED;
   3450             p[10] = VA_BITS16_UNDEFINED;
   3451             p[11] = VA_BITS16_UNDEFINED;
   3452             p[12] = VA_BITS16_UNDEFINED;
   3453             p[13] = VA_BITS16_UNDEFINED;
   3454             p[14] = VA_BITS16_UNDEFINED;
   3455             p[15] = VA_BITS16_UNDEFINED;
   3456             p[16] = VA_BITS16_UNDEFINED;
   3457             p[17] = VA_BITS16_UNDEFINED;
   3458             p[18] = VA_BITS16_UNDEFINED;
   3459             p[19] = VA_BITS16_UNDEFINED;
   3460             p[20] = VA_BITS16_UNDEFINED;
   3461             p[21] = VA_BITS16_UNDEFINED;
   3462             p[22] = VA_BITS16_UNDEFINED;
   3463             p[23] = VA_BITS16_UNDEFINED;
   3464             p[24] = VA_BITS16_UNDEFINED;
   3465             p[25] = VA_BITS16_UNDEFINED;
   3466             p[26] = VA_BITS16_UNDEFINED;
   3467             p[27] = VA_BITS16_UNDEFINED;
   3468             p[28] = VA_BITS16_UNDEFINED;
   3469             p[29] = VA_BITS16_UNDEFINED;
   3470             p[30] = VA_BITS16_UNDEFINED;
   3471             p[31] = VA_BITS16_UNDEFINED;
   3472             p[32] = VA_BITS16_UNDEFINED;
   3473             p[33] = VA_BITS16_UNDEFINED;
   3474             p[34] = VA_BITS16_UNDEFINED;
   3475             p[35] = VA_BITS16_UNDEFINED;
   3476             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   3477                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
   3478                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
   3479                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
   3480                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
   3481                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
   3482                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
   3483                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
   3484                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
   3485                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
   3486                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
   3487                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
   3488                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
   3489                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
   3490                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
   3491                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
   3492                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
   3493                set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
   3494                set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
   3495                set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
   3496                set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
   3497                set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
   3498                set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
   3499                set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
   3500                set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
   3501                set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
   3502                set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
   3503                set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
   3504                set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
   3505                set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
   3506                set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
   3507                set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
   3508                set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
   3509                set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
   3510                set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
   3511                set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
   3512                set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
   3513             }
   3514             return;
   3515          }
   3516       }
   3517    }