Home | History | Annotate | Download | only in memcheck
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- MemCheck: Maintain bitmaps of memory, tracking the           ---*/
      4 /*--- accessibility (A) and validity (V) status of each byte.      ---*/
      5 /*---                                                    mc_main.c ---*/
      6 /*--------------------------------------------------------------------*/
      7 
      8 /*
      9    This file is part of MemCheck, a heavyweight Valgrind tool for
     10    detecting memory errors.
     11 
     12    Copyright (C) 2000-2013 Julian Seward
     13       jseward (at) acm.org
     14 
     15    This program is free software; you can redistribute it and/or
     16    modify it under the terms of the GNU General Public License as
     17    published by the Free Software Foundation; either version 2 of the
     18    License, or (at your option) any later version.
     19 
     20    This program is distributed in the hope that it will be useful, but
     21    WITHOUT ANY WARRANTY; without even the implied warranty of
     22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     23    General Public License for more details.
     24 
     25    You should have received a copy of the GNU General Public License
     26    along with this program; if not, write to the Free Software
     27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     28    02111-1307, USA.
     29 
     30    The GNU General Public License is contained in the file COPYING.
     31 */
     32 
     33 #include "pub_tool_basics.h"
     34 #include "pub_tool_aspacemgr.h"
     35 #include "pub_tool_gdbserver.h"
     36 #include "pub_tool_poolalloc.h"
     37 #include "pub_tool_hashtable.h"     // For mc_include.h
     38 #include "pub_tool_libcbase.h"
     39 #include "pub_tool_libcassert.h"
     40 #include "pub_tool_libcprint.h"
     41 #include "pub_tool_machine.h"
     42 #include "pub_tool_mallocfree.h"
     43 #include "pub_tool_options.h"
     44 #include "pub_tool_oset.h"
     45 #include "pub_tool_rangemap.h"
     46 #include "pub_tool_replacemalloc.h"
     47 #include "pub_tool_tooliface.h"
     48 #include "pub_tool_threadstate.h"
     49 
     50 #include "mc_include.h"
     51 #include "memcheck.h"   /* for client requests */
     52 
     53 
     54 /* Set to 1 to do a little more sanity checking */
     55 #define VG_DEBUG_MEMORY 0
     56 
     57 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
     58 
     59 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
     60 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
     61 
     62 
     63 /*------------------------------------------------------------*/
     64 /*--- Fast-case knobs                                      ---*/
     65 /*------------------------------------------------------------*/
     66 
     67 // Comment these out to disable the fast cases (don't just set them to zero).
     68 
     69 #define PERF_FAST_LOADV    1
     70 #define PERF_FAST_STOREV   1
     71 
     72 #define PERF_FAST_SARP     1
     73 
     74 #define PERF_FAST_STACK    1
     75 #define PERF_FAST_STACK2   1
     76 
     77 /* Change this to 1 to enable assertions on origin tracking cache fast
     78    paths */
     79 #define OC_ENABLE_ASSERTIONS 0
     80 
     81 
     82 /*------------------------------------------------------------*/
     83 /*--- Comments on the origin tracking implementation       ---*/
     84 /*------------------------------------------------------------*/
     85 
     86 /* See detailed comment entitled
     87    AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
     88    which is contained further on in this file. */
     89 
     90 
     91 /*------------------------------------------------------------*/
     92 /*--- V bits and A bits                                    ---*/
     93 /*------------------------------------------------------------*/
     94 
     95 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
     96    thinks the corresponding value bit is defined.  And every memory byte
     97    has an A bit, which tracks whether Memcheck thinks the program can access
     98    it safely (ie. it's mapped, and has at least one of the RWX permission bits
     99    set).  So every N-bit register is shadowed with N V bits, and every memory
    100    byte is shadowed with 8 V bits and one A bit.
    101 
    102    In the implementation, we use two forms of compression (compressed V bits
    103    and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
    104    for memory.
    105 
    106    Memcheck also tracks extra information about each heap block that is
    107    allocated, for detecting memory leaks and other purposes.
    108 */
    109 
    110 /*------------------------------------------------------------*/
    111 /*--- Basic A/V bitmap representation.                     ---*/
    112 /*------------------------------------------------------------*/
    113 
    114 /* All reads and writes are checked against a memory map (a.k.a. shadow
    115    memory), which records the state of all memory in the process.
    116 
    117    On 32-bit machines the memory map is organised as follows.
    118    The top 16 bits of an address are used to index into a top-level
    119    map table, containing 65536 entries.  Each entry is a pointer to a
    120    second-level map, which records the accesibililty and validity
    121    permissions for the 65536 bytes indexed by the lower 16 bits of the
    122    address.  Each byte is represented by two bits (details are below).  So
    123    each second-level map contains 16384 bytes.  This two-level arrangement
    124    conveniently divides the 4G address space into 64k lumps, each size 64k
    125    bytes.
    126 
    127    All entries in the primary (top-level) map must point to a valid
    128    secondary (second-level) map.  Since many of the 64kB chunks will
    129    have the same status for every bit -- ie. noaccess (for unused
    130    address space) or entirely addressable and defined (for code segments) --
    131    there are three distinguished secondary maps, which indicate 'noaccess',
    132    'undefined' and 'defined'.  For these uniform 64kB chunks, the primary
    133    map entry points to the relevant distinguished map.  In practice,
    134    typically more than half of the addressable memory is represented with
    135    the 'undefined' or 'defined' distinguished secondary map, so it gives a
    136    good saving.  It also lets us set the V+A bits of large address regions
    137    quickly in set_address_range_perms().
    138 
    139    On 64-bit machines it's more complicated.  If we followed the same basic
    140    scheme we'd have a four-level table which would require too many memory
    141    accesses.  So instead the top-level map table has 2^20 entries (indexed
    142    using bits 16..35 of the address);  this covers the bottom 64GB.  Any
    143    accesses above 64GB are handled with a slow, sparse auxiliary table.
    144    Valgrind's address space manager tries very hard to keep things below
    145    this 64GB barrier so that performance doesn't suffer too much.
    146 
    147    Note that this file has a lot of different functions for reading and
    148    writing shadow memory.  Only a couple are strictly necessary (eg.
    149    get_vabits2 and set_vabits2), most are just specialised for specific
    150    common cases to improve performance.
    151 
    152    Aside: the V+A bits are less precise than they could be -- we have no way
    153    of marking memory as read-only.  It would be great if we could add an
    154    extra state VA_BITSn_READONLY.  But then we'd have 5 different states,
    155    which requires 2.3 bits to hold, and there's no way to do that elegantly
    156    -- we'd have to double up to 4 bits of metadata per byte, which doesn't
    157    seem worth it.
    158 */
    159 
    160 /* --------------- Basic configuration --------------- */
    161 
    162 /* Only change this.  N_PRIMARY_MAP *must* be a power of 2. */
    163 
    164 #if VG_WORDSIZE == 4
    165 
    166 /* cover the entire address space */
    167 #  define N_PRIMARY_BITS  16
    168 
    169 #else
    170 
    171 /* Just handle the first 64G fast and the rest via auxiliary
    172    primaries.  If you change this, Memcheck will assert at startup.
    173    See the definition of UNALIGNED_OR_HIGH for extensive comments. */
    174 #  define N_PRIMARY_BITS  20
    175 
    176 #endif
    177 
    178 
    179 /* Do not change this. */
    180 #define N_PRIMARY_MAP  ( ((UWord)1) << N_PRIMARY_BITS)
    181 
    182 /* Do not change this. */
    183 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
    184 
    185 
    186 /* --------------- Secondary maps --------------- */
    187 
    188 // Each byte of memory conceptually has an A bit, which indicates its
    189 // addressability, and 8 V bits, which indicates its definedness.
    190 //
    191 // But because very few bytes are partially defined, we can use a nice
    192 // compression scheme to reduce the size of shadow memory.  Each byte of
    193 // memory has 2 bits which indicates its state (ie. V+A bits):
    194 //
    195 //   00:  noaccess    (unaddressable but treated as fully defined)
    196 //   01:  undefined   (addressable and fully undefined)
    197 //   10:  defined     (addressable and fully defined)
    198 //   11:  partdefined (addressable and partially defined)
    199 //
    200 // In the "partdefined" case, we use a secondary table to store the V bits.
    201 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
    202 // bits.
    203 //
    204 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
    205 // four bytes (32 bits) of memory are in each chunk.  Hence the name
    206 // "vabits8".  This lets us get the V+A bits for four bytes at a time
    207 // easily (without having to do any shifting and/or masking), and that is a
    208 // very common operation.  (Note that although each vabits8 chunk
    209 // is 8 bits in size, it represents 32 bits of memory.)
    210 //
    211 // The representation is "inverse" little-endian... each 4 bytes of
    212 // memory is represented by a 1 byte value, where:
    213 //
    214 // - the status of byte (a+0) is held in bits [1..0]
    215 // - the status of byte (a+1) is held in bits [3..2]
    216 // - the status of byte (a+2) is held in bits [5..4]
    217 // - the status of byte (a+3) is held in bits [7..6]
    218 //
    219 // It's "inverse" because endianness normally describes a mapping from
    220 // value bits to memory addresses;  in this case the mapping is inverted.
    221 // Ie. instead of particular value bits being held in certain addresses, in
    222 // this case certain addresses are represented by particular value bits.
    223 // See insert_vabits2_into_vabits8() for an example.
    224 //
    225 // But note that we don't compress the V bits stored in registers;  they
    226 // need to be explicit to made the shadow operations possible.  Therefore
    227 // when moving values between registers and memory we need to convert
    228 // between the expanded in-register format and the compressed in-memory
    229 // format.  This isn't so difficult, it just requires careful attention in a
    230 // few places.
    231 
    232 // These represent eight bits of memory.
    233 #define VA_BITS2_NOACCESS     0x0      // 00b
    234 #define VA_BITS2_UNDEFINED    0x1      // 01b
    235 #define VA_BITS2_DEFINED      0x2      // 10b
    236 #define VA_BITS2_PARTDEFINED  0x3      // 11b
    237 
    238 // These represent 16 bits of memory.
    239 #define VA_BITS4_NOACCESS     0x0      // 00_00b
    240 #define VA_BITS4_UNDEFINED    0x5      // 01_01b
    241 #define VA_BITS4_DEFINED      0xa      // 10_10b
    242 
    243 // These represent 32 bits of memory.
    244 #define VA_BITS8_NOACCESS     0x00     // 00_00_00_00b
    245 #define VA_BITS8_UNDEFINED    0x55     // 01_01_01_01b
    246 #define VA_BITS8_DEFINED      0xaa     // 10_10_10_10b
    247 
    248 // These represent 64 bits of memory.
    249 #define VA_BITS16_NOACCESS    0x0000   // 00_00_00_00b x 2
    250 #define VA_BITS16_UNDEFINED   0x5555   // 01_01_01_01b x 2
    251 #define VA_BITS16_DEFINED     0xaaaa   // 10_10_10_10b x 2
    252 
    253 
    254 #define SM_CHUNKS             16384
    255 #define SM_OFF(aaa)           (((aaa) & 0xffff) >> 2)
    256 #define SM_OFF_16(aaa)        (((aaa) & 0xffff) >> 3)
    257 
    258 // Paranoia:  it's critical for performance that the requested inlining
    259 // occurs.  So try extra hard.
    260 #define INLINE    inline __attribute__((always_inline))
    261 
    262 static INLINE Addr start_of_this_sm ( Addr a ) {
    263    return (a & (~SM_MASK));
    264 }
    265 static INLINE Bool is_start_of_sm ( Addr a ) {
    266    return (start_of_this_sm(a) == a);
    267 }
    268 
    269 typedef
    270    struct {
    271       UChar vabits8[SM_CHUNKS];
    272    }
    273    SecMap;
    274 
    275 // 3 distinguished secondary maps, one for no-access, one for
    276 // accessible but undefined, and one for accessible and defined.
    277 // Distinguished secondaries may never be modified.
    278 #define SM_DIST_NOACCESS   0
    279 #define SM_DIST_UNDEFINED  1
    280 #define SM_DIST_DEFINED    2
    281 
    282 static SecMap sm_distinguished[3];
    283 
    284 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
    285    return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
    286 }
    287 
    288 // Forward declaration
    289 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
    290 
    291 /* dist_sm points to one of our three distinguished secondaries.  Make
    292    a copy of it so that we can write to it.
    293 */
    294 static SecMap* copy_for_writing ( SecMap* dist_sm )
    295 {
    296    SecMap* new_sm;
    297    tl_assert(dist_sm == &sm_distinguished[0]
    298           || dist_sm == &sm_distinguished[1]
    299           || dist_sm == &sm_distinguished[2]);
    300 
    301    new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
    302    if (new_sm == NULL)
    303       VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
    304                                    sizeof(SecMap) );
    305    VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
    306    update_SM_counts(dist_sm, new_sm);
    307    return new_sm;
    308 }
    309 
    310 /* --------------- Stats --------------- */
    311 
    312 static Int   n_issued_SMs      = 0;
    313 static Int   n_deissued_SMs    = 0;
    314 static Int   n_noaccess_SMs    = N_PRIMARY_MAP; // start with many noaccess DSMs
    315 static Int   n_undefined_SMs   = 0;
    316 static Int   n_defined_SMs     = 0;
    317 static Int   n_non_DSM_SMs     = 0;
    318 static Int   max_noaccess_SMs  = 0;
    319 static Int   max_undefined_SMs = 0;
    320 static Int   max_defined_SMs   = 0;
    321 static Int   max_non_DSM_SMs   = 0;
    322 
    323 /* # searches initiated in auxmap_L1, and # base cmps required */
    324 static ULong n_auxmap_L1_searches  = 0;
    325 static ULong n_auxmap_L1_cmps      = 0;
    326 /* # of searches that missed in auxmap_L1 and therefore had to
    327    be handed to auxmap_L2. And the number of nodes inserted. */
    328 static ULong n_auxmap_L2_searches  = 0;
    329 static ULong n_auxmap_L2_nodes     = 0;
    330 
    331 static Int   n_sanity_cheap     = 0;
    332 static Int   n_sanity_expensive = 0;
    333 
    334 static Int   n_secVBit_nodes   = 0;
    335 static Int   max_secVBit_nodes = 0;
    336 
    337 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
    338 {
    339    if      (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
    340    else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
    341    else if (oldSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  --;
    342    else                                                  { n_non_DSM_SMs  --;
    343                                                            n_deissued_SMs ++; }
    344 
    345    if      (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
    346    else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
    347    else if (newSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  ++;
    348    else                                                  { n_non_DSM_SMs  ++;
    349                                                            n_issued_SMs   ++; }
    350 
    351    if (n_noaccess_SMs  > max_noaccess_SMs ) max_noaccess_SMs  = n_noaccess_SMs;
    352    if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
    353    if (n_defined_SMs   > max_defined_SMs  ) max_defined_SMs   = n_defined_SMs;
    354    if (n_non_DSM_SMs   > max_non_DSM_SMs  ) max_non_DSM_SMs   = n_non_DSM_SMs;
    355 }
    356 
    357 /* --------------- Primary maps --------------- */
    358 
    359 /* The main primary map.  This covers some initial part of the address
    360    space, addresses 0 .. (N_PRIMARY_MAP << 16)-1.  The rest of it is
    361    handled using the auxiliary primary map.
    362 */
    363 static SecMap* primary_map[N_PRIMARY_MAP];
    364 
    365 
    366 /* An entry in the auxiliary primary map.  base must be a 64k-aligned
    367    value, and sm points at the relevant secondary map.  As with the
    368    main primary map, the secondary may be either a real secondary, or
    369    one of the three distinguished secondaries.  DO NOT CHANGE THIS
    370    LAYOUT: the first word has to be the key for OSet fast lookups.
    371 */
    372 typedef
    373    struct {
    374       Addr    base;
    375       SecMap* sm;
    376    }
    377    AuxMapEnt;
    378 
    379 /* Tunable parameter: How big is the L1 queue? */
    380 #define N_AUXMAP_L1 24
    381 
    382 /* Tunable parameter: How far along the L1 queue to insert
    383    entries resulting from L2 lookups? */
    384 #define AUXMAP_L1_INSERT_IX 12
    385 
    386 static struct {
    387           Addr       base;
    388           AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
    389        }
    390        auxmap_L1[N_AUXMAP_L1];
    391 
    392 static OSet* auxmap_L2 = NULL;
    393 
    394 static void init_auxmap_L1_L2 ( void )
    395 {
    396    Int i;
    397    for (i = 0; i < N_AUXMAP_L1; i++) {
    398       auxmap_L1[i].base = 0;
    399       auxmap_L1[i].ent  = NULL;
    400    }
    401 
    402    tl_assert(0 == offsetof(AuxMapEnt,base));
    403    tl_assert(sizeof(Addr) == sizeof(void*));
    404    auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/  offsetof(AuxMapEnt,base),
    405                                     /*fastCmp*/ NULL,
    406                                     VG_(malloc), "mc.iaLL.1", VG_(free) );
    407 }
    408 
    409 /* Check representation invariants; if OK return NULL; else a
    410    descriptive bit of text.  Also return the number of
    411    non-distinguished secondary maps referred to from the auxiliary
    412    primary maps. */
    413 
    414 static const HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
    415 {
    416    Word i, j;
    417    /* On a 32-bit platform, the L2 and L1 tables should
    418       both remain empty forever.
    419 
    420       On a 64-bit platform:
    421       In the L2 table:
    422        all .base & 0xFFFF == 0
    423        all .base > MAX_PRIMARY_ADDRESS
    424       In the L1 table:
    425        all .base & 0xFFFF == 0
    426        all (.base > MAX_PRIMARY_ADDRESS
    427             .base & 0xFFFF == 0
    428             and .ent points to an AuxMapEnt with the same .base)
    429            or
    430            (.base == 0 and .ent == NULL)
    431    */
    432    *n_secmaps_found = 0;
    433    if (sizeof(void*) == 4) {
    434       /* 32-bit platform */
    435       if (VG_(OSetGen_Size)(auxmap_L2) != 0)
    436          return "32-bit: auxmap_L2 is non-empty";
    437       for (i = 0; i < N_AUXMAP_L1; i++)
    438         if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
    439       return "32-bit: auxmap_L1 is non-empty";
    440    } else {
    441       /* 64-bit platform */
    442       UWord elems_seen = 0;
    443       AuxMapEnt *elem, *res;
    444       AuxMapEnt key;
    445       /* L2 table */
    446       VG_(OSetGen_ResetIter)(auxmap_L2);
    447       while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
    448          elems_seen++;
    449          if (0 != (elem->base & (Addr)0xFFFF))
    450             return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
    451          if (elem->base <= MAX_PRIMARY_ADDRESS)
    452             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
    453          if (elem->sm == NULL)
    454             return "64-bit: .sm in _L2 is NULL";
    455          if (!is_distinguished_sm(elem->sm))
    456             (*n_secmaps_found)++;
    457       }
    458       if (elems_seen != n_auxmap_L2_nodes)
    459          return "64-bit: disagreement on number of elems in _L2";
    460       /* Check L1-L2 correspondence */
    461       for (i = 0; i < N_AUXMAP_L1; i++) {
    462          if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
    463             continue;
    464          if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
    465             return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
    466          if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
    467             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
    468          if (auxmap_L1[i].ent == NULL)
    469             return "64-bit: .ent is NULL in auxmap_L1";
    470          if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
    471             return "64-bit: _L1 and _L2 bases are inconsistent";
    472          /* Look it up in auxmap_L2. */
    473          key.base = auxmap_L1[i].base;
    474          key.sm   = 0;
    475          res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
    476          if (res == NULL)
    477             return "64-bit: _L1 .base not found in _L2";
    478          if (res != auxmap_L1[i].ent)
    479             return "64-bit: _L1 .ent disagrees with _L2 entry";
    480       }
    481       /* Check L1 contains no duplicates */
    482       for (i = 0; i < N_AUXMAP_L1; i++) {
    483          if (auxmap_L1[i].base == 0)
    484             continue;
    485 	 for (j = i+1; j < N_AUXMAP_L1; j++) {
    486             if (auxmap_L1[j].base == 0)
    487                continue;
    488             if (auxmap_L1[j].base == auxmap_L1[i].base)
    489                return "64-bit: duplicate _L1 .base entries";
    490          }
    491       }
    492    }
    493    return NULL; /* ok */
    494 }
    495 
    496 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
    497 {
    498    Word i;
    499    tl_assert(ent);
    500    tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
    501    for (i = N_AUXMAP_L1-1; i > rank; i--)
    502       auxmap_L1[i] = auxmap_L1[i-1];
    503    auxmap_L1[rank].base = ent->base;
    504    auxmap_L1[rank].ent  = ent;
    505 }
    506 
    507 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
    508 {
    509    AuxMapEnt  key;
    510    AuxMapEnt* res;
    511    Word       i;
    512 
    513    tl_assert(a > MAX_PRIMARY_ADDRESS);
    514    a &= ~(Addr)0xFFFF;
    515 
    516    /* First search the front-cache, which is a self-organising
    517       list containing the most popular entries. */
    518 
    519    if (LIKELY(auxmap_L1[0].base == a))
    520       return auxmap_L1[0].ent;
    521    if (LIKELY(auxmap_L1[1].base == a)) {
    522       Addr       t_base = auxmap_L1[0].base;
    523       AuxMapEnt* t_ent  = auxmap_L1[0].ent;
    524       auxmap_L1[0].base = auxmap_L1[1].base;
    525       auxmap_L1[0].ent  = auxmap_L1[1].ent;
    526       auxmap_L1[1].base = t_base;
    527       auxmap_L1[1].ent  = t_ent;
    528       return auxmap_L1[0].ent;
    529    }
    530 
    531    n_auxmap_L1_searches++;
    532 
    533    for (i = 0; i < N_AUXMAP_L1; i++) {
    534       if (auxmap_L1[i].base == a) {
    535          break;
    536       }
    537    }
    538    tl_assert(i >= 0 && i <= N_AUXMAP_L1);
    539 
    540    n_auxmap_L1_cmps += (ULong)(i+1);
    541 
    542    if (i < N_AUXMAP_L1) {
    543       if (i > 0) {
    544          Addr       t_base = auxmap_L1[i-1].base;
    545          AuxMapEnt* t_ent  = auxmap_L1[i-1].ent;
    546          auxmap_L1[i-1].base = auxmap_L1[i-0].base;
    547          auxmap_L1[i-1].ent  = auxmap_L1[i-0].ent;
    548          auxmap_L1[i-0].base = t_base;
    549          auxmap_L1[i-0].ent  = t_ent;
    550          i--;
    551       }
    552       return auxmap_L1[i].ent;
    553    }
    554 
    555    n_auxmap_L2_searches++;
    556 
    557    /* First see if we already have it. */
    558    key.base = a;
    559    key.sm   = 0;
    560 
    561    res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
    562    if (res)
    563       insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
    564    return res;
    565 }
    566 
    567 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
    568 {
    569    AuxMapEnt *nyu, *res;
    570 
    571    /* First see if we already have it. */
    572    res = maybe_find_in_auxmap( a );
    573    if (LIKELY(res))
    574       return res;
    575 
    576    /* Ok, there's no entry in the secondary map, so we'll have
    577       to allocate one. */
    578    a &= ~(Addr)0xFFFF;
    579 
    580    nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
    581    nyu->base = a;
    582    nyu->sm   = &sm_distinguished[SM_DIST_NOACCESS];
    583    VG_(OSetGen_Insert)( auxmap_L2, nyu );
    584    insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
    585    n_auxmap_L2_nodes++;
    586    return nyu;
    587 }
    588 
    589 /* --------------- SecMap fundamentals --------------- */
    590 
    591 // In all these, 'low' means it's definitely in the main primary map,
    592 // 'high' means it's definitely in the auxiliary table.
    593 
    594 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
    595 {
    596    UWord pm_off = a >> 16;
    597 #  if VG_DEBUG_MEMORY >= 1
    598    tl_assert(pm_off < N_PRIMARY_MAP);
    599 #  endif
    600    return &primary_map[ pm_off ];
    601 }
    602 
    603 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
    604 {
    605    AuxMapEnt* am = find_or_alloc_in_auxmap(a);
    606    return &am->sm;
    607 }
    608 
    609 static INLINE SecMap** get_secmap_ptr ( Addr a )
    610 {
    611    return ( a <= MAX_PRIMARY_ADDRESS
    612           ? get_secmap_low_ptr(a)
    613           : get_secmap_high_ptr(a));
    614 }
    615 
    616 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
    617 {
    618    return *get_secmap_low_ptr(a);
    619 }
    620 
    621 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
    622 {
    623    return *get_secmap_high_ptr(a);
    624 }
    625 
    626 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
    627 {
    628    SecMap** p = get_secmap_low_ptr(a);
    629    if (UNLIKELY(is_distinguished_sm(*p)))
    630       *p = copy_for_writing(*p);
    631    return *p;
    632 }
    633 
    634 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
    635 {
    636    SecMap** p = get_secmap_high_ptr(a);
    637    if (UNLIKELY(is_distinguished_sm(*p)))
    638       *p = copy_for_writing(*p);
    639    return *p;
    640 }
    641 
    642 /* Produce the secmap for 'a', either from the primary map or by
    643    ensuring there is an entry for it in the aux primary map.  The
    644    secmap may be a distinguished one as the caller will only want to
    645    be able to read it.
    646 */
    647 static INLINE SecMap* get_secmap_for_reading ( Addr a )
    648 {
    649    return ( a <= MAX_PRIMARY_ADDRESS
    650           ? get_secmap_for_reading_low (a)
    651           : get_secmap_for_reading_high(a) );
    652 }
    653 
    654 /* Produce the secmap for 'a', either from the primary map or by
    655    ensuring there is an entry for it in the aux primary map.  The
    656    secmap may not be a distinguished one, since the caller will want
    657    to be able to write it.  If it is a distinguished secondary, make a
    658    writable copy of it, install it, and return the copy instead.  (COW
    659    semantics).
    660 */
    661 static INLINE SecMap* get_secmap_for_writing ( Addr a )
    662 {
    663    return ( a <= MAX_PRIMARY_ADDRESS
    664           ? get_secmap_for_writing_low (a)
    665           : get_secmap_for_writing_high(a) );
    666 }
    667 
    668 /* If 'a' has a SecMap, produce it.  Else produce NULL.  But don't
    669    allocate one if one doesn't already exist.  This is used by the
    670    leak checker.
    671 */
    672 static SecMap* maybe_get_secmap_for ( Addr a )
    673 {
    674    if (a <= MAX_PRIMARY_ADDRESS) {
    675       return get_secmap_for_reading_low(a);
    676    } else {
    677       AuxMapEnt* am = maybe_find_in_auxmap(a);
    678       return am ? am->sm : NULL;
    679    }
    680 }
    681 
    682 /* --------------- Fundamental functions --------------- */
    683 
    684 static INLINE
    685 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
    686 {
    687    UInt shift =  (a & 3)  << 1;        // shift by 0, 2, 4, or 6
    688    *vabits8  &= ~(0x3     << shift);   // mask out the two old bits
    689    *vabits8  |=  (vabits2 << shift);   // mask  in the two new bits
    690 }
    691 
    692 static INLINE
    693 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
    694 {
    695    UInt shift;
    696    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
    697    shift     =  (a & 2)   << 1;        // shift by 0 or 4
    698    *vabits8 &= ~(0xf      << shift);   // mask out the four old bits
    699    *vabits8 |=  (vabits4 << shift);    // mask  in the four new bits
    700 }
    701 
    702 static INLINE
    703 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
    704 {
    705    UInt shift = (a & 3) << 1;          // shift by 0, 2, 4, or 6
    706    vabits8 >>= shift;                  // shift the two bits to the bottom
    707    return 0x3 & vabits8;               // mask out the rest
    708 }
    709 
    710 static INLINE
    711 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
    712 {
    713    UInt shift;
    714    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
    715    shift = (a & 2) << 1;               // shift by 0 or 4
    716    vabits8 >>= shift;                  // shift the four bits to the bottom
    717    return 0xf & vabits8;               // mask out the rest
    718 }
    719 
    720 // Note that these four are only used in slow cases.  The fast cases do
    721 // clever things like combine the auxmap check (in
    722 // get_secmap_{read,writ}able) with alignment checks.
    723 
    724 // *** WARNING! ***
    725 // Any time this function is called, if it is possible that vabits2
    726 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
    727 // sec-V-bits table must also be set!
    728 static INLINE
    729 void set_vabits2 ( Addr a, UChar vabits2 )
    730 {
    731    SecMap* sm       = get_secmap_for_writing(a);
    732    UWord   sm_off   = SM_OFF(a);
    733    insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
    734 }
    735 
    736 static INLINE
    737 UChar get_vabits2 ( Addr a )
    738 {
    739    SecMap* sm       = get_secmap_for_reading(a);
    740    UWord   sm_off   = SM_OFF(a);
    741    UChar   vabits8  = sm->vabits8[sm_off];
    742    return extract_vabits2_from_vabits8(a, vabits8);
    743 }
    744 
    745 // *** WARNING! ***
    746 // Any time this function is called, if it is possible that any of the
    747 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
    748 // corresponding entry(s) in the sec-V-bits table must also be set!
    749 static INLINE
    750 UChar get_vabits8_for_aligned_word32 ( Addr a )
    751 {
    752    SecMap* sm       = get_secmap_for_reading(a);
    753    UWord   sm_off   = SM_OFF(a);
    754    UChar   vabits8  = sm->vabits8[sm_off];
    755    return vabits8;
    756 }
    757 
    758 static INLINE
    759 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
    760 {
    761    SecMap* sm       = get_secmap_for_writing(a);
    762    UWord   sm_off   = SM_OFF(a);
    763    sm->vabits8[sm_off] = vabits8;
    764 }
    765 
    766 
    767 // Forward declarations
    768 static UWord get_sec_vbits8(Addr a);
    769 static void  set_sec_vbits8(Addr a, UWord vbits8);
    770 
    771 // Returns False if there was an addressability error.
    772 static INLINE
    773 Bool set_vbits8 ( Addr a, UChar vbits8 )
    774 {
    775    Bool  ok      = True;
    776    UChar vabits2 = get_vabits2(a);
    777    if ( VA_BITS2_NOACCESS != vabits2 ) {
    778       // Addressable.  Convert in-register format to in-memory format.
    779       // Also remove any existing sec V bit entry for the byte if no
    780       // longer necessary.
    781       if      ( V_BITS8_DEFINED   == vbits8 ) { vabits2 = VA_BITS2_DEFINED;   }
    782       else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
    783       else                                    { vabits2 = VA_BITS2_PARTDEFINED;
    784                                                 set_sec_vbits8(a, vbits8);  }
    785       set_vabits2(a, vabits2);
    786 
    787    } else {
    788       // Unaddressable!  Do nothing -- when writing to unaddressable
    789       // memory it acts as a black hole, and the V bits can never be seen
    790       // again.  So we don't have to write them at all.
    791       ok = False;
    792    }
    793    return ok;
    794 }
    795 
    796 // Returns False if there was an addressability error.  In that case, we put
    797 // all defined bits into vbits8.
    798 static INLINE
    799 Bool get_vbits8 ( Addr a, UChar* vbits8 )
    800 {
    801    Bool  ok      = True;
    802    UChar vabits2 = get_vabits2(a);
    803 
    804    // Convert the in-memory format to in-register format.
    805    if      ( VA_BITS2_DEFINED   == vabits2 ) { *vbits8 = V_BITS8_DEFINED;   }
    806    else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
    807    else if ( VA_BITS2_NOACCESS  == vabits2 ) {
    808       *vbits8 = V_BITS8_DEFINED;    // Make V bits defined!
    809       ok = False;
    810    } else {
    811       tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
    812       *vbits8 = get_sec_vbits8(a);
    813    }
    814    return ok;
    815 }
    816 
    817 
    818 /* --------------- Secondary V bit table ------------ */
    819 
    820 // This table holds the full V bit pattern for partially-defined bytes
    821 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
    822 // memory.
    823 //
    824 // Note: the nodes in this table can become stale.  Eg. if you write a PDB,
    825 // then overwrite the same address with a fully defined byte, the sec-V-bit
    826 // node will not necessarily be removed.  This is because checking for
    827 // whether removal is necessary would slow down the fast paths.
    828 //
    829 // To avoid the stale nodes building up too much, we periodically (once the
    830 // table reaches a certain size) garbage collect (GC) the table by
    831 // traversing it and evicting any nodes not having PDB.
    832 // If more than a certain proportion of nodes survived, we increase the
    833 // table size so that GCs occur less often.
    834 //
    835 // This policy is designed to avoid bad table bloat in the worst case where
    836 // a program creates huge numbers of stale PDBs -- we would get this bloat
    837 // if we had no GC -- while handling well the case where a node becomes
    838 // stale but shortly afterwards is rewritten with a PDB and so becomes
    839 // non-stale again (which happens quite often, eg. in perf/bz2).  If we just
    840 // remove all stale nodes as soon as possible, we just end up re-adding a
    841 // lot of them in later again.  The "sufficiently stale" approach avoids
    842 // this.  (If a program has many live PDBs, performance will just suck,
    843 // there's no way around that.)
    844 //
    845 // Further comments, JRS 14 Feb 2012.  It turns out that the policy of
    846 // holding on to stale entries for 2 GCs before discarding them can lead
    847 // to massive space leaks.  So we're changing to an arrangement where
    848 // lines are evicted as soon as they are observed to be stale during a
    849 // GC.  This also has a side benefit of allowing the sufficiently_stale
    850 // field to be removed from the SecVBitNode struct, reducing its size by
    851 // 8 bytes, which is a substantial space saving considering that the
    852 // struct was previously 32 or so bytes, on a 64 bit target.
    853 //
    854 // In order to try and mitigate the problem that the "sufficiently stale"
    855 // heuristic was designed to avoid, the table size is allowed to drift
    856 // up ("DRIFTUP") slowly to 80000, even if the residency is low.  This
    857 // means that nodes will exist in the table longer on average, and hopefully
    858 // will be deleted and re-added less frequently.
    859 //
    860 // The previous scaling up mechanism (now called STEPUP) is retained:
    861 // if residency exceeds 50%, the table is scaled up, although by a
    862 // factor sqrt(2) rather than 2 as before.  This effectively doubles the
    863 // frequency of GCs when there are many PDBs at reduces the tendency of
    864 // stale PDBs to reside for long periods in the table.
    865 
    866 static OSet* secVBitTable;
    867 
    868 // Stats
    869 static ULong sec_vbits_new_nodes = 0;
    870 static ULong sec_vbits_updates   = 0;
    871 
    872 // This must be a power of two;  this is checked in mc_pre_clo_init().
    873 // The size chosen here is a trade-off:  if the nodes are bigger (ie. cover
    874 // a larger address range) they take more space but we can get multiple
    875 // partially-defined bytes in one if they are close to each other, reducing
    876 // the number of total nodes.  In practice sometimes they are clustered (eg.
    877 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
    878 // row), but often not.  So we choose something intermediate.
    879 #define BYTES_PER_SEC_VBIT_NODE     16
    880 
    881 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
    882 // more than this many nodes survive a GC.
    883 #define STEPUP_SURVIVOR_PROPORTION  0.5
    884 #define STEPUP_GROWTH_FACTOR        1.414213562
    885 
    886 // If the above heuristic doesn't apply, then we may make the table
    887 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
    888 // this many nodes survive a GC, _and_ the total table size does
    889 // not exceed a fixed limit.  The numbers are somewhat arbitrary, but
    890 // work tolerably well on long Firefox runs.  The scaleup ratio of 1.5%
    891 // effectively although gradually reduces residency and increases time
    892 // between GCs for programs with small numbers of PDBs.  The 80000 limit
    893 // effectively limits the table size to around 2MB for programs with
    894 // small numbers of PDBs, whilst giving a reasonably long lifetime to
    895 // entries, to try and reduce the costs resulting from deleting and
    896 // re-adding of entries.
    897 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
    898 #define DRIFTUP_GROWTH_FACTOR       1.015
    899 #define DRIFTUP_MAX_SIZE            80000
    900 
    901 // We GC the table when it gets this many nodes in it, ie. it's effectively
    902 // the table size.  It can change.
    903 static Int  secVBitLimit = 1000;
    904 
    905 // The number of GCs done, used to age sec-V-bit nodes for eviction.
    906 // Because it's unsigned, wrapping doesn't matter -- the right answer will
    907 // come out anyway.
    908 static UInt GCs_done = 0;
    909 
    910 typedef
    911    struct {
    912       Addr  a;
    913       UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
    914    }
    915    SecVBitNode;
    916 
    917 static OSet* createSecVBitTable(void)
    918 {
    919    OSet* newSecVBitTable;
    920    newSecVBitTable = VG_(OSetGen_Create_With_Pool)
    921       ( offsetof(SecVBitNode, a),
    922         NULL, // use fast comparisons
    923         VG_(malloc), "mc.cSVT.1 (sec VBit table)",
    924         VG_(free),
    925         1000,
    926         sizeof(SecVBitNode));
    927    return newSecVBitTable;
    928 }
    929 
    930 static void gcSecVBitTable(void)
    931 {
    932    OSet*        secVBitTable2;
    933    SecVBitNode* n;
    934    Int          i, n_nodes = 0, n_survivors = 0;
    935 
    936    GCs_done++;
    937 
    938    // Create the new table.
    939    secVBitTable2 = createSecVBitTable();
    940 
    941    // Traverse the table, moving fresh nodes into the new table.
    942    VG_(OSetGen_ResetIter)(secVBitTable);
    943    while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
    944       // Keep node if any of its bytes are non-stale.  Using
    945       // get_vabits2() for the lookup is not very efficient, but I don't
    946       // think it matters.
    947       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
    948          if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
    949             // Found a non-stale byte, so keep =>
    950             // Insert a copy of the node into the new table.
    951             SecVBitNode* n2 =
    952                VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
    953             *n2 = *n;
    954             VG_(OSetGen_Insert)(secVBitTable2, n2);
    955             break;
    956          }
    957       }
    958    }
    959 
    960    // Get the before and after sizes.
    961    n_nodes     = VG_(OSetGen_Size)(secVBitTable);
    962    n_survivors = VG_(OSetGen_Size)(secVBitTable2);
    963 
    964    // Destroy the old table, and put the new one in its place.
    965    VG_(OSetGen_Destroy)(secVBitTable);
    966    secVBitTable = secVBitTable2;
    967 
    968    if (VG_(clo_verbosity) > 1 && n_nodes != 0) {
    969       VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
    970                    n_nodes, n_survivors, n_survivors * 100.0 / n_nodes);
    971    }
    972 
    973    // Increase table size if necessary.
    974    if ((Double)n_survivors
    975        > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
    976       secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
    977       if (VG_(clo_verbosity) > 1)
    978          VG_(message)(Vg_DebugMsg,
    979                       "memcheck GC: %d new table size (stepup)\n",
    980                       secVBitLimit);
    981    }
    982    else
    983    if (secVBitLimit < DRIFTUP_MAX_SIZE
    984        && (Double)n_survivors
    985           > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
    986       secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
    987       if (VG_(clo_verbosity) > 1)
    988          VG_(message)(Vg_DebugMsg,
    989                       "memcheck GC: %d new table size (driftup)\n",
    990                       secVBitLimit);
    991    }
    992 }
    993 
    994 static UWord get_sec_vbits8(Addr a)
    995 {
    996    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
    997    Int          amod     = a % BYTES_PER_SEC_VBIT_NODE;
    998    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
    999    UChar        vbits8;
   1000    tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
   1001    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
   1002    // make it to the secondary V bits table.
   1003    vbits8 = n->vbits8[amod];
   1004    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
   1005    return vbits8;
   1006 }
   1007 
   1008 static void set_sec_vbits8(Addr a, UWord vbits8)
   1009 {
   1010    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
   1011    Int          i, amod  = a % BYTES_PER_SEC_VBIT_NODE;
   1012    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
   1013    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
   1014    // make it to the secondary V bits table.
   1015    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
   1016    if (n) {
   1017       n->vbits8[amod] = vbits8;     // update
   1018       sec_vbits_updates++;
   1019    } else {
   1020       // Do a table GC if necessary.  Nb: do this before creating and
   1021       // inserting the new node, to avoid erroneously GC'ing the new node.
   1022       if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
   1023          gcSecVBitTable();
   1024       }
   1025 
   1026       // New node:  assign the specific byte, make the rest invalid (they
   1027       // should never be read as-is, but be cautious).
   1028       n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
   1029       n->a            = aAligned;
   1030       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
   1031          n->vbits8[i] = V_BITS8_UNDEFINED;
   1032       }
   1033       n->vbits8[amod] = vbits8;
   1034 
   1035       // Insert the new node.
   1036       VG_(OSetGen_Insert)(secVBitTable, n);
   1037       sec_vbits_new_nodes++;
   1038 
   1039       n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
   1040       if (n_secVBit_nodes > max_secVBit_nodes)
   1041          max_secVBit_nodes = n_secVBit_nodes;
   1042    }
   1043 }
   1044 
   1045 /* --------------- Endianness helpers --------------- */
   1046 
   1047 /* Returns the offset in memory of the byteno-th most significant byte
   1048    in a wordszB-sized word, given the specified endianness. */
   1049 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
   1050                                     UWord byteno ) {
   1051    return bigendian ? (wordszB-1-byteno) : byteno;
   1052 }
   1053 
   1054 
   1055 /* --------------- Ignored address ranges --------------- */
   1056 
   1057 /* Denotes the address-error-reportability status for address ranges:
   1058    IAR_NotIgnored:  the usual case -- report errors in this range
   1059    IAR_CommandLine: don't report errors -- from command line setting
   1060    IAR_ClientReq:   don't report errors -- from client request
   1061 */
   1062 typedef
   1063    enum { IAR_INVALID=99,
   1064           IAR_NotIgnored,
   1065           IAR_CommandLine,
   1066           IAR_ClientReq }
   1067    IARKind;
   1068 
   1069 static const HChar* showIARKind ( IARKind iark )
   1070 {
   1071    switch (iark) {
   1072       case IAR_INVALID:     return "INVALID";
   1073       case IAR_NotIgnored:  return "NotIgnored";
   1074       case IAR_CommandLine: return "CommandLine";
   1075       case IAR_ClientReq:   return "ClientReq";
   1076       default:              return "???";
   1077    }
   1078 }
   1079 
   1080 // RangeMap<IARKind>
   1081 static RangeMap* gIgnoredAddressRanges = NULL;
   1082 
   1083 static void init_gIgnoredAddressRanges ( void )
   1084 {
   1085    if (LIKELY(gIgnoredAddressRanges != NULL))
   1086       return;
   1087    gIgnoredAddressRanges = VG_(newRangeMap)( VG_(malloc), "mc.igIAR.1",
   1088                                              VG_(free), IAR_NotIgnored );
   1089 }
   1090 
   1091 Bool MC_(in_ignored_range) ( Addr a )
   1092 {
   1093    if (LIKELY(gIgnoredAddressRanges == NULL))
   1094       return False;
   1095    UWord how     = IAR_INVALID;
   1096    UWord key_min = ~(UWord)0;
   1097    UWord key_max =  (UWord)0;
   1098    VG_(lookupRangeMap)(&key_min, &key_max, &how, gIgnoredAddressRanges, a);
   1099    tl_assert(key_min <= a && a <= key_max);
   1100    switch (how) {
   1101       case IAR_NotIgnored:  return False;
   1102       case IAR_CommandLine: return True;
   1103       case IAR_ClientReq:   return True;
   1104       default: break; /* invalid */
   1105    }
   1106    VG_(tool_panic)("MC_(in_ignore_range)");
   1107    /*NOTREACHED*/
   1108 }
   1109 
   1110 /* Parse two Addr separated by a dash, or fail. */
   1111 
   1112 static Bool parse_range ( const HChar** ppc, Addr* result1, Addr* result2 )
   1113 {
   1114    Bool ok = VG_(parse_Addr) (ppc, result1);
   1115    if (!ok)
   1116       return False;
   1117    if (**ppc != '-')
   1118       return False;
   1119    (*ppc)++;
   1120    ok = VG_(parse_Addr) (ppc, result2);
   1121    if (!ok)
   1122       return False;
   1123    return True;
   1124 }
   1125 
   1126 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
   1127    fail.  If they are valid, add them to the global set of ignored
   1128    ranges. */
   1129 static Bool parse_ignore_ranges ( const HChar* str0 )
   1130 {
   1131    init_gIgnoredAddressRanges();
   1132    const HChar*  str = str0;
   1133    const HChar** ppc = &str;
   1134    while (1) {
   1135       Addr start = ~(Addr)0;
   1136       Addr end   = (Addr)0;
   1137       Bool ok    = parse_range(ppc, &start, &end);
   1138       if (!ok)
   1139          return False;
   1140       if (start > end)
   1141          return False;
   1142       VG_(bindRangeMap)( gIgnoredAddressRanges, start, end, IAR_CommandLine );
   1143       if (**ppc == 0)
   1144          return True;
   1145       if (**ppc != ',')
   1146          return False;
   1147       (*ppc)++;
   1148    }
   1149    /*NOTREACHED*/
   1150    return False;
   1151 }
   1152 
   1153 /* Add or remove [start, +len) from the set of ignored ranges. */
   1154 static Bool modify_ignore_ranges ( Bool addRange, Addr start, Addr len )
   1155 {
   1156    init_gIgnoredAddressRanges();
   1157    const Bool verbose = (VG_(clo_verbosity) > 1);
   1158    if (len == 0) {
   1159       return False;
   1160    }
   1161    if (addRange) {
   1162       VG_(bindRangeMap)(gIgnoredAddressRanges,
   1163                         start, start+len-1, IAR_ClientReq);
   1164       if (verbose)
   1165          VG_(dmsg)("memcheck: modify_ignore_ranges: add %p %p\n",
   1166                    (void*)start, (void*)(start+len-1));
   1167    } else {
   1168       VG_(bindRangeMap)(gIgnoredAddressRanges,
   1169                         start, start+len-1, IAR_NotIgnored);
   1170       if (verbose)
   1171          VG_(dmsg)("memcheck: modify_ignore_ranges: del %p %p\n",
   1172                    (void*)start, (void*)(start+len-1));
   1173    }
   1174    if (verbose) {
   1175       VG_(dmsg)("memcheck:   now have %ld ranges:\n",
   1176                 VG_(sizeRangeMap)(gIgnoredAddressRanges));
   1177       Word i;
   1178       for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
   1179          UWord val     = IAR_INVALID;
   1180          UWord key_min = ~(UWord)0;
   1181          UWord key_max = (UWord)0;
   1182          VG_(indexRangeMap)( &key_min, &key_max, &val,
   1183                              gIgnoredAddressRanges, i );
   1184          VG_(dmsg)("memcheck:      [%ld]  %016llx-%016llx  %s\n",
   1185                    i, (ULong)key_min, (ULong)key_max, showIARKind(val));
   1186       }
   1187    }
   1188    return True;
   1189 }
   1190 
   1191 
   1192 /* --------------- Load/store slow cases. --------------- */
   1193 
   1194 static
   1195 __attribute__((noinline))
   1196 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
   1197                                 Addr a, SizeT nBits, Bool bigendian )
   1198 {
   1199    ULong  pessim[4];     /* only used when p-l-ok=yes */
   1200    SSizeT szB            = nBits / 8;
   1201    SSizeT szL            = szB / 8;  /* Size in Longs (64-bit units) */
   1202    SSizeT i, j;          /* Must be signed. */
   1203    SizeT  n_addrs_bad = 0;
   1204    Addr   ai;
   1205    UChar  vbits8;
   1206    Bool   ok;
   1207 
   1208    /* Code below assumes load size is a power of two and at least 64
   1209       bits. */
   1210    tl_assert((szB & (szB-1)) == 0 && szL > 0);
   1211 
   1212    /* If this triggers, you probably just need to increase the size of
   1213       the pessim array. */
   1214    tl_assert(szL <= sizeof(pessim) / sizeof(pessim[0]));
   1215 
   1216    for (j = 0; j < szL; j++) {
   1217       pessim[j] = V_BITS64_DEFINED;
   1218       res[j] = V_BITS64_UNDEFINED;
   1219    }
   1220 
   1221    /* Make up a result V word, which contains the loaded data for
   1222       valid addresses and Defined for invalid addresses.  Iterate over
   1223       the bytes in the word, from the most significant down to the
   1224       least.  The vbits to return are calculated into vbits128.  Also
   1225       compute the pessimising value to be used when
   1226       --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
   1227       info can be gleaned from the pessim array) but is used as a
   1228       cross-check. */
   1229    for (j = szL-1; j >= 0; j--) {
   1230       ULong vbits64    = V_BITS64_UNDEFINED;
   1231       ULong pessim64   = V_BITS64_DEFINED;
   1232       UWord long_index = byte_offset_w(szL, bigendian, j);
   1233       for (i = 8-1; i >= 0; i--) {
   1234          PROF_EVENT(29, "mc_LOADV_128_or_256_slow(loop)");
   1235          ai = a + 8*long_index + byte_offset_w(8, bigendian, i);
   1236          ok = get_vbits8(ai, &vbits8);
   1237          vbits64 <<= 8;
   1238          vbits64 |= vbits8;
   1239          if (!ok) n_addrs_bad++;
   1240          pessim64 <<= 8;
   1241          pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
   1242       }
   1243       res[long_index] = vbits64;
   1244       pessim[long_index] = pessim64;
   1245    }
   1246 
   1247    /* In the common case, all the addresses involved are valid, so we
   1248       just return the computed V bits and have done. */
   1249    if (LIKELY(n_addrs_bad == 0))
   1250       return;
   1251 
   1252    /* If there's no possibility of getting a partial-loads-ok
   1253       exemption, report the error and quit. */
   1254    if (!MC_(clo_partial_loads_ok)) {
   1255       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1256       return;
   1257    }
   1258 
   1259    /* The partial-loads-ok excemption might apply.  Find out if it
   1260       does.  If so, don't report an addressing error, but do return
   1261       Undefined for the bytes that are out of range, so as to avoid
   1262       false negatives.  If it doesn't apply, just report an addressing
   1263       error in the usual way. */
   1264 
   1265    /* Some code steps along byte strings in aligned chunks
   1266       even when there is only a partially defined word at the end (eg,
   1267       optimised strlen).  This is allowed by the memory model of
   1268       modern machines, since an aligned load cannot span two pages and
   1269       thus cannot "partially fault".
   1270 
   1271       Therefore, a load from a partially-addressible place is allowed
   1272       if all of the following hold:
   1273       - the command-line flag is set [by default, it isn't]
   1274       - it's an aligned load
   1275       - at least one of the addresses in the word *is* valid
   1276 
   1277       Since this suppresses the addressing error, we avoid false
   1278       negatives by marking bytes undefined when they come from an
   1279       invalid address.
   1280    */
   1281 
   1282    /* "at least one of the addresses is invalid" */
   1283    ok = False;
   1284    for (j = 0; j < szL; j++)
   1285       ok |= pessim[j] != V_BITS64_DEFINED;
   1286    tl_assert(ok);
   1287 
   1288    if (0 == (a & (szB - 1)) && n_addrs_bad < szB) {
   1289       /* Exemption applies.  Use the previously computed pessimising
   1290          value and return the combined result, but don't flag an
   1291          addressing error.  The pessimising value is Defined for valid
   1292          addresses and Undefined for invalid addresses. */
   1293       /* for assumption that doing bitwise or implements UifU */
   1294       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
   1295       /* (really need "UifU" here...)
   1296          vbits[j] UifU= pessim[j]  (is pessimised by it, iow) */
   1297       for (j = szL-1; j >= 0; j--)
   1298          res[j] |= pessim[j];
   1299       return;
   1300    }
   1301 
   1302    /* Exemption doesn't apply.  Flag an addressing error in the normal
   1303       way. */
   1304    MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1305 }
   1306 
   1307 
   1308 static
   1309 __attribute__((noinline))
   1310 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
   1311 {
   1312    PROF_EVENT(30, "mc_LOADVn_slow");
   1313 
   1314    /* ------------ BEGIN semi-fast cases ------------ */
   1315    /* These deal quickly-ish with the common auxiliary primary map
   1316       cases on 64-bit platforms.  Are merely a speedup hack; can be
   1317       omitted without loss of correctness/functionality.  Note that in
   1318       both cases the "sizeof(void*) == 8" causes these cases to be
   1319       folded out by compilers on 32-bit platforms.  These are derived
   1320       from LOADV64 and LOADV32.
   1321    */
   1322    if (LIKELY(sizeof(void*) == 8
   1323                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
   1324       SecMap* sm       = get_secmap_for_reading(a);
   1325       UWord   sm_off16 = SM_OFF_16(a);
   1326       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   1327       if (LIKELY(vabits16 == VA_BITS16_DEFINED))
   1328          return V_BITS64_DEFINED;
   1329       if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
   1330          return V_BITS64_UNDEFINED;
   1331       /* else fall into the slow case */
   1332    }
   1333    if (LIKELY(sizeof(void*) == 8
   1334                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
   1335       SecMap* sm = get_secmap_for_reading(a);
   1336       UWord sm_off = SM_OFF(a);
   1337       UWord vabits8 = sm->vabits8[sm_off];
   1338       if (LIKELY(vabits8 == VA_BITS8_DEFINED))
   1339          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
   1340       if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
   1341          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
   1342       /* else fall into slow case */
   1343    }
   1344    /* ------------ END semi-fast cases ------------ */
   1345 
   1346    ULong  vbits64     = V_BITS64_UNDEFINED; /* result */
   1347    ULong  pessim64    = V_BITS64_DEFINED;   /* only used when p-l-ok=yes */
   1348    SSizeT szB         = nBits / 8;
   1349    SSizeT i;          /* Must be signed. */
   1350    SizeT  n_addrs_bad = 0;
   1351    Addr   ai;
   1352    UChar  vbits8;
   1353    Bool   ok;
   1354 
   1355    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
   1356 
   1357    /* Make up a 64-bit result V word, which contains the loaded data
   1358       for valid addresses and Defined for invalid addresses.  Iterate
   1359       over the bytes in the word, from the most significant down to
   1360       the least.  The vbits to return are calculated into vbits64.
   1361       Also compute the pessimising value to be used when
   1362       --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
   1363       info can be gleaned from pessim64) but is used as a
   1364       cross-check. */
   1365    for (i = szB-1; i >= 0; i--) {
   1366       PROF_EVENT(31, "mc_LOADVn_slow(loop)");
   1367       ai = a + byte_offset_w(szB, bigendian, i);
   1368       ok = get_vbits8(ai, &vbits8);
   1369       vbits64 <<= 8;
   1370       vbits64 |= vbits8;
   1371       if (!ok) n_addrs_bad++;
   1372       pessim64 <<= 8;
   1373       pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
   1374    }
   1375 
   1376    /* In the common case, all the addresses involved are valid, so we
   1377       just return the computed V bits and have done. */
   1378    if (LIKELY(n_addrs_bad == 0))
   1379       return vbits64;
   1380 
   1381    /* If there's no possibility of getting a partial-loads-ok
   1382       exemption, report the error and quit. */
   1383    if (!MC_(clo_partial_loads_ok)) {
   1384       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1385       return vbits64;
   1386    }
   1387 
   1388    /* The partial-loads-ok excemption might apply.  Find out if it
   1389       does.  If so, don't report an addressing error, but do return
   1390       Undefined for the bytes that are out of range, so as to avoid
   1391       false negatives.  If it doesn't apply, just report an addressing
   1392       error in the usual way. */
   1393 
   1394    /* Some code steps along byte strings in aligned word-sized chunks
   1395       even when there is only a partially defined word at the end (eg,
   1396       optimised strlen).  This is allowed by the memory model of
   1397       modern machines, since an aligned load cannot span two pages and
   1398       thus cannot "partially fault".  Despite such behaviour being
   1399       declared undefined by ANSI C/C++.
   1400 
   1401       Therefore, a load from a partially-addressible place is allowed
   1402       if all of the following hold:
   1403       - the command-line flag is set [by default, it isn't]
   1404       - it's a word-sized, word-aligned load
   1405       - at least one of the addresses in the word *is* valid
   1406 
   1407       Since this suppresses the addressing error, we avoid false
   1408       negatives by marking bytes undefined when they come from an
   1409       invalid address.
   1410    */
   1411 
   1412    /* "at least one of the addresses is invalid" */
   1413    tl_assert(pessim64 != V_BITS64_DEFINED);
   1414 
   1415    if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
   1416        && n_addrs_bad < VG_WORDSIZE) {
   1417       /* Exemption applies.  Use the previously computed pessimising
   1418          value for vbits64 and return the combined result, but don't
   1419          flag an addressing error.  The pessimising value is Defined
   1420          for valid addresses and Undefined for invalid addresses. */
   1421       /* for assumption that doing bitwise or implements UifU */
   1422       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
   1423       /* (really need "UifU" here...)
   1424          vbits64 UifU= pessim64  (is pessimised by it, iow) */
   1425       vbits64 |= pessim64;
   1426       return vbits64;
   1427    }
   1428 
   1429    /* Also, in appears that gcc generates string-stepping code in
   1430       32-bit chunks on 64 bit platforms.  So, also grant an exception
   1431       for this case.  Note that the first clause of the conditional
   1432       (VG_WORDSIZE == 8) is known at compile time, so the whole clause
   1433       will get folded out in 32 bit builds. */
   1434    if (VG_WORDSIZE == 8
   1435        && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4) {
   1436       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
   1437       /* (really need "UifU" here...)
   1438          vbits64 UifU= pessim64  (is pessimised by it, iow) */
   1439       vbits64 |= pessim64;
   1440       /* Mark the upper 32 bits as undefined, just to be on the safe
   1441          side. */
   1442       vbits64 |= (((ULong)V_BITS32_UNDEFINED) << 32);
   1443       return vbits64;
   1444    }
   1445 
   1446    /* Exemption doesn't apply.  Flag an addressing error in the normal
   1447       way. */
   1448    MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
   1449 
   1450    return vbits64;
   1451 }
   1452 
   1453 
   1454 static
   1455 __attribute__((noinline))
   1456 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
   1457 {
   1458    SizeT szB = nBits / 8;
   1459    SizeT i, n_addrs_bad = 0;
   1460    UChar vbits8;
   1461    Addr  ai;
   1462    Bool  ok;
   1463 
   1464    PROF_EVENT(35, "mc_STOREVn_slow");
   1465 
   1466    /* ------------ BEGIN semi-fast cases ------------ */
   1467    /* These deal quickly-ish with the common auxiliary primary map
   1468       cases on 64-bit platforms.  Are merely a speedup hack; can be
   1469       omitted without loss of correctness/functionality.  Note that in
   1470       both cases the "sizeof(void*) == 8" causes these cases to be
   1471       folded out by compilers on 32-bit platforms.  The logic below
   1472       is somewhat similar to some cases extensively commented in
   1473       MC_(helperc_STOREV8).
   1474    */
   1475    if (LIKELY(sizeof(void*) == 8
   1476                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
   1477       SecMap* sm       = get_secmap_for_reading(a);
   1478       UWord   sm_off16 = SM_OFF_16(a);
   1479       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   1480       if (LIKELY( !is_distinguished_sm(sm) &&
   1481                           (VA_BITS16_DEFINED   == vabits16 ||
   1482                            VA_BITS16_UNDEFINED == vabits16) )) {
   1483          /* Handle common case quickly: a is suitably aligned, */
   1484          /* is mapped, and is addressible. */
   1485          // Convert full V-bits in register to compact 2-bit form.
   1486          if (LIKELY(V_BITS64_DEFINED == vbytes)) {
   1487             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
   1488             return;
   1489          } else if (V_BITS64_UNDEFINED == vbytes) {
   1490             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
   1491             return;
   1492          }
   1493          /* else fall into the slow case */
   1494       }
   1495       /* else fall into the slow case */
   1496    }
   1497    if (LIKELY(sizeof(void*) == 8
   1498                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
   1499       SecMap* sm      = get_secmap_for_reading(a);
   1500       UWord   sm_off  = SM_OFF(a);
   1501       UWord   vabits8 = sm->vabits8[sm_off];
   1502       if (LIKELY( !is_distinguished_sm(sm) &&
   1503                           (VA_BITS8_DEFINED   == vabits8 ||
   1504                            VA_BITS8_UNDEFINED == vabits8) )) {
   1505          /* Handle common case quickly: a is suitably aligned, */
   1506          /* is mapped, and is addressible. */
   1507          // Convert full V-bits in register to compact 2-bit form.
   1508          if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
   1509             sm->vabits8[sm_off] = VA_BITS8_DEFINED;
   1510             return;
   1511          } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
   1512             sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
   1513             return;
   1514          }
   1515          /* else fall into the slow case */
   1516       }
   1517       /* else fall into the slow case */
   1518    }
   1519    /* ------------ END semi-fast cases ------------ */
   1520 
   1521    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
   1522 
   1523    /* Dump vbytes in memory, iterating from least to most significant
   1524       byte.  At the same time establish addressibility of the location. */
   1525    for (i = 0; i < szB; i++) {
   1526       PROF_EVENT(36, "mc_STOREVn_slow(loop)");
   1527       ai     = a + byte_offset_w(szB, bigendian, i);
   1528       vbits8 = vbytes & 0xff;
   1529       ok     = set_vbits8(ai, vbits8);
   1530       if (!ok) n_addrs_bad++;
   1531       vbytes >>= 8;
   1532    }
   1533 
   1534    /* If an address error has happened, report it. */
   1535    if (n_addrs_bad > 0)
   1536       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
   1537 }
   1538 
   1539 
   1540 /*------------------------------------------------------------*/
   1541 /*--- Setting permissions over address ranges.             ---*/
   1542 /*------------------------------------------------------------*/
   1543 
   1544 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
   1545                                       UWord dsm_num )
   1546 {
   1547    UWord    sm_off, sm_off16;
   1548    UWord    vabits2 = vabits16 & 0x3;
   1549    SizeT    lenA, lenB, len_to_next_secmap;
   1550    Addr     aNext;
   1551    SecMap*  sm;
   1552    SecMap** sm_ptr;
   1553    SecMap*  example_dsm;
   1554 
   1555    PROF_EVENT(150, "set_address_range_perms");
   1556 
   1557    /* Check the V+A bits make sense. */
   1558    tl_assert(VA_BITS16_NOACCESS  == vabits16 ||
   1559              VA_BITS16_UNDEFINED == vabits16 ||
   1560              VA_BITS16_DEFINED   == vabits16);
   1561 
   1562    // This code should never write PDBs;  ensure this.  (See comment above
   1563    // set_vabits2().)
   1564    tl_assert(VA_BITS2_PARTDEFINED != vabits2);
   1565 
   1566    if (lenT == 0)
   1567       return;
   1568 
   1569    if (lenT > 256 * 1024 * 1024) {
   1570       if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
   1571          const HChar* s = "unknown???";
   1572          if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
   1573          if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
   1574          if (vabits16 == VA_BITS16_DEFINED  ) s = "defined";
   1575          VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
   1576                                   "large range [0x%lx, 0x%lx) (%s)\n",
   1577                                   a, a + lenT, s);
   1578       }
   1579    }
   1580 
   1581 #ifndef PERF_FAST_SARP
   1582    /*------------------ debug-only case ------------------ */
   1583    {
   1584       // Endianness doesn't matter here because all bytes are being set to
   1585       // the same value.
   1586       // Nb: We don't have to worry about updating the sec-V-bits table
   1587       // after these set_vabits2() calls because this code never writes
   1588       // VA_BITS2_PARTDEFINED values.
   1589       SizeT i;
   1590       for (i = 0; i < lenT; i++) {
   1591          set_vabits2(a + i, vabits2);
   1592       }
   1593       return;
   1594    }
   1595 #endif
   1596 
   1597    /*------------------ standard handling ------------------ */
   1598 
   1599    /* Get the distinguished secondary that we might want
   1600       to use (part of the space-compression scheme). */
   1601    example_dsm = &sm_distinguished[dsm_num];
   1602 
   1603    // We have to handle ranges covering various combinations of partial and
   1604    // whole sec-maps.  Here is how parts 1, 2 and 3 are used in each case.
   1605    // Cases marked with a '*' are common.
   1606    //
   1607    //   TYPE                                             PARTS USED
   1608    //   ----                                             ----------
   1609    // * one partial sec-map                  (p)         1
   1610    // - one whole sec-map                    (P)         2
   1611    //
   1612    // * two partial sec-maps                 (pp)        1,3
   1613    // - one partial, one whole sec-map       (pP)        1,2
   1614    // - one whole, one partial sec-map       (Pp)        2,3
   1615    // - two whole sec-maps                   (PP)        2,2
   1616    //
   1617    // * one partial, one whole, one partial  (pPp)       1,2,3
   1618    // - one partial, two whole               (pPP)       1,2,2
   1619    // - two whole, one partial               (PPp)       2,2,3
   1620    // - three whole                          (PPP)       2,2,2
   1621    //
   1622    // * one partial, N-2 whole, one partial  (pP...Pp)   1,2...2,3
   1623    // - one partial, N-1 whole               (pP...PP)   1,2...2,2
   1624    // - N-1 whole, one partial               (PP...Pp)   2,2...2,3
   1625    // - N whole                              (PP...PP)   2,2...2,3
   1626 
   1627    // Break up total length (lenT) into two parts:  length in the first
   1628    // sec-map (lenA), and the rest (lenB);   lenT == lenA + lenB.
   1629    aNext = start_of_this_sm(a) + SM_SIZE;
   1630    len_to_next_secmap = aNext - a;
   1631    if ( lenT <= len_to_next_secmap ) {
   1632       // Range entirely within one sec-map.  Covers almost all cases.
   1633       PROF_EVENT(151, "set_address_range_perms-single-secmap");
   1634       lenA = lenT;
   1635       lenB = 0;
   1636    } else if (is_start_of_sm(a)) {
   1637       // Range spans at least one whole sec-map, and starts at the beginning
   1638       // of a sec-map; skip to Part 2.
   1639       PROF_EVENT(152, "set_address_range_perms-startof-secmap");
   1640       lenA = 0;
   1641       lenB = lenT;
   1642       goto part2;
   1643    } else {
   1644       // Range spans two or more sec-maps, first one is partial.
   1645       PROF_EVENT(153, "set_address_range_perms-multiple-secmaps");
   1646       lenA = len_to_next_secmap;
   1647       lenB = lenT - lenA;
   1648    }
   1649 
   1650    //------------------------------------------------------------------------
   1651    // Part 1: Deal with the first sec_map.  Most of the time the range will be
   1652    // entirely within a sec_map and this part alone will suffice.  Also,
   1653    // doing it this way lets us avoid repeatedly testing for the crossing of
   1654    // a sec-map boundary within these loops.
   1655    //------------------------------------------------------------------------
   1656 
   1657    // If it's distinguished, make it undistinguished if necessary.
   1658    sm_ptr = get_secmap_ptr(a);
   1659    if (is_distinguished_sm(*sm_ptr)) {
   1660       if (*sm_ptr == example_dsm) {
   1661          // Sec-map already has the V+A bits that we want, so skip.
   1662          PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick");
   1663          a    = aNext;
   1664          lenA = 0;
   1665       } else {
   1666          PROF_EVENT(155, "set_address_range_perms-dist-sm1");
   1667          *sm_ptr = copy_for_writing(*sm_ptr);
   1668       }
   1669    }
   1670    sm = *sm_ptr;
   1671 
   1672    // 1 byte steps
   1673    while (True) {
   1674       if (VG_IS_8_ALIGNED(a)) break;
   1675       if (lenA < 1)           break;
   1676       PROF_EVENT(156, "set_address_range_perms-loop1a");
   1677       sm_off = SM_OFF(a);
   1678       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1679       a    += 1;
   1680       lenA -= 1;
   1681    }
   1682    // 8-aligned, 8 byte steps
   1683    while (True) {
   1684       if (lenA < 8) break;
   1685       PROF_EVENT(157, "set_address_range_perms-loop8a");
   1686       sm_off16 = SM_OFF_16(a);
   1687       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
   1688       a    += 8;
   1689       lenA -= 8;
   1690    }
   1691    // 1 byte steps
   1692    while (True) {
   1693       if (lenA < 1) break;
   1694       PROF_EVENT(158, "set_address_range_perms-loop1b");
   1695       sm_off = SM_OFF(a);
   1696       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1697       a    += 1;
   1698       lenA -= 1;
   1699    }
   1700 
   1701    // We've finished the first sec-map.  Is that it?
   1702    if (lenB == 0)
   1703       return;
   1704 
   1705    //------------------------------------------------------------------------
   1706    // Part 2: Fast-set entire sec-maps at a time.
   1707    //------------------------------------------------------------------------
   1708   part2:
   1709    // 64KB-aligned, 64KB steps.
   1710    // Nb: we can reach here with lenB < SM_SIZE
   1711    tl_assert(0 == lenA);
   1712    while (True) {
   1713       if (lenB < SM_SIZE) break;
   1714       tl_assert(is_start_of_sm(a));
   1715       PROF_EVENT(159, "set_address_range_perms-loop64K");
   1716       sm_ptr = get_secmap_ptr(a);
   1717       if (!is_distinguished_sm(*sm_ptr)) {
   1718          PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm");
   1719          // Free the non-distinguished sec-map that we're replacing.  This
   1720          // case happens moderately often, enough to be worthwhile.
   1721          SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
   1722          tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
   1723       }
   1724       update_SM_counts(*sm_ptr, example_dsm);
   1725       // Make the sec-map entry point to the example DSM
   1726       *sm_ptr = example_dsm;
   1727       lenB -= SM_SIZE;
   1728       a    += SM_SIZE;
   1729    }
   1730 
   1731    // We've finished the whole sec-maps.  Is that it?
   1732    if (lenB == 0)
   1733       return;
   1734 
   1735    //------------------------------------------------------------------------
   1736    // Part 3: Finish off the final partial sec-map, if necessary.
   1737    //------------------------------------------------------------------------
   1738 
   1739    tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
   1740 
   1741    // If it's distinguished, make it undistinguished if necessary.
   1742    sm_ptr = get_secmap_ptr(a);
   1743    if (is_distinguished_sm(*sm_ptr)) {
   1744       if (*sm_ptr == example_dsm) {
   1745          // Sec-map already has the V+A bits that we want, so stop.
   1746          PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick");
   1747          return;
   1748       } else {
   1749          PROF_EVENT(162, "set_address_range_perms-dist-sm2");
   1750          *sm_ptr = copy_for_writing(*sm_ptr);
   1751       }
   1752    }
   1753    sm = *sm_ptr;
   1754 
   1755    // 8-aligned, 8 byte steps
   1756    while (True) {
   1757       if (lenB < 8) break;
   1758       PROF_EVENT(163, "set_address_range_perms-loop8b");
   1759       sm_off16 = SM_OFF_16(a);
   1760       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
   1761       a    += 8;
   1762       lenB -= 8;
   1763    }
   1764    // 1 byte steps
   1765    while (True) {
   1766       if (lenB < 1) return;
   1767       PROF_EVENT(164, "set_address_range_perms-loop1c");
   1768       sm_off = SM_OFF(a);
   1769       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
   1770       a    += 1;
   1771       lenB -= 1;
   1772    }
   1773 }
   1774 
   1775 
   1776 /* --- Set permissions for arbitrary address ranges --- */
   1777 
   1778 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
   1779 {
   1780    PROF_EVENT(40, "MC_(make_mem_noaccess)");
   1781    DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
   1782    set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
   1783    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1784       ocache_sarp_Clear_Origins ( a, len );
   1785 }
   1786 
   1787 static void make_mem_undefined ( Addr a, SizeT len )
   1788 {
   1789    PROF_EVENT(41, "make_mem_undefined");
   1790    DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
   1791    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
   1792 }
   1793 
   1794 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
   1795 {
   1796    PROF_EVENT(43, "MC_(make_mem_undefined)");
   1797    DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
   1798    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
   1799    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1800       ocache_sarp_Set_Origins ( a, len, otag );
   1801 }
   1802 
   1803 static
   1804 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
   1805                                           ThreadId tid, UInt okind )
   1806 {
   1807    UInt        ecu;
   1808    ExeContext* here;
   1809    /* VG_(record_ExeContext) checks for validity of tid, and asserts
   1810       if it is invalid.  So no need to do it here. */
   1811    tl_assert(okind <= 3);
   1812    here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
   1813    tl_assert(here);
   1814    ecu = VG_(get_ECU_from_ExeContext)(here);
   1815    tl_assert(VG_(is_plausible_ECU)(ecu));
   1816    MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
   1817 }
   1818 
   1819 static
   1820 void mc_new_mem_w_tid_make_ECU  ( Addr a, SizeT len, ThreadId tid )
   1821 {
   1822    make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
   1823 }
   1824 
   1825 static
   1826 void mc_new_mem_w_tid_no_ECU  ( Addr a, SizeT len, ThreadId tid )
   1827 {
   1828    MC_(make_mem_undefined_w_otag) ( a, len, MC_OKIND_UNKNOWN );
   1829 }
   1830 
   1831 void MC_(make_mem_defined) ( Addr a, SizeT len )
   1832 {
   1833    PROF_EVENT(42, "MC_(make_mem_defined)");
   1834    DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
   1835    set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
   1836    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
   1837       ocache_sarp_Clear_Origins ( a, len );
   1838 }
   1839 
   1840 /* For each byte in [a,a+len), if the byte is addressable, make it be
   1841    defined, but if it isn't addressible, leave it alone.  In other
   1842    words a version of MC_(make_mem_defined) that doesn't mess with
   1843    addressibility.  Low-performance implementation. */
   1844 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
   1845 {
   1846    SizeT i;
   1847    UChar vabits2;
   1848    DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
   1849    for (i = 0; i < len; i++) {
   1850       vabits2 = get_vabits2( a+i );
   1851       if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
   1852          set_vabits2(a+i, VA_BITS2_DEFINED);
   1853          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
   1854             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
   1855          }
   1856       }
   1857    }
   1858 }
   1859 
   1860 /* Similarly (needed for mprotect handling ..) */
   1861 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
   1862 {
   1863    SizeT i;
   1864    UChar vabits2;
   1865    DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
   1866    for (i = 0; i < len; i++) {
   1867       vabits2 = get_vabits2( a+i );
   1868       if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
   1869          set_vabits2(a+i, VA_BITS2_DEFINED);
   1870          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
   1871             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
   1872          }
   1873       }
   1874    }
   1875 }
   1876 
   1877 /* --- Block-copy permissions (needed for implementing realloc() and
   1878        sys_mremap). --- */
   1879 
   1880 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
   1881 {
   1882    SizeT i, j;
   1883    UChar vabits2, vabits8;
   1884    Bool  aligned, nooverlap;
   1885 
   1886    DEBUG("MC_(copy_address_range_state)\n");
   1887    PROF_EVENT(50, "MC_(copy_address_range_state)");
   1888 
   1889    if (len == 0 || src == dst)
   1890       return;
   1891 
   1892    aligned   = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
   1893    nooverlap = src+len <= dst || dst+len <= src;
   1894 
   1895    if (nooverlap && aligned) {
   1896 
   1897       /* Vectorised fast case, when no overlap and suitably aligned */
   1898       /* vector loop */
   1899       i = 0;
   1900       while (len >= 4) {
   1901          vabits8 = get_vabits8_for_aligned_word32( src+i );
   1902          set_vabits8_for_aligned_word32( dst+i, vabits8 );
   1903          if (LIKELY(VA_BITS8_DEFINED == vabits8
   1904                             || VA_BITS8_UNDEFINED == vabits8
   1905                             || VA_BITS8_NOACCESS == vabits8)) {
   1906             /* do nothing */
   1907          } else {
   1908             /* have to copy secondary map info */
   1909             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
   1910                set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
   1911             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
   1912                set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
   1913             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
   1914                set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
   1915             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
   1916                set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
   1917          }
   1918          i += 4;
   1919          len -= 4;
   1920       }
   1921       /* fixup loop */
   1922       while (len >= 1) {
   1923          vabits2 = get_vabits2( src+i );
   1924          set_vabits2( dst+i, vabits2 );
   1925          if (VA_BITS2_PARTDEFINED == vabits2) {
   1926             set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
   1927          }
   1928          i++;
   1929          len--;
   1930       }
   1931 
   1932    } else {
   1933 
   1934       /* We have to do things the slow way */
   1935       if (src < dst) {
   1936          for (i = 0, j = len-1; i < len; i++, j--) {
   1937             PROF_EVENT(51, "MC_(copy_address_range_state)(loop)");
   1938             vabits2 = get_vabits2( src+j );
   1939             set_vabits2( dst+j, vabits2 );
   1940             if (VA_BITS2_PARTDEFINED == vabits2) {
   1941                set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
   1942             }
   1943          }
   1944       }
   1945 
   1946       if (src > dst) {
   1947          for (i = 0; i < len; i++) {
   1948             PROF_EVENT(52, "MC_(copy_address_range_state)(loop)");
   1949             vabits2 = get_vabits2( src+i );
   1950             set_vabits2( dst+i, vabits2 );
   1951             if (VA_BITS2_PARTDEFINED == vabits2) {
   1952                set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
   1953             }
   1954          }
   1955       }
   1956    }
   1957 
   1958 }
   1959 
   1960 
   1961 /*------------------------------------------------------------*/
   1962 /*--- Origin tracking stuff - cache basics                 ---*/
   1963 /*------------------------------------------------------------*/
   1964 
   1965 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
   1966    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1967 
   1968    Note that this implementation draws inspiration from the "origin
   1969    tracking by value piggybacking" scheme described in "Tracking Bad
   1970    Apples: Reporting the Origin of Null and Undefined Value Errors"
   1971    (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
   1972    Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
   1973    implemented completely differently.
   1974 
   1975    Origin tags and ECUs -- about the shadow values
   1976    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1977 
   1978    This implementation tracks the defining point of all uninitialised
   1979    values using so called "origin tags", which are 32-bit integers,
   1980    rather than using the values themselves to encode the origins.  The
   1981    latter, so-called value piggybacking", is what the OOPSLA07 paper
   1982    describes.
   1983 
   1984    Origin tags, as tracked by the machinery below, are 32-bit unsigned
   1985    ints (UInts), regardless of the machine's word size.  Each tag
   1986    comprises an upper 30-bit ECU field and a lower 2-bit
   1987    'kind' field.  The ECU field is a number given out by m_execontext
   1988    and has a 1-1 mapping with ExeContext*s.  An ECU can be used
   1989    directly as an origin tag (otag), but in fact we want to put
   1990    additional information 'kind' field to indicate roughly where the
   1991    tag came from.  This helps print more understandable error messages
   1992    for the user -- it has no other purpose.  In summary:
   1993 
   1994    * Both ECUs and origin tags are represented as 32-bit words
   1995 
   1996    * m_execontext and the core-tool interface deal purely in ECUs.
   1997      They have no knowledge of origin tags - that is a purely
   1998      Memcheck-internal matter.
   1999 
   2000    * all valid ECUs have the lowest 2 bits zero and at least
   2001      one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
   2002 
   2003    * to convert from an ECU to an otag, OR in one of the MC_OKIND_
   2004      constants defined in mc_include.h.
   2005 
   2006    * to convert an otag back to an ECU, AND it with ~3
   2007 
   2008    One important fact is that no valid otag is zero.  A zero otag is
   2009    used by the implementation to indicate "no origin", which could
   2010    mean that either the value is defined, or it is undefined but the
   2011    implementation somehow managed to lose the origin.
   2012 
   2013    The ECU used for memory created by malloc etc is derived from the
   2014    stack trace at the time the malloc etc happens.  This means the
   2015    mechanism can show the exact allocation point for heap-created
   2016    uninitialised values.
   2017 
   2018    In contrast, it is simply too expensive to create a complete
   2019    backtrace for each stack allocation.  Therefore we merely use a
   2020    depth-1 backtrace for stack allocations, which can be done once at
   2021    translation time, rather than N times at run time.  The result of
   2022    this is that, for stack created uninitialised values, Memcheck can
   2023    only show the allocating function, and not what called it.
   2024    Furthermore, compilers tend to move the stack pointer just once at
   2025    the start of the function, to allocate all locals, and so in fact
   2026    the stack origin almost always simply points to the opening brace
   2027    of the function.  Net result is, for stack origins, the mechanism
   2028    can tell you in which function the undefined value was created, but
   2029    that's all.  Users will need to carefully check all locals in the
   2030    specified function.
   2031 
   2032    Shadowing registers and memory
   2033    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2034 
   2035    Memory is shadowed using a two level cache structure (ocacheL1 and
   2036    ocacheL2).  Memory references are first directed to ocacheL1.  This
   2037    is a traditional 2-way set associative cache with 32-byte lines and
   2038    approximate LRU replacement within each set.
   2039 
   2040    A naive implementation would require storing one 32 bit otag for
   2041    each byte of memory covered, a 4:1 space overhead.  Instead, there
   2042    is one otag for every 4 bytes of memory covered, plus a 4-bit mask
   2043    that shows which of the 4 bytes have that shadow value and which
   2044    have a shadow value of zero (indicating no origin).  Hence a lot of
   2045    space is saved, but the cost is that only one different origin per
   2046    4 bytes of address space can be represented.  This is a source of
   2047    imprecision, but how much of a problem it really is remains to be
   2048    seen.
   2049 
   2050    A cache line that contains all zeroes ("no origins") contains no
   2051    useful information, and can be ejected from the L1 cache "for
   2052    free", in the sense that a read miss on the L1 causes a line of
   2053    zeroes to be installed.  However, ejecting a line containing
   2054    nonzeroes risks losing origin information permanently.  In order to
   2055    prevent such lossage, ejected nonzero lines are placed in a
   2056    secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
   2057    lines.  This can grow arbitrarily large, and so should ensure that
   2058    Memcheck runs out of memory in preference to losing useful origin
   2059    info due to cache size limitations.
   2060 
   2061    Shadowing registers is a bit tricky, because the shadow values are
   2062    32 bits, regardless of the size of the register.  That gives a
   2063    problem for registers smaller than 32 bits.  The solution is to
   2064    find spaces in the guest state that are unused, and use those to
   2065    shadow guest state fragments smaller than 32 bits.  For example, on
   2066    ppc32/64, each vector register is 16 bytes long.  If 4 bytes of the
   2067    shadow are allocated for the register's otag, then there are still
   2068    12 bytes left over which could be used to shadow 3 other values.
   2069 
   2070    This implies there is some non-obvious mapping from guest state
   2071    (start,length) pairs to the relevant shadow offset (for the origin
   2072    tags).  And it is unfortunately guest-architecture specific.  The
   2073    mapping is contained in mc_machine.c, which is quite lengthy but
   2074    straightforward.
   2075 
   2076    Instrumenting the IR
   2077    ~~~~~~~~~~~~~~~~~~~~
   2078 
   2079    Instrumentation is largely straightforward, and done by the
   2080    functions schemeE and schemeS in mc_translate.c.  These generate
   2081    code for handling the origin tags of expressions (E) and statements
   2082    (S) respectively.  The rather strange names are a reference to the
   2083    "compilation schemes" shown in Simon Peyton Jones' book "The
   2084    Implementation of Functional Programming Languages" (Prentice Hall,
   2085    1987, see
   2086    http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
   2087 
   2088    schemeS merely arranges to move shadow values around the guest
   2089    state to track the incoming IR.  schemeE is largely trivial too.
   2090    The only significant point is how to compute the otag corresponding
   2091    to binary (or ternary, quaternary, etc) operator applications.  The
   2092    rule is simple: just take whichever value is larger (32-bit
   2093    unsigned max).  Constants get the special value zero.  Hence this
   2094    rule always propagates a nonzero (known) otag in preference to a
   2095    zero (unknown, or more likely, value-is-defined) tag, as we want.
   2096    If two different undefined values are inputs to a binary operator
   2097    application, then which is propagated is arbitrary, but that
   2098    doesn't matter, since the program is erroneous in using either of
   2099    the values, and so there's no point in attempting to propagate
   2100    both.
   2101 
   2102    Since constants are abstracted to (otag) zero, much of the
   2103    instrumentation code can be folded out without difficulty by the
   2104    generic post-instrumentation IR cleanup pass, using these rules:
   2105    Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
   2106    constants is evaluated at JIT time.  And the resulting dead code
   2107    removal.  In practice this causes surprisingly few Max32Us to
   2108    survive through to backend code generation.
   2109 
   2110    Integration with the V-bits machinery
   2111    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2112 
   2113    This is again largely straightforward.  Mostly the otag and V bits
   2114    stuff are independent.  The only point of interaction is when the V
   2115    bits instrumenter creates a call to a helper function to report an
   2116    uninitialised value error -- in that case it must first use schemeE
   2117    to get hold of the origin tag expression for the value, and pass
   2118    that to the helper too.
   2119 
   2120    There is the usual stuff to do with setting address range
   2121    permissions.  When memory is painted undefined, we must also know
   2122    the origin tag to paint with, which involves some tedious plumbing,
   2123    particularly to do with the fast case stack handlers.  When memory
   2124    is painted defined or noaccess then the origin tags must be forced
   2125    to zero.
   2126 
   2127    One of the goals of the implementation was to ensure that the
   2128    non-origin tracking mode isn't slowed down at all.  To do this,
   2129    various functions to do with memory permissions setting (again,
   2130    mostly pertaining to the stack) are duplicated for the with- and
   2131    without-otag case.
   2132 
   2133    Dealing with stack redzones, and the NIA cache
   2134    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2135 
   2136    This is one of the few non-obvious parts of the implementation.
   2137 
   2138    Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
   2139    reserved area below the stack pointer, that can be used as scratch
   2140    space by compiler generated code for functions.  In the Memcheck
   2141    sources this is referred to as the "stack redzone".  The important
   2142    thing here is that such redzones are considered volatile across
   2143    function calls and returns.  So Memcheck takes care to mark them as
   2144    undefined for each call and return, on the afflicted platforms.
   2145    Past experience shows this is essential in order to get reliable
   2146    messages about uninitialised values that come from the stack.
   2147 
   2148    So the question is, when we paint a redzone undefined, what origin
   2149    tag should we use for it?  Consider a function f() calling g().  If
   2150    we paint the redzone using an otag derived from the ExeContext of
   2151    the CALL/BL instruction in f, then any errors in g causing it to
   2152    use uninitialised values that happen to lie in the redzone, will be
   2153    reported as having their origin in f.  Which is highly confusing.
   2154 
   2155    The same applies for returns: if, on a return, we paint the redzone
   2156    using a origin tag derived from the ExeContext of the RET/BLR
   2157    instruction in g, then any later errors in f causing it to use
   2158    uninitialised values in the redzone, will be reported as having
   2159    their origin in g.  Which is just as confusing.
   2160 
   2161    To do it right, in both cases we need to use an origin tag which
   2162    pertains to the instruction which dynamically follows the CALL/BL
   2163    or RET/BLR.  In short, one derived from the NIA - the "next
   2164    instruction address".
   2165 
   2166    To make this work, Memcheck's redzone-painting helper,
   2167    MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
   2168    NIA.  It converts the NIA to a 1-element ExeContext, and uses that
   2169    ExeContext's ECU as the basis for the otag used to paint the
   2170    redzone.  The expensive part of this is converting an NIA into an
   2171    ECU, since this happens once for every call and every return.  So
   2172    we use a simple 511-line, 2-way set associative cache
   2173    (nia_to_ecu_cache) to cache the mappings, and that knocks most of
   2174    the cost out.
   2175 
   2176    Further background comments
   2177    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2178 
   2179    > Question: why is otag a UInt?  Wouldn't a UWord be better?  Isn't
   2180    > it really just the address of the relevant ExeContext?
   2181 
   2182    Well, it's not the address, but a value which has a 1-1 mapping
   2183    with ExeContexts, and is guaranteed not to be zero, since zero
   2184    denotes (to memcheck) "unknown origin or defined value".  So these
   2185    UInts are just numbers starting at 4 and incrementing by 4; each
   2186    ExeContext is given a number when it is created.  (*** NOTE this
   2187    confuses otags and ECUs; see comments above ***).
   2188 
   2189    Making these otags 32-bit regardless of the machine's word size
   2190    makes the 64-bit implementation easier (next para).  And it doesn't
   2191    really limit us in any way, since for the tags to overflow would
   2192    require that the program somehow caused 2^30-1 different
   2193    ExeContexts to be created, in which case it is probably in deep
   2194    trouble.  Not to mention V will have soaked up many tens of
   2195    gigabytes of memory merely to store them all.
   2196 
   2197    So having 64-bit origins doesn't really buy you anything, and has
   2198    the following downsides:
   2199 
   2200    Suppose that instead, an otag is a UWord.  This would mean that, on
   2201    a 64-bit target,
   2202 
   2203    1. It becomes hard to shadow any element of guest state which is
   2204       smaller than 8 bytes.  To do so means you'd need to find some
   2205       8-byte-sized hole in the guest state which you don't want to
   2206       shadow, and use that instead to hold the otag.  On ppc64, the
   2207       condition code register(s) are split into 20 UChar sized pieces,
   2208       all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
   2209       and so that would entail finding 160 bytes somewhere else in the
   2210       guest state.
   2211 
   2212       Even on x86, I want to track origins for %AH .. %DH (bits 15:8
   2213       of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
   2214       same) and so I had to look for 4 untracked otag-sized areas in
   2215       the guest state to make that possible.
   2216 
   2217       The same problem exists of course when origin tags are only 32
   2218       bits, but it's less extreme.
   2219 
   2220    2. (More compelling) it doubles the size of the origin shadow
   2221       memory.  Given that the shadow memory is organised as a fixed
   2222       size cache, and that accuracy of tracking is limited by origins
   2223       falling out the cache due to space conflicts, this isn't good.
   2224 
   2225    > Another question: is the origin tracking perfect, or are there
   2226    > cases where it fails to determine an origin?
   2227 
   2228    It is imperfect for at least for the following reasons, and
   2229    probably more:
   2230 
   2231    * Insufficient capacity in the origin cache.  When a line is
   2232      evicted from the cache it is gone forever, and so subsequent
   2233      queries for the line produce zero, indicating no origin
   2234      information.  Interestingly, a line containing all zeroes can be
   2235      evicted "free" from the cache, since it contains no useful
   2236      information, so there is scope perhaps for some cleverer cache
   2237      management schemes.  (*** NOTE, with the introduction of the
   2238      second level origin tag cache, ocacheL2, this is no longer a
   2239      problem. ***)
   2240 
   2241    * The origin cache only stores one otag per 32-bits of address
   2242      space, plus 4 bits indicating which of the 4 bytes has that tag
   2243      and which are considered defined.  The result is that if two
   2244      undefined bytes in the same word are stored in memory, the first
   2245      stored byte's origin will be lost and replaced by the origin for
   2246      the second byte.
   2247 
   2248    * Nonzero origin tags for defined values.  Consider a binary
   2249      operator application op(x,y).  Suppose y is undefined (and so has
   2250      a valid nonzero origin tag), and x is defined, but erroneously
   2251      has a nonzero origin tag (defined values should have tag zero).
   2252      If the erroneous tag has a numeric value greater than y's tag,
   2253      then the rule for propagating origin tags though binary
   2254      operations, which is simply to take the unsigned max of the two
   2255      tags, will erroneously propagate x's tag rather than y's.
   2256 
   2257    * Some obscure uses of x86/amd64 byte registers can cause lossage
   2258      or confusion of origins.  %AH .. %DH are treated as different
   2259      from, and unrelated to, their parent registers, %EAX .. %EDX.
   2260      So some wierd sequences like
   2261 
   2262         movb undefined-value, %AH
   2263         movb defined-value, %AL
   2264         .. use %AX or %EAX ..
   2265 
   2266      will cause the origin attributed to %AH to be ignored, since %AL,
   2267      %AX, %EAX are treated as the same register, and %AH as a
   2268      completely separate one.
   2269 
   2270    But having said all that, it actually seems to work fairly well in
   2271    practice.
   2272 */
   2273 
   2274 static UWord stats_ocacheL1_find           = 0;
   2275 static UWord stats_ocacheL1_found_at_1     = 0;
   2276 static UWord stats_ocacheL1_found_at_N     = 0;
   2277 static UWord stats_ocacheL1_misses         = 0;
   2278 static UWord stats_ocacheL1_lossage        = 0;
   2279 static UWord stats_ocacheL1_movefwds       = 0;
   2280 
   2281 static UWord stats__ocacheL2_refs          = 0;
   2282 static UWord stats__ocacheL2_misses        = 0;
   2283 static UWord stats__ocacheL2_n_nodes_max   = 0;
   2284 
   2285 /* Cache of 32-bit values, one every 32 bits of address space */
   2286 
   2287 #define OC_BITS_PER_LINE 5
   2288 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
   2289 
   2290 static INLINE UWord oc_line_offset ( Addr a ) {
   2291    return (a >> 2) & (OC_W32S_PER_LINE - 1);
   2292 }
   2293 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
   2294    return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
   2295 }
   2296 
   2297 #define OC_LINES_PER_SET 2
   2298 
   2299 #define OC_N_SET_BITS    20
   2300 #define OC_N_SETS        (1 << OC_N_SET_BITS)
   2301 
   2302 /* These settings give:
   2303    64 bit host: ocache:  100,663,296 sizeB    67,108,864 useful
   2304    32 bit host: ocache:   92,274,688 sizeB    67,108,864 useful
   2305 */
   2306 
   2307 #define OC_MOVE_FORWARDS_EVERY_BITS 7
   2308 
   2309 
   2310 typedef
   2311    struct {
   2312       Addr  tag;
   2313       UInt  w32[OC_W32S_PER_LINE];
   2314       UChar descr[OC_W32S_PER_LINE];
   2315    }
   2316    OCacheLine;
   2317 
   2318 /* Classify and also sanity-check 'line'.  Return 'e' (empty) if not
   2319    in use, 'n' (nonzero) if it contains at least one valid origin tag,
   2320    and 'z' if all the represented tags are zero. */
   2321 static UChar classify_OCacheLine ( OCacheLine* line )
   2322 {
   2323    UWord i;
   2324    if (line->tag == 1/*invalid*/)
   2325       return 'e'; /* EMPTY */
   2326    tl_assert(is_valid_oc_tag(line->tag));
   2327    for (i = 0; i < OC_W32S_PER_LINE; i++) {
   2328       tl_assert(0 == ((~0xF) & line->descr[i]));
   2329       if (line->w32[i] > 0 && line->descr[i] > 0)
   2330          return 'n'; /* NONZERO - contains useful info */
   2331    }
   2332    return 'z'; /* ZERO - no useful info */
   2333 }
   2334 
   2335 typedef
   2336    struct {
   2337       OCacheLine line[OC_LINES_PER_SET];
   2338    }
   2339    OCacheSet;
   2340 
   2341 typedef
   2342    struct {
   2343       OCacheSet set[OC_N_SETS];
   2344    }
   2345    OCache;
   2346 
   2347 static OCache* ocacheL1 = NULL;
   2348 static UWord   ocacheL1_event_ctr = 0;
   2349 
   2350 static void init_ocacheL2 ( void ); /* fwds */
   2351 static void init_OCache ( void )
   2352 {
   2353    UWord line, set;
   2354    tl_assert(MC_(clo_mc_level) >= 3);
   2355    tl_assert(ocacheL1 == NULL);
   2356    ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
   2357    if (ocacheL1 == NULL) {
   2358       VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
   2359                                    sizeof(OCache) );
   2360    }
   2361    tl_assert(ocacheL1 != NULL);
   2362    for (set = 0; set < OC_N_SETS; set++) {
   2363       for (line = 0; line < OC_LINES_PER_SET; line++) {
   2364          ocacheL1->set[set].line[line].tag = 1/*invalid*/;
   2365       }
   2366    }
   2367    init_ocacheL2();
   2368 }
   2369 
   2370 static void moveLineForwards ( OCacheSet* set, UWord lineno )
   2371 {
   2372    OCacheLine tmp;
   2373    stats_ocacheL1_movefwds++;
   2374    tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
   2375    tmp = set->line[lineno-1];
   2376    set->line[lineno-1] = set->line[lineno];
   2377    set->line[lineno] = tmp;
   2378 }
   2379 
   2380 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
   2381    UWord i;
   2382    for (i = 0; i < OC_W32S_PER_LINE; i++) {
   2383       line->w32[i] = 0; /* NO ORIGIN */
   2384       line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
   2385    }
   2386    line->tag = tag;
   2387 }
   2388 
   2389 //////////////////////////////////////////////////////////////
   2390 //// OCache backing store
   2391 
   2392 static OSet* ocacheL2 = NULL;
   2393 
   2394 static void* ocacheL2_malloc ( const HChar* cc, SizeT szB ) {
   2395    return VG_(malloc)(cc, szB);
   2396 }
   2397 static void ocacheL2_free ( void* v ) {
   2398    VG_(free)( v );
   2399 }
   2400 
   2401 /* Stats: # nodes currently in tree */
   2402 static UWord stats__ocacheL2_n_nodes = 0;
   2403 
   2404 static void init_ocacheL2 ( void )
   2405 {
   2406    tl_assert(!ocacheL2);
   2407    tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
   2408    tl_assert(0 == offsetof(OCacheLine,tag));
   2409    ocacheL2
   2410       = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
   2411                              NULL, /* fast cmp */
   2412                              ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
   2413    stats__ocacheL2_n_nodes = 0;
   2414 }
   2415 
   2416 /* Find line with the given tag in the tree, or NULL if not found. */
   2417 static OCacheLine* ocacheL2_find_tag ( Addr tag )
   2418 {
   2419    OCacheLine* line;
   2420    tl_assert(is_valid_oc_tag(tag));
   2421    stats__ocacheL2_refs++;
   2422    line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
   2423    return line;
   2424 }
   2425 
   2426 /* Delete the line with the given tag from the tree, if it is present, and
   2427    free up the associated memory. */
   2428 static void ocacheL2_del_tag ( Addr tag )
   2429 {
   2430    OCacheLine* line;
   2431    tl_assert(is_valid_oc_tag(tag));
   2432    stats__ocacheL2_refs++;
   2433    line = VG_(OSetGen_Remove)( ocacheL2, &tag );
   2434    if (line) {
   2435       VG_(OSetGen_FreeNode)(ocacheL2, line);
   2436       tl_assert(stats__ocacheL2_n_nodes > 0);
   2437       stats__ocacheL2_n_nodes--;
   2438    }
   2439 }
   2440 
   2441 /* Add a copy of the given line to the tree.  It must not already be
   2442    present. */
   2443 static void ocacheL2_add_line ( OCacheLine* line )
   2444 {
   2445    OCacheLine* copy;
   2446    tl_assert(is_valid_oc_tag(line->tag));
   2447    copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
   2448    *copy = *line;
   2449    stats__ocacheL2_refs++;
   2450    VG_(OSetGen_Insert)( ocacheL2, copy );
   2451    stats__ocacheL2_n_nodes++;
   2452    if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
   2453       stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
   2454 }
   2455 
   2456 ////
   2457 //////////////////////////////////////////////////////////////
   2458 
   2459 __attribute__((noinline))
   2460 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
   2461 {
   2462    OCacheLine *victim, *inL2;
   2463    UChar c;
   2464    UWord line;
   2465    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
   2466    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
   2467    UWord tag     = a & tagmask;
   2468    tl_assert(setno >= 0 && setno < OC_N_SETS);
   2469 
   2470    /* we already tried line == 0; skip therefore. */
   2471    for (line = 1; line < OC_LINES_PER_SET; line++) {
   2472       if (ocacheL1->set[setno].line[line].tag == tag) {
   2473          if (line == 1) {
   2474             stats_ocacheL1_found_at_1++;
   2475          } else {
   2476             stats_ocacheL1_found_at_N++;
   2477          }
   2478          if (UNLIKELY(0 == (ocacheL1_event_ctr++
   2479                             & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
   2480             moveLineForwards( &ocacheL1->set[setno], line );
   2481             line--;
   2482          }
   2483          return &ocacheL1->set[setno].line[line];
   2484       }
   2485    }
   2486 
   2487    /* A miss.  Use the last slot.  Implicitly this means we're
   2488       ejecting the line in the last slot. */
   2489    stats_ocacheL1_misses++;
   2490    tl_assert(line == OC_LINES_PER_SET);
   2491    line--;
   2492    tl_assert(line > 0);
   2493 
   2494    /* First, move the to-be-ejected line to the L2 cache. */
   2495    victim = &ocacheL1->set[setno].line[line];
   2496    c = classify_OCacheLine(victim);
   2497    switch (c) {
   2498       case 'e':
   2499          /* the line is empty (has invalid tag); ignore it. */
   2500          break;
   2501       case 'z':
   2502          /* line contains zeroes.  We must ensure the backing store is
   2503             updated accordingly, either by copying the line there
   2504             verbatim, or by ensuring it isn't present there.  We
   2505             chosse the latter on the basis that it reduces the size of
   2506             the backing store. */
   2507          ocacheL2_del_tag( victim->tag );
   2508          break;
   2509       case 'n':
   2510          /* line contains at least one real, useful origin.  Copy it
   2511             to the backing store. */
   2512          stats_ocacheL1_lossage++;
   2513          inL2 = ocacheL2_find_tag( victim->tag );
   2514          if (inL2) {
   2515             *inL2 = *victim;
   2516          } else {
   2517             ocacheL2_add_line( victim );
   2518          }
   2519          break;
   2520       default:
   2521          tl_assert(0);
   2522    }
   2523 
   2524    /* Now we must reload the L1 cache from the backing tree, if
   2525       possible. */
   2526    tl_assert(tag != victim->tag); /* stay sane */
   2527    inL2 = ocacheL2_find_tag( tag );
   2528    if (inL2) {
   2529       /* We're in luck.  It's in the L2. */
   2530       ocacheL1->set[setno].line[line] = *inL2;
   2531    } else {
   2532       /* Missed at both levels of the cache hierarchy.  We have to
   2533          declare it as full of zeroes (unknown origins). */
   2534       stats__ocacheL2_misses++;
   2535       zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
   2536    }
   2537 
   2538    /* Move it one forwards */
   2539    moveLineForwards( &ocacheL1->set[setno], line );
   2540    line--;
   2541 
   2542    return &ocacheL1->set[setno].line[line];
   2543 }
   2544 
   2545 static INLINE OCacheLine* find_OCacheLine ( Addr a )
   2546 {
   2547    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
   2548    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
   2549    UWord tag     = a & tagmask;
   2550 
   2551    stats_ocacheL1_find++;
   2552 
   2553    if (OC_ENABLE_ASSERTIONS) {
   2554       tl_assert(setno >= 0 && setno < OC_N_SETS);
   2555       tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
   2556    }
   2557 
   2558    if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
   2559       return &ocacheL1->set[setno].line[0];
   2560    }
   2561 
   2562    return find_OCacheLine_SLOW( a );
   2563 }
   2564 
   2565 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
   2566 {
   2567    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2568    //// Set the origins for a+0 .. a+7
   2569    { OCacheLine* line;
   2570      UWord lineoff = oc_line_offset(a);
   2571      if (OC_ENABLE_ASSERTIONS) {
   2572         tl_assert(lineoff >= 0
   2573                   && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2574      }
   2575      line = find_OCacheLine( a );
   2576      line->descr[lineoff+0] = 0xF;
   2577      line->descr[lineoff+1] = 0xF;
   2578      line->w32[lineoff+0]   = otag;
   2579      line->w32[lineoff+1]   = otag;
   2580    }
   2581    //// END inlined, specialised version of MC_(helperc_b_store8)
   2582 }
   2583 
   2584 
   2585 /*------------------------------------------------------------*/
   2586 /*--- Aligned fast case permission setters,                ---*/
   2587 /*--- for dealing with stacks                              ---*/
   2588 /*------------------------------------------------------------*/
   2589 
   2590 /*--------------------- 32-bit ---------------------*/
   2591 
   2592 /* Nb: by "aligned" here we mean 4-byte aligned */
   2593 
   2594 static INLINE void make_aligned_word32_undefined ( Addr a )
   2595 {
   2596    PROF_EVENT(300, "make_aligned_word32_undefined");
   2597 
   2598 #ifndef PERF_FAST_STACK2
   2599    make_mem_undefined(a, 4);
   2600 #else
   2601    {
   2602       UWord   sm_off;
   2603       SecMap* sm;
   2604 
   2605       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2606          PROF_EVENT(301, "make_aligned_word32_undefined-slow1");
   2607          make_mem_undefined(a, 4);
   2608          return;
   2609       }
   2610 
   2611       sm                  = get_secmap_for_writing_low(a);
   2612       sm_off              = SM_OFF(a);
   2613       sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
   2614    }
   2615 #endif
   2616 }
   2617 
   2618 static INLINE
   2619 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
   2620 {
   2621    make_aligned_word32_undefined(a);
   2622    //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
   2623    //// Set the origins for a+0 .. a+3
   2624    { OCacheLine* line;
   2625      UWord lineoff = oc_line_offset(a);
   2626      if (OC_ENABLE_ASSERTIONS) {
   2627         tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   2628      }
   2629      line = find_OCacheLine( a );
   2630      line->descr[lineoff] = 0xF;
   2631      line->w32[lineoff]   = otag;
   2632    }
   2633    //// END inlined, specialised version of MC_(helperc_b_store4)
   2634 }
   2635 
   2636 static INLINE
   2637 void make_aligned_word32_noaccess ( Addr a )
   2638 {
   2639    PROF_EVENT(310, "make_aligned_word32_noaccess");
   2640 
   2641 #ifndef PERF_FAST_STACK2
   2642    MC_(make_mem_noaccess)(a, 4);
   2643 #else
   2644    {
   2645       UWord   sm_off;
   2646       SecMap* sm;
   2647 
   2648       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2649          PROF_EVENT(311, "make_aligned_word32_noaccess-slow1");
   2650          MC_(make_mem_noaccess)(a, 4);
   2651          return;
   2652       }
   2653 
   2654       sm                  = get_secmap_for_writing_low(a);
   2655       sm_off              = SM_OFF(a);
   2656       sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
   2657 
   2658       //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
   2659       //// Set the origins for a+0 .. a+3.
   2660       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   2661          OCacheLine* line;
   2662          UWord lineoff = oc_line_offset(a);
   2663          if (OC_ENABLE_ASSERTIONS) {
   2664             tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   2665          }
   2666          line = find_OCacheLine( a );
   2667          line->descr[lineoff] = 0;
   2668       }
   2669       //// END inlined, specialised version of MC_(helperc_b_store4)
   2670    }
   2671 #endif
   2672 }
   2673 
   2674 /*--------------------- 64-bit ---------------------*/
   2675 
   2676 /* Nb: by "aligned" here we mean 8-byte aligned */
   2677 
   2678 static INLINE void make_aligned_word64_undefined ( Addr a )
   2679 {
   2680    PROF_EVENT(320, "make_aligned_word64_undefined");
   2681 
   2682 #ifndef PERF_FAST_STACK2
   2683    make_mem_undefined(a, 8);
   2684 #else
   2685    {
   2686       UWord   sm_off16;
   2687       SecMap* sm;
   2688 
   2689       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2690          PROF_EVENT(321, "make_aligned_word64_undefined-slow1");
   2691          make_mem_undefined(a, 8);
   2692          return;
   2693       }
   2694 
   2695       sm       = get_secmap_for_writing_low(a);
   2696       sm_off16 = SM_OFF_16(a);
   2697       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED;
   2698    }
   2699 #endif
   2700 }
   2701 
   2702 static INLINE
   2703 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
   2704 {
   2705    make_aligned_word64_undefined(a);
   2706    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2707    //// Set the origins for a+0 .. a+7
   2708    { OCacheLine* line;
   2709      UWord lineoff = oc_line_offset(a);
   2710      tl_assert(lineoff >= 0
   2711                && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2712      line = find_OCacheLine( a );
   2713      line->descr[lineoff+0] = 0xF;
   2714      line->descr[lineoff+1] = 0xF;
   2715      line->w32[lineoff+0]   = otag;
   2716      line->w32[lineoff+1]   = otag;
   2717    }
   2718    //// END inlined, specialised version of MC_(helperc_b_store8)
   2719 }
   2720 
   2721 static INLINE
   2722 void make_aligned_word64_noaccess ( Addr a )
   2723 {
   2724    PROF_EVENT(330, "make_aligned_word64_noaccess");
   2725 
   2726 #ifndef PERF_FAST_STACK2
   2727    MC_(make_mem_noaccess)(a, 8);
   2728 #else
   2729    {
   2730       UWord   sm_off16;
   2731       SecMap* sm;
   2732 
   2733       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
   2734          PROF_EVENT(331, "make_aligned_word64_noaccess-slow1");
   2735          MC_(make_mem_noaccess)(a, 8);
   2736          return;
   2737       }
   2738 
   2739       sm       = get_secmap_for_writing_low(a);
   2740       sm_off16 = SM_OFF_16(a);
   2741       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS;
   2742 
   2743       //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
   2744       //// Clear the origins for a+0 .. a+7.
   2745       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   2746          OCacheLine* line;
   2747          UWord lineoff = oc_line_offset(a);
   2748          tl_assert(lineoff >= 0
   2749                    && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
   2750          line = find_OCacheLine( a );
   2751          line->descr[lineoff+0] = 0;
   2752          line->descr[lineoff+1] = 0;
   2753       }
   2754       //// END inlined, specialised version of MC_(helperc_b_store8)
   2755    }
   2756 #endif
   2757 }
   2758 
   2759 
   2760 /*------------------------------------------------------------*/
   2761 /*--- Stack pointer adjustment                             ---*/
   2762 /*------------------------------------------------------------*/
   2763 
   2764 #ifdef PERF_FAST_STACK
   2765 #  define MAYBE_USED
   2766 #else
   2767 #  define MAYBE_USED __attribute__((unused))
   2768 #endif
   2769 
   2770 /*--------------- adjustment by 4 bytes ---------------*/
   2771 
   2772 MAYBE_USED
   2773 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
   2774 {
   2775    UInt otag = ecu | MC_OKIND_STACK;
   2776    PROF_EVENT(110, "new_mem_stack_4");
   2777    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2778       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
   2779    } else {
   2780       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
   2781    }
   2782 }
   2783 
   2784 MAYBE_USED
   2785 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
   2786 {
   2787    PROF_EVENT(110, "new_mem_stack_4");
   2788    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2789       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2790    } else {
   2791       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
   2792    }
   2793 }
   2794 
   2795 MAYBE_USED
   2796 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
   2797 {
   2798    PROF_EVENT(120, "die_mem_stack_4");
   2799    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2800       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
   2801    } else {
   2802       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
   2803    }
   2804 }
   2805 
   2806 /*--------------- adjustment by 8 bytes ---------------*/
   2807 
   2808 MAYBE_USED
   2809 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
   2810 {
   2811    UInt otag = ecu | MC_OKIND_STACK;
   2812    PROF_EVENT(111, "new_mem_stack_8");
   2813    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2814       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
   2815    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2816       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2817       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
   2818    } else {
   2819       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
   2820    }
   2821 }
   2822 
   2823 MAYBE_USED
   2824 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
   2825 {
   2826    PROF_EVENT(111, "new_mem_stack_8");
   2827    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2828       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2829    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2830       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2831       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   2832    } else {
   2833       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
   2834    }
   2835 }
   2836 
   2837 MAYBE_USED
   2838 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
   2839 {
   2840    PROF_EVENT(121, "die_mem_stack_8");
   2841    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2842       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
   2843    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2844       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
   2845       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
   2846    } else {
   2847       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
   2848    }
   2849 }
   2850 
   2851 /*--------------- adjustment by 12 bytes ---------------*/
   2852 
   2853 MAYBE_USED
   2854 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
   2855 {
   2856    UInt otag = ecu | MC_OKIND_STACK;
   2857    PROF_EVENT(112, "new_mem_stack_12");
   2858    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2859       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2860       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
   2861    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2862       /* from previous test we don't have 8-alignment at offset +0,
   2863          hence must have 8 alignment at offsets +4/-4.  Hence safe to
   2864          do 4 at +0 and then 8 at +4/. */
   2865       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2866       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
   2867    } else {
   2868       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
   2869    }
   2870 }
   2871 
   2872 MAYBE_USED
   2873 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
   2874 {
   2875    PROF_EVENT(112, "new_mem_stack_12");
   2876    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2877       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2878       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2879    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2880       /* from previous test we don't have 8-alignment at offset +0,
   2881          hence must have 8 alignment at offsets +4/-4.  Hence safe to
   2882          do 4 at +0 and then 8 at +4/. */
   2883       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2884       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   2885    } else {
   2886       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
   2887    }
   2888 }
   2889 
   2890 MAYBE_USED
   2891 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
   2892 {
   2893    PROF_EVENT(122, "die_mem_stack_12");
   2894    /* Note the -12 in the test */
   2895    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
   2896       /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
   2897          -4. */
   2898       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2899       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   2900    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2901       /* We have 4-alignment at +0, but we don't have 8-alignment at
   2902          -12.  So we must have 8-alignment at -8.  Hence do 4 at -12
   2903          and then 8 at -8. */
   2904       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2905       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
   2906    } else {
   2907       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
   2908    }
   2909 }
   2910 
   2911 /*--------------- adjustment by 16 bytes ---------------*/
   2912 
   2913 MAYBE_USED
   2914 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
   2915 {
   2916    UInt otag = ecu | MC_OKIND_STACK;
   2917    PROF_EVENT(113, "new_mem_stack_16");
   2918    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2919       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
   2920       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
   2921       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
   2922    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2923       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
   2924          Hence do 4 at +0, 8 at +4, 4 at +12. */
   2925       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2926       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
   2927       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
   2928    } else {
   2929       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
   2930    }
   2931 }
   2932 
   2933 MAYBE_USED
   2934 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
   2935 {
   2936    PROF_EVENT(113, "new_mem_stack_16");
   2937    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2938       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
   2939       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2940       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   2941    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2942       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
   2943          Hence do 4 at +0, 8 at +4, 4 at +12. */
   2944       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   2945       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4  );
   2946       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
   2947    } else {
   2948       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
   2949    }
   2950 }
   2951 
   2952 MAYBE_USED
   2953 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
   2954 {
   2955    PROF_EVENT(123, "die_mem_stack_16");
   2956    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2957       /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
   2958       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2959       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
   2960    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2961       /* 8 alignment must be at -12.  Do 4 at -16, 8 at -12, 4 at -4. */
   2962       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   2963       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   2964       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   2965    } else {
   2966       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
   2967    }
   2968 }
   2969 
   2970 /*--------------- adjustment by 32 bytes ---------------*/
   2971 
   2972 MAYBE_USED
   2973 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
   2974 {
   2975    UInt otag = ecu | MC_OKIND_STACK;
   2976    PROF_EVENT(114, "new_mem_stack_32");
   2977    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2978       /* Straightforward */
   2979       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2980       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   2981       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   2982       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   2983    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   2984       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
   2985          +0,+28. */
   2986       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   2987       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
   2988       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
   2989       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
   2990       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
   2991    } else {
   2992       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
   2993    }
   2994 }
   2995 
   2996 MAYBE_USED
   2997 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
   2998 {
   2999    PROF_EVENT(114, "new_mem_stack_32");
   3000    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3001       /* Straightforward */
   3002       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3003       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3004       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3005       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3006    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3007       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
   3008          +0,+28. */
   3009       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3010       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
   3011       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
   3012       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
   3013       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
   3014    } else {
   3015       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
   3016    }
   3017 }
   3018 
   3019 MAYBE_USED
   3020 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
   3021 {
   3022    PROF_EVENT(124, "die_mem_stack_32");
   3023    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3024       /* Straightforward */
   3025       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3026       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3027       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3028       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3029    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3030       /* 8 alignment must be at -4 etc.  Hence do 8 at -12,-20,-28 and
   3031          4 at -32,-4. */
   3032       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3033       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
   3034       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
   3035       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
   3036       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
   3037    } else {
   3038       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
   3039    }
   3040 }
   3041 
   3042 /*--------------- adjustment by 112 bytes ---------------*/
   3043 
   3044 MAYBE_USED
   3045 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
   3046 {
   3047    UInt otag = ecu | MC_OKIND_STACK;
   3048    PROF_EVENT(115, "new_mem_stack_112");
   3049    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3050       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   3051       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   3052       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   3053       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   3054       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
   3055       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
   3056       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
   3057       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
   3058       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
   3059       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
   3060       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
   3061       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
   3062       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
   3063       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3064    } else {
   3065       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
   3066    }
   3067 }
   3068 
   3069 MAYBE_USED
   3070 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
   3071 {
   3072    PROF_EVENT(115, "new_mem_stack_112");
   3073    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3074       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3075       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3076       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3077       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3078       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3079       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3080       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3081       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3082       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3083       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3084       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3085       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3086       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3087       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3088    } else {
   3089       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
   3090    }
   3091 }
   3092 
   3093 MAYBE_USED
   3094 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
   3095 {
   3096    PROF_EVENT(125, "die_mem_stack_112");
   3097    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3098       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3099       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3100       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3101       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3102       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3103       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3104       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3105       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3106       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3107       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3108       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3109       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3110       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3111       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3112    } else {
   3113       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
   3114    }
   3115 }
   3116 
   3117 /*--------------- adjustment by 128 bytes ---------------*/
   3118 
   3119 MAYBE_USED
   3120 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
   3121 {
   3122    UInt otag = ecu | MC_OKIND_STACK;
   3123    PROF_EVENT(116, "new_mem_stack_128");
   3124    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3125       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
   3126       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
   3127       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
   3128       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
   3129       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
   3130       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
   3131       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
   3132       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
   3133       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
   3134       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
   3135       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
   3136       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
   3137       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
   3138       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3139       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   3140       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   3141    } else {
   3142       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
   3143    }
   3144 }
   3145 
   3146 MAYBE_USED
   3147 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
   3148 {
   3149    PROF_EVENT(116, "new_mem_stack_128");
   3150    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3151       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3152       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3153       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3154       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3155       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3156       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3157       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3158       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3159       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3160       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3161       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3162       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3163       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3164       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3165       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   3166       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   3167    } else {
   3168       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
   3169    }
   3170 }
   3171 
   3172 MAYBE_USED
   3173 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
   3174 {
   3175    PROF_EVENT(126, "die_mem_stack_128");
   3176    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3177       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   3178       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   3179       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3180       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3181       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3182       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3183       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3184       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3185       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3186       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3187       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3188       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3189       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3190       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3191       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3192       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3193    } else {
   3194       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
   3195    }
   3196 }
   3197 
   3198 /*--------------- adjustment by 144 bytes ---------------*/
   3199 
   3200 MAYBE_USED
   3201 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
   3202 {
   3203    UInt otag = ecu | MC_OKIND_STACK;
   3204    PROF_EVENT(117, "new_mem_stack_144");
   3205    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3206       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
   3207       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
   3208       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
   3209       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
   3210       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
   3211       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
   3212       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
   3213       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
   3214       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
   3215       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
   3216       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
   3217       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
   3218       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
   3219       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3220       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   3221       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   3222       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
   3223       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
   3224    } else {
   3225       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
   3226    }
   3227 }
   3228 
   3229 MAYBE_USED
   3230 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
   3231 {
   3232    PROF_EVENT(117, "new_mem_stack_144");
   3233    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3234       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3235       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3236       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3237       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3238       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3239       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3240       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3241       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3242       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3243       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3244       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3245       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3246       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3247       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3248       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   3249       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   3250       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
   3251       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
   3252    } else {
   3253       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
   3254    }
   3255 }
   3256 
   3257 MAYBE_USED
   3258 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
   3259 {
   3260    PROF_EVENT(127, "die_mem_stack_144");
   3261    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3262       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
   3263       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
   3264       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   3265       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   3266       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3267       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3268       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3269       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3270       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3271       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3272       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3273       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3274       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3275       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3276       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3277       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3278       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3279       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3280    } else {
   3281       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
   3282    }
   3283 }
   3284 
   3285 /*--------------- adjustment by 160 bytes ---------------*/
   3286 
   3287 MAYBE_USED
   3288 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
   3289 {
   3290    UInt otag = ecu | MC_OKIND_STACK;
   3291    PROF_EVENT(118, "new_mem_stack_160");
   3292    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3293       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
   3294       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
   3295       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
   3296       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
   3297       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
   3298       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
   3299       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
   3300       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
   3301       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
   3302       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
   3303       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
   3304       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
   3305       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
   3306       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
   3307       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
   3308       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
   3309       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
   3310       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
   3311       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
   3312       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
   3313    } else {
   3314       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
   3315    }
   3316 }
   3317 
   3318 MAYBE_USED
   3319 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
   3320 {
   3321    PROF_EVENT(118, "new_mem_stack_160");
   3322    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3323       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
   3324       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
   3325       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
   3326       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
   3327       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
   3328       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
   3329       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
   3330       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
   3331       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
   3332       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
   3333       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
   3334       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
   3335       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
   3336       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
   3337       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
   3338       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
   3339       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
   3340       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
   3341       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
   3342       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
   3343    } else {
   3344       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
   3345    }
   3346 }
   3347 
   3348 MAYBE_USED
   3349 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
   3350 {
   3351    PROF_EVENT(128, "die_mem_stack_160");
   3352    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
   3353       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
   3354       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
   3355       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
   3356       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
   3357       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
   3358       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
   3359       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
   3360       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
   3361       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
   3362       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
   3363       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
   3364       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
   3365       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
   3366       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
   3367       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
   3368       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
   3369       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
   3370       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
   3371       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
   3372       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
   3373    } else {
   3374       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
   3375    }
   3376 }
   3377 
   3378 /*--------------- adjustment by N bytes ---------------*/
   3379 
   3380 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
   3381 {
   3382    UInt otag = ecu | MC_OKIND_STACK;
   3383    PROF_EVENT(115, "new_mem_stack_w_otag");
   3384    MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
   3385 }
   3386 
   3387 static void mc_new_mem_stack ( Addr a, SizeT len )
   3388 {
   3389    PROF_EVENT(115, "new_mem_stack");
   3390    make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
   3391 }
   3392 
   3393 static void mc_die_mem_stack ( Addr a, SizeT len )
   3394 {
   3395    PROF_EVENT(125, "die_mem_stack");
   3396    MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
   3397 }
   3398 
   3399 
   3400 /* The AMD64 ABI says:
   3401 
   3402    "The 128-byte area beyond the location pointed to by %rsp is considered
   3403     to be reserved and shall not be modified by signal or interrupt
   3404     handlers.  Therefore, functions may use this area for temporary data
   3405     that is not needed across function calls.  In particular, leaf functions
   3406     may use this area for their entire stack frame, rather than adjusting
   3407     the stack pointer in the prologue and epilogue.  This area is known as
   3408     red zone [sic]."
   3409 
   3410    So after any call or return we need to mark this redzone as containing
   3411    undefined values.
   3412 
   3413    Consider this:  we're in function f.  f calls g.  g moves rsp down
   3414    modestly (say 16 bytes) and writes stuff all over the red zone, making it
   3415    defined.  g returns.  f is buggy and reads from parts of the red zone
   3416    that it didn't write on.  But because g filled that area in, f is going
   3417    to be picking up defined V bits and so any errors from reading bits of
   3418    the red zone it didn't write, will be missed.  The only solution I could
   3419    think of was to make the red zone undefined when g returns to f.
   3420 
   3421    This is in accordance with the ABI, which makes it clear the redzone
   3422    is volatile across function calls.
   3423 
   3424    The problem occurs the other way round too: f could fill the RZ up
   3425    with defined values and g could mistakenly read them.  So the RZ
   3426    also needs to be nuked on function calls.
   3427 */
   3428 
   3429 
   3430 /* Here's a simple cache to hold nia -> ECU mappings.  It could be
   3431    improved so as to have a lower miss rate. */
   3432 
   3433 static UWord stats__nia_cache_queries = 0;
   3434 static UWord stats__nia_cache_misses  = 0;
   3435 
   3436 typedef
   3437    struct { UWord nia0; UWord ecu0;   /* nia0 maps to ecu0 */
   3438             UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
   3439    WCacheEnt;
   3440 
   3441 #define N_NIA_TO_ECU_CACHE 511
   3442 
   3443 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
   3444 
   3445 static void init_nia_to_ecu_cache ( void )
   3446 {
   3447    UWord       i;
   3448    Addr        zero_addr = 0;
   3449    ExeContext* zero_ec;
   3450    UInt        zero_ecu;
   3451    /* Fill all the slots with an entry for address zero, and the
   3452       relevant otags accordingly.  Hence the cache is initially filled
   3453       with valid data. */
   3454    zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
   3455    tl_assert(zero_ec);
   3456    zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
   3457    tl_assert(VG_(is_plausible_ECU)(zero_ecu));
   3458    for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
   3459       nia_to_ecu_cache[i].nia0 = zero_addr;
   3460       nia_to_ecu_cache[i].ecu0 = zero_ecu;
   3461       nia_to_ecu_cache[i].nia1 = zero_addr;
   3462       nia_to_ecu_cache[i].ecu1 = zero_ecu;
   3463    }
   3464 }
   3465 
   3466 static inline UInt convert_nia_to_ecu ( Addr nia )
   3467 {
   3468    UWord i;
   3469    UInt        ecu;
   3470    ExeContext* ec;
   3471 
   3472    tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
   3473 
   3474    stats__nia_cache_queries++;
   3475    i = nia % N_NIA_TO_ECU_CACHE;
   3476    tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
   3477 
   3478    if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
   3479       return nia_to_ecu_cache[i].ecu0;
   3480 
   3481    if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
   3482 #     define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
   3483       SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
   3484       SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
   3485 #     undef SWAP
   3486       return nia_to_ecu_cache[i].ecu0;
   3487    }
   3488 
   3489    stats__nia_cache_misses++;
   3490    ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
   3491    tl_assert(ec);
   3492    ecu = VG_(get_ECU_from_ExeContext)(ec);
   3493    tl_assert(VG_(is_plausible_ECU)(ecu));
   3494 
   3495    nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
   3496    nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
   3497 
   3498    nia_to_ecu_cache[i].nia0 = nia;
   3499    nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
   3500    return ecu;
   3501 }
   3502 
   3503 
   3504 /* Note that this serves both the origin-tracking and
   3505    no-origin-tracking modes.  We assume that calls to it are
   3506    sufficiently infrequent that it isn't worth specialising for the
   3507    with/without origin-tracking cases. */
   3508 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
   3509 {
   3510    UInt otag;
   3511    tl_assert(sizeof(UWord) == sizeof(SizeT));
   3512    if (0)
   3513       VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
   3514                   base, len, nia );
   3515 
   3516    if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   3517       UInt ecu = convert_nia_to_ecu ( nia );
   3518       tl_assert(VG_(is_plausible_ECU)(ecu));
   3519       otag = ecu | MC_OKIND_STACK;
   3520    } else {
   3521       tl_assert(nia == 0);
   3522       otag = 0;
   3523    }
   3524 
   3525 #  if 0
   3526    /* Really slow version */
   3527    MC_(make_mem_undefined)(base, len, otag);
   3528 #  endif
   3529 
   3530 #  if 0
   3531    /* Slow(ish) version, which is fairly easily seen to be correct.
   3532    */
   3533    if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
   3534       make_aligned_word64_undefined(base +   0, otag);
   3535       make_aligned_word64_undefined(base +   8, otag);
   3536       make_aligned_word64_undefined(base +  16, otag);
   3537       make_aligned_word64_undefined(base +  24, otag);
   3538 
   3539       make_aligned_word64_undefined(base +  32, otag);
   3540       make_aligned_word64_undefined(base +  40, otag);
   3541       make_aligned_word64_undefined(base +  48, otag);
   3542       make_aligned_word64_undefined(base +  56, otag);
   3543 
   3544       make_aligned_word64_undefined(base +  64, otag);
   3545       make_aligned_word64_undefined(base +  72, otag);
   3546       make_aligned_word64_undefined(base +  80, otag);
   3547       make_aligned_word64_undefined(base +  88, otag);
   3548 
   3549       make_aligned_word64_undefined(base +  96, otag);
   3550       make_aligned_word64_undefined(base + 104, otag);
   3551       make_aligned_word64_undefined(base + 112, otag);
   3552       make_aligned_word64_undefined(base + 120, otag);
   3553    } else {
   3554       MC_(make_mem_undefined)(base, len, otag);
   3555    }
   3556 #  endif
   3557 
   3558    /* Idea is: go fast when
   3559          * 8-aligned and length is 128
   3560          * the sm is available in the main primary map
   3561          * the address range falls entirely with a single secondary map
   3562       If all those conditions hold, just update the V+A bits by writing
   3563       directly into the vabits array.  (If the sm was distinguished, this
   3564       will make a copy and then write to it.)
   3565    */
   3566 
   3567    if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
   3568       /* Now we know the address range is suitably sized and aligned. */
   3569       UWord a_lo = (UWord)(base);
   3570       UWord a_hi = (UWord)(base + 128 - 1);
   3571       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
   3572       if (a_hi <= MAX_PRIMARY_ADDRESS) {
   3573          // Now we know the entire range is within the main primary map.
   3574          SecMap* sm    = get_secmap_for_writing_low(a_lo);
   3575          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
   3576          /* Now we know that the entire address range falls within a
   3577             single secondary map, and that that secondary 'lives' in
   3578             the main primary map. */
   3579          if (LIKELY(sm == sm_hi)) {
   3580             // Finally, we know that the range is entirely within one secmap.
   3581             UWord   v_off = SM_OFF(a_lo);
   3582             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
   3583             p[ 0] = VA_BITS16_UNDEFINED;
   3584             p[ 1] = VA_BITS16_UNDEFINED;
   3585             p[ 2] = VA_BITS16_UNDEFINED;
   3586             p[ 3] = VA_BITS16_UNDEFINED;
   3587             p[ 4] = VA_BITS16_UNDEFINED;
   3588             p[ 5] = VA_BITS16_UNDEFINED;
   3589             p[ 6] = VA_BITS16_UNDEFINED;
   3590             p[ 7] = VA_BITS16_UNDEFINED;
   3591             p[ 8] = VA_BITS16_UNDEFINED;
   3592             p[ 9] = VA_BITS16_UNDEFINED;
   3593             p[10] = VA_BITS16_UNDEFINED;
   3594             p[11] = VA_BITS16_UNDEFINED;
   3595             p[12] = VA_BITS16_UNDEFINED;
   3596             p[13] = VA_BITS16_UNDEFINED;
   3597             p[14] = VA_BITS16_UNDEFINED;
   3598             p[15] = VA_BITS16_UNDEFINED;
   3599             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   3600                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
   3601                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
   3602                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
   3603                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
   3604                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
   3605                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
   3606                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
   3607                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
   3608                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
   3609                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
   3610                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
   3611                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
   3612                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
   3613                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
   3614                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
   3615                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
   3616             }
   3617             return;
   3618          }
   3619       }
   3620    }
   3621 
   3622    /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
   3623    if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
   3624       /* Now we know the address range is suitably sized and aligned. */
   3625       UWord a_lo = (UWord)(base);
   3626       UWord a_hi = (UWord)(base + 288 - 1);
   3627       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
   3628       if (a_hi <= MAX_PRIMARY_ADDRESS) {
   3629          // Now we know the entire range is within the main primary map.
   3630          SecMap* sm    = get_secmap_for_writing_low(a_lo);
   3631          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
   3632          /* Now we know that the entire address range falls within a
   3633             single secondary map, and that that secondary 'lives' in
   3634             the main primary map. */
   3635          if (LIKELY(sm == sm_hi)) {
   3636             // Finally, we know that the range is entirely within one secmap.
   3637             UWord   v_off = SM_OFF(a_lo);
   3638             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
   3639             p[ 0] = VA_BITS16_UNDEFINED;
   3640             p[ 1] = VA_BITS16_UNDEFINED;
   3641             p[ 2] = VA_BITS16_UNDEFINED;
   3642             p[ 3] = VA_BITS16_UNDEFINED;
   3643             p[ 4] = VA_BITS16_UNDEFINED;
   3644             p[ 5] = VA_BITS16_UNDEFINED;
   3645             p[ 6] = VA_BITS16_UNDEFINED;
   3646             p[ 7] = VA_BITS16_UNDEFINED;
   3647             p[ 8] = VA_BITS16_UNDEFINED;
   3648             p[ 9] = VA_BITS16_UNDEFINED;
   3649             p[10] = VA_BITS16_UNDEFINED;
   3650             p[11] = VA_BITS16_UNDEFINED;
   3651             p[12] = VA_BITS16_UNDEFINED;
   3652             p[13] = VA_BITS16_UNDEFINED;
   3653             p[14] = VA_BITS16_UNDEFINED;
   3654             p[15] = VA_BITS16_UNDEFINED;
   3655             p[16] = VA_BITS16_UNDEFINED;
   3656             p[17] = VA_BITS16_UNDEFINED;
   3657             p[18] = VA_BITS16_UNDEFINED;
   3658             p[19] = VA_BITS16_UNDEFINED;
   3659             p[20] = VA_BITS16_UNDEFINED;
   3660             p[21] = VA_BITS16_UNDEFINED;
   3661             p[22] = VA_BITS16_UNDEFINED;
   3662             p[23] = VA_BITS16_UNDEFINED;
   3663             p[24] = VA_BITS16_UNDEFINED;
   3664             p[25] = VA_BITS16_UNDEFINED;
   3665             p[26] = VA_BITS16_UNDEFINED;
   3666             p[27] = VA_BITS16_UNDEFINED;
   3667             p[28] = VA_BITS16_UNDEFINED;
   3668             p[29] = VA_BITS16_UNDEFINED;
   3669             p[30] = VA_BITS16_UNDEFINED;
   3670             p[31] = VA_BITS16_UNDEFINED;
   3671             p[32] = VA_BITS16_UNDEFINED;
   3672             p[33] = VA_BITS16_UNDEFINED;
   3673             p[34] = VA_BITS16_UNDEFINED;
   3674             p[35] = VA_BITS16_UNDEFINED;
   3675             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
   3676                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
   3677                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
   3678                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
   3679                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
   3680                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
   3681                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
   3682                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
   3683                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
   3684                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
   3685                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
   3686                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
   3687                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
   3688                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
   3689                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
   3690                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
   3691                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
   3692                set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
   3693                set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
   3694                set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
   3695                set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
   3696                set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
   3697                set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
   3698                set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
   3699                set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
   3700                set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
   3701                set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
   3702                set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
   3703                set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
   3704                set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
   3705                set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
   3706                set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
   3707                set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
   3708                set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
   3709                set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
   3710                set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
   3711                set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
   3712             }
   3713             return;
   3714          }
   3715       }
   3716    }
   3717 
   3718    /* else fall into slow case */
   3719    MC_(make_mem_undefined_w_otag)(base, len, otag);
   3720 }
   3721 
   3722 
   3723 /*------------------------------------------------------------*/
   3724 /*--- Checking memory                                      ---*/
   3725 /*------------------------------------------------------------*/
   3726 
   3727 typedef
   3728    enum {
   3729       MC_Ok = 5,
   3730       MC_AddrErr = 6,
   3731       MC_ValueErr = 7
   3732    }
   3733    MC_ReadResult;
   3734 
   3735 
   3736 /* Check permissions for address range.  If inadequate permissions
   3737    exist, *bad_addr is set to the offending address, so the caller can
   3738    know what it is. */
   3739 
   3740 /* Returns True if [a .. a+len) is not addressible.  Otherwise,
   3741    returns False, and if bad_addr is non-NULL, sets *bad_addr to
   3742    indicate the lowest failing address.  Functions below are
   3743    similar. */
   3744 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
   3745 {
   3746    SizeT i;
   3747    UWord vabits2;
   3748 
   3749    PROF_EVENT(60, "check_mem_is_noaccess");
   3750    for (i = 0; i < len; i++) {
   3751       PROF_EVENT(61, "check_mem_is_noaccess(loop)");
   3752       vabits2 = get_vabits2(a);
   3753       if (VA_BITS2_NOACCESS != vabits2) {
   3754          if (bad_addr != NULL) *bad_addr = a;
   3755          return False;
   3756       }
   3757       a++;
   3758    }
   3759    return True;
   3760 }
   3761 
   3762 static Bool is_mem_addressable ( Addr a, SizeT len,
   3763                                  /*OUT*/Addr* bad_addr )
   3764 {
   3765    SizeT i;
   3766    UWord vabits2;
   3767 
   3768    PROF_EVENT(62, "is_mem_addressable");
   3769    for (i = 0; i < len; i++) {
   3770       PROF_EVENT(63, "is_mem_addressable(loop)");
   3771       vabits2 = get_vabits2(a);
   3772       if (VA_BITS2_NOACCESS == vabits2) {
   3773          if (bad_addr != NULL) *bad_addr = a;
   3774          return False;
   3775       }
   3776       a++;
   3777    }
   3778    return True;
   3779 }
   3780 
   3781 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
   3782                                       /*OUT*/Addr* bad_addr,
   3783                                       /*OUT*/UInt* otag )
   3784 {
   3785    SizeT i;
   3786    UWord vabits2;
   3787 
   3788    PROF_EVENT(64, "is_mem_defined");
   3789    DEBUG("is_mem_defined\n");
   3790 
   3791    if (otag)     *otag = 0;
   3792    if (bad_addr) *bad_addr = 0;
   3793    for (i = 0; i < len; i++) {
   3794       PROF_EVENT(65, "is_mem_defined(loop)");
   3795       vabits2 = get_vabits2(a);
   3796       if (VA_BITS2_DEFINED != vabits2) {
   3797          // Error!  Nb: Report addressability errors in preference to
   3798          // definedness errors.  And don't report definedeness errors unless
   3799          // --undef-value-errors=yes.
   3800          if (bad_addr) {
   3801             *bad_addr = a;
   3802          }
   3803          if (VA_BITS2_NOACCESS == vabits2) {
   3804             return MC_AddrErr;
   3805          }
   3806          if (MC_(clo_mc_level) >= 2) {
   3807             if (otag && MC_(clo_mc_level) == 3) {
   3808                *otag = MC_(helperc_b_load1)( a );
   3809             }
   3810             return MC_ValueErr;
   3811          }
   3812       }
   3813       a++;
   3814    }
   3815    return MC_Ok;
   3816 }
   3817 
   3818 
   3819 /* Like is_mem_defined but doesn't give up at the first uninitialised
   3820    byte -- the entire range is always checked.  This is important for
   3821    detecting errors in the case where a checked range strays into
   3822    invalid memory, but that fact is not detected by the ordinary
   3823    is_mem_defined(), because of an undefined section that precedes the
   3824    out of range section, possibly as a result of an alignment hole in
   3825    the checked data.  This version always checks the entire range and
   3826    can report both a definedness and an accessbility error, if
   3827    necessary. */
   3828 static void is_mem_defined_comprehensive (
   3829                Addr a, SizeT len,
   3830                /*OUT*/Bool* errorV,    /* is there a definedness err? */
   3831                /*OUT*/Addr* bad_addrV, /* if so where? */
   3832                /*OUT*/UInt* otagV,     /* and what's its otag? */
   3833                /*OUT*/Bool* errorA,    /* is there an addressability err? */
   3834                /*OUT*/Addr* bad_addrA  /* if so where? */
   3835             )
   3836 {
   3837    SizeT i;
   3838    UWord vabits2;
   3839    Bool  already_saw_errV = False;
   3840 
   3841    PROF_EVENT(64, "is_mem_defined"); // fixme
   3842    DEBUG("is_mem_defined_comprehensive\n");
   3843 
   3844    tl_assert(!(*errorV || *errorA));
   3845 
   3846    for (i = 0; i < len; i++) {
   3847       PROF_EVENT(65, "is_mem_defined(loop)"); // fixme
   3848       vabits2 = get_vabits2(a);
   3849       switch (vabits2) {
   3850          case VA_BITS2_DEFINED:
   3851             a++;
   3852             break;
   3853          case VA_BITS2_UNDEFINED:
   3854          case VA_BITS2_PARTDEFINED:
   3855             if (!already_saw_errV) {
   3856                *errorV    = True;
   3857                *bad_addrV = a;
   3858                if (MC_(clo_mc_level) == 3) {
   3859                   *otagV = MC_(helperc_b_load1)( a );
   3860                } else {
   3861                   *otagV = 0;
   3862                }
   3863                already_saw_errV = True;
   3864             }
   3865             a++; /* keep going */
   3866             break;
   3867          case VA_BITS2_NOACCESS:
   3868             *errorA    = True;
   3869             *bad_addrA = a;
   3870             return; /* give up now. */
   3871          default:
   3872             tl_assert(0);
   3873       }
   3874    }
   3875 }
   3876 
   3877 
   3878 /* Check a zero-terminated ascii string.  Tricky -- don't want to
   3879    examine the actual bytes, to find the end, until we're sure it is
   3880    safe to do so. */
   3881 
   3882 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
   3883 {
   3884    UWord vabits2;
   3885 
   3886    PROF_EVENT(66, "mc_is_defined_asciiz");
   3887    DEBUG("mc_is_defined_asciiz\n");
   3888 
   3889    if (otag)     *otag = 0;
   3890    if (bad_addr) *bad_addr = 0;
   3891    while (True) {
   3892       PROF_EVENT(67, "mc_is_defined_asciiz(loop)");
   3893       vabits2 = get_vabits2(a);
   3894       if (VA_BITS2_DEFINED != vabits2) {
   3895          // Error!  Nb: Report addressability errors in preference to
   3896          // definedness errors.  And don't report definedeness errors unless
   3897          // --undef-value-errors=yes.
   3898          if (bad_addr) {
   3899             *bad_addr = a;
   3900          }
   3901          if (VA_BITS2_NOACCESS == vabits2) {
   3902             return MC_AddrErr;
   3903          }
   3904          if (MC_(clo_mc_level) >= 2) {
   3905             if (otag && MC_(clo_mc_level) == 3) {
   3906                *otag = MC_(helperc_b_load1)( a );
   3907             }
   3908             return MC_ValueErr;
   3909          }
   3910       }
   3911       /* Ok, a is safe to read. */
   3912       if (* ((UChar*)a) == 0) {
   3913          return MC_Ok;
   3914       }
   3915       a++;
   3916    }
   3917 }
   3918 
   3919 
   3920 /*------------------------------------------------------------*/
   3921 /*--- Memory event handlers                                ---*/
   3922 /*------------------------------------------------------------*/
   3923 
   3924 static
   3925 void check_mem_is_addressable ( CorePart part, ThreadId tid, const HChar* s,
   3926                                 Addr base, SizeT size )
   3927 {
   3928    Addr bad_addr;
   3929    Bool ok = is_mem_addressable ( base, size, &bad_addr );
   3930 
   3931    if (!ok) {
   3932       switch (part) {
   3933       case Vg_CoreSysCall:
   3934          MC_(record_memparam_error) ( tid, bad_addr,
   3935                                       /*isAddrErr*/True, s, 0/*otag*/ );
   3936          break;
   3937 
   3938       case Vg_CoreSignal:
   3939          MC_(record_core_mem_error)( tid, s );
   3940          break;
   3941 
   3942       default:
   3943          VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
   3944       }
   3945    }
   3946 }
   3947 
   3948 static
   3949 void check_mem_is_defined ( CorePart part, ThreadId tid, const HChar* s,
   3950                             Addr base, SizeT size )
   3951 {
   3952    UInt otag = 0;
   3953    Addr bad_addr;
   3954    MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
   3955 
   3956    if (MC_Ok != res) {
   3957       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
   3958 
   3959       switch (part) {
   3960       case Vg_CoreSysCall:
   3961          MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
   3962                                       isAddrErr ? 0 : otag );
   3963          break;
   3964 
   3965       case Vg_CoreSysCallArgInMem:
   3966          MC_(record_regparam_error) ( tid, s, otag );
   3967          break;
   3968 
   3969       /* If we're being asked to jump to a silly address, record an error
   3970          message before potentially crashing the entire system. */
   3971       case Vg_CoreTranslate:
   3972          MC_(record_jump_error)( tid, bad_addr );
   3973          break;
   3974 
   3975       default:
   3976          VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
   3977       }
   3978    }
   3979 }
   3980 
   3981 static
   3982 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
   3983                                    const HChar* s, Addr str )
   3984 {
   3985    MC_ReadResult res;
   3986    Addr bad_addr = 0;   // shut GCC up
   3987    UInt otag = 0;
   3988 
   3989    tl_assert(part == Vg_CoreSysCall);
   3990    res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
   3991    if (MC_Ok != res) {
   3992       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
   3993       MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
   3994                                    isAddrErr ? 0 : otag );
   3995    }
   3996 }
   3997 
   3998 /* Handling of mmap and mprotect is not as simple as it seems.
   3999 
   4000    The underlying semantics are that memory obtained from mmap is
   4001    always initialised, but may be inaccessible.  And changes to the
   4002    protection of memory do not change its contents and hence not its
   4003    definedness state.  Problem is we can't model
   4004    inaccessible-but-with-some-definedness state; once we mark memory
   4005    as inaccessible we lose all info about definedness, and so can't
   4006    restore that if it is later made accessible again.
   4007 
   4008    One obvious thing to do is this:
   4009 
   4010       mmap/mprotect NONE  -> noaccess
   4011       mmap/mprotect other -> defined
   4012 
   4013    The problem case here is: taking accessible memory, writing
   4014    uninitialised data to it, mprotecting it NONE and later mprotecting
   4015    it back to some accessible state causes the undefinedness to be
   4016    lost.
   4017 
   4018    A better proposal is:
   4019 
   4020      (1) mmap NONE       ->  make noaccess
   4021      (2) mmap other      ->  make defined
   4022 
   4023      (3) mprotect NONE   ->  # no change
   4024      (4) mprotect other  ->  change any "noaccess" to "defined"
   4025 
   4026    (2) is OK because memory newly obtained from mmap really is defined
   4027        (zeroed out by the kernel -- doing anything else would
   4028        constitute a massive security hole.)
   4029 
   4030    (1) is OK because the only way to make the memory usable is via
   4031        (4), in which case we also wind up correctly marking it all as
   4032        defined.
   4033 
   4034    (3) is the weak case.  We choose not to change memory state.
   4035        (presumably the range is in some mixture of "defined" and
   4036        "undefined", viz, accessible but with arbitrary V bits).  Doing
   4037        nothing means we retain the V bits, so that if the memory is
   4038        later mprotected "other", the V bits remain unchanged, so there
   4039        can be no false negatives.  The bad effect is that if there's
   4040        an access in the area, then MC cannot warn; but at least we'll
   4041        get a SEGV to show, so it's better than nothing.
   4042 
   4043    Consider the sequence (3) followed by (4).  Any memory that was
   4044    "defined" or "undefined" previously retains its state (as
   4045    required).  Any memory that was "noaccess" before can only have
   4046    been made that way by (1), and so it's OK to change it to
   4047    "defined".
   4048 
   4049    See https://bugs.kde.org/show_bug.cgi?id=205541
   4050    and https://bugs.kde.org/show_bug.cgi?id=210268
   4051 */
   4052 static
   4053 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
   4054                        ULong di_handle )
   4055 {
   4056    if (rr || ww || xx) {
   4057       /* (2) mmap/mprotect other -> defined */
   4058       MC_(make_mem_defined)(a, len);
   4059    } else {
   4060       /* (1) mmap/mprotect NONE  -> noaccess */
   4061       MC_(make_mem_noaccess)(a, len);
   4062    }
   4063 }
   4064 
   4065 static
   4066 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
   4067 {
   4068    if (rr || ww || xx) {
   4069       /* (4) mprotect other  ->  change any "noaccess" to "defined" */
   4070       make_mem_defined_if_noaccess(a, len);
   4071    } else {
   4072       /* (3) mprotect NONE   ->  # no change */
   4073       /* do nothing */
   4074    }
   4075 }
   4076 
   4077 
   4078 static
   4079 void mc_new_mem_startup( Addr a, SizeT len,
   4080                          Bool rr, Bool ww, Bool xx, ULong di_handle )
   4081 {
   4082    // Because code is defined, initialised variables get put in the data
   4083    // segment and are defined, and uninitialised variables get put in the
   4084    // bss segment and are auto-zeroed (and so defined).
   4085    //
   4086    // It's possible that there will be padding between global variables.
   4087    // This will also be auto-zeroed, and marked as defined by Memcheck.  If
   4088    // a program uses it, Memcheck will not complain.  This is arguably a
   4089    // false negative, but it's a grey area -- the behaviour is defined (the
   4090    // padding is zeroed) but it's probably not what the user intended.  And
   4091    // we can't avoid it.
   4092    //
   4093    // Note: we generally ignore RWX permissions, because we can't track them
   4094    // without requiring more than one A bit which would slow things down a
   4095    // lot.  But on Darwin the 0th page is mapped but !R and !W and !X.
   4096    // So we mark any such pages as "unaddressable".
   4097    DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
   4098          a, (ULong)len, rr, ww, xx);
   4099    mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
   4100 }
   4101 
   4102 static
   4103 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
   4104 {
   4105    MC_(make_mem_defined)(a, len);
   4106 }
   4107 
   4108 
   4109 /*------------------------------------------------------------*/
   4110 /*--- Register event handlers                              ---*/
   4111 /*------------------------------------------------------------*/
   4112 
   4113 /* Try and get a nonzero origin for the guest state section of thread
   4114    tid characterised by (offset,size).  Return 0 if nothing to show
   4115    for it. */
   4116 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
   4117                                              Int offset, SizeT size )
   4118 {
   4119    Int   sh2off;
   4120    UInt  area[3];
   4121    UInt  otag;
   4122    sh2off = MC_(get_otrack_shadow_offset)( offset, size );
   4123    if (sh2off == -1)
   4124       return 0;  /* This piece of guest state is not tracked */
   4125    tl_assert(sh2off >= 0);
   4126    tl_assert(0 == (sh2off % 4));
   4127    area[0] = 0x31313131;
   4128    area[2] = 0x27272727;
   4129    VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
   4130    tl_assert(area[0] == 0x31313131);
   4131    tl_assert(area[2] == 0x27272727);
   4132    otag = area[1];
   4133    return otag;
   4134 }
   4135 
   4136 
   4137 /* When some chunk of guest state is written, mark the corresponding
   4138    shadow area as valid.  This is used to initialise arbitrarily large
   4139    chunks of guest state, hence the _SIZE value, which has to be as
   4140    big as the biggest guest state.
   4141 */
   4142 static void mc_post_reg_write ( CorePart part, ThreadId tid,
   4143                                 PtrdiffT offset, SizeT size)
   4144 {
   4145 #  define MAX_REG_WRITE_SIZE 1712
   4146    UChar area[MAX_REG_WRITE_SIZE];
   4147    tl_assert(size <= MAX_REG_WRITE_SIZE);
   4148    VG_(memset)(area, V_BITS8_DEFINED, size);
   4149    VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
   4150 #  undef MAX_REG_WRITE_SIZE
   4151 }
   4152 
   4153 static
   4154 void mc_post_reg_write_clientcall ( ThreadId tid,
   4155                                     PtrdiffT offset, SizeT size, Addr f)
   4156 {
   4157    mc_post_reg_write(/*dummy*/0, tid, offset, size);
   4158 }
   4159 
   4160 /* Look at the definedness of the guest's shadow state for
   4161    [offset, offset+len).  If any part of that is undefined, record
   4162    a parameter error.
   4163 */
   4164 static void mc_pre_reg_read ( CorePart part, ThreadId tid, const HChar* s,
   4165                               PtrdiffT offset, SizeT size)
   4166 {
   4167    Int   i;
   4168    Bool  bad;
   4169    UInt  otag;
   4170 
   4171    UChar area[16];
   4172    tl_assert(size <= 16);
   4173 
   4174    VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
   4175 
   4176    bad = False;
   4177    for (i = 0; i < size; i++) {
   4178       if (area[i] != V_BITS8_DEFINED) {
   4179          bad = True;
   4180          break;
   4181       }
   4182    }
   4183 
   4184    if (!bad)
   4185       return;
   4186 
   4187    /* We've found some undefinedness.  See if we can also find an
   4188       origin for it. */
   4189    otag = mb_get_origin_for_guest_offset( tid, offset, size );
   4190    MC_(record_regparam_error) ( tid, s, otag );
   4191 }
   4192 
   4193 
   4194 /*------------------------------------------------------------*/
   4195 /*--- Functions called directly from generated code:       ---*/
   4196 /*--- Load/store handlers.                                 ---*/
   4197 /*------------------------------------------------------------*/
   4198 
   4199 /* Types:  LOADV32, LOADV16, LOADV8 are:
   4200                UWord fn ( Addr a )
   4201    so they return 32-bits on 32-bit machines and 64-bits on
   4202    64-bit machines.  Addr has the same size as a host word.
   4203 
   4204    LOADV64 is always  ULong fn ( Addr a )
   4205 
   4206    Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
   4207    are a UWord, and for STOREV64 they are a ULong.
   4208 */
   4209 
   4210 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
   4211    naturally '_sz/8'-aligned, or it exceeds the range covered by the
   4212    primary map.  This is all very tricky (and important!), so let's
   4213    work through the maths by hand (below), *and* assert for these
   4214    values at startup. */
   4215 #define MASK(_szInBytes) \
   4216    ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
   4217 
   4218 /* MASK only exists so as to define this macro. */
   4219 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
   4220    ((_a) & MASK((_szInBits>>3)))
   4221 
   4222 /* On a 32-bit machine:
   4223 
   4224    N_PRIMARY_BITS          == 16, so
   4225    N_PRIMARY_MAP           == 0x10000, so
   4226    N_PRIMARY_MAP-1         == 0xFFFF, so
   4227    (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
   4228 
   4229    MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
   4230            = ~ ( 0xFFFF | 0xFFFF0000 )
   4231            = ~ 0xFFFF'FFFF
   4232            = 0
   4233 
   4234    MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
   4235            = ~ ( 0xFFFE | 0xFFFF0000 )
   4236            = ~ 0xFFFF'FFFE
   4237            = 1
   4238 
   4239    MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
   4240            = ~ ( 0xFFFC | 0xFFFF0000 )
   4241            = ~ 0xFFFF'FFFC
   4242            = 3
   4243 
   4244    MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
   4245            = ~ ( 0xFFF8 | 0xFFFF0000 )
   4246            = ~ 0xFFFF'FFF8
   4247            = 7
   4248 
   4249    Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
   4250    precisely when a is not 1/2/4/8-bytes aligned.  And obviously, for
   4251    the 1-byte alignment case, it is always a zero value, since MASK(1)
   4252    is zero.  All as expected.
   4253 
   4254    On a 64-bit machine, it's more complex, since we're testing
   4255    simultaneously for misalignment and for the address being at or
   4256    above 64G:
   4257 
   4258    N_PRIMARY_BITS          == 20, so
   4259    N_PRIMARY_MAP           == 0x100000, so
   4260    N_PRIMARY_MAP-1         == 0xFFFFF, so
   4261    (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
   4262 
   4263    MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
   4264            = ~ ( 0xFFFF | 0xF'FFFF'0000 )
   4265            = ~ 0xF'FFFF'FFFF
   4266            = 0xFFFF'FFF0'0000'0000
   4267 
   4268    MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
   4269            = ~ ( 0xFFFE | 0xF'FFFF'0000 )
   4270            = ~ 0xF'FFFF'FFFE
   4271            = 0xFFFF'FFF0'0000'0001
   4272 
   4273    MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
   4274            = ~ ( 0xFFFC | 0xF'FFFF'0000 )
   4275            = ~ 0xF'FFFF'FFFC
   4276            = 0xFFFF'FFF0'0000'0003
   4277 
   4278    MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
   4279            = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
   4280            = ~ 0xF'FFFF'FFF8
   4281            = 0xFFFF'FFF0'0000'0007
   4282 */
   4283 
   4284 
   4285 /* ------------------------ Size = 16 ------------------------ */
   4286 
   4287 static INLINE
   4288 void mc_LOADV_128_or_256 ( /*OUT*/ULong* res,
   4289                            Addr a, SizeT nBits, Bool isBigEndian )
   4290 {
   4291    PROF_EVENT(200, "mc_LOADV_128_or_256");
   4292 
   4293 #ifndef PERF_FAST_LOADV
   4294    mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
   4295    return;
   4296 #else
   4297    {
   4298       UWord   sm_off16, vabits16, j;
   4299       UWord   nBytes  = nBits / 8;
   4300       UWord   nULongs = nBytes / 8;
   4301       SecMap* sm;
   4302 
   4303       if (UNLIKELY( UNALIGNED_OR_HIGH(a,nBits) )) {
   4304          PROF_EVENT(201, "mc_LOADV_128_or_256-slow1");
   4305          mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
   4306          return;
   4307       }
   4308 
   4309       /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
   4310          suitably aligned, is mapped, and addressible. */
   4311       for (j = 0; j < nULongs; j++) {
   4312          sm       = get_secmap_for_reading_low(a + 8*j);
   4313          sm_off16 = SM_OFF_16(a + 8*j);
   4314          vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   4315 
   4316          // Convert V bits from compact memory form to expanded
   4317          // register form.
   4318          if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
   4319             res[j] = V_BITS64_DEFINED;
   4320          } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
   4321             res[j] = V_BITS64_UNDEFINED;
   4322          } else {
   4323             /* Slow case: some block of 8 bytes are not all-defined or
   4324                all-undefined. */
   4325             PROF_EVENT(202, "mc_LOADV_128_or_256-slow2");
   4326             mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
   4327             return;
   4328          }
   4329       }
   4330       return;
   4331    }
   4332 #endif
   4333 }
   4334 
   4335 VG_REGPARM(2) void MC_(helperc_LOADV256be) ( /*OUT*/V256* res, Addr a )
   4336 {
   4337    mc_LOADV_128_or_256(&res->w64[0], a, 256, True);
   4338 }
   4339 VG_REGPARM(2) void MC_(helperc_LOADV256le) ( /*OUT*/V256* res, Addr a )
   4340 {
   4341    mc_LOADV_128_or_256(&res->w64[0], a, 256, False);
   4342 }
   4343 
   4344 VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128* res, Addr a )
   4345 {
   4346    mc_LOADV_128_or_256(&res->w64[0], a, 128, True);
   4347 }
   4348 VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128* res, Addr a )
   4349 {
   4350    mc_LOADV_128_or_256(&res->w64[0], a, 128, False);
   4351 }
   4352 
   4353 /* ------------------------ Size = 8 ------------------------ */
   4354 
   4355 static INLINE
   4356 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
   4357 {
   4358    PROF_EVENT(200, "mc_LOADV64");
   4359 
   4360 #ifndef PERF_FAST_LOADV
   4361    return mc_LOADVn_slow( a, 64, isBigEndian );
   4362 #else
   4363    {
   4364       UWord   sm_off16, vabits16;
   4365       SecMap* sm;
   4366 
   4367       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
   4368          PROF_EVENT(201, "mc_LOADV64-slow1");
   4369          return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
   4370       }
   4371 
   4372       sm       = get_secmap_for_reading_low(a);
   4373       sm_off16 = SM_OFF_16(a);
   4374       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   4375 
   4376       // Handle common case quickly: a is suitably aligned, is mapped, and
   4377       // addressible.
   4378       // Convert V bits from compact memory form to expanded register form.
   4379       if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
   4380          return V_BITS64_DEFINED;
   4381       } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
   4382          return V_BITS64_UNDEFINED;
   4383       } else {
   4384          /* Slow case: the 8 bytes are not all-defined or all-undefined. */
   4385          PROF_EVENT(202, "mc_LOADV64-slow2");
   4386          return mc_LOADVn_slow( a, 64, isBigEndian );
   4387       }
   4388    }
   4389 #endif
   4390 }
   4391 
   4392 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
   4393 {
   4394    return mc_LOADV64(a, True);
   4395 }
   4396 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
   4397 {
   4398    return mc_LOADV64(a, False);
   4399 }
   4400 
   4401 
   4402 static INLINE
   4403 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
   4404 {
   4405    PROF_EVENT(210, "mc_STOREV64");
   4406 
   4407 #ifndef PERF_FAST_STOREV
   4408    // XXX: this slow case seems to be marginally faster than the fast case!
   4409    // Investigate further.
   4410    mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4411 #else
   4412    {
   4413       UWord   sm_off16, vabits16;
   4414       SecMap* sm;
   4415 
   4416       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
   4417          PROF_EVENT(211, "mc_STOREV64-slow1");
   4418          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4419          return;
   4420       }
   4421 
   4422       sm       = get_secmap_for_reading_low(a);
   4423       sm_off16 = SM_OFF_16(a);
   4424       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
   4425 
   4426       // To understand the below cleverness, see the extensive comments
   4427       // in MC_(helperc_STOREV8).
   4428       if (LIKELY(V_BITS64_DEFINED == vbits64)) {
   4429          if (LIKELY(vabits16 == (UShort)VA_BITS16_DEFINED)) {
   4430             return;
   4431          }
   4432          if (!is_distinguished_sm(sm) && VA_BITS16_UNDEFINED == vabits16) {
   4433             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
   4434             return;
   4435          }
   4436          PROF_EVENT(232, "mc_STOREV64-slow2");
   4437          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4438          return;
   4439       }
   4440       if (V_BITS64_UNDEFINED == vbits64) {
   4441          if (vabits16 == (UShort)VA_BITS16_UNDEFINED) {
   4442             return;
   4443          }
   4444          if (!is_distinguished_sm(sm) && VA_BITS16_DEFINED == vabits16) {
   4445             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
   4446             return;
   4447          }
   4448          PROF_EVENT(232, "mc_STOREV64-slow3");
   4449          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4450          return;
   4451       }
   4452 
   4453       PROF_EVENT(212, "mc_STOREV64-slow4");
   4454       mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
   4455    }
   4456 #endif
   4457 }
   4458 
   4459 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
   4460 {
   4461    mc_STOREV64(a, vbits64, True);
   4462 }
   4463 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
   4464 {
   4465    mc_STOREV64(a, vbits64, False);
   4466 }
   4467 
   4468 
   4469 /* ------------------------ Size = 4 ------------------------ */
   4470 
   4471 static INLINE
   4472 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
   4473 {
   4474    PROF_EVENT(220, "mc_LOADV32");
   4475 
   4476 #ifndef PERF_FAST_LOADV
   4477    return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
   4478 #else
   4479    {
   4480       UWord   sm_off, vabits8;
   4481       SecMap* sm;
   4482 
   4483       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
   4484          PROF_EVENT(221, "mc_LOADV32-slow1");
   4485          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
   4486       }
   4487 
   4488       sm      = get_secmap_for_reading_low(a);
   4489       sm_off  = SM_OFF(a);
   4490       vabits8 = sm->vabits8[sm_off];
   4491 
   4492       // Handle common case quickly: a is suitably aligned, is mapped, and the
   4493       // entire word32 it lives in is addressible.
   4494       // Convert V bits from compact memory form to expanded register form.
   4495       // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
   4496       // Almost certainly not necessary, but be paranoid.
   4497       if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
   4498          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
   4499       } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
   4500          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
   4501       } else {
   4502          /* Slow case: the 4 bytes are not all-defined or all-undefined. */
   4503          PROF_EVENT(222, "mc_LOADV32-slow2");
   4504          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
   4505       }
   4506    }
   4507 #endif
   4508 }
   4509 
   4510 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
   4511 {
   4512    return mc_LOADV32(a, True);
   4513 }
   4514 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
   4515 {
   4516    return mc_LOADV32(a, False);
   4517 }
   4518 
   4519 
   4520 static INLINE
   4521 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
   4522 {
   4523    PROF_EVENT(230, "mc_STOREV32");
   4524 
   4525 #ifndef PERF_FAST_STOREV
   4526    mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4527 #else
   4528    {
   4529       UWord   sm_off, vabits8;
   4530       SecMap* sm;
   4531 
   4532       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
   4533          PROF_EVENT(231, "mc_STOREV32-slow1");
   4534          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4535          return;
   4536       }
   4537 
   4538       sm      = get_secmap_for_reading_low(a);
   4539       sm_off  = SM_OFF(a);
   4540       vabits8 = sm->vabits8[sm_off];
   4541 
   4542       // To understand the below cleverness, see the extensive comments
   4543       // in MC_(helperc_STOREV8).
   4544       if (LIKELY(V_BITS32_DEFINED == vbits32)) {
   4545          if (LIKELY(vabits8 == (UInt)VA_BITS8_DEFINED)) {
   4546             return;
   4547          }
   4548          if (!is_distinguished_sm(sm)  && VA_BITS8_UNDEFINED == vabits8) {
   4549             sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
   4550             return;
   4551          }
   4552          PROF_EVENT(232, "mc_STOREV32-slow2");
   4553          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4554          return;
   4555       }
   4556       if (V_BITS32_UNDEFINED == vbits32) {
   4557          if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
   4558             return;
   4559          }
   4560          if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
   4561             sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
   4562             return;
   4563          }
   4564          PROF_EVENT(233, "mc_STOREV32-slow3");
   4565          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4566          return;
   4567       }
   4568 
   4569       PROF_EVENT(234, "mc_STOREV32-slow4");
   4570       mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
   4571    }
   4572 #endif
   4573 }
   4574 
   4575 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
   4576 {
   4577    mc_STOREV32(a, vbits32, True);
   4578 }
   4579 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
   4580 {
   4581    mc_STOREV32(a, vbits32, False);
   4582 }
   4583 
   4584 
   4585 /* ------------------------ Size = 2 ------------------------ */
   4586 
   4587 static INLINE
   4588 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
   4589 {
   4590    PROF_EVENT(240, "mc_LOADV16");
   4591 
   4592 #ifndef PERF_FAST_LOADV
   4593    return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
   4594 #else
   4595    {
   4596       UWord   sm_off, vabits8;
   4597       SecMap* sm;
   4598 
   4599       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
   4600          PROF_EVENT(241, "mc_LOADV16-slow1");
   4601          return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
   4602       }
   4603 
   4604       sm      = get_secmap_for_reading_low(a);
   4605       sm_off  = SM_OFF(a);
   4606       vabits8 = sm->vabits8[sm_off];
   4607       // Handle common case quickly: a is suitably aligned, is mapped, and is
   4608       // addressible.
   4609       // Convert V bits from compact memory form to expanded register form
   4610       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS16_DEFINED;   }
   4611       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
   4612       else {
   4613          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
   4614          // the two sub-bytes.
   4615          UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
   4616          if      (vabits4 == VA_BITS4_DEFINED  ) { return V_BITS16_DEFINED;   }
   4617          else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
   4618          else {
   4619             /* Slow case: the two bytes are not all-defined or all-undefined. */
   4620             PROF_EVENT(242, "mc_LOADV16-slow2");
   4621             return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
   4622          }
   4623       }
   4624    }
   4625 #endif
   4626 }
   4627 
   4628 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
   4629 {
   4630    return mc_LOADV16(a, True);
   4631 }
   4632 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
   4633 {
   4634    return mc_LOADV16(a, False);
   4635 }
   4636 
   4637 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
   4638 static INLINE
   4639 Bool accessible_vabits4_in_vabits8 ( Addr a, UChar vabits8 )
   4640 {
   4641    UInt shift;
   4642    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
   4643    shift = (a & 2) << 1;               // shift by 0 or 4
   4644    vabits8 >>= shift;                  // shift the four bits to the bottom
   4645     // check 2 x vabits2 != VA_BITS2_NOACCESS
   4646    return ((0x3 & vabits8) != VA_BITS2_NOACCESS)
   4647       &&  ((0xc & vabits8) != VA_BITS2_NOACCESS << 2);
   4648 }
   4649 
   4650 static INLINE
   4651 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
   4652 {
   4653    PROF_EVENT(250, "mc_STOREV16");
   4654 
   4655 #ifndef PERF_FAST_STOREV
   4656    mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4657 #else
   4658    {
   4659       UWord   sm_off, vabits8;
   4660       SecMap* sm;
   4661 
   4662       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
   4663          PROF_EVENT(251, "mc_STOREV16-slow1");
   4664          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4665          return;
   4666       }
   4667 
   4668       sm      = get_secmap_for_reading_low(a);
   4669       sm_off  = SM_OFF(a);
   4670       vabits8 = sm->vabits8[sm_off];
   4671 
   4672       // To understand the below cleverness, see the extensive comments
   4673       // in MC_(helperc_STOREV8).
   4674       if (LIKELY(V_BITS16_DEFINED == vbits16)) {
   4675          if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
   4676             return;
   4677          }
   4678          if (!is_distinguished_sm(sm)
   4679              && accessible_vabits4_in_vabits8(a, vabits8)) {
   4680             insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED,
   4681                                          &(sm->vabits8[sm_off]) );
   4682             return;
   4683          }
   4684          PROF_EVENT(232, "mc_STOREV16-slow2");
   4685          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4686       }
   4687       if (V_BITS16_UNDEFINED == vbits16) {
   4688          if (vabits8 == VA_BITS8_UNDEFINED) {
   4689             return;
   4690          }
   4691          if (!is_distinguished_sm(sm)
   4692              && accessible_vabits4_in_vabits8(a, vabits8)) {
   4693             insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
   4694                                          &(sm->vabits8[sm_off]) );
   4695             return;
   4696          }
   4697          PROF_EVENT(233, "mc_STOREV16-slow3");
   4698          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4699          return;
   4700       }
   4701 
   4702       PROF_EVENT(234, "mc_STOREV16-slow4");
   4703       mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
   4704    }
   4705 #endif
   4706 }
   4707 
   4708 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
   4709 {
   4710    mc_STOREV16(a, vbits16, True);
   4711 }
   4712 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
   4713 {
   4714    mc_STOREV16(a, vbits16, False);
   4715 }
   4716 
   4717 
   4718 /* ------------------------ Size = 1 ------------------------ */
   4719 /* Note: endianness is irrelevant for size == 1 */
   4720 
   4721 VG_REGPARM(1)
   4722 UWord MC_(helperc_LOADV8) ( Addr a )
   4723 {
   4724    PROF_EVENT(260, "mc_LOADV8");
   4725 
   4726 #ifndef PERF_FAST_LOADV
   4727    return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
   4728 #else
   4729    {
   4730       UWord   sm_off, vabits8;
   4731       SecMap* sm;
   4732 
   4733       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
   4734          PROF_EVENT(261, "mc_LOADV8-slow1");
   4735          return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
   4736       }
   4737 
   4738       sm      = get_secmap_for_reading_low(a);
   4739       sm_off  = SM_OFF(a);
   4740       vabits8 = sm->vabits8[sm_off];
   4741       // Convert V bits from compact memory form to expanded register form
   4742       // Handle common case quickly: a is mapped, and the entire
   4743       // word32 it lives in is addressible.
   4744       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS8_DEFINED;   }
   4745       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
   4746       else {
   4747          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
   4748          // the single byte.
   4749          UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
   4750          if      (vabits2 == VA_BITS2_DEFINED  ) { return V_BITS8_DEFINED;   }
   4751          else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
   4752          else {
   4753             /* Slow case: the byte is not all-defined or all-undefined. */
   4754             PROF_EVENT(262, "mc_LOADV8-slow2");
   4755             return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
   4756          }
   4757       }
   4758    }
   4759 #endif
   4760 }
   4761 
   4762 
   4763 VG_REGPARM(2)
   4764 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
   4765 {
   4766    PROF_EVENT(270, "mc_STOREV8");
   4767 
   4768 #ifndef PERF_FAST_STOREV
   4769    mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4770 #else
   4771    {
   4772       UWord   sm_off, vabits8;
   4773       SecMap* sm;
   4774 
   4775       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
   4776          PROF_EVENT(271, "mc_STOREV8-slow1");
   4777          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4778          return;
   4779       }
   4780 
   4781       sm      = get_secmap_for_reading_low(a);
   4782       sm_off  = SM_OFF(a);
   4783       vabits8 = sm->vabits8[sm_off];
   4784 
   4785       // Clevernesses to speed up storing V bits.
   4786       // The 64/32/16 bit cases also have similar clevernesses, but it
   4787       // works a little differently to the code below.
   4788       //
   4789       // Cleverness 1:  sometimes we don't have to write the shadow memory at
   4790       // all, if we can tell that what we want to write is the same as what is
   4791       // already there. These cases are marked below as "defined on defined" and
   4792       // "undefined on undefined".
   4793       //
   4794       // Cleverness 2:
   4795       // We also avoid to call mc_STOREVn_slow if the V bits can directly
   4796       // be written in the secondary map. V bits can be directly written
   4797       // if 4 conditions are respected:
   4798       //   * The address for which V bits are written is naturally aligned
   4799       //        on 1 byte  for STOREV8 (this is always true)
   4800       //        on 2 bytes for STOREV16
   4801       //        on 4 bytes for STOREV32
   4802       //        on 8 bytes for STOREV64.
   4803       //   * V bits being written are either fully defined or fully undefined.
   4804       //     (for partially defined V bits, V bits cannot be directly written,
   4805       //      as the secondary vbits table must be maintained).
   4806       //   * the secmap is not distinguished (distinguished maps cannot be
   4807       //     modified).
   4808       //   * the memory corresponding to the V bits being written is
   4809       //     accessible (if one or more bytes are not accessible,
   4810       //     we must call mc_STOREVn_slow in order to report accessibility
   4811       //     errors).
   4812       //     Note that for STOREV32 and STOREV64, it is too expensive
   4813       //     to verify the accessibility of each byte for the benefit it
   4814       //     brings. Instead, a quicker check is done by comparing to
   4815       //     VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
   4816       //     but misses some opportunity of direct modifications.
   4817       //     Checking each byte accessibility was measured for
   4818       //     STOREV32+perf tests and was slowing down all perf tests.
   4819       // The cases corresponding to cleverness 2 are marked below as
   4820       // "direct mod".
   4821       if (LIKELY(V_BITS8_DEFINED == vbits8)) {
   4822          if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
   4823             return; // defined on defined
   4824          }
   4825          if (!is_distinguished_sm(sm)
   4826              && VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) {
   4827             // direct mod
   4828             insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
   4829                                          &(sm->vabits8[sm_off]) );
   4830             return;
   4831          }
   4832          PROF_EVENT(232, "mc_STOREV8-slow2");
   4833          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4834          return;
   4835       }
   4836       if (V_BITS8_UNDEFINED == vbits8) {
   4837          if (vabits8 == VA_BITS8_UNDEFINED) {
   4838             return; // undefined on undefined
   4839          }
   4840          if (!is_distinguished_sm(sm)
   4841              && (VA_BITS2_NOACCESS
   4842                  != extract_vabits2_from_vabits8(a, vabits8))) {
   4843             // direct mod
   4844             insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
   4845                                          &(sm->vabits8[sm_off]) );
   4846             return;
   4847          }
   4848          PROF_EVENT(233, "mc_STOREV8-slow3");
   4849          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4850          return;
   4851       }
   4852 
   4853       // Partially defined word
   4854       PROF_EVENT(234, "mc_STOREV8-slow4");
   4855       mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
   4856    }
   4857 #endif
   4858 }
   4859 
   4860 
   4861 /*------------------------------------------------------------*/
   4862 /*--- Functions called directly from generated code:       ---*/
   4863 /*--- Value-check failure handlers.                        ---*/
   4864 /*------------------------------------------------------------*/
   4865 
   4866 /* Call these ones when an origin is available ... */
   4867 VG_REGPARM(1)
   4868 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
   4869    MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
   4870 }
   4871 
   4872 VG_REGPARM(1)
   4873 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
   4874    MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
   4875 }
   4876 
   4877 VG_REGPARM(1)
   4878 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
   4879    MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
   4880 }
   4881 
   4882 VG_REGPARM(1)
   4883 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
   4884    MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
   4885 }
   4886 
   4887 VG_REGPARM(2)
   4888 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
   4889    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
   4890 }
   4891 
   4892 /* ... and these when an origin isn't available. */
   4893 
   4894 VG_REGPARM(0)
   4895 void MC_(helperc_value_check0_fail_no_o) ( void ) {
   4896    MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
   4897 }
   4898 
   4899 VG_REGPARM(0)
   4900 void MC_(helperc_value_check1_fail_no_o) ( void ) {
   4901    MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
   4902 }
   4903 
   4904 VG_REGPARM(0)
   4905 void MC_(helperc_value_check4_fail_no_o) ( void ) {
   4906    MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
   4907 }
   4908 
   4909 VG_REGPARM(0)
   4910 void MC_(helperc_value_check8_fail_no_o) ( void ) {
   4911    MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
   4912 }
   4913 
   4914 VG_REGPARM(1)
   4915 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
   4916    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
   4917 }
   4918 
   4919 
   4920 /*------------------------------------------------------------*/
   4921 /*--- Metadata get/set functions, for client requests.     ---*/
   4922 /*------------------------------------------------------------*/
   4923 
   4924 // Nb: this expands the V+A bits out into register-form V bits, even though
   4925 // they're in memory.  This is for backward compatibility, and because it's
   4926 // probably what the user wants.
   4927 
   4928 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
   4929    error [no longer used], 3 == addressing error. */
   4930 /* Nb: We used to issue various definedness/addressability errors from here,
   4931    but we took them out because they ranged from not-very-helpful to
   4932    downright annoying, and they complicated the error data structures. */
   4933 static Int mc_get_or_set_vbits_for_client (
   4934    Addr a,
   4935    Addr vbits,
   4936    SizeT szB,
   4937    Bool setting, /* True <=> set vbits,  False <=> get vbits */
   4938    Bool is_client_request /* True <=> real user request
   4939                              False <=> internal call from gdbserver */
   4940 )
   4941 {
   4942    SizeT i;
   4943    Bool  ok;
   4944    UChar vbits8;
   4945 
   4946    /* Check that arrays are addressible before doing any getting/setting.
   4947       vbits to be checked only for real user request. */
   4948    for (i = 0; i < szB; i++) {
   4949       if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
   4950           (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
   4951          return 3;
   4952       }
   4953    }
   4954 
   4955    /* Do the copy */
   4956    if (setting) {
   4957       /* setting */
   4958       for (i = 0; i < szB; i++) {
   4959          ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
   4960          tl_assert(ok);
   4961       }
   4962    } else {
   4963       /* getting */
   4964       for (i = 0; i < szB; i++) {
   4965          ok = get_vbits8(a + i, &vbits8);
   4966          tl_assert(ok);
   4967          ((UChar*)vbits)[i] = vbits8;
   4968       }
   4969       if (is_client_request)
   4970         // The bytes in vbits[] have now been set, so mark them as such.
   4971         MC_(make_mem_defined)(vbits, szB);
   4972    }
   4973 
   4974    return 1;
   4975 }
   4976 
   4977 
   4978 /*------------------------------------------------------------*/
   4979 /*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
   4980 /*------------------------------------------------------------*/
   4981 
   4982 /* For the memory leak detector, say whether an entire 64k chunk of
   4983    address space is possibly in use, or not.  If in doubt return
   4984    True.
   4985 */
   4986 Bool MC_(is_within_valid_secondary) ( Addr a )
   4987 {
   4988    SecMap* sm = maybe_get_secmap_for ( a );
   4989    if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
   4990       /* Definitely not in use. */
   4991       return False;
   4992    } else {
   4993       return True;
   4994    }
   4995 }
   4996 
   4997 
   4998 /* For the memory leak detector, say whether or not a given word
   4999    address is to be regarded as valid. */
   5000 Bool MC_(is_valid_aligned_word) ( Addr a )
   5001 {
   5002    tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
   5003    tl_assert(VG_IS_WORD_ALIGNED(a));
   5004    if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
   5005       return False;
   5006    if (sizeof(UWord) == 8) {
   5007       if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
   5008          return False;
   5009    }
   5010    if (UNLIKELY(MC_(in_ignored_range)(a)))
   5011       return False;
   5012    else
   5013       return True;
   5014 }
   5015 
   5016 
   5017 /*------------------------------------------------------------*/
   5018 /*--- Initialisation                                       ---*/
   5019 /*------------------------------------------------------------*/
   5020 
   5021 static void init_shadow_memory ( void )
   5022 {
   5023    Int     i;
   5024    SecMap* sm;
   5025 
   5026    tl_assert(V_BIT_UNDEFINED   == 1);
   5027    tl_assert(V_BIT_DEFINED     == 0);
   5028    tl_assert(V_BITS8_UNDEFINED == 0xFF);
   5029    tl_assert(V_BITS8_DEFINED   == 0);
   5030 
   5031    /* Build the 3 distinguished secondaries */
   5032    sm = &sm_distinguished[SM_DIST_NOACCESS];
   5033    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
   5034 
   5035    sm = &sm_distinguished[SM_DIST_UNDEFINED];
   5036    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
   5037 
   5038    sm = &sm_distinguished[SM_DIST_DEFINED];
   5039    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
   5040 
   5041    /* Set up the primary map. */
   5042    /* These entries gradually get overwritten as the used address
   5043       space expands. */
   5044    for (i = 0; i < N_PRIMARY_MAP; i++)
   5045       primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
   5046 
   5047    /* Auxiliary primary maps */
   5048    init_auxmap_L1_L2();
   5049 
   5050    /* auxmap_size = auxmap_used = 0;
   5051       no ... these are statically initialised */
   5052 
   5053    /* Secondary V bit table */
   5054    secVBitTable = createSecVBitTable();
   5055 }
   5056 
   5057 
   5058 /*------------------------------------------------------------*/
   5059 /*--- Sanity check machinery (permanently engaged)         ---*/
   5060 /*------------------------------------------------------------*/
   5061 
   5062 static Bool mc_cheap_sanity_check ( void )
   5063 {
   5064    n_sanity_cheap++;
   5065    PROF_EVENT(490, "cheap_sanity_check");
   5066    /* Check for sane operating level */
   5067    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
   5068       return False;
   5069    /* nothing else useful we can rapidly check */
   5070    return True;
   5071 }
   5072 
   5073 static Bool mc_expensive_sanity_check ( void )
   5074 {
   5075    Int     i;
   5076    Word    n_secmaps_found;
   5077    SecMap* sm;
   5078    const HChar*  errmsg;
   5079    Bool    bad = False;
   5080 
   5081    if (0) VG_(printf)("expensive sanity check\n");
   5082    if (0) return True;
   5083 
   5084    n_sanity_expensive++;
   5085    PROF_EVENT(491, "expensive_sanity_check");
   5086 
   5087    /* Check for sane operating level */
   5088    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
   5089       return False;
   5090 
   5091    /* Check that the 3 distinguished SMs are still as they should be. */
   5092 
   5093    /* Check noaccess DSM. */
   5094    sm = &sm_distinguished[SM_DIST_NOACCESS];
   5095    for (i = 0; i < SM_CHUNKS; i++)
   5096       if (sm->vabits8[i] != VA_BITS8_NOACCESS)
   5097          bad = True;
   5098 
   5099    /* Check undefined DSM. */
   5100    sm = &sm_distinguished[SM_DIST_UNDEFINED];
   5101    for (i = 0; i < SM_CHUNKS; i++)
   5102       if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
   5103          bad = True;
   5104 
   5105    /* Check defined DSM. */
   5106    sm = &sm_distinguished[SM_DIST_DEFINED];
   5107    for (i = 0; i < SM_CHUNKS; i++)
   5108       if (sm->vabits8[i] != VA_BITS8_DEFINED)
   5109          bad = True;
   5110 
   5111    if (bad) {
   5112       VG_(printf)("memcheck expensive sanity: "
   5113                   "distinguished_secondaries have changed\n");
   5114       return False;
   5115    }
   5116 
   5117    /* If we're not checking for undefined value errors, the secondary V bit
   5118     * table should be empty. */
   5119    if (MC_(clo_mc_level) == 1) {
   5120       if (0 != VG_(OSetGen_Size)(secVBitTable))
   5121          return False;
   5122    }
   5123 
   5124    /* check the auxiliary maps, very thoroughly */
   5125    n_secmaps_found = 0;
   5126    errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
   5127    if (errmsg) {
   5128       VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
   5129       return False;
   5130    }
   5131 
   5132    /* n_secmaps_found is now the number referred to by the auxiliary
   5133       primary map.  Now add on the ones referred to by the main
   5134       primary map. */
   5135    for (i = 0; i < N_PRIMARY_MAP; i++) {
   5136       if (primary_map[i] == NULL) {
   5137          bad = True;
   5138       } else {
   5139          if (!is_distinguished_sm(primary_map[i]))
   5140             n_secmaps_found++;
   5141       }
   5142    }
   5143 
   5144    /* check that the number of secmaps issued matches the number that
   5145       are reachable (iow, no secmap leaks) */
   5146    if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
   5147       bad = True;
   5148 
   5149    if (bad) {
   5150       VG_(printf)("memcheck expensive sanity: "
   5151                   "apparent secmap leakage\n");
   5152       return False;
   5153    }
   5154 
   5155    if (bad) {
   5156       VG_(printf)("memcheck expensive sanity: "
   5157                   "auxmap covers wrong address space\n");
   5158       return False;
   5159    }
   5160 
   5161    /* there is only one pointer to each secmap (expensive) */
   5162 
   5163    return True;
   5164 }
   5165 
   5166 /*------------------------------------------------------------*/
   5167 /*--- Command line args                                    ---*/
   5168 /*------------------------------------------------------------*/
   5169 
   5170 /* --partial-loads-ok: enable by default on MacOS.  The MacOS system
   5171    graphics libraries are heavily vectorised, and not enabling this by
   5172    default causes lots of false errors. */
   5173 #if defined(VGO_darwin)
   5174 Bool          MC_(clo_partial_loads_ok)       = True;
   5175 #else
   5176 Bool          MC_(clo_partial_loads_ok)       = False;
   5177 #endif
   5178 
   5179 Long          MC_(clo_freelist_vol)           = 20*1000*1000LL;
   5180 Long          MC_(clo_freelist_big_blocks)    =  1*1000*1000LL;
   5181 LeakCheckMode MC_(clo_leak_check)             = LC_Summary;
   5182 VgRes         MC_(clo_leak_resolution)        = Vg_HighRes;
   5183 UInt          MC_(clo_show_leak_kinds)        = R2S(Possible) | R2S(Unreached);
   5184 UInt          MC_(clo_error_for_leak_kinds)   = R2S(Possible) | R2S(Unreached);
   5185 UInt          MC_(clo_leak_check_heuristics)  = 0;
   5186 Bool          MC_(clo_workaround_gcc296_bugs) = False;
   5187 Int           MC_(clo_malloc_fill)            = -1;
   5188 Int           MC_(clo_free_fill)              = -1;
   5189 KeepStacktraces MC_(clo_keep_stacktraces)     = KS_alloc_then_free;
   5190 Int           MC_(clo_mc_level)               = 2;
   5191 Bool          MC_(clo_show_mismatched_frees)  = True;
   5192 
   5193 static const HChar * MC_(parse_leak_heuristics_tokens) =
   5194    "-,stdstring,length64,newarray,multipleinheritance";
   5195 /* The first heuristic value (LchNone) has no keyword, as this is
   5196    a fake heuristic used to collect the blocks found without any
   5197    heuristic. */
   5198 
   5199 static Bool mc_process_cmd_line_options(const HChar* arg)
   5200 {
   5201    const HChar* tmp_str;
   5202    Int   tmp_show;
   5203 
   5204    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
   5205 
   5206    /* Set MC_(clo_mc_level):
   5207          1 = A bit tracking only
   5208          2 = A and V bit tracking, but no V bit origins
   5209          3 = A and V bit tracking, and V bit origins
   5210 
   5211       Do this by inspecting --undef-value-errors= and
   5212       --track-origins=.  Reject the case --undef-value-errors=no
   5213       --track-origins=yes as meaningless.
   5214    */
   5215    if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
   5216       if (MC_(clo_mc_level) == 3) {
   5217          goto bad_level;
   5218       } else {
   5219          MC_(clo_mc_level) = 1;
   5220          return True;
   5221       }
   5222    }
   5223    if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
   5224       if (MC_(clo_mc_level) == 1)
   5225          MC_(clo_mc_level) = 2;
   5226       return True;
   5227    }
   5228    if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
   5229       if (MC_(clo_mc_level) == 3)
   5230          MC_(clo_mc_level) = 2;
   5231       return True;
   5232    }
   5233    if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
   5234       if (MC_(clo_mc_level) == 1) {
   5235          goto bad_level;
   5236       } else {
   5237          MC_(clo_mc_level) = 3;
   5238          return True;
   5239       }
   5240    }
   5241 
   5242         if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
   5243    else if VG_USET_CLO(arg, "--errors-for-leak-kinds",
   5244                        MC_(parse_leak_kinds_tokens),
   5245                        MC_(clo_error_for_leak_kinds)) {}
   5246    else if VG_USET_CLO(arg, "--show-leak-kinds",
   5247                        MC_(parse_leak_kinds_tokens),
   5248                        MC_(clo_show_leak_kinds)) {}
   5249    else if VG_USET_CLO(arg, "--leak-check-heuristics",
   5250                        MC_(parse_leak_heuristics_tokens),
   5251                        MC_(clo_leak_check_heuristics)) {}
   5252    else if (VG_BOOL_CLO(arg, "--show-reachable", tmp_show)) {
   5253       if (tmp_show) {
   5254          MC_(clo_show_leak_kinds) = MC_(all_Reachedness)();
   5255       } else {
   5256          MC_(clo_show_leak_kinds) &= ~R2S(Reachable);
   5257       }
   5258    }
   5259    else if VG_BOOL_CLO(arg, "--show-possibly-lost", tmp_show) {
   5260       if (tmp_show) {
   5261          MC_(clo_show_leak_kinds) |= R2S(Possible);
   5262       } else {
   5263          MC_(clo_show_leak_kinds) &= ~R2S(Possible);
   5264       }
   5265    }
   5266    else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
   5267                                             MC_(clo_workaround_gcc296_bugs)) {}
   5268 
   5269    else if VG_BINT_CLO(arg, "--freelist-vol",  MC_(clo_freelist_vol),
   5270                                                0, 10*1000*1000*1000LL) {}
   5271 
   5272    else if VG_BINT_CLO(arg, "--freelist-big-blocks",
   5273                        MC_(clo_freelist_big_blocks),
   5274                        0, 10*1000*1000*1000LL) {}
   5275 
   5276    else if VG_XACT_CLO(arg, "--leak-check=no",
   5277                             MC_(clo_leak_check), LC_Off) {}
   5278    else if VG_XACT_CLO(arg, "--leak-check=summary",
   5279                             MC_(clo_leak_check), LC_Summary) {}
   5280    else if VG_XACT_CLO(arg, "--leak-check=yes",
   5281                             MC_(clo_leak_check), LC_Full) {}
   5282    else if VG_XACT_CLO(arg, "--leak-check=full",
   5283                             MC_(clo_leak_check), LC_Full) {}
   5284 
   5285    else if VG_XACT_CLO(arg, "--leak-resolution=low",
   5286                             MC_(clo_leak_resolution), Vg_LowRes) {}
   5287    else if VG_XACT_CLO(arg, "--leak-resolution=med",
   5288                             MC_(clo_leak_resolution), Vg_MedRes) {}
   5289    else if VG_XACT_CLO(arg, "--leak-resolution=high",
   5290                             MC_(clo_leak_resolution), Vg_HighRes) {}
   5291 
   5292    else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
   5293       Bool ok = parse_ignore_ranges(tmp_str);
   5294       if (!ok) {
   5295          VG_(message)(Vg_DebugMsg,
   5296             "ERROR: --ignore-ranges: "
   5297             "invalid syntax, or end <= start in range\n");
   5298          return False;
   5299       }
   5300       if (gIgnoredAddressRanges) {
   5301          Word i;
   5302          for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
   5303             UWord val     = IAR_INVALID;
   5304             UWord key_min = ~(UWord)0;
   5305             UWord key_max = (UWord)0;
   5306             VG_(indexRangeMap)( &key_min, &key_max, &val,
   5307                                 gIgnoredAddressRanges, i );
   5308             tl_assert(key_min <= key_max);
   5309             UWord limit = 0x4000000; /* 64M - entirely arbitrary limit */
   5310             if (key_max - key_min > limit) {
   5311                VG_(message)(Vg_DebugMsg,
   5312                   "ERROR: --ignore-ranges: suspiciously large range:\n");
   5313                VG_(message)(Vg_DebugMsg,
   5314                    "       0x%lx-0x%lx (size %ld)\n", key_min, key_max,
   5315                    key_max - key_min + 1);
   5316                return False;
   5317             }
   5318          }
   5319       }
   5320    }
   5321 
   5322    else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
   5323    else if VG_BHEX_CLO(arg, "--free-fill",   MC_(clo_free_fill),   0x00,0xFF) {}
   5324 
   5325    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc",
   5326                        MC_(clo_keep_stacktraces), KS_alloc) {}
   5327    else if VG_XACT_CLO(arg, "--keep-stacktraces=free",
   5328                        MC_(clo_keep_stacktraces), KS_free) {}
   5329    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-and-free",
   5330                        MC_(clo_keep_stacktraces), KS_alloc_and_free) {}
   5331    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-then-free",
   5332                        MC_(clo_keep_stacktraces), KS_alloc_then_free) {}
   5333    else if VG_XACT_CLO(arg, "--keep-stacktraces=none",
   5334                        MC_(clo_keep_stacktraces), KS_none) {}
   5335 
   5336    else if VG_BOOL_CLO(arg, "--show-mismatched-frees",
   5337                        MC_(clo_show_mismatched_frees)) {}
   5338 
   5339    else
   5340       return VG_(replacement_malloc_process_cmd_line_option)(arg);
   5341 
   5342    return True;
   5343 
   5344 
   5345   bad_level:
   5346    VG_(fmsg_bad_option)(arg,
   5347       "--track-origins=yes has no effect when --undef-value-errors=no.\n");
   5348 }
   5349 
   5350 static void mc_print_usage(void)
   5351 {
   5352    const HChar* plo_default = "no";
   5353 #  if defined(VGO_darwin)
   5354    plo_default = "yes";
   5355 #  endif
   5356 
   5357    VG_(printf)(
   5358 "    --leak-check=no|summary|full     search for memory leaks at exit?  [summary]\n"
   5359 "    --leak-resolution=low|med|high   differentiation of leak stack traces [high]\n"
   5360 "    --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
   5361 "                                            [definite,possible]\n"
   5362 "    --errors-for-leak-kinds=kind1,kind2,..  which leak kinds are errors?\n"
   5363 "                                            [definite,possible]\n"
   5364 "        where kind is one of:\n"
   5365 "          definite indirect possible reachable all none\n"
   5366 "    --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
   5367 "        improving leak search false positive [none]\n"
   5368 "        where heur is one of:\n"
   5369 "          stdstring length64 newarray multipleinheritance all none\n"
   5370 "    --show-reachable=yes             same as --show-leak-kinds=all\n"
   5371 "    --show-reachable=no --show-possibly-lost=yes\n"
   5372 "                                     same as --show-leak-kinds=definite,possible\n"
   5373 "    --show-reachable=no --show-possibly-lost=no\n"
   5374 "                                     same as --show-leak-kinds=definite\n"
   5375 "    --undef-value-errors=no|yes      check for undefined value errors [yes]\n"
   5376 "    --track-origins=no|yes           show origins of undefined values? [no]\n"
   5377 "    --partial-loads-ok=no|yes        too hard to explain here; see manual [%s]\n"
   5378 "    --freelist-vol=<number>          volume of freed blocks queue     [20000000]\n"
   5379 "    --freelist-big-blocks=<number>   releases first blocks with size>= [1000000]\n"
   5380 "    --workaround-gcc296-bugs=no|yes  self explanatory [no]\n"
   5381 "    --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS]   assume given addresses are OK\n"
   5382 "    --malloc-fill=<hexnumber>        fill malloc'd areas with given value\n"
   5383 "    --free-fill=<hexnumber>          fill free'd areas with given value\n"
   5384 "    --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
   5385 "        stack trace(s) to keep for malloc'd/free'd areas       [alloc-then-free]\n"
   5386 "    --show-mismatched-frees=no|yes   show frees that don't match the allocator? [yes]\n"
   5387 , plo_default
   5388    );
   5389 }
   5390 
   5391 static void mc_print_debug_usage(void)
   5392 {
   5393    VG_(printf)(
   5394 "    (none)\n"
   5395    );
   5396 }
   5397 
   5398 
   5399 /*------------------------------------------------------------*/
   5400 /*--- Client blocks                                        ---*/
   5401 /*------------------------------------------------------------*/
   5402 
   5403 /* Client block management:
   5404 
   5405    This is managed as an expanding array of client block descriptors.
   5406    Indices of live descriptors are issued to the client, so it can ask
   5407    to free them later.  Therefore we cannot slide live entries down
   5408    over dead ones.  Instead we must use free/inuse flags and scan for
   5409    an empty slot at allocation time.  This in turn means allocation is
   5410    relatively expensive, so we hope this does not happen too often.
   5411 
   5412    An unused block has start == size == 0
   5413 */
   5414 
   5415 /* type CGenBlock is defined in mc_include.h */
   5416 
   5417 /* This subsystem is self-initialising. */
   5418 static UWord      cgb_size = 0;
   5419 static UWord      cgb_used = 0;
   5420 static CGenBlock* cgbs     = NULL;
   5421 
   5422 /* Stats for this subsystem. */
   5423 static ULong cgb_used_MAX = 0;   /* Max in use. */
   5424 static ULong cgb_allocs   = 0;   /* Number of allocs. */
   5425 static ULong cgb_discards = 0;   /* Number of discards. */
   5426 static ULong cgb_search   = 0;   /* Number of searches. */
   5427 
   5428 
   5429 /* Get access to the client block array. */
   5430 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
   5431                                  /*OUT*/UWord* nBlocks )
   5432 {
   5433    *blocks  = cgbs;
   5434    *nBlocks = cgb_used;
   5435 }
   5436 
   5437 
   5438 static
   5439 Int alloc_client_block ( void )
   5440 {
   5441    UWord      i, sz_new;
   5442    CGenBlock* cgbs_new;
   5443 
   5444    cgb_allocs++;
   5445 
   5446    for (i = 0; i < cgb_used; i++) {
   5447       cgb_search++;
   5448       if (cgbs[i].start == 0 && cgbs[i].size == 0)
   5449          return i;
   5450    }
   5451 
   5452    /* Not found.  Try to allocate one at the end. */
   5453    if (cgb_used < cgb_size) {
   5454       cgb_used++;
   5455       return cgb_used-1;
   5456    }
   5457 
   5458    /* Ok, we have to allocate a new one. */
   5459    tl_assert(cgb_used == cgb_size);
   5460    sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
   5461 
   5462    cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
   5463    for (i = 0; i < cgb_used; i++)
   5464       cgbs_new[i] = cgbs[i];
   5465 
   5466    if (cgbs != NULL)
   5467       VG_(free)( cgbs );
   5468    cgbs = cgbs_new;
   5469 
   5470    cgb_size = sz_new;
   5471    cgb_used++;
   5472    if (cgb_used > cgb_used_MAX)
   5473       cgb_used_MAX = cgb_used;
   5474    return cgb_used-1;
   5475 }
   5476 
   5477 
   5478 static void show_client_block_stats ( void )
   5479 {
   5480    VG_(message)(Vg_DebugMsg,
   5481       "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
   5482       cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
   5483    );
   5484 }
   5485 static void print_monitor_help ( void )
   5486 {
   5487    VG_(gdb_printf)
   5488       (
   5489 "\n"
   5490 "memcheck monitor commands:\n"
   5491 "  get_vbits <addr> [<len>]\n"
   5492 "        returns validity bits for <len> (or 1) bytes at <addr>\n"
   5493 "            bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
   5494 "        Example: get_vbits 0x8049c78 10\n"
   5495 "  make_memory [noaccess|undefined\n"
   5496 "                     |defined|Definedifaddressable] <addr> [<len>]\n"
   5497 "        mark <len> (or 1) bytes at <addr> with the given accessibility\n"
   5498 "  check_memory [addressable|defined] <addr> [<len>]\n"
   5499 "        check that <len> (or 1) bytes at <addr> have the given accessibility\n"
   5500 "            and outputs a description of <addr>\n"
   5501 "  leak_check [full*|summary]\n"
   5502 "                [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
   5503 "                [heuristics heur1,heur2,...]\n"
   5504 "                [increased*|changed|any]\n"
   5505 "                [unlimited*|limited <max_loss_records_output>]\n"
   5506 "            * = defaults\n"
   5507 "       where kind is one of:\n"
   5508 "         definite indirect possible reachable all none\n"
   5509 "       where heur is one of:\n"
   5510 "         stdstring length64 newarray multipleinheritance all none*\n"
   5511 "       Examples: leak_check\n"
   5512 "                 leak_check summary any\n"
   5513 "                 leak_check full kinds indirect,possible\n"
   5514 "                 leak_check full reachable any limited 100\n"
   5515 "  block_list <loss_record_nr>\n"
   5516 "        after a leak search, shows the list of blocks of <loss_record_nr>\n"
   5517 "  who_points_at <addr> [<len>]\n"
   5518 "        shows places pointing inside <len> (default 1) bytes at <addr>\n"
   5519 "        (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
   5520 "         with len > 1, will also show \"interior pointers\")\n"
   5521 "\n");
   5522 }
   5523 
   5524 /* return True if request recognised, False otherwise */
   5525 static Bool handle_gdb_monitor_command (ThreadId tid, HChar *req)
   5526 {
   5527    HChar* wcmd;
   5528    HChar s[VG_(strlen(req)) + 1]; /* copy for strtok_r */
   5529    HChar *ssaveptr;
   5530 
   5531    VG_(strcpy) (s, req);
   5532 
   5533    wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
   5534    /* NB: if possible, avoid introducing a new command below which
   5535       starts with the same first letter(s) as an already existing
   5536       command. This ensures a shorter abbreviation for the user. */
   5537    switch (VG_(keyword_id)
   5538            ("help get_vbits leak_check make_memory check_memory "
   5539             "block_list who_points_at",
   5540             wcmd, kwd_report_duplicated_matches)) {
   5541    case -2: /* multiple matches */
   5542       return True;
   5543    case -1: /* not found */
   5544       return False;
   5545    case  0: /* help */
   5546       print_monitor_help();
   5547       return True;
   5548    case  1: { /* get_vbits */
   5549       Addr address;
   5550       SizeT szB = 1;
   5551       if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
   5552          UChar vbits;
   5553          Int i;
   5554          Int unaddressable = 0;
   5555          for (i = 0; i < szB; i++) {
   5556             Int res = mc_get_or_set_vbits_for_client
   5557                (address+i, (Addr) &vbits, 1,
   5558                 False, /* get them */
   5559                 False  /* is client request */ );
   5560             /* we are before the first character on next line, print a \n. */
   5561             if ((i % 32) == 0 && i != 0)
   5562                VG_(printf) ("\n");
   5563             /* we are before the next block of 4 starts, print a space. */
   5564             else if ((i % 4) == 0 && i != 0)
   5565                VG_(printf) (" ");
   5566             if (res == 1) {
   5567                VG_(printf) ("%02x", vbits);
   5568             } else {
   5569                tl_assert(3 == res);
   5570                unaddressable++;
   5571                VG_(printf) ("__");
   5572             }
   5573          }
   5574          VG_(printf) ("\n");
   5575          if (unaddressable) {
   5576             VG_(printf)
   5577                ("Address %p len %ld has %d bytes unaddressable\n",
   5578                 (void *)address, szB, unaddressable);
   5579          }
   5580       }
   5581       return True;
   5582    }
   5583    case  2: { /* leak_check */
   5584       Int err = 0;
   5585       LeakCheckParams lcp;
   5586       HChar* kw;
   5587 
   5588       lcp.mode               = LC_Full;
   5589       lcp.show_leak_kinds    = R2S(Possible) | R2S(Unreached);
   5590       lcp.errors_for_leak_kinds = 0; // no errors for interactive leak search.
   5591       lcp.heuristics         = 0;
   5592       lcp.deltamode          = LCD_Increased;
   5593       lcp.max_loss_records_output = 999999999;
   5594       lcp.requested_by_monitor_command = True;
   5595 
   5596       for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
   5597            kw != NULL;
   5598            kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
   5599          switch (VG_(keyword_id)
   5600                  ("full summary "
   5601                   "kinds reachable possibleleak definiteleak "
   5602                   "heuristics "
   5603                   "increased changed any "
   5604                   "unlimited limited ",
   5605                   kw, kwd_report_all)) {
   5606          case -2: err++; break;
   5607          case -1: err++; break;
   5608          case  0: /* full */
   5609             lcp.mode = LC_Full; break;
   5610          case  1: /* summary */
   5611             lcp.mode = LC_Summary; break;
   5612          case  2: { /* kinds */
   5613             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
   5614             if (wcmd == NULL
   5615                 || !VG_(parse_enum_set)(MC_(parse_leak_kinds_tokens),
   5616                                         True/*allow_all*/,
   5617                                         wcmd,
   5618                                         &lcp.show_leak_kinds)) {
   5619                VG_(gdb_printf) ("missing or malformed leak kinds set\n");
   5620                err++;
   5621             }
   5622             break;
   5623          }
   5624          case  3: /* reachable */
   5625             lcp.show_leak_kinds = MC_(all_Reachedness)();
   5626             break;
   5627          case  4: /* possibleleak */
   5628             lcp.show_leak_kinds
   5629                = R2S(Possible) | R2S(IndirectLeak) | R2S(Unreached);
   5630             break;
   5631          case  5: /* definiteleak */
   5632             lcp.show_leak_kinds = R2S(Unreached);
   5633             break;
   5634          case  6: { /* heuristics */
   5635             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
   5636             if (wcmd == NULL
   5637                 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
   5638                                         True,/*allow_all*/
   5639                                         wcmd,
   5640                                         &lcp.heuristics)) {
   5641                VG_(gdb_printf) ("missing or malformed heuristics set\n");
   5642                err++;
   5643             }
   5644             break;
   5645          }
   5646          case  7: /* increased */
   5647             lcp.deltamode = LCD_Increased; break;
   5648          case  8: /* changed */
   5649             lcp.deltamode = LCD_Changed; break;
   5650          case  9: /* any */
   5651             lcp.deltamode = LCD_Any; break;
   5652          case 10: /* unlimited */
   5653             lcp.max_loss_records_output = 999999999; break;
   5654          case 11: { /* limited */
   5655             Int int_value;
   5656             const HChar* endptr;
   5657 
   5658             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
   5659             if (wcmd == NULL) {
   5660                int_value = 0;
   5661                endptr = "empty"; /* to report an error below */
   5662             } else {
   5663                HChar *the_end;
   5664                int_value = VG_(strtoll10) (wcmd, &the_end);
   5665                endptr = the_end;
   5666             }
   5667             if (*endptr != '\0')
   5668                VG_(gdb_printf) ("missing or malformed integer value\n");
   5669             else if (int_value > 0)
   5670                lcp.max_loss_records_output = (UInt) int_value;
   5671             else
   5672                VG_(gdb_printf) ("max_loss_records_output must be >= 1, got %d\n",
   5673                                 int_value);
   5674             break;
   5675          }
   5676          default:
   5677             tl_assert (0);
   5678          }
   5679       }
   5680       if (!err)
   5681          MC_(detect_memory_leaks)(tid, &lcp);
   5682       return True;
   5683    }
   5684 
   5685    case  3: { /* make_memory */
   5686       Addr address;
   5687       SizeT szB = 1;
   5688       Int kwdid = VG_(keyword_id)
   5689          ("noaccess undefined defined Definedifaddressable",
   5690           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
   5691       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
   5692          return True;
   5693       switch (kwdid) {
   5694       case -2: break;
   5695       case -1: break;
   5696       case  0: MC_(make_mem_noaccess) (address, szB); break;
   5697       case  1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
   5698                                                     MC_OKIND_USER ); break;
   5699       case  2: MC_(make_mem_defined) ( address, szB ); break;
   5700       case  3: make_mem_defined_if_addressable ( address, szB ); break;;
   5701       default: tl_assert(0);
   5702       }
   5703       return True;
   5704    }
   5705 
   5706    case  4: { /* check_memory */
   5707       Addr address;
   5708       SizeT szB = 1;
   5709       Addr bad_addr;
   5710       UInt okind;
   5711       const HChar* src;
   5712       UInt otag;
   5713       UInt ecu;
   5714       ExeContext* origin_ec;
   5715       MC_ReadResult res;
   5716 
   5717       Int kwdid = VG_(keyword_id)
   5718          ("addressable defined",
   5719           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
   5720       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
   5721          return True;
   5722       switch (kwdid) {
   5723       case -2: break;
   5724       case -1: break;
   5725       case  0: /* addressable */
   5726          if (is_mem_addressable ( address, szB, &bad_addr ))
   5727             VG_(printf) ("Address %p len %ld addressable\n",
   5728                              (void *)address, szB);
   5729          else
   5730             VG_(printf)
   5731                ("Address %p len %ld not addressable:\nbad address %p\n",
   5732                 (void *)address, szB, (void *) bad_addr);
   5733          MC_(pp_describe_addr) (address);
   5734          break;
   5735       case  1: /* defined */
   5736          res = is_mem_defined ( address, szB, &bad_addr, &otag );
   5737          if (MC_AddrErr == res)
   5738             VG_(printf)
   5739                ("Address %p len %ld not addressable:\nbad address %p\n",
   5740                 (void *)address, szB, (void *) bad_addr);
   5741          else if (MC_ValueErr == res) {
   5742             okind = otag & 3;
   5743             switch (okind) {
   5744             case MC_OKIND_STACK:
   5745                src = " was created by a stack allocation"; break;
   5746             case MC_OKIND_HEAP:
   5747                src = " was created by a heap allocation"; break;
   5748             case MC_OKIND_USER:
   5749                src = " was created by a client request"; break;
   5750             case MC_OKIND_UNKNOWN:
   5751                src = ""; break;
   5752             default: tl_assert(0);
   5753             }
   5754             VG_(printf)
   5755                ("Address %p len %ld not defined:\n"
   5756                 "Uninitialised value at %p%s\n",
   5757                 (void *)address, szB, (void *) bad_addr, src);
   5758             ecu = otag & ~3;
   5759             if (VG_(is_plausible_ECU)(ecu)) {
   5760                origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
   5761                VG_(pp_ExeContext)( origin_ec );
   5762             }
   5763          }
   5764          else
   5765             VG_(printf) ("Address %p len %ld defined\n",
   5766                          (void *)address, szB);
   5767          MC_(pp_describe_addr) (address);
   5768          break;
   5769       default: tl_assert(0);
   5770       }
   5771       return True;
   5772    }
   5773 
   5774    case  5: { /* block_list */
   5775       HChar* wl;
   5776       HChar *endptr;
   5777       UInt lr_nr = 0;
   5778       wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
   5779       if (wl != NULL)
   5780          lr_nr = VG_(strtoull10) (wl, &endptr);
   5781       if (wl == NULL || *endptr != '\0') {
   5782          VG_(gdb_printf) ("malformed or missing integer\n");
   5783       } else {
   5784          // lr_nr-1 as what is shown to the user is 1 more than the index in lr_array.
   5785          if (lr_nr == 0 || ! MC_(print_block_list) (lr_nr-1))
   5786             VG_(gdb_printf) ("invalid loss record nr\n");
   5787       }
   5788       return True;
   5789    }
   5790 
   5791    case  6: { /* who_points_at */
   5792       Addr address;
   5793       SizeT szB = 1;
   5794 
   5795       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
   5796          return True;
   5797       if (address == (Addr) 0) {
   5798          VG_(gdb_printf) ("Cannot search who points at 0x0\n");
   5799          return True;
   5800       }
   5801       MC_(who_points_at) (address, szB);
   5802       return True;
   5803    }
   5804 
   5805    default:
   5806       tl_assert(0);
   5807       return False;
   5808    }
   5809 }
   5810 
   5811 /*------------------------------------------------------------*/
   5812 /*--- Client requests                                      ---*/
   5813 /*------------------------------------------------------------*/
   5814 
   5815 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
   5816 {
   5817    Int   i;
   5818    Addr  bad_addr;
   5819 
   5820    if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
   5821        && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
   5822        && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
   5823        && VG_USERREQ__FREELIKE_BLOCK   != arg[0]
   5824        && VG_USERREQ__CREATE_MEMPOOL   != arg[0]
   5825        && VG_USERREQ__DESTROY_MEMPOOL  != arg[0]
   5826        && VG_USERREQ__MEMPOOL_ALLOC    != arg[0]
   5827        && VG_USERREQ__MEMPOOL_FREE     != arg[0]
   5828        && VG_USERREQ__MEMPOOL_TRIM     != arg[0]
   5829        && VG_USERREQ__MOVE_MEMPOOL     != arg[0]
   5830        && VG_USERREQ__MEMPOOL_CHANGE   != arg[0]
   5831        && VG_USERREQ__MEMPOOL_EXISTS   != arg[0]
   5832        && VG_USERREQ__GDB_MONITOR_COMMAND   != arg[0]
   5833        && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0]
   5834        && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0])
   5835       return False;
   5836 
   5837    switch (arg[0]) {
   5838       case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE: {
   5839          Bool ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
   5840          if (!ok)
   5841             MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
   5842          *ret = ok ? (UWord)NULL : bad_addr;
   5843          break;
   5844       }
   5845 
   5846       case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
   5847          Bool errorV    = False;
   5848          Addr bad_addrV = 0;
   5849          UInt otagV     = 0;
   5850          Bool errorA    = False;
   5851          Addr bad_addrA = 0;
   5852          is_mem_defined_comprehensive(
   5853             arg[1], arg[2],
   5854             &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
   5855          );
   5856          if (errorV) {
   5857             MC_(record_user_error) ( tid, bad_addrV,
   5858                                      /*isAddrErr*/False, otagV );
   5859          }
   5860          if (errorA) {
   5861             MC_(record_user_error) ( tid, bad_addrA,
   5862                                      /*isAddrErr*/True, 0 );
   5863          }
   5864          /* Return the lower of the two erring addresses, if any. */
   5865          *ret = 0;
   5866          if (errorV && !errorA) {
   5867             *ret = bad_addrV;
   5868          }
   5869          if (!errorV && errorA) {
   5870             *ret = bad_addrA;
   5871          }
   5872          if (errorV && errorA) {
   5873             *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
   5874          }
   5875          break;
   5876       }
   5877 
   5878       case VG_USERREQ__DO_LEAK_CHECK: {
   5879          LeakCheckParams lcp;
   5880 
   5881          if (arg[1] == 0)
   5882             lcp.mode = LC_Full;
   5883          else if (arg[1] == 1)
   5884             lcp.mode = LC_Summary;
   5885          else {
   5886             VG_(message)(Vg_UserMsg,
   5887                          "Warning: unknown memcheck leak search mode\n");
   5888             lcp.mode = LC_Full;
   5889          }
   5890 
   5891          lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
   5892          lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
   5893          lcp.heuristics = MC_(clo_leak_check_heuristics);
   5894 
   5895          if (arg[2] == 0)
   5896             lcp.deltamode = LCD_Any;
   5897          else if (arg[2] == 1)
   5898             lcp.deltamode = LCD_Increased;
   5899          else if (arg[2] == 2)
   5900             lcp.deltamode = LCD_Changed;
   5901          else {
   5902             VG_(message)
   5903                (Vg_UserMsg,
   5904                 "Warning: unknown memcheck leak search deltamode\n");
   5905             lcp.deltamode = LCD_Any;
   5906          }
   5907          lcp.max_loss_records_output = 999999999;
   5908          lcp.requested_by_monitor_command = False;
   5909 
   5910          MC_(detect_memory_leaks)(tid, &lcp);
   5911          *ret = 0; /* return value is meaningless */
   5912          break;
   5913       }
   5914 
   5915       case VG_USERREQ__MAKE_MEM_NOACCESS:
   5916          MC_(make_mem_noaccess) ( arg[1], arg[2] );
   5917          *ret = -1;
   5918          break;
   5919 
   5920       case VG_USERREQ__MAKE_MEM_UNDEFINED:
   5921          make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
   5922                                               MC_OKIND_USER );
   5923          *ret = -1;
   5924          break;
   5925 
   5926       case VG_USERREQ__MAKE_MEM_DEFINED:
   5927          MC_(make_mem_defined) ( arg[1], arg[2] );
   5928          *ret = -1;
   5929          break;
   5930 
   5931       case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
   5932          make_mem_defined_if_addressable ( arg[1], arg[2] );
   5933          *ret = -1;
   5934          break;
   5935 
   5936       case VG_USERREQ__CREATE_BLOCK: /* describe a block */
   5937          if (arg[1] != 0 && arg[2] != 0) {
   5938             i = alloc_client_block();
   5939             /* VG_(printf)("allocated %d %p\n", i, cgbs); */
   5940             cgbs[i].start = arg[1];
   5941             cgbs[i].size  = arg[2];
   5942             cgbs[i].desc  = VG_(strdup)("mc.mhcr.1", (HChar *)arg[3]);
   5943             cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
   5944             *ret = i;
   5945          } else
   5946             *ret = -1;
   5947          break;
   5948 
   5949       case VG_USERREQ__DISCARD: /* discard */
   5950          if (cgbs == NULL
   5951              || arg[2] >= cgb_used ||
   5952              (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
   5953             *ret = 1;
   5954          } else {
   5955             tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
   5956             cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
   5957             VG_(free)(cgbs[arg[2]].desc);
   5958             cgb_discards++;
   5959             *ret = 0;
   5960          }
   5961          break;
   5962 
   5963       case VG_USERREQ__GET_VBITS:
   5964          *ret = mc_get_or_set_vbits_for_client
   5965                    ( arg[1], arg[2], arg[3],
   5966                      False /* get them */,
   5967                      True /* is client request */ );
   5968          break;
   5969 
   5970       case VG_USERREQ__SET_VBITS:
   5971          *ret = mc_get_or_set_vbits_for_client
   5972                    ( arg[1], arg[2], arg[3],
   5973                      True /* set them */,
   5974                      True /* is client request */ );
   5975          break;
   5976 
   5977       case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
   5978          UWord** argp = (UWord**)arg;
   5979          // MC_(bytes_leaked) et al were set by the last leak check (or zero
   5980          // if no prior leak checks performed).
   5981          *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
   5982          *argp[2] = MC_(bytes_dubious);
   5983          *argp[3] = MC_(bytes_reachable);
   5984          *argp[4] = MC_(bytes_suppressed);
   5985          // there is no argp[5]
   5986          //*argp[5] = MC_(bytes_indirect);
   5987          // XXX need to make *argp[1-4] defined;  currently done in the
   5988          // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
   5989          *ret = 0;
   5990          return True;
   5991       }
   5992       case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
   5993          UWord** argp = (UWord**)arg;
   5994          // MC_(blocks_leaked) et al were set by the last leak check (or zero
   5995          // if no prior leak checks performed).
   5996          *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
   5997          *argp[2] = MC_(blocks_dubious);
   5998          *argp[3] = MC_(blocks_reachable);
   5999          *argp[4] = MC_(blocks_suppressed);
   6000          // there is no argp[5]
   6001          //*argp[5] = MC_(blocks_indirect);
   6002          // XXX need to make *argp[1-4] defined;  currently done in the
   6003          // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
   6004          *ret = 0;
   6005          return True;
   6006       }
   6007       case VG_USERREQ__MALLOCLIKE_BLOCK: {
   6008          Addr p         = (Addr)arg[1];
   6009          SizeT sizeB    =       arg[2];
   6010          UInt rzB       =       arg[3];
   6011          Bool is_zeroed = (Bool)arg[4];
   6012 
   6013          MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
   6014                           MC_AllocCustom, MC_(malloc_list) );
   6015          if (rzB > 0) {
   6016             MC_(make_mem_noaccess) ( p - rzB, rzB);
   6017             MC_(make_mem_noaccess) ( p + sizeB, rzB);
   6018          }
   6019          return True;
   6020       }
   6021       case VG_USERREQ__RESIZEINPLACE_BLOCK: {
   6022          Addr p         = (Addr)arg[1];
   6023          SizeT oldSizeB =       arg[2];
   6024          SizeT newSizeB =       arg[3];
   6025          UInt rzB       =       arg[4];
   6026 
   6027          MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
   6028          return True;
   6029       }
   6030       case VG_USERREQ__FREELIKE_BLOCK: {
   6031          Addr p         = (Addr)arg[1];
   6032          UInt rzB       =       arg[2];
   6033 
   6034          MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
   6035          return True;
   6036       }
   6037 
   6038       case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
   6039          HChar* s  = (HChar*)arg[1];
   6040          Addr  dst = (Addr) arg[2];
   6041          Addr  src = (Addr) arg[3];
   6042          SizeT len = (SizeT)arg[4];
   6043          MC_(record_overlap_error)(tid, s, src, dst, len);
   6044          return True;
   6045       }
   6046 
   6047       case VG_USERREQ__CREATE_MEMPOOL: {
   6048          Addr pool      = (Addr)arg[1];
   6049          UInt rzB       =       arg[2];
   6050          Bool is_zeroed = (Bool)arg[3];
   6051 
   6052          MC_(create_mempool) ( pool, rzB, is_zeroed );
   6053          return True;
   6054       }
   6055 
   6056       case VG_USERREQ__DESTROY_MEMPOOL: {
   6057          Addr pool      = (Addr)arg[1];
   6058 
   6059          MC_(destroy_mempool) ( pool );
   6060          return True;
   6061       }
   6062 
   6063       case VG_USERREQ__MEMPOOL_ALLOC: {
   6064          Addr pool      = (Addr)arg[1];
   6065          Addr addr      = (Addr)arg[2];
   6066          UInt size      =       arg[3];
   6067 
   6068          MC_(mempool_alloc) ( tid, pool, addr, size );
   6069          return True;
   6070       }
   6071 
   6072       case VG_USERREQ__MEMPOOL_FREE: {
   6073          Addr pool      = (Addr)arg[1];
   6074          Addr addr      = (Addr)arg[2];
   6075 
   6076          MC_(mempool_free) ( pool, addr );
   6077          return True;
   6078       }
   6079 
   6080       case VG_USERREQ__MEMPOOL_TRIM: {
   6081          Addr pool      = (Addr)arg[1];
   6082          Addr addr      = (Addr)arg[2];
   6083          UInt size      =       arg[3];
   6084 
   6085          MC_(mempool_trim) ( pool, addr, size );
   6086          return True;
   6087       }
   6088 
   6089       case VG_USERREQ__MOVE_MEMPOOL: {
   6090          Addr poolA     = (Addr)arg[1];
   6091          Addr poolB     = (Addr)arg[2];
   6092 
   6093          MC_(move_mempool) ( poolA, poolB );
   6094          return True;
   6095       }
   6096 
   6097       case VG_USERREQ__MEMPOOL_CHANGE: {
   6098          Addr pool      = (Addr)arg[1];
   6099          Addr addrA     = (Addr)arg[2];
   6100          Addr addrB     = (Addr)arg[3];
   6101          UInt size      =       arg[4];
   6102 
   6103          MC_(mempool_change) ( pool, addrA, addrB, size );
   6104          return True;
   6105       }
   6106 
   6107       case VG_USERREQ__MEMPOOL_EXISTS: {
   6108          Addr pool      = (Addr)arg[1];
   6109 
   6110          *ret = (UWord) MC_(mempool_exists) ( pool );
   6111 	 return True;
   6112       }
   6113 
   6114       case VG_USERREQ__GDB_MONITOR_COMMAND: {
   6115          Bool handled = handle_gdb_monitor_command (tid, (HChar*)arg[1]);
   6116          if (handled)
   6117             *ret = 1;
   6118          else
   6119             *ret = 0;
   6120          return handled;
   6121       }
   6122 
   6123       case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE:
   6124       case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE: {
   6125          Bool addRange
   6126             = arg[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE;
   6127          Bool ok
   6128             = modify_ignore_ranges(addRange, arg[1], arg[2]);
   6129          *ret = ok ? 1 : 0;
   6130          return True;
   6131       }
   6132 
   6133       default:
   6134          VG_(message)(
   6135             Vg_UserMsg,
   6136             "Warning: unknown memcheck client request code %llx\n",
   6137             (ULong)arg[0]
   6138          );
   6139          return False;
   6140    }
   6141    return True;
   6142 }
   6143 
   6144 
   6145 /*------------------------------------------------------------*/
   6146 /*--- Crude profiling machinery.                           ---*/
   6147 /*------------------------------------------------------------*/
   6148 
   6149 // We track a number of interesting events (using PROF_EVENT)
   6150 // if MC_PROFILE_MEMORY is defined.
   6151 
   6152 #ifdef MC_PROFILE_MEMORY
   6153 
   6154 UInt   MC_(event_ctr)[N_PROF_EVENTS];
   6155 HChar* MC_(event_ctr_name)[N_PROF_EVENTS];
   6156 
   6157 static void init_prof_mem ( void )
   6158 {
   6159    Int i;
   6160    for (i = 0; i < N_PROF_EVENTS; i++) {
   6161       MC_(event_ctr)[i] = 0;
   6162       MC_(event_ctr_name)[i] = NULL;
   6163    }
   6164 }
   6165 
   6166 static void done_prof_mem ( void )
   6167 {
   6168    Int  i;
   6169    Bool spaced = False;
   6170    for (i = 0; i < N_PROF_EVENTS; i++) {
   6171       if (!spaced && (i % 10) == 0) {
   6172          VG_(printf)("\n");
   6173          spaced = True;
   6174       }
   6175       if (MC_(event_ctr)[i] > 0) {
   6176          spaced = False;
   6177          VG_(printf)( "prof mem event %3d: %9d   %s\n",
   6178                       i, MC_(event_ctr)[i],
   6179                       MC_(event_ctr_name)[i]
   6180                          ? MC_(event_ctr_name)[i] : "unnamed");
   6181       }
   6182    }
   6183 }
   6184 
   6185 #else
   6186 
   6187 static void init_prof_mem ( void ) { }
   6188 static void done_prof_mem ( void ) { }
   6189 
   6190 #endif
   6191 
   6192 
   6193 /*------------------------------------------------------------*/
   6194 /*--- Origin tracking stuff                                ---*/
   6195 /*------------------------------------------------------------*/
   6196 
   6197 /*--------------------------------------------*/
   6198 /*--- Origin tracking: load handlers       ---*/
   6199 /*--------------------------------------------*/
   6200 
   6201 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
   6202    return or1 > or2 ? or1 : or2;
   6203 }
   6204 
   6205 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
   6206    OCacheLine* line;
   6207    UChar descr;
   6208    UWord lineoff = oc_line_offset(a);
   6209    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
   6210 
   6211    if (OC_ENABLE_ASSERTIONS) {
   6212       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   6213    }
   6214 
   6215    line = find_OCacheLine( a );
   6216 
   6217    descr = line->descr[lineoff];
   6218    if (OC_ENABLE_ASSERTIONS) {
   6219       tl_assert(descr < 0x10);
   6220    }
   6221 
   6222    if (LIKELY(0 == (descr & (1 << byteoff))))  {
   6223       return 0;
   6224    } else {
   6225       return line->w32[lineoff];
   6226    }
   6227 }
   6228 
   6229 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
   6230    OCacheLine* line;
   6231    UChar descr;
   6232    UWord lineoff, byteoff;
   6233 
   6234    if (UNLIKELY(a & 1)) {
   6235       /* Handle misaligned case, slowly. */
   6236       UInt oLo   = (UInt)MC_(helperc_b_load1)( a + 0 );
   6237       UInt oHi   = (UInt)MC_(helperc_b_load1)( a + 1 );
   6238       return merge_origins(oLo, oHi);
   6239    }
   6240 
   6241    lineoff = oc_line_offset(a);
   6242    byteoff = a & 3; /* 0 or 2 */
   6243 
   6244    if (OC_ENABLE_ASSERTIONS) {
   6245       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   6246    }
   6247    line = find_OCacheLine( a );
   6248 
   6249    descr = line->descr[lineoff];
   6250    if (OC_ENABLE_ASSERTIONS) {
   6251       tl_assert(descr < 0x10);
   6252    }
   6253 
   6254    if (LIKELY(0 == (descr & (3 << byteoff)))) {
   6255       return 0;
   6256    } else {
   6257       return line->w32[lineoff];
   6258    }
   6259 }
   6260 
   6261 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
   6262    OCacheLine* line;
   6263    UChar descr;
   6264    UWord lineoff;
   6265 
   6266    if (UNLIKELY(a & 3)) {
   6267       /* Handle misaligned case, slowly. */
   6268       UInt oLo   = (UInt)MC_(helperc_b_load2)( a + 0 );
   6269       UInt oHi   = (UInt)MC_(helperc_b_load2)( a + 2 );
   6270       return merge_origins(oLo, oHi);
   6271    }
   6272 
   6273    lineoff = oc_line_offset(a);
   6274    if (OC_ENABLE_ASSERTIONS) {
   6275       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   6276    }
   6277 
   6278    line = find_OCacheLine( a );
   6279 
   6280    descr = line->descr[lineoff];
   6281    if (OC_ENABLE_ASSERTIONS) {
   6282       tl_assert(descr < 0x10);
   6283    }
   6284 
   6285    if (LIKELY(0 == descr)) {
   6286       return 0;
   6287    } else {
   6288       return line->w32[lineoff];
   6289    }
   6290 }
   6291 
   6292 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
   6293    OCacheLine* line;
   6294    UChar descrLo, descrHi, descr;
   6295    UWord lineoff;
   6296 
   6297    if (UNLIKELY(a & 7)) {
   6298       /* Handle misaligned case, slowly. */
   6299       UInt oLo   = (UInt)MC_(helperc_b_load4)( a + 0 );
   6300       UInt oHi   = (UInt)MC_(helperc_b_load4)( a + 4 );
   6301       return merge_origins(oLo, oHi);
   6302    }
   6303 
   6304    lineoff = oc_line_offset(a);
   6305    if (OC_ENABLE_ASSERTIONS) {
   6306       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
   6307    }
   6308 
   6309    line = find_OCacheLine( a );
   6310 
   6311    descrLo = line->descr[lineoff + 0];
   6312    descrHi = line->descr[lineoff + 1];
   6313    descr   = descrLo | descrHi;
   6314    if (OC_ENABLE_ASSERTIONS) {
   6315       tl_assert(descr < 0x10);
   6316    }
   6317 
   6318    if (LIKELY(0 == descr)) {
   6319       return 0; /* both 32-bit chunks are defined */
   6320    } else {
   6321       UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
   6322       UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
   6323       return merge_origins(oLo, oHi);
   6324    }
   6325 }
   6326 
   6327 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
   6328    UInt oLo   = (UInt)MC_(helperc_b_load8)( a + 0 );
   6329    UInt oHi   = (UInt)MC_(helperc_b_load8)( a + 8 );
   6330    UInt oBoth = merge_origins(oLo, oHi);
   6331    return (UWord)oBoth;
   6332 }
   6333 
   6334 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
   6335    UInt oQ0   = (UInt)MC_(helperc_b_load8)( a + 0 );
   6336    UInt oQ1   = (UInt)MC_(helperc_b_load8)( a + 8 );
   6337    UInt oQ2   = (UInt)MC_(helperc_b_load8)( a + 16 );
   6338    UInt oQ3   = (UInt)MC_(helperc_b_load8)( a + 24 );
   6339    UInt oAll  = merge_origins(merge_origins(oQ0, oQ1),
   6340                               merge_origins(oQ2, oQ3));
   6341    return (UWord)oAll;
   6342 }
   6343 
   6344 
   6345 /*--------------------------------------------*/
   6346 /*--- Origin tracking: store handlers      ---*/
   6347 /*--------------------------------------------*/
   6348 
   6349 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
   6350    OCacheLine* line;
   6351    UWord lineoff = oc_line_offset(a);
   6352    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
   6353 
   6354    if (OC_ENABLE_ASSERTIONS) {
   6355       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   6356    }
   6357 
   6358    line = find_OCacheLine( a );
   6359 
   6360    if (d32 == 0) {
   6361       line->descr[lineoff] &= ~(1 << byteoff);
   6362    } else {
   6363       line->descr[lineoff] |= (1 << byteoff);
   6364       line->w32[lineoff] = d32;
   6365    }
   6366 }
   6367 
   6368 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
   6369    OCacheLine* line;
   6370    UWord lineoff, byteoff;
   6371 
   6372    if (UNLIKELY(a & 1)) {
   6373       /* Handle misaligned case, slowly. */
   6374       MC_(helperc_b_store1)( a + 0, d32 );
   6375       MC_(helperc_b_store1)( a + 1, d32 );
   6376       return;
   6377    }
   6378 
   6379    lineoff = oc_line_offset(a);
   6380    byteoff = a & 3; /* 0 or 2 */
   6381 
   6382    if (OC_ENABLE_ASSERTIONS) {
   6383       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   6384    }
   6385 
   6386    line = find_OCacheLine( a );
   6387 
   6388    if (d32 == 0) {
   6389       line->descr[lineoff] &= ~(3 << byteoff);
   6390    } else {
   6391       line->descr[lineoff] |= (3 << byteoff);
   6392       line->w32[lineoff] = d32;
   6393    }
   6394 }
   6395 
   6396 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
   6397    OCacheLine* line;
   6398    UWord lineoff;
   6399 
   6400    if (UNLIKELY(a & 3)) {
   6401       /* Handle misaligned case, slowly. */
   6402       MC_(helperc_b_store2)( a + 0, d32 );
   6403       MC_(helperc_b_store2)( a + 2, d32 );
   6404       return;
   6405    }
   6406 
   6407    lineoff = oc_line_offset(a);
   6408    if (OC_ENABLE_ASSERTIONS) {
   6409       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
   6410    }
   6411 
   6412    line = find_OCacheLine( a );
   6413 
   6414    if (d32 == 0) {
   6415       line->descr[lineoff] = 0;
   6416    } else {
   6417       line->descr[lineoff] = 0xF;
   6418       line->w32[lineoff] = d32;
   6419    }
   6420 }
   6421 
   6422 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
   6423    OCacheLine* line;
   6424    UWord lineoff;
   6425 
   6426    if (UNLIKELY(a & 7)) {
   6427       /* Handle misaligned case, slowly. */
   6428       MC_(helperc_b_store4)( a + 0, d32 );
   6429       MC_(helperc_b_store4)( a + 4, d32 );
   6430       return;
   6431    }
   6432 
   6433    lineoff = oc_line_offset(a);
   6434    if (OC_ENABLE_ASSERTIONS) {
   6435       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
   6436    }
   6437 
   6438    line = find_OCacheLine( a );
   6439 
   6440    if (d32 == 0) {
   6441       line->descr[lineoff + 0] = 0;
   6442       line->descr[lineoff + 1] = 0;
   6443    } else {
   6444       line->descr[lineoff + 0] = 0xF;
   6445       line->descr[lineoff + 1] = 0xF;
   6446       line->w32[lineoff + 0] = d32;
   6447       line->w32[lineoff + 1] = d32;
   6448    }
   6449 }
   6450 
   6451 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
   6452    MC_(helperc_b_store8)( a + 0, d32 );
   6453    MC_(helperc_b_store8)( a + 8, d32 );
   6454 }
   6455 
   6456 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
   6457    MC_(helperc_b_store8)( a +  0, d32 );
   6458    MC_(helperc_b_store8)( a +  8, d32 );
   6459    MC_(helperc_b_store8)( a + 16, d32 );
   6460    MC_(helperc_b_store8)( a + 24, d32 );
   6461 }
   6462 
   6463 
   6464 /*--------------------------------------------*/
   6465 /*--- Origin tracking: sarp handlers       ---*/
   6466 /*--------------------------------------------*/
   6467 
   6468 __attribute__((noinline))
   6469 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
   6470    if ((a & 1) && len >= 1) {
   6471       MC_(helperc_b_store1)( a, otag );
   6472       a++;
   6473       len--;
   6474    }
   6475    if ((a & 2) && len >= 2) {
   6476       MC_(helperc_b_store2)( a, otag );
   6477       a += 2;
   6478       len -= 2;
   6479    }
   6480    if (len >= 4)
   6481       tl_assert(0 == (a & 3));
   6482    while (len >= 4) {
   6483       MC_(helperc_b_store4)( a, otag );
   6484       a += 4;
   6485       len -= 4;
   6486    }
   6487    if (len >= 2) {
   6488       MC_(helperc_b_store2)( a, otag );
   6489       a += 2;
   6490       len -= 2;
   6491    }
   6492    if (len >= 1) {
   6493       MC_(helperc_b_store1)( a, otag );
   6494       //a++;
   6495       len--;
   6496    }
   6497    tl_assert(len == 0);
   6498 }
   6499 
   6500 __attribute__((noinline))
   6501 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
   6502    if ((a & 1) && len >= 1) {
   6503       MC_(helperc_b_store1)( a, 0 );
   6504       a++;
   6505       len--;
   6506    }
   6507    if ((a & 2) && len >= 2) {
   6508       MC_(helperc_b_store2)( a, 0 );
   6509       a += 2;
   6510       len -= 2;
   6511    }
   6512    if (len >= 4)
   6513       tl_assert(0 == (a & 3));
   6514    while (len >= 4) {
   6515       MC_(helperc_b_store4)( a, 0 );
   6516       a += 4;
   6517       len -= 4;
   6518    }
   6519    if (len >= 2) {
   6520       MC_(helperc_b_store2)( a, 0 );
   6521       a += 2;
   6522       len -= 2;
   6523    }
   6524    if (len >= 1) {
   6525       MC_(helperc_b_store1)( a, 0 );
   6526       //a++;
   6527       len--;
   6528    }
   6529    tl_assert(len == 0);
   6530 }
   6531 
   6532 
   6533 /*------------------------------------------------------------*/
   6534 /*--- Setup and finalisation                               ---*/
   6535 /*------------------------------------------------------------*/
   6536 
   6537 static void mc_post_clo_init ( void )
   6538 {
   6539    /* If we've been asked to emit XML, mash around various other
   6540       options so as to constrain the output somewhat. */
   6541    if (VG_(clo_xml)) {
   6542       /* Extract as much info as possible from the leak checker. */
   6543       MC_(clo_leak_check) = LC_Full;
   6544    }
   6545 
   6546    if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol))
   6547       VG_(message)(Vg_UserMsg,
   6548                    "Warning: --freelist-big-blocks value %lld has no effect\n"
   6549                    "as it is >= to --freelist-vol value %lld\n",
   6550                    MC_(clo_freelist_big_blocks),
   6551                    MC_(clo_freelist_vol));
   6552 
   6553    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
   6554 
   6555    if (MC_(clo_mc_level) == 3) {
   6556       /* We're doing origin tracking. */
   6557 #     ifdef PERF_FAST_STACK
   6558       VG_(track_new_mem_stack_4_w_ECU)   ( mc_new_mem_stack_4_w_ECU   );
   6559       VG_(track_new_mem_stack_8_w_ECU)   ( mc_new_mem_stack_8_w_ECU   );
   6560       VG_(track_new_mem_stack_12_w_ECU)  ( mc_new_mem_stack_12_w_ECU  );
   6561       VG_(track_new_mem_stack_16_w_ECU)  ( mc_new_mem_stack_16_w_ECU  );
   6562       VG_(track_new_mem_stack_32_w_ECU)  ( mc_new_mem_stack_32_w_ECU  );
   6563       VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
   6564       VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
   6565       VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
   6566       VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
   6567 #     endif
   6568       VG_(track_new_mem_stack_w_ECU)     ( mc_new_mem_stack_w_ECU     );
   6569       VG_(track_new_mem_stack_signal)    ( mc_new_mem_w_tid_make_ECU );
   6570    } else {
   6571       /* Not doing origin tracking */
   6572 #     ifdef PERF_FAST_STACK
   6573       VG_(track_new_mem_stack_4)   ( mc_new_mem_stack_4   );
   6574       VG_(track_new_mem_stack_8)   ( mc_new_mem_stack_8   );
   6575       VG_(track_new_mem_stack_12)  ( mc_new_mem_stack_12  );
   6576       VG_(track_new_mem_stack_16)  ( mc_new_mem_stack_16  );
   6577       VG_(track_new_mem_stack_32)  ( mc_new_mem_stack_32  );
   6578       VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
   6579       VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
   6580       VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
   6581       VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
   6582 #     endif
   6583       VG_(track_new_mem_stack)     ( mc_new_mem_stack     );
   6584       VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_no_ECU );
   6585    }
   6586 
   6587    // We assume that brk()/sbrk() does not initialise new memory.  Is this
   6588    // accurate?  John Reiser says:
   6589    //
   6590    //   0) sbrk() can *decrease* process address space.  No zero fill is done
   6591    //   for a decrease, not even the fragment on the high end of the last page
   6592    //   that is beyond the new highest address.  For maximum safety and
   6593    //   portability, then the bytes in the last page that reside above [the
   6594    //   new] sbrk(0) should be considered to be uninitialized, but in practice
   6595    //   it is exceedingly likely that they will retain their previous
   6596    //   contents.
   6597    //
   6598    //   1) If an increase is large enough to require new whole pages, then
   6599    //   those new whole pages (like all new pages) are zero-filled by the
   6600    //   operating system.  So if sbrk(0) already is page aligned, then
   6601    //   sbrk(PAGE_SIZE) *does* zero-fill the new memory.
   6602    //
   6603    //   2) Any increase that lies within an existing allocated page is not
   6604    //   changed.  So if (x = sbrk(0)) is not page aligned, then
   6605    //   sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
   6606    //   existing contents, and an additional PAGE_SIZE bytes which are zeroed.
   6607    //   ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
   6608    //   of them come along for the ride because the operating system deals
   6609    //   only in whole pages.  Again, for maximum safety and portability, then
   6610    //   anything that lives above [the new] sbrk(0) should be considered
   6611    //   uninitialized, but in practice will retain previous contents [zero in
   6612    //   this case.]"
   6613    //
   6614    // In short:
   6615    //
   6616    //   A key property of sbrk/brk is that new whole pages that are supplied
   6617    //   by the operating system *do* get initialized to zero.
   6618    //
   6619    // As for the portability of all this:
   6620    //
   6621    //   sbrk and brk are not POSIX.  However, any system that is a derivative
   6622    //   of *nix has sbrk and brk because there are too many softwares (such as
   6623    //   the Bourne shell) which rely on the traditional memory map (.text,
   6624    //   .data+.bss, stack) and the existence of sbrk/brk.
   6625    //
   6626    // So we should arguably observe all this.  However:
   6627    // - The current inaccuracy has caused maybe one complaint in seven years(?)
   6628    // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
   6629    //   doubt most programmers know the above information.
   6630    // So I'm not terribly unhappy with marking it as undefined. --njn.
   6631    //
   6632    // [More:  I think most of what John said only applies to sbrk().  It seems
   6633    // that brk() always deals in whole pages.  And since this event deals
   6634    // directly with brk(), not with sbrk(), perhaps it would be reasonable to
   6635    // just mark all memory it allocates as defined.]
   6636    //
   6637    if (MC_(clo_mc_level) == 3)
   6638       VG_(track_new_mem_brk)         ( mc_new_mem_w_tid_make_ECU );
   6639    else
   6640       VG_(track_new_mem_brk)         ( mc_new_mem_w_tid_no_ECU );
   6641 
   6642    /* This origin tracking cache is huge (~100M), so only initialise
   6643       if we need it. */
   6644    if (MC_(clo_mc_level) >= 3) {
   6645       init_OCache();
   6646       tl_assert(ocacheL1 != NULL);
   6647       tl_assert(ocacheL2 != NULL);
   6648    } else {
   6649       tl_assert(ocacheL1 == NULL);
   6650       tl_assert(ocacheL2 == NULL);
   6651    }
   6652 
   6653    MC_(chunk_poolalloc) = VG_(newPA)
   6654       (sizeof(MC_Chunk) + MC_(n_where_pointers)() * sizeof(ExeContext*),
   6655        1000,
   6656        VG_(malloc),
   6657        "mc.cMC.1 (MC_Chunk pools)",
   6658        VG_(free));
   6659 
   6660    /* Do not check definedness of guest state if --undef-value-errors=no */
   6661    if (MC_(clo_mc_level) >= 2)
   6662       VG_(track_pre_reg_read) ( mc_pre_reg_read );
   6663 }
   6664 
   6665 static void print_SM_info(const HChar* type, Int n_SMs)
   6666 {
   6667    VG_(message)(Vg_DebugMsg,
   6668       " memcheck: SMs: %s = %d (%ldk, %ldM)\n",
   6669       type,
   6670       n_SMs,
   6671       n_SMs * sizeof(SecMap) / 1024UL,
   6672       n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
   6673 }
   6674 
   6675 static void mc_print_stats (void)
   6676 {
   6677    SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
   6678 
   6679    VG_(message)(Vg_DebugMsg, " memcheck: freelist: vol %lld length %lld\n",
   6680                 VG_(free_queue_volume), VG_(free_queue_length));
   6681    VG_(message)(Vg_DebugMsg,
   6682       " memcheck: sanity checks: %d cheap, %d expensive\n",
   6683       n_sanity_cheap, n_sanity_expensive );
   6684    VG_(message)(Vg_DebugMsg,
   6685       " memcheck: auxmaps: %lld auxmap entries (%lldk, %lldM) in use\n",
   6686       n_auxmap_L2_nodes,
   6687       n_auxmap_L2_nodes * 64,
   6688       n_auxmap_L2_nodes / 16 );
   6689    VG_(message)(Vg_DebugMsg,
   6690       " memcheck: auxmaps_L1: %lld searches, %lld cmps, ratio %lld:10\n",
   6691       n_auxmap_L1_searches, n_auxmap_L1_cmps,
   6692       (10ULL * n_auxmap_L1_cmps)
   6693          / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
   6694    );
   6695    VG_(message)(Vg_DebugMsg,
   6696       " memcheck: auxmaps_L2: %lld searches, %lld nodes\n",
   6697       n_auxmap_L2_searches, n_auxmap_L2_nodes
   6698    );
   6699 
   6700    print_SM_info("n_issued     ", n_issued_SMs);
   6701    print_SM_info("n_deissued   ", n_deissued_SMs);
   6702    print_SM_info("max_noaccess ", max_noaccess_SMs);
   6703    print_SM_info("max_undefined", max_undefined_SMs);
   6704    print_SM_info("max_defined  ", max_defined_SMs);
   6705    print_SM_info("max_non_DSM  ", max_non_DSM_SMs);
   6706 
   6707    // Three DSMs, plus the non-DSM ones
   6708    max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
   6709    // The 3*sizeof(Word) bytes is the AVL node metadata size.
   6710    // The VG_ROUNDUP is because the OSet pool allocator will/must align
   6711    // the elements on pointer size.
   6712    // Note that the pool allocator has some additional small overhead
   6713    // which is not counted in the below.
   6714    // Hardwiring this logic sucks, but I don't see how else to do it.
   6715    max_secVBit_szB = max_secVBit_nodes *
   6716          (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
   6717    max_shmem_szB   = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
   6718 
   6719    VG_(message)(Vg_DebugMsg,
   6720       " memcheck: max sec V bit nodes:    %d (%ldk, %ldM)\n",
   6721       max_secVBit_nodes, max_secVBit_szB / 1024,
   6722                          max_secVBit_szB / (1024 * 1024));
   6723    VG_(message)(Vg_DebugMsg,
   6724       " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
   6725       sec_vbits_new_nodes + sec_vbits_updates,
   6726       sec_vbits_new_nodes, sec_vbits_updates );
   6727    VG_(message)(Vg_DebugMsg,
   6728       " memcheck: max shadow mem size:   %ldk, %ldM\n",
   6729       max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
   6730 
   6731    if (MC_(clo_mc_level) >= 3) {
   6732       VG_(message)(Vg_DebugMsg,
   6733                    " ocacheL1: %'12lu refs   %'12lu misses (%'lu lossage)\n",
   6734                    stats_ocacheL1_find,
   6735                    stats_ocacheL1_misses,
   6736                    stats_ocacheL1_lossage );
   6737       VG_(message)(Vg_DebugMsg,
   6738                    " ocacheL1: %'12lu at 0   %'12lu at 1\n",
   6739                    stats_ocacheL1_find - stats_ocacheL1_misses
   6740                       - stats_ocacheL1_found_at_1
   6741                       - stats_ocacheL1_found_at_N,
   6742                    stats_ocacheL1_found_at_1 );
   6743       VG_(message)(Vg_DebugMsg,
   6744                    " ocacheL1: %'12lu at 2+  %'12lu move-fwds\n",
   6745                    stats_ocacheL1_found_at_N,
   6746                    stats_ocacheL1_movefwds );
   6747       VG_(message)(Vg_DebugMsg,
   6748                    " ocacheL1: %'12lu sizeB  %'12u useful\n",
   6749                    (UWord)sizeof(OCache),
   6750                    4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
   6751       VG_(message)(Vg_DebugMsg,
   6752                    " ocacheL2: %'12lu refs   %'12lu misses\n",
   6753                    stats__ocacheL2_refs,
   6754                    stats__ocacheL2_misses );
   6755       VG_(message)(Vg_DebugMsg,
   6756                    " ocacheL2:    %'9lu max nodes %'9lu curr nodes\n",
   6757                    stats__ocacheL2_n_nodes_max,
   6758                    stats__ocacheL2_n_nodes );
   6759       VG_(message)(Vg_DebugMsg,
   6760                    " niacache: %'12lu refs   %'12lu misses\n",
   6761                    stats__nia_cache_queries, stats__nia_cache_misses);
   6762    } else {
   6763       tl_assert(ocacheL1 == NULL);
   6764       tl_assert(ocacheL2 == NULL);
   6765    }
   6766 }
   6767 
   6768 
   6769 static void mc_fini ( Int exitcode )
   6770 {
   6771    MC_(print_malloc_stats)();
   6772 
   6773    if (MC_(clo_leak_check) != LC_Off) {
   6774       LeakCheckParams lcp;
   6775       lcp.mode = MC_(clo_leak_check);
   6776       lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
   6777       lcp.heuristics = MC_(clo_leak_check_heuristics);
   6778       lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
   6779       lcp.deltamode = LCD_Any;
   6780       lcp.max_loss_records_output = 999999999;
   6781       lcp.requested_by_monitor_command = False;
   6782       MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
   6783    } else {
   6784       if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
   6785          VG_(umsg)(
   6786             "For a detailed leak analysis, rerun with: --leak-check=full\n"
   6787             "\n"
   6788          );
   6789       }
   6790    }
   6791 
   6792    if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
   6793       VG_(message)(Vg_UserMsg,
   6794                    "For counts of detected and suppressed errors, rerun with: -v\n");
   6795    }
   6796 
   6797    if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
   6798        && MC_(clo_mc_level) == 2) {
   6799       VG_(message)(Vg_UserMsg,
   6800                    "Use --track-origins=yes to see where "
   6801                    "uninitialised values come from\n");
   6802    }
   6803 
   6804    /* Print a warning if any client-request generated ignore-ranges
   6805       still exist.  It would be reasonable to expect that a properly
   6806       written program would remove any such ranges before exiting, and
   6807       since they are a bit on the dangerous side, let's comment.  By
   6808       contrast ranges which are specified on the command line normally
   6809       pertain to hardware mapped into the address space, and so we
   6810       can't expect the client to have got rid of them. */
   6811    if (gIgnoredAddressRanges) {
   6812       Word i, nBad = 0;
   6813       for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
   6814          UWord val     = IAR_INVALID;
   6815          UWord key_min = ~(UWord)0;
   6816          UWord key_max = (UWord)0;
   6817          VG_(indexRangeMap)( &key_min, &key_max, &val,
   6818                              gIgnoredAddressRanges, i );
   6819          if (val != IAR_ClientReq)
   6820            continue;
   6821          /* Print the offending range.  Also, if it is the first,
   6822             print a banner before it. */
   6823          nBad++;
   6824          if (nBad == 1) {
   6825             VG_(umsg)(
   6826               "WARNING: exiting program has the following client-requested\n"
   6827               "WARNING: address error disablement range(s) still in force,\n"
   6828               "WARNING: "
   6829                  "possibly as a result of some mistake in the use of the\n"
   6830               "WARNING: "
   6831                  "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
   6832             );
   6833          }
   6834          VG_(umsg)("   [%ld]  0x%016llx-0x%016llx  %s\n",
   6835                    i, (ULong)key_min, (ULong)key_max, showIARKind(val));
   6836       }
   6837    }
   6838 
   6839    done_prof_mem();
   6840 
   6841    if (VG_(clo_stats))
   6842       mc_print_stats();
   6843 
   6844    if (0) {
   6845       VG_(message)(Vg_DebugMsg,
   6846         "------ Valgrind's client block stats follow ---------------\n" );
   6847       show_client_block_stats();
   6848    }
   6849 }
   6850 
   6851 /* mark the given addr/len unaddressable for watchpoint implementation
   6852    The PointKind will be handled at access time */
   6853 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
   6854                                                   Addr addr, SizeT len)
   6855 {
   6856    /* GDBTD this is somewhat fishy. We might rather have to save the previous
   6857       accessibility and definedness in gdbserver so as to allow restoring it
   6858       properly. Currently, we assume that the user only watches things
   6859       which are properly addressable and defined */
   6860    if (insert)
   6861       MC_(make_mem_noaccess) (addr, len);
   6862    else
   6863       MC_(make_mem_defined)  (addr, len);
   6864    return True;
   6865 }
   6866 
   6867 static void mc_pre_clo_init(void)
   6868 {
   6869    VG_(details_name)            ("Memcheck");
   6870    VG_(details_version)         (NULL);
   6871    VG_(details_description)     ("a memory error detector");
   6872    VG_(details_copyright_author)(
   6873       "Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.");
   6874    VG_(details_bug_reports_to)  (VG_BUGS_TO);
   6875    VG_(details_avg_translation_sizeB) ( 640 );
   6876 
   6877    VG_(basic_tool_funcs)          (mc_post_clo_init,
   6878                                    MC_(instrument),
   6879                                    mc_fini);
   6880 
   6881    VG_(needs_final_IR_tidy_pass)  ( MC_(final_tidy) );
   6882 
   6883 
   6884    VG_(needs_core_errors)         ();
   6885    VG_(needs_tool_errors)         (MC_(eq_Error),
   6886                                    MC_(before_pp_Error),
   6887                                    MC_(pp_Error),
   6888                                    True,/*show TIDs for errors*/
   6889                                    MC_(update_Error_extra),
   6890                                    MC_(is_recognised_suppression),
   6891                                    MC_(read_extra_suppression_info),
   6892                                    MC_(error_matches_suppression),
   6893                                    MC_(get_error_name),
   6894                                    MC_(get_extra_suppression_info),
   6895                                    MC_(print_extra_suppression_use),
   6896                                    MC_(update_extra_suppression_use));
   6897    VG_(needs_libc_freeres)        ();
   6898    VG_(needs_command_line_options)(mc_process_cmd_line_options,
   6899                                    mc_print_usage,
   6900                                    mc_print_debug_usage);
   6901    VG_(needs_client_requests)     (mc_handle_client_request);
   6902    VG_(needs_sanity_checks)       (mc_cheap_sanity_check,
   6903                                    mc_expensive_sanity_check);
   6904    VG_(needs_print_stats)         (mc_print_stats);
   6905    VG_(needs_info_location)       (MC_(pp_describe_addr));
   6906    VG_(needs_malloc_replacement)  (MC_(malloc),
   6907                                    MC_(__builtin_new),
   6908                                    MC_(__builtin_vec_new),
   6909                                    MC_(memalign),
   6910                                    MC_(calloc),
   6911                                    MC_(free),
   6912                                    MC_(__builtin_delete),
   6913                                    MC_(__builtin_vec_delete),
   6914                                    MC_(realloc),
   6915                                    MC_(malloc_usable_size),
   6916                                    MC_MALLOC_DEFAULT_REDZONE_SZB );
   6917    MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
   6918 
   6919    VG_(needs_xml_output)          ();
   6920 
   6921    VG_(track_new_mem_startup)     ( mc_new_mem_startup );
   6922 
   6923    // Handling of mmap and mprotect isn't simple (well, it is simple,
   6924    // but the justification isn't.)  See comments above, just prior to
   6925    // mc_new_mem_mmap.
   6926    VG_(track_new_mem_mmap)        ( mc_new_mem_mmap );
   6927    VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
   6928 
   6929    VG_(track_copy_mem_remap)      ( MC_(copy_address_range_state) );
   6930 
   6931    VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
   6932    VG_(track_die_mem_brk)         ( MC_(make_mem_noaccess) );
   6933    VG_(track_die_mem_munmap)      ( MC_(make_mem_noaccess) );
   6934 
   6935    /* Defer the specification of the new_mem_stack functions to the
   6936       post_clo_init function, since we need to first parse the command
   6937       line before deciding which set to use. */
   6938 
   6939 #  ifdef PERF_FAST_STACK
   6940    VG_(track_die_mem_stack_4)     ( mc_die_mem_stack_4   );
   6941    VG_(track_die_mem_stack_8)     ( mc_die_mem_stack_8   );
   6942    VG_(track_die_mem_stack_12)    ( mc_die_mem_stack_12  );
   6943    VG_(track_die_mem_stack_16)    ( mc_die_mem_stack_16  );
   6944    VG_(track_die_mem_stack_32)    ( mc_die_mem_stack_32  );
   6945    VG_(track_die_mem_stack_112)   ( mc_die_mem_stack_112 );
   6946    VG_(track_die_mem_stack_128)   ( mc_die_mem_stack_128 );
   6947    VG_(track_die_mem_stack_144)   ( mc_die_mem_stack_144 );
   6948    VG_(track_die_mem_stack_160)   ( mc_die_mem_stack_160 );
   6949 #  endif
   6950    VG_(track_die_mem_stack)       ( mc_die_mem_stack     );
   6951 
   6952    VG_(track_ban_mem_stack)       ( MC_(make_mem_noaccess) );
   6953 
   6954    VG_(track_pre_mem_read)        ( check_mem_is_defined );
   6955    VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
   6956    VG_(track_pre_mem_write)       ( check_mem_is_addressable );
   6957    VG_(track_post_mem_write)      ( mc_post_mem_write );
   6958 
   6959    VG_(track_post_reg_write)                  ( mc_post_reg_write );
   6960    VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
   6961 
   6962    VG_(needs_watchpoint)          ( mc_mark_unaddressable_for_watchpoint );
   6963 
   6964    init_shadow_memory();
   6965    // MC_(chunk_poolalloc) must be allocated in post_clo_init
   6966    tl_assert(MC_(chunk_poolalloc) == NULL);
   6967    MC_(malloc_list)  = VG_(HT_construct)( "MC_(malloc_list)" );
   6968    MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
   6969    init_prof_mem();
   6970 
   6971    tl_assert( mc_expensive_sanity_check() );
   6972 
   6973    // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
   6974    tl_assert(sizeof(UWord) == sizeof(Addr));
   6975    // Call me paranoid.  I don't care.
   6976    tl_assert(sizeof(void*) == sizeof(Addr));
   6977 
   6978    // BYTES_PER_SEC_VBIT_NODE must be a power of two.
   6979    tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
   6980 
   6981    /* This is small.  Always initialise it. */
   6982    init_nia_to_ecu_cache();
   6983 
   6984    /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
   6985       if we need to, since the command line args haven't been
   6986       processed yet.  Hence defer it to mc_post_clo_init. */
   6987    tl_assert(ocacheL1 == NULL);
   6988    tl_assert(ocacheL2 == NULL);
   6989 
   6990    /* Check some important stuff.  See extensive comments above
   6991       re UNALIGNED_OR_HIGH for background. */
   6992 #  if VG_WORDSIZE == 4
   6993    tl_assert(sizeof(void*) == 4);
   6994    tl_assert(sizeof(Addr)  == 4);
   6995    tl_assert(sizeof(UWord) == 4);
   6996    tl_assert(sizeof(Word)  == 4);
   6997    tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
   6998    tl_assert(MASK(1) == 0UL);
   6999    tl_assert(MASK(2) == 1UL);
   7000    tl_assert(MASK(4) == 3UL);
   7001    tl_assert(MASK(8) == 7UL);
   7002 #  else
   7003    tl_assert(VG_WORDSIZE == 8);
   7004    tl_assert(sizeof(void*) == 8);
   7005    tl_assert(sizeof(Addr)  == 8);
   7006    tl_assert(sizeof(UWord) == 8);
   7007    tl_assert(sizeof(Word)  == 8);
   7008    tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFFULL);
   7009    tl_assert(MASK(1) == 0xFFFFFFF000000000ULL);
   7010    tl_assert(MASK(2) == 0xFFFFFFF000000001ULL);
   7011    tl_assert(MASK(4) == 0xFFFFFFF000000003ULL);
   7012    tl_assert(MASK(8) == 0xFFFFFFF000000007ULL);
   7013 #  endif
   7014 }
   7015 
   7016 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
   7017 
   7018 /*--------------------------------------------------------------------*/
   7019 /*--- end                                                mc_main.c ---*/
   7020 /*--------------------------------------------------------------------*/
   7021