Home | History | Annotate | Download | only in exp-ptrcheck
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Ptrcheck: a pointer-use checker.                             ---*/
      4 /*--- This file checks heap accesses.                              ---*/
      5 /*---                                                     h_main.c ---*/
      6 /*--------------------------------------------------------------------*/
      7 
      8 /*
      9    This file is part of Ptrcheck, a Valgrind tool for checking pointer
     10    use in programs.
     11 
     12    Initial version (Annelid):
     13 
     14    Copyright (C) 2003-2010 Nicholas Nethercote
     15       njn (at) valgrind.org
     16 
     17    Valgrind-3.X port:
     18 
     19    Copyright (C) 2008-2010 OpenWorks Ltd
     20       info (at) open-works.co.uk
     21 
     22    This program is free software; you can redistribute it and/or
     23    modify it under the terms of the GNU General Public License as
     24    published by the Free Software Foundation; either version 2 of the
     25    License, or (at your option) any later version.
     26 
     27    This program is distributed in the hope that it will be useful, but
     28    WITHOUT ANY WARRANTY; without even the implied warranty of
     29    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     30    General Public License for more details.
     31 
     32    You should have received a copy of the GNU General Public License
     33    along with this program; if not, write to the Free Software
     34    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     35    02111-1307, USA.
     36 
     37    The GNU General Public License is contained in the file COPYING.
     38 */
     39 
     40 // FIXME: 64-bit cleanness, check the following
     41 // struct _ISNode.ownerCount is 32-bit
     42 // struct _ISNode.topLevel is 32-bit
     43 // or is that not really right now?  add assertion checks about
     44 // the max size of a node
     45 
     46 // FIXME: should we shadow %RIP?  Maybe not.
     47 
     48 // FIXME: shadows of temporaries created in preamble, a la memcheck?
     49 
     50 // FIXME: result of add_new_segment is always ignored
     51 
     52 // FIXME: the mechanism involving last_seg_added is really ugly.
     53 // Do something cleaner.
     54 
     55 // FIXME: post_reg_write_clientcall: check function pointer comparisons
     56 // are safe on toc-afflicted platforms
     57 
     58 // FIXME: tidy up findShadowTmp
     59 
     60 // FIXME: post_reg_write_demux(Vg_CoreSysCall) is redundant w.r.t.
     61 // the default 'NONPTR' behaviour of post_syscall.  post_reg_write_demux
     62 // is called first, then post_syscall.
     63 
     64 // FIXME: check nothing is mapped in the lowest 1M of memory at
     65 // startup, or quit (to do with nonptr_or_unknown, also sync 1M
     66 // magic value with PIE default load address in m_ume.c.
     67 
     68 // FIXME: consider whether we could paint memory acquired from
     69 // sys_read etc as NONPTR rather than UNKNOWN.
     70 
     71 // XXX: recycle freed segments
     72 
     73 //--------------------------------------------------------------
     74 // Metadata:
     75 //   HeapBlock.id :: Seg (stored as heap shadowchunk; always non-zero)
     76 //   MemLoc.aseg  :: Seg (implicitly stored)
     77 //   MemLoc.vseg  :: Seg (explicitly stored as the shadow memory)
     78 //   RegLoc.vseg  :: Seg (explicitly stored as shadow registers)
     79 //
     80 // A Seg is made when new memory is created, eg. with malloc() or mmap().
     81 // There are two other Segs:
     82 //  - NONPTR:  for something that's definitely not a pointer
     83 //  - UNKNOWN: for something that could be a pointer
     84 //  - BOTTOM:  used with pointer differences (see below)
     85 //
     86 // MemLoc.vseg is done at word granularity.  If a pointer is written
     87 // to memory misaligned, the information about it will be lost -- it's
     88 // treated as two sub-word writes to two adjacent words.  This avoids
     89 // certain nasty cases that could arise if we tried to track unaligned
     90 // pointers.  Fortunately, misalignment is rare so we don't lose much
     91 // information this way.
     92 //
     93 // MemLoc.aseg is done at byte granularity, and *implicitly* -- ie. not
     94 // directly accessible like MemLoc.vseg, but only by searching through all
     95 // the segments.  Fortunately, it's mostly checked at LOADs/STOREs;  at that
     96 // point we have a pointer p to the MemLoc m as the other arg of the
     97 // LOAD/STORE, so we can check to see if the p.vseg's range includes m.  If
     98 // not, it's an error and we have to search through all segments to find out
     99 // what m.aseg really is.  That's still pretty fast though, thanks to the
    100 // interval skip-list used.  With syscalls we must also do the skip-list
    101 // search, but only on the first and last bytes touched.
    102 //--------------------------------------------------------------
    103 
    104 //--------------------------------------------------------------
    105 // Assumptions, etc:
    106 // - see comment at top of SK_(instrument)() for how sub-word ops are
    107 //   handled.
    108 //
    109 // - ioctl(), socketcall() (and ipc() will be) assumed to return non-pointers
    110 //
    111 // - FPU_W is assumed to never write pointers.
    112 //
    113 // - Assuming none of the post_mem_writes create segments worth tracking.
    114 //
    115 // - Treating mmap'd segments (all! including code) like heap segments.  But
    116 //   their ranges can change, new ones can be created by unmapping parts of
    117 //   old segments, etc.  But this nasty behaviour seems to never happen --
    118 //   there are assertions checking it.
    119 //--------------------------------------------------------------
    120 
    121 //--------------------------------------------------------------
    122 // What I am checking:
    123 // - Type errors:
    124 //    * ADD, OR, LEA2: error if two pointer inputs.
    125 //    * ADC, SBB: error if one or two pointer inputs.
    126 //    * AND, OR: error if two unequal pointer inputs.
    127 //    * NEG: error if pointer input.
    128 //    * {,i}mul_32_64 if either input is a pointer.
    129 //    * shldl/shrdl, bsf/bsr if any inputs are pointers.
    130 //
    131 // - LOAD, STORE:
    132 //    * ptr.vseg must match ptee.aseg.
    133 //    * ptee.aseg must not be a freed segment.
    134 //
    135 // - syscalls: for those accessing memory, look at first and last bytes:
    136 //    * check first.aseg == last.aseg
    137 //    * check first.aseg and last.aseg are not freed segments.
    138 //
    139 // What I am not checking, that I expected to when I started:
    140 // - AND, XOR: allowing two pointers to be used if both from the same segment,
    141 //   because "xor %r,%r" is commonly used to zero %r, and "test %r,%r"
    142 //   (which is translated with an AND) is common too.
    143 //
    144 // - div_64_32/idiv_64_32 can take pointer inputs for the dividend;
    145 //   division doesn't make sense, but modulo does, and they're done with the
    146 //   same instruction.  (Could try to be super-clever and watch the outputs
    147 //   to see if the quotient is used, but not worth it.)
    148 //
    149 // - mul_64_32/imul_64_32 can take pointers inputs for one arg or the
    150 //   other, but not both.  This is because some programs (eg. Mozilla
    151 //   Firebird) multiply pointers in hash routines.
    152 //
    153 // - NEG: can take a pointer.  It happens in glibc in a few places.  I've
    154 //   seen the code, didn't understand it, but it's done deliberately.
    155 //
    156 // What I am not checking/doing, but could, but it would require more
    157 // instrumentation and/or slow things down a bit:
    158 // - SUB: when differencing two pointers, result is BOTTOM, ie. "don't
    159 //   check".  Could link segments instead, slower but a bit more accurate.
    160 //   Also use BOTTOM when doing (ptr - unknown), which could be a pointer
    161 //   difference with a stack/static pointer.
    162 //
    163 // - PUTF: input should be non-pointer
    164 //
    165 // - arithmetic error messages: eg. for adding two pointers, just giving the
    166 //   segments, not the actual pointers.
    167 //
    168 // What I am not checking, and would be difficult:
    169 // - mmap(...MAP_FIXED...) is not handled specially.  It might be used in
    170 //   ways that fool Ptrcheck into giving false positives.
    171 //
    172 // - syscalls: for those accessing memory, not checking that the asegs of the
    173 //   accessed words match the vseg of the accessing pointer, because the
    174 //   vseg is not easily accessible at the required time (would required
    175 //   knowing for every syscall which register each arg came in, and looking
    176 //   there).
    177 //
    178 // What I am not checking, and would be difficult, but doesn't matter:
    179 // - free(p): similar to syscalls, not checking that the p.vseg matches the
    180 //   aseg of the first byte in the block.  However, Memcheck does an
    181 //   equivalent "bad free" check using shadow_chunks;  indeed, Ptrcheck could
    182 //   do the same check, but there's no point duplicating functionality.  So
    183 //   no loss, really.
    184 //
    185 // Other:
    186 // - not doing anything with mprotect();  probably not worth the effort.
    187 //--------------------------------------------------------------
    188 
    189 //--------------------------------------------------------------
    190 // Todo:
    191 // - Segments for stack frames.  Would detect (some, large) stack
    192 //   over/under-runs, dangling pointers.
    193 //
    194 // - Segments for static data.  Would detect over/under-runs.  Requires
    195 //   reading debug info.
    196 //--------------------------------------------------------------
    197 
    198 //--------------------------------------------------------------
    199 // Some profiling results:
    200 //                                                 twolf   konq    date sz
    201 // 1. started                                              35.0s   14.7
    202 // 2. introduced GETV/PUTV                                 30.2s   10.1
    203 // 3. inlined check_load_or_store                  5.6s    27.5s   10.1
    204 // 4. (made check_load, check_store4 regparm(0))          (27.9s) (11.0)
    205 // 5. um, not sure                                 5.3s    27.3s   10.6
    206 //    ...
    207 // 6. after big changes, corrections              11.2s    32.8s   14.0
    208 // 7. removed link-segment chasing in check/L/S    8.9s    30.8s   14.0
    209 // 8. avoiding do_lea1 if k is a nonptr            8.0s    28.0s   12.9
    210 //--------------------------------------------------------------
    211 
    212 //#include "vg_skin.h"
    213 
    214 #include "pub_tool_basics.h"
    215 #include "pub_tool_libcbase.h"
    216 #include "pub_tool_libcprint.h"
    217 #include "pub_tool_libcassert.h"
    218 #include "pub_tool_mallocfree.h"
    219 #include "pub_tool_execontext.h"
    220 #include "pub_tool_hashtable.h"
    221 #include "pub_tool_tooliface.h"
    222 #include "pub_tool_replacemalloc.h"
    223 #include "pub_tool_options.h"
    224 #include "pub_tool_execontext.h"
    225 #include "pub_tool_aspacemgr.h"    // VG_(am_shadow_malloc)
    226 #include "pub_tool_vki.h"          // VKI_MAX_PAGE_SIZE
    227 #include "pub_tool_machine.h"      // VG_({get,set}_shadow_regs_area) et al
    228 #include "pub_tool_debuginfo.h"    // VG_(get_fnname)
    229 #include "pub_tool_threadstate.h"  // VG_(get_running_tid)
    230 #include "pub_tool_oset.h"
    231 #include "pub_tool_vkiscnums.h"
    232 #include "pub_tool_machine.h"
    233 #include "pub_tool_wordfm.h"
    234 #include "pub_tool_xarray.h"
    235 
    236 #include "pc_common.h"
    237 
    238 //#include "h_list.h"
    239 #include "h_main.h"
    240 
    241 #include "sg_main.h"   // sg_instrument_*, and struct _SGEnv
    242 
    243 
    244 
    245 /*------------------------------------------------------------*/
    246 /*--- Debug/trace options                                  ---*/
    247 /*------------------------------------------------------------*/
    248 
    249 /* Set to 1 to do sanity checks on Seg values in many places, which
    250    checks if bogus Segs are in circulation.  Quite expensive from a
    251    performance point of view. */
    252 #define SC_SEGS 0
    253 
    254 static ULong stats__client_mallocs = 0;
    255 static ULong stats__client_frees   = 0;
    256 static ULong stats__segs_allocd    = 0;
    257 static ULong stats__segs_recycled  = 0;
    258 
    259 
    260 //////////////////////////////////////////////////////////////
    261 //                                                          //
    262 // Segments low level storage                               //
    263 //                                                          //
    264 //////////////////////////////////////////////////////////////
    265 
    266 // NONPTR, UNKNOWN, BOTTOM defined in h_main.h since
    267 // pc_common.c needs to see them, for error processing
    268 
    269 // we only start recycling segs when this many exist
    270 #define N_FREED_SEGS (1 * 1000 * 1000)
    271 
    272 struct _Seg {
    273    Addr  addr;
    274    SizeT szB; /* may be zero */
    275    ExeContext* ec;  /* where malloc'd or freed */
    276    /* When 1, indicates block is in use.  Otherwise, used to form a
    277       linked list of freed blocks, running from oldest freed block to
    278       the most recently freed block. */
    279    struct _Seg* nextfree;
    280 };
    281 
    282 // Determines if 'a' is before, within, or after seg's range.  Sets 'cmp' to
    283 // -1/0/1 accordingly.  Sets 'n' to the number of bytes before/within/after.
    284 void Seg__cmp(Seg* seg, Addr a, Int* cmp, UWord* n)
    285 {
    286    if (a < seg->addr) {
    287       *cmp = -1;
    288       *n   = seg->addr - a;
    289    } else if (a < seg->addr + seg->szB && seg->szB > 0) {
    290       *cmp = 0;
    291       *n = a - seg->addr;
    292    } else {
    293       *cmp = 1;
    294       *n = a - (seg->addr + seg->szB);
    295    }
    296 }
    297 
    298 inline Bool Seg__is_freed(Seg* seg)
    299 {
    300    if (!is_known_segment(seg))
    301       return False;
    302    else
    303       return seg->nextfree != (Seg*)1;
    304 }
    305 
    306 ExeContext* Seg__where(Seg* seg)
    307 {
    308    tl_assert(is_known_segment(seg));
    309    return seg->ec;
    310 }
    311 
    312 SizeT Seg__size(Seg* seg)
    313 {
    314    tl_assert(is_known_segment(seg));
    315    return seg->szB;
    316 }
    317 
    318 Addr Seg__addr(Seg* seg)
    319 {
    320    tl_assert(is_known_segment(seg));
    321    return seg->addr;
    322 }
    323 
    324 
    325 #define N_SEGS_PER_GROUP 10000
    326 
    327 typedef
    328    struct _SegGroup {
    329       struct _SegGroup* admin;
    330       UWord nextfree; /* 0 .. N_SEGS_PER_GROUP */
    331       Seg segs[N_SEGS_PER_GROUP];
    332    }
    333    SegGroup;
    334 
    335 static SegGroup* group_list = NULL;
    336 static UWord     nFreeSegs = 0;
    337 static Seg*      freesegs_youngest = NULL;
    338 static Seg*      freesegs_oldest = NULL;
    339 
    340 
    341 static SegGroup* new_SegGroup ( void ) {
    342    SegGroup* g = VG_(malloc)("pc.h_main.nTG.1", sizeof(SegGroup));
    343    VG_(memset)(g, 0, sizeof(*g));
    344    return g;
    345 }
    346 
    347 /* Get a completely new Seg */
    348 static Seg* new_Seg ( void )
    349 {
    350    Seg*      teg;
    351    SegGroup* g;
    352    if (group_list == NULL) {
    353       g = new_SegGroup();
    354       g->admin = NULL;
    355       group_list = g;
    356    }
    357    tl_assert(group_list->nextfree <= N_SEGS_PER_GROUP);
    358    if (group_list->nextfree == N_SEGS_PER_GROUP) {
    359       g = new_SegGroup();
    360       g->admin = group_list;
    361       group_list = g;
    362    }
    363    tl_assert(group_list->nextfree < N_SEGS_PER_GROUP);
    364    teg = &group_list->segs[ group_list->nextfree ];
    365    group_list->nextfree++;
    366    stats__segs_allocd++;
    367    return teg;
    368 }
    369 
    370 static Seg* get_Seg_for_malloc ( void )
    371 {
    372    Seg* seg;
    373    if (nFreeSegs < N_FREED_SEGS) {
    374       seg = new_Seg();
    375       seg->nextfree = (Seg*)1;
    376       return seg;
    377    }
    378    /* else recycle the oldest Seg in the free list */
    379    tl_assert(freesegs_youngest);
    380    tl_assert(freesegs_oldest);
    381    tl_assert(freesegs_youngest != freesegs_oldest);
    382    seg = freesegs_oldest;
    383    freesegs_oldest = seg->nextfree;
    384    nFreeSegs--;
    385    seg->nextfree = (Seg*)1;
    386    stats__segs_recycled++;
    387    return seg;
    388 }
    389 
    390 static void set_Seg_freed ( Seg* seg )
    391 {
    392    tl_assert(seg);
    393    tl_assert(!Seg__is_freed(seg));
    394    if (nFreeSegs == 0) {
    395       tl_assert(freesegs_oldest == NULL);
    396       tl_assert(freesegs_youngest == NULL);
    397       seg->nextfree = NULL;
    398       freesegs_youngest = seg;
    399       freesegs_oldest = seg;
    400       nFreeSegs++;
    401    } else {
    402       tl_assert(freesegs_youngest);
    403       tl_assert(freesegs_oldest);
    404       if (nFreeSegs == 1) {
    405          tl_assert(freesegs_youngest == freesegs_oldest);
    406       } else {
    407          tl_assert(freesegs_youngest != freesegs_oldest);
    408       }
    409       tl_assert(freesegs_youngest->nextfree == NULL);
    410       tl_assert(seg != freesegs_youngest && seg != freesegs_oldest);
    411       seg->nextfree = NULL;
    412       freesegs_youngest->nextfree = seg;
    413       freesegs_youngest = seg;
    414       nFreeSegs++;
    415    }
    416 }
    417 
    418 static WordFM* addr_to_seg_map = NULL; /* GuestAddr -> Seg* */
    419 
    420 static void addr_to_seg_map_ENSURE_INIT ( void )
    421 {
    422    if (UNLIKELY(addr_to_seg_map == NULL)) {
    423       addr_to_seg_map = VG_(newFM)( VG_(malloc), "pc.h_main.attmEI.1",
    424                                     VG_(free), NULL/*unboxedcmp*/ );
    425    }
    426 }
    427 
    428 static Seg* find_Seg_by_addr ( Addr ga )
    429 {
    430    UWord keyW, valW;
    431    addr_to_seg_map_ENSURE_INIT();
    432    if (VG_(lookupFM)( addr_to_seg_map, &keyW, &valW, (UWord)ga )) {
    433       tl_assert(keyW == ga);
    434       return (Seg*)valW;
    435    } else {
    436       return NULL;
    437    }
    438 }
    439 
    440 static void bind_addr_to_Seg ( Addr ga, Seg* seg )
    441 {
    442    Bool b;
    443    addr_to_seg_map_ENSURE_INIT();
    444    b = VG_(addToFM)( addr_to_seg_map, (UWord)ga, (UWord)seg );
    445    tl_assert(!b); /* else ga is already bound */
    446 }
    447 
    448 static void unbind_addr_from_Seg ( Addr ga )
    449 {
    450    Bool b;
    451    UWord keyW, valW;
    452    addr_to_seg_map_ENSURE_INIT();
    453    b = VG_(delFromFM)( addr_to_seg_map, &keyW, &valW, (UWord)ga );
    454    tl_assert(b); /* else ga was not already bound */
    455    tl_assert(keyW == ga);
    456    tl_assert(valW != 0);
    457 }
    458 
    459 
    460 //////////////////////////////////////////////////////////////
    461 //////////////////////////////////////////////////////////////
    462 //////////////////////////////////////////////////////////////
    463 
    464 // So that post_reg_write_clientcall knows the segment just allocated.
    465 static Seg* last_seg_added = NULL;
    466 
    467 // Returns the added heap segment
    468 static Seg* add_new_segment ( ThreadId tid, Addr p, SizeT size )
    469 {
    470    Seg* seg = get_Seg_for_malloc();
    471    tl_assert(seg != (Seg*)1); /* since we're using 1 as a special value */
    472    seg->addr = p;
    473    seg->szB  = size;
    474    seg->ec   = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
    475    tl_assert(!Seg__is_freed(seg));
    476 
    477    bind_addr_to_Seg(p, seg);
    478 
    479    last_seg_added = seg;
    480 
    481    return seg;
    482 }
    483 
    484 // Forward declarations
    485 static void copy_mem( Addr from, Addr to, SizeT len );
    486 static void set_mem_unknown ( Addr a, SizeT len );
    487 
    488 static inline VG_REGPARM(1) Seg* nonptr_or_unknown(UWord x); /*fwds*/
    489 
    490 static
    491 void* alloc_and_new_mem_heap ( ThreadId tid,
    492                                SizeT size, SizeT alignment, Bool is_zeroed )
    493 {
    494    Addr p;
    495 
    496    if ( ((SSizeT)size) < 0) return NULL;
    497 
    498    p = (Addr)VG_(cli_malloc)(alignment, size);
    499    if (is_zeroed) VG_(memset)((void*)p, 0, size);
    500 
    501    set_mem_unknown( p, size );
    502    add_new_segment( tid, p, size );
    503 
    504    stats__client_mallocs++;
    505    return (void*)p;
    506 }
    507 
    508 static void die_and_free_mem_heap ( ThreadId tid, Seg* seg )
    509 {
    510    // Empty and free the actual block
    511    tl_assert(!Seg__is_freed(seg));
    512    set_mem_unknown( seg->addr, seg->szB );
    513 
    514    VG_(cli_free)( (void*)seg->addr );
    515 
    516    // Remember where freed
    517    seg->ec = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
    518 
    519    set_Seg_freed(seg);
    520    unbind_addr_from_Seg( seg->addr );
    521 
    522    stats__client_frees++;
    523 }
    524 
    525 static void handle_free_heap( ThreadId tid, void* p )
    526 {
    527    Seg* seg = find_Seg_by_addr( (Addr)p );
    528    if (!seg) {
    529       /* freeing a block that wasn't malloc'd.  Ignore. */
    530       return;
    531    }
    532    die_and_free_mem_heap( tid, seg );
    533 }
    534 
    535 
    536 /*------------------------------------------------------------*/
    537 /*--- Shadow memory                                        ---*/
    538 /*------------------------------------------------------------*/
    539 
    540 /* Shadow memory holds one Seg for each naturally aligned (guest)
    541    word.  For a 32 bit target (assuming host word size == guest word
    542    size) that means one Seg per 4 bytes, and each Seg occupies 4
    543    bytes.  For a 64 bit target that means one Seg per 8 bytes, and
    544    each Seg occupies 8 bytes.  Hence in each case the overall space
    545    overhead for shadow memory is 1:1.
    546 
    547    This does however make it a bit tricky to size SecMap.vseg[], simce
    548    it needs to hold 16384 entries for 32 bit targets but only 8192
    549    entries for 64 bit targets. */
    550 
    551 #if 0
    552 __attribute__((unused))
    553 static void pp_curr_ExeContext(void)
    554 {
    555    VG_(pp_ExeContext)(
    556       VG_(get_ExeContext)(
    557          VG_(get_current_or_recent_tid)() ) );
    558    VG_(message)(Vg_UserMsg, "");
    559 }
    560 #endif
    561 
    562 #if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_arm)
    563 #  define SHMEM_SECMAP_MASK         0xFFFC
    564 #  define SHMEM_SECMAP_SHIFT        2
    565 #  define SHMEM_IS_WORD_ALIGNED(_a) VG_IS_4_ALIGNED(_a)
    566 #  define SEC_MAP_WORDS             (0x10000UL / 4UL) /* 16k */
    567 #elif defined(VGA_amd64) || defined(VGA_ppc64)
    568 #  define SHMEM_SECMAP_MASK         0xFFF8
    569 #  define SHMEM_SECMAP_SHIFT        3
    570 #  define SHMEM_IS_WORD_ALIGNED(_a) VG_IS_8_ALIGNED(_a)
    571 #  define SEC_MAP_WORDS             (0x10000UL / 8UL) /* 8k */
    572 #else
    573 #  error "Unknown arch"
    574 #endif
    575 
    576 typedef
    577    struct {
    578       Seg* vseg[SEC_MAP_WORDS];
    579    }
    580    SecMap;
    581 
    582 static SecMap  distinguished_secondary_map;
    583 
    584 /* An entry in the primary map.  base must be a 64k-aligned value, and
    585    sm points at the relevant secondary map.  The secondary may be
    586    either a real secondary, or the distinguished secondary.  DO NOT
    587    CHANGE THIS LAYOUT: the first word has to be the key for OSet fast
    588    lookups.
    589 */
    590 typedef
    591    struct {
    592       Addr    base;
    593       SecMap* sm;
    594    }
    595    PriMapEnt;
    596 
    597 /* Primary map is an OSet of PriMapEnt (primap_L2), "fronted" by a
    598    cache (primap_L1). */
    599 
    600 /* Tunable parameter: How big is the L1 queue? */
    601 #define N_PRIMAP_L1 24
    602 
    603 /* Tunable parameter: How far along the L1 queue to insert
    604    entries resulting from L2 lookups? */
    605 #define PRIMAP_L1_INSERT_IX 12
    606 
    607 static struct {
    608           Addr       base; // must be 64k aligned
    609           PriMapEnt* ent; // pointer to the matching primap_L2 node
    610        }
    611        primap_L1[N_PRIMAP_L1];
    612 
    613 static OSet* primap_L2 = NULL;
    614 
    615 
    616 /* # searches initiated in auxmap_L1, and # base cmps required */
    617 static ULong n_primap_L1_searches  = 0;
    618 static ULong n_primap_L1_cmps      = 0;
    619 /* # of searches that missed in auxmap_L1 and therefore had to
    620    be handed to auxmap_L2. And the number of nodes inserted. */
    621 static ULong n_primap_L2_searches  = 0;
    622 static ULong n_primap_L2_nodes     = 0;
    623 
    624 
    625 static void init_shadow_memory ( void )
    626 {
    627    Int i;
    628 
    629    for (i = 0; i < SEC_MAP_WORDS; i++)
    630       distinguished_secondary_map.vseg[i] = NONPTR;
    631 
    632    for (i = 0; i < N_PRIMAP_L1; i++) {
    633       primap_L1[i].base = 1; /* not 64k aligned, so doesn't match any
    634                                 request ==> slot is empty */
    635       primap_L1[i].ent  = NULL;
    636    }
    637 
    638    tl_assert(0 == offsetof(PriMapEnt,base));
    639    tl_assert(sizeof(Addr) == sizeof(void*));
    640    primap_L2 = VG_(OSetGen_Create)( /*keyOff*/  offsetof(PriMapEnt,base),
    641                                     /*fastCmp*/ NULL,
    642                                     VG_(malloc), "pc.h_main.ism.1",
    643                                     VG_(free) );
    644    tl_assert(primap_L2);
    645 }
    646 
    647 static void insert_into_primap_L1_at ( Word rank, PriMapEnt* ent )
    648 {
    649    Word i;
    650    tl_assert(ent);
    651    tl_assert(rank >= 0 && rank < N_PRIMAP_L1);
    652    for (i = N_PRIMAP_L1-1; i > rank; i--)
    653       primap_L1[i] = primap_L1[i-1];
    654    primap_L1[rank].base = ent->base;
    655    primap_L1[rank].ent  = ent;
    656 }
    657 
    658 static inline PriMapEnt* maybe_find_in_primap ( Addr a )
    659 {
    660    PriMapEnt  key;
    661    PriMapEnt* res;
    662    Word       i;
    663 
    664    a &= ~(Addr)0xFFFF;
    665 
    666    /* First search the front-cache, which is a self-organising
    667       list containing the most popular entries. */
    668 
    669    if (LIKELY(primap_L1[0].base == a))
    670       return primap_L1[0].ent;
    671    if (LIKELY(primap_L1[1].base == a)) {
    672       Addr       t_base = primap_L1[0].base;
    673       PriMapEnt* t_ent  = primap_L1[0].ent;
    674       primap_L1[0].base = primap_L1[1].base;
    675       primap_L1[0].ent  = primap_L1[1].ent;
    676       primap_L1[1].base = t_base;
    677       primap_L1[1].ent  = t_ent;
    678       return primap_L1[0].ent;
    679    }
    680 
    681    n_primap_L1_searches++;
    682 
    683    for (i = 0; i < N_PRIMAP_L1; i++) {
    684       if (primap_L1[i].base == a) {
    685          break;
    686       }
    687    }
    688    tl_assert(i >= 0 && i <= N_PRIMAP_L1);
    689 
    690    n_primap_L1_cmps += (ULong)(i+1);
    691 
    692    if (i < N_PRIMAP_L1) {
    693       if (i > 0) {
    694          Addr       t_base = primap_L1[i-1].base;
    695          PriMapEnt* t_ent  = primap_L1[i-1].ent;
    696          primap_L1[i-1].base = primap_L1[i-0].base;
    697          primap_L1[i-1].ent  = primap_L1[i-0].ent;
    698          primap_L1[i-0].base = t_base;
    699          primap_L1[i-0].ent  = t_ent;
    700          i--;
    701       }
    702       return primap_L1[i].ent;
    703    }
    704 
    705    n_primap_L2_searches++;
    706 
    707    /* First see if we already have it. */
    708    key.base = a;
    709    key.sm   = 0;
    710 
    711    res = VG_(OSetGen_Lookup)(primap_L2, &key);
    712    if (res)
    713       insert_into_primap_L1_at( PRIMAP_L1_INSERT_IX, res );
    714    return res;
    715 }
    716 
    717 static SecMap* alloc_secondary_map ( void )
    718 {
    719    SecMap* map;
    720    UInt  i;
    721 
    722    // JRS 2008-June-25: what's the following assertion for?
    723    tl_assert(0 == (sizeof(SecMap) % VKI_MAX_PAGE_SIZE));
    724 
    725    map = VG_(am_shadow_alloc)( sizeof(SecMap) );
    726    if (map == NULL)
    727       VG_(out_of_memory_NORETURN)( "annelid:allocate new SecMap",
    728                                    sizeof(SecMap) );
    729 
    730    for (i = 0; i < SEC_MAP_WORDS; i++)
    731       map->vseg[i] = NONPTR;
    732    if (0) VG_(printf)("XXX new secmap %p\n", map);
    733    return map;
    734 }
    735 
    736 static PriMapEnt* find_or_alloc_in_primap ( Addr a )
    737 {
    738    PriMapEnt *nyu, *res;
    739 
    740    /* First see if we already have it. */
    741    res = maybe_find_in_primap( a );
    742    if (LIKELY(res))
    743       return res;
    744 
    745    /* Ok, there's no entry in the secondary map, so we'll have
    746       to allocate one. */
    747    a &= ~(Addr)0xFFFF;
    748 
    749    nyu = (PriMapEnt*) VG_(OSetGen_AllocNode)(
    750                          primap_L2, sizeof(PriMapEnt) );
    751    tl_assert(nyu);
    752    nyu->base = a;
    753    nyu->sm   = alloc_secondary_map();
    754    tl_assert(nyu->sm);
    755    VG_(OSetGen_Insert)( primap_L2, nyu );
    756    insert_into_primap_L1_at( PRIMAP_L1_INSERT_IX, nyu );
    757    n_primap_L2_nodes++;
    758    return nyu;
    759 }
    760 
    761 /////////////////////////////////////////////////
    762 
    763 // Nb: 'a' must be naturally word aligned for the host.
    764 static inline Seg* get_mem_vseg ( Addr a )
    765 {
    766    SecMap* sm     = find_or_alloc_in_primap(a)->sm;
    767    UWord   sm_off = (a & SHMEM_SECMAP_MASK) >> SHMEM_SECMAP_SHIFT;
    768    tl_assert(SHMEM_IS_WORD_ALIGNED(a));
    769    return sm->vseg[sm_off];
    770 }
    771 
    772 // Nb: 'a' must be naturally word aligned for the host.
    773 static inline void set_mem_vseg ( Addr a, Seg* vseg )
    774 {
    775    SecMap* sm     = find_or_alloc_in_primap(a)->sm;
    776    UWord   sm_off = (a & SHMEM_SECMAP_MASK) >> SHMEM_SECMAP_SHIFT;
    777    tl_assert(SHMEM_IS_WORD_ALIGNED(a));
    778    sm->vseg[sm_off] = vseg;
    779 }
    780 
    781 // Find the Seg which contains the given address.
    782 // Returns UNKNOWN if no matches.  Never returns BOTTOM or NONPTR.
    783 // Also, only returns in-use segments, not freed ones.
    784 /* Doing this fast is distinctly difficult when there are more than a
    785    few heap allocated blocks live.  Basically it is done by searching
    786    addr_to_seg_map for 'a'.
    787 
    788    First, if 'a' is the start address of a segment, then we can detect
    789    that by simply doing a VG_(lookupFM) of 'a', and we are done (nice
    790    and easy).
    791 
    792    If 'a' is within some segment, but does not point to the start, it
    793    is much more complex.  We use VG_(findBoundsFM) to find the segment
    794    with the largest .addr field which is <= a, and we then inspect the
    795    segment to see if 'a' really falls inside it or not.  This is all a
    796    bit complex and fragile, and so there's a lot of assertery in the
    797    code below.  It has been crosschecked however against the trivial
    798    _SLOW implementation shown after the end of this fn.
    799 */
    800 static Seg* get_Seg_containing_addr( Addr a )
    801 {
    802    UWord keyW, valW;
    803    Seg*  s2;
    804 
    805    /* Since we are going to poke around in it */
    806    addr_to_seg_map_ENSURE_INIT();
    807 
    808    /* first, see if 'a' is at the start of a block.  We do this both
    809       because it's easy and more imporantly because VG_(findBoundsFM)
    810       will fail in this case, so we need to exclude it first. */
    811    if (VG_(lookupFM)( addr_to_seg_map, &keyW, &valW, a )) {
    812       tl_assert(keyW == a);
    813       s2 = (Seg*)valW;
    814       tl_assert(s2->addr == a);
    815    } else {
    816       Bool  ok;
    817       UWord kMin, vMin, kMax, vMax;
    818       Seg   minSeg;
    819       Seg   maxSeg;
    820       UWord minAddr = 0;
    821       UWord maxAddr = ~minAddr;
    822       VG_(memset)(&minSeg, 0, sizeof(minSeg));
    823       VG_(memset)(&maxSeg, 0, sizeof(maxSeg));
    824       minSeg.addr = minAddr;
    825       maxSeg.addr = maxAddr;
    826       ok = VG_(findBoundsFM)( addr_to_seg_map,
    827                               &kMin, &vMin, &kMax, &vMax,
    828                               minAddr, (UWord)&minSeg,
    829                               maxAddr, (UWord)&maxSeg, a );
    830       tl_assert(ok); /* must be so, since False is only returned when
    831                         'a' is directly present in the map, and we
    832                         just established that it isn't. */
    833       /* At this point, either vMin points at minSeg, or it points at a
    834          real Seg.  In the former case, there is no live heap-allocated
    835          Seg which has a start address <= a, so a is not in any block.
    836          In the latter case, the Seg vMin points at may or may not
    837          actually contain 'a'; we can only tell that by inspecting the
    838          Seg itself. */
    839       s2 = (Seg*)vMin;
    840       tl_assert(kMin == s2->addr);
    841       if (s2 == &minSeg) {
    842          /* the former */
    843          s2 = UNKNOWN;
    844       } else {
    845          /* the latter */
    846          tl_assert(s2->addr <= a);
    847          /* if s2 doesn't actually contain 'a', we must forget about it. */
    848          if (s2->szB == 0 /* a zero sized block can't contain anything */
    849              || s2->addr + s2->szB < a /* the usual range check */)
    850             s2 = UNKNOWN;
    851       }
    852       /* while we're at it, do as much assertery as we can, since this
    853          is all rather complex.  Either vMax points at maxSeg, or it
    854          points to a real block, which must have a start address
    855          greater than a. */
    856       tl_assert(kMax == ((Seg*)vMax)->addr);
    857       if (vMax == (UWord)&maxSeg) {
    858          /* nothing we can check */
    859       } else {
    860          tl_assert(a < kMax); /* hence also a < ((Seg*)vMax)->addr */
    861       }
    862    }
    863 
    864    return s2;
    865 }
    866 
    867 /* XXXX very slow reference implementation.  Do not use.
    868 static Seg* get_Seg_containing_addr_SLOW( Addr a )
    869 {
    870    SegGroup* group;
    871    UWord i;
    872    stats__slow_searches++;
    873    for (group = group_list; group; group = group->admin) {
    874       for (i = 0; i < group->nextfree; i++) {
    875          stats__slow_totcmps++;
    876          if (Seg__is_freed(&group->segs[i]))
    877             continue;
    878          if (group->segs[i].addr <= a
    879              && a < group->segs[i].addr + group->segs[i].szB)
    880             return &group->segs[i];
    881       }
    882    }
    883    return UNKNOWN;
    884 }
    885 */
    886 
    887 
    888 
    889 /*------------------------------------------------------------*/
    890 /*--- malloc() et al replacements                          ---*/
    891 /*------------------------------------------------------------*/
    892 
    893 void* h_replace_malloc ( ThreadId tid, SizeT n )
    894 {
    895    return alloc_and_new_mem_heap ( tid, n, VG_(clo_alignment),
    896                                         /*is_zeroed*/False );
    897 }
    898 
    899 void* h_replace___builtin_new ( ThreadId tid, SizeT n )
    900 {
    901    return alloc_and_new_mem_heap ( tid, n, VG_(clo_alignment),
    902                                            /*is_zeroed*/False );
    903 }
    904 
    905 void* h_replace___builtin_vec_new ( ThreadId tid, SizeT n )
    906 {
    907    return alloc_and_new_mem_heap ( tid, n, VG_(clo_alignment),
    908                                            /*is_zeroed*/False );
    909 }
    910 
    911 void* h_replace_memalign ( ThreadId tid, SizeT align, SizeT n )
    912 {
    913    return alloc_and_new_mem_heap ( tid, n, align,
    914                                         /*is_zeroed*/False );
    915 }
    916 
    917 void* h_replace_calloc ( ThreadId tid, SizeT nmemb, SizeT size1 )
    918 {
    919    return alloc_and_new_mem_heap ( tid, nmemb*size1, VG_(clo_alignment),
    920                                         /*is_zeroed*/True );
    921 }
    922 
    923 void h_replace_free ( ThreadId tid, void* p )
    924 {
    925    // Should arguably check here if p.vseg matches the segID of the
    926    // pointed-to block... unfortunately, by this stage, we don't know what
    927    // p.vseg is, because we don't know the address of p (the p here is a
    928    // copy, and we've lost the address of its source).  To do so would
    929    // require passing &p in, which would require rewriting part of
    930    // vg_replace_malloc.c... argh.
    931    //
    932    // However, Memcheck does free checking, and will catch almost all
    933    // violations this checking would have caught.  (Would only miss if we
    934    // unluckily passed an unrelated pointer to the very start of a heap
    935    // block that was unrelated to that block.  This is very unlikely!)    So
    936    // we haven't lost much.
    937 
    938    handle_free_heap(tid, p);
    939 }
    940 
    941 void h_replace___builtin_delete ( ThreadId tid, void* p )
    942 {
    943    handle_free_heap(tid, p);
    944 }
    945 
    946 void h_replace___builtin_vec_delete ( ThreadId tid, void* p )
    947 {
    948    handle_free_heap(tid, p);
    949 }
    950 
    951 void* h_replace_realloc ( ThreadId tid, void* p_old, SizeT new_size )
    952 {
    953    Seg* seg;
    954 
    955    /* First try and find the block. */
    956    seg = find_Seg_by_addr( (Addr)p_old );
    957    if (!seg)
    958       return NULL;
    959 
    960    tl_assert(seg->addr == (Addr)p_old);
    961 
    962    if (new_size <= seg->szB) {
    963       /* new size is smaller: allocate, copy from old to new */
    964       Addr p_new = (Addr)VG_(cli_malloc)(VG_(clo_alignment), new_size);
    965       VG_(memcpy)((void*)p_new, p_old, new_size);
    966 
    967       /* Notification: copy retained part */
    968       copy_mem       ( (Addr)p_old, p_new, new_size );
    969 
    970       /* Free old memory */
    971       die_and_free_mem_heap( tid, seg );
    972 
    973       /* This has to be after die_and_free_mem_heap, otherwise the
    974          former succeeds in shorting out the new block, not the
    975          old, in the case when both are on the same list.  */
    976       add_new_segment ( tid, p_new, new_size );
    977 
    978       return (void*)p_new;
    979    } else {
    980       /* new size is bigger: allocate, copy from old to new */
    981       Addr p_new = (Addr)VG_(cli_malloc)(VG_(clo_alignment), new_size);
    982       VG_(memcpy)((void*)p_new, p_old, seg->szB);
    983 
    984       /* Notification: first half kept and copied, second half new */
    985       copy_mem       ( (Addr)p_old, p_new, seg->szB );
    986       set_mem_unknown( p_new + seg->szB, new_size - seg->szB );
    987 
    988       /* Free old memory */
    989       die_and_free_mem_heap( tid, seg );
    990 
    991       /* This has to be after die_and_free_mem_heap, otherwise the
    992          former succeeds in shorting out the new block, not the old,
    993          in the case when both are on the same list.  NB jrs
    994          2008-Sept-11: not sure if this comment is valid/correct any
    995          more -- I suspect not. */
    996       add_new_segment ( tid, p_new, new_size );
    997 
    998       return (void*)p_new;
    999    }
   1000 }
   1001 
   1002 SizeT h_replace_malloc_usable_size ( ThreadId tid, void* p )
   1003 {
   1004    Seg* seg = find_Seg_by_addr( (Addr)p );
   1005 
   1006    // There may be slop, but pretend there isn't because only the asked-for
   1007    // area will have been shadowed properly.
   1008    return ( seg ? seg->szB : 0 );
   1009 }
   1010 
   1011 
   1012 /*------------------------------------------------------------*/
   1013 /*--- Memory events                                        ---*/
   1014 /*------------------------------------------------------------*/
   1015 
   1016 static inline
   1017 void set_mem ( Addr a, SizeT len, Seg* seg )
   1018 {
   1019    Addr end;
   1020 
   1021    if (0 == len)
   1022       return;
   1023 
   1024    if (len > 100 * 1000 * 1000)
   1025       VG_(message)(Vg_UserMsg,
   1026                    "Warning: set address range state: large range %lu\n",
   1027                    len);
   1028 
   1029    a   = VG_ROUNDDN(a,       sizeof(UWord));
   1030    end = VG_ROUNDUP(a + len, sizeof(UWord));
   1031    for ( ; a < end; a += sizeof(UWord))
   1032       set_mem_vseg(a, seg);
   1033 }
   1034 
   1035 static void set_mem_unknown( Addr a, SizeT len )
   1036 {
   1037    set_mem( a, len, UNKNOWN );
   1038 }
   1039 
   1040 //zz static void set_mem_nonptr( Addr a, UInt len )
   1041 //zz {
   1042 //zz    set_mem( a, len, NONPTR );
   1043 //zz }
   1044 
   1045 void h_new_mem_startup( Addr a, SizeT len,
   1046                         Bool rr, Bool ww, Bool xx, ULong di_handle )
   1047 {
   1048    if (0) VG_(printf)("new_mem_startup(%#lx,%lu)\n", a, len);
   1049    set_mem_unknown( a, len );
   1050    //add_new_segment( VG_(get_running_tid)(), a, len, SegMmap );
   1051 }
   1052 
   1053 //zz // XXX: Currently not doing anything with brk() -- new segments, or not?
   1054 //zz // Proper way to do it would be to grow/shrink a single, special brk segment.
   1055 //zz //
   1056 //zz // brk is difficult: it defines a single segment, of changeable size.
   1057 //zz // It starts off with size zero, at the address given by brk(0).  There are
   1058 //zz // no pointers within the program to it.  Any subsequent calls by the
   1059 //zz // program to brk() (possibly growing or shrinking it) return pointers to
   1060 //zz // the *end* of the segment (nb: this is the kernel brk(), which is
   1061 //zz // different to the libc brk()).
   1062 //zz //
   1063 //zz // If fixing this, don't forget to update the brk case in SK_(post_syscall).
   1064 //zz //
   1065 //zz // Nb: not sure if the return value is the last byte addressible, or one
   1066 //zz // past the end of the segment.
   1067 //zz //
   1068 //zz static void new_mem_brk( Addr a, UInt len )
   1069 //zz {
   1070 //zz    set_mem_unknown(a, len);
   1071 //zz    //VG_(skin_panic)("can't handle new_mem_brk");
   1072 //zz }
   1073 
   1074 // Not quite right:  if you mmap a segment into a specified place, it could
   1075 // be legitimate to do certain arithmetic with the pointer that it wouldn't
   1076 // otherwise.  Hopefully this is rare, though.
   1077 void h_new_mem_mmap( Addr a, SizeT len,
   1078                      Bool rr, Bool ww, Bool xx, ULong di_handle )
   1079 {
   1080    if (0) VG_(printf)("new_mem_mmap(%#lx,%lu)\n", a, len);
   1081 //zz #if 0
   1082 //zz    Seg seg = NULL;
   1083 //zz
   1084 //zz    // Check for overlapping segments
   1085 //zz #if 0
   1086 //zz    is_overlapping_seg___a   = a;    // 'free' variable
   1087 //zz    is_overlapping_seg___len = len;  // 'free' variable
   1088 //zz    seg = (Seg)VG_(HT_first_match) ( mlist, is_overlapping_seg );
   1089 //zz    is_overlapping_seg___a   = 0;    // paranoia, reset
   1090 //zz    is_overlapping_seg___len = 0;    // paranoia, reset
   1091 //zz #endif
   1092 //zz
   1093 //zz    // XXX: do this check properly with ISLists
   1094 //zz
   1095 //zz    if ( ISList__findI( seglist, a, &seg )) {
   1096 //zz       sk_assert(SegMmap == seg->status || SegMmapFree == seg->status);
   1097 //zz       if (SegMmap == seg->status)
   1098 //zz
   1099 //zz    }
   1100 //zz
   1101 //zz    if (NULL != seg) {
   1102 //zz       // Right, we found an overlap
   1103 //zz       if (VG_(clo_verbosity) > 1)
   1104 //zz          VG_(message)(Vg_UserMsg, "mmap overlap:  old: %#lx, %d;  new: %#lx, %d",
   1105 //zz                                   seg->left, Seg__size(seg), a, len);
   1106 //zz       if (seg->left <= a && a <= seg->right) {
   1107 //zz          // New one truncates end of the old one.  Nb: we don't adjust its
   1108 //zz          // size, because the first segment's pointer can be (and for
   1109 //zz          // Konqueror, is) legitimately used to access parts of the second
   1110 //zz          // segment.  At least, I assume Konqueror is doing something legal.
   1111 //zz          // so that a size mismatch upon munmap isn't a problem.
   1112 //zz //         seg->size = a - seg->data;
   1113 //zz //         seg->is_truncated_map = True;
   1114 //zz //         if (VG_(clo_verbosity) > 1)
   1115 //zz //            VG_(message)(Vg_UserMsg, "old seg truncated to length %d",
   1116 //zz //                                     seg->size);
   1117 //zz       } else {
   1118 //zz          VG_(skin_panic)("Can't handle this mmap() overlap case");
   1119 //zz       }
   1120 //zz    }
   1121    set_mem_unknown( a, len );
   1122    //add_new_segment( VG_(get_running_tid)(), a, len, SegMmap );
   1123 //zz #endif
   1124 }
   1125 
   1126 static void copy_mem( Addr from, Addr to, SizeT len )
   1127 {
   1128    Addr fromend = from + len;
   1129 
   1130    // Must be aligned due to malloc always returning aligned objects.
   1131    tl_assert(VG_IS_8_ALIGNED(from) && VG_IS_8_ALIGNED(to));
   1132 
   1133    // Must only be called with positive len.
   1134    if (0 == len)
   1135       return;
   1136 
   1137    for ( ; from < fromend; from += sizeof(UWord), to += sizeof(UWord))
   1138       set_mem_vseg( to, get_mem_vseg(from) );
   1139 }
   1140 
   1141 //zz // Similar to SK_(realloc)()
   1142 //zz static void copy_mem_remap( Addr from, Addr to, UInt len )
   1143 //zz {
   1144 //zz    VG_(skin_panic)("argh: copy_mem_remap");
   1145 //zz }
   1146 //zz
   1147 //zz static void die_mem_brk( Addr a, UInt len )
   1148 //zz {
   1149 //zz    set_mem_unknown(a, len);
   1150 //zz //   VG_(skin_panic)("can't handle die_mem_brk()");
   1151 //zz }
   1152 
   1153 void h_die_mem_munmap( Addr a, SizeT len )
   1154 {
   1155 //   handle_free_munmap( (void*)a, len );
   1156 }
   1157 
   1158 // Don't need to check all addresses within the block; in the absence of
   1159 // discontiguous segments, the segments for the first and last bytes should
   1160 // be the same.  Can't easily check the pointer segment matches the block
   1161 // segment, unfortunately, but the first/last check should catch most
   1162 // errors.
   1163 static void pre_mem_access2 ( CorePart part, ThreadId tid, Char* str,
   1164                               Addr s/*tart*/, Addr e/*nd*/ )
   1165 {
   1166    Seg  *seglo, *seghi;
   1167 
   1168    // Don't check code being translated -- very slow, and not much point
   1169    if (Vg_CoreTranslate == part) return;
   1170 
   1171    // Don't check the signal case -- only happens in core, no need to check
   1172    if (Vg_CoreSignal == part) return;
   1173 
   1174    // Only expect syscalls after this point
   1175    if (part != Vg_CoreSysCall) {
   1176       VG_(printf)("part = %d\n", part);
   1177       VG_(tool_panic)("unknown corepart in pre_mem_access2");
   1178    }
   1179 
   1180    // Check first and last bytes match
   1181    seglo = get_Seg_containing_addr( s );
   1182    seghi = get_Seg_containing_addr( e );
   1183    tl_assert( BOTTOM != seglo && NONPTR != seglo );
   1184    tl_assert( BOTTOM != seghi && NONPTR != seghi );
   1185 
   1186    /* record an error if start and end are in different, but known segments */
   1187    if (is_known_segment(seglo) && is_known_segment(seghi)
   1188        && seglo != seghi) {
   1189       h_record_sysparam_error(tid, part, str, s, e, seglo, seghi);
   1190    }
   1191    else
   1192    /* record an error if start is in a known segment but end isn't */
   1193    if (is_known_segment(seglo) && !is_known_segment(seghi)) {
   1194       h_record_sysparam_error(tid, part, str, s, e, seglo, UNKNOWN);
   1195    }
   1196    else
   1197    /* record an error if end is in a known segment but start isn't */
   1198    if (!is_known_segment(seglo) && is_known_segment(seghi)) {
   1199       h_record_sysparam_error(tid, part, str, s, e, UNKNOWN, seghi);
   1200    }
   1201 }
   1202 
   1203 void h_pre_mem_access ( CorePart part, ThreadId tid, Char* s,
   1204                         Addr base, SizeT size )
   1205 {
   1206    pre_mem_access2( part, tid, s, base, base + size - 1 );
   1207 }
   1208 
   1209 void h_pre_mem_read_asciiz ( CorePart part, ThreadId tid,
   1210                              Char* s, Addr lo )
   1211 {
   1212    Addr hi = lo;
   1213 
   1214    // Nb: the '\0' must be included in the lo...hi range
   1215    while ('\0' != *(Char*)hi) hi++;
   1216    pre_mem_access2( part, tid, s, lo, hi );
   1217 }
   1218 
   1219 //zz static void post_mem_write(Addr a, UInt len)
   1220 //zz {
   1221 //zz    set_mem_unknown(a, len);
   1222 //zz }
   1223 
   1224 
   1225 /*------------------------------------------------------------*/
   1226 /*--- Register event handlers                              ---*/
   1227 /*------------------------------------------------------------*/
   1228 
   1229 //zz static void post_regs_write_init ( void )
   1230 //zz {
   1231 //zz    UInt i;
   1232 //zz    for (i = R_EAX; i <= R_EDI; i++)
   1233 //zz       VG_(set_shadow_archreg)( i, (UInt)UNKNOWN );
   1234 //zz
   1235 //zz    // Don't bother about eflags
   1236 //zz }
   1237 
   1238 // BEGIN move this uglyness to pc_machine.c
   1239 
   1240 static inline Bool host_is_big_endian ( void ) {
   1241    UInt x = 0x11223344;
   1242    return 0x1122 == *(UShort*)(&x);
   1243 }
   1244 static inline Bool host_is_little_endian ( void ) {
   1245    UInt x = 0x11223344;
   1246    return 0x3344 == *(UShort*)(&x);
   1247 }
   1248 
   1249 #define N_INTREGINFO_OFFSETS 4
   1250 
   1251 /* Holds the result of a query to 'get_IntRegInfo'.  Valid values for
   1252    n_offsets are:
   1253 
   1254    -1: means the queried guest state slice exactly matches
   1255        one integer register
   1256 
   1257    0: means the queried guest state slice does not overlap any
   1258       integer registers
   1259 
   1260    1 .. N_INTREGINFO_OFFSETS: means the queried guest state offset
   1261       overlaps n_offsets different integer registers, and their base
   1262       offsets are placed in the offsets array.
   1263 */
   1264 typedef
   1265    struct {
   1266       Int offsets[N_INTREGINFO_OFFSETS];
   1267       Int n_offsets;
   1268    }
   1269    IntRegInfo;
   1270 
   1271 
   1272 #if defined(VGA_x86)
   1273 # include "libvex_guest_x86.h"
   1274 # define PC_SIZEOF_GUEST_STATE sizeof(VexGuestX86State)
   1275 #endif
   1276 
   1277 #if defined(VGA_amd64)
   1278 # include "libvex_guest_amd64.h"
   1279 # define PC_SIZEOF_GUEST_STATE sizeof(VexGuestAMD64State)
   1280 # define PC_OFF_FS_ZERO offsetof(VexGuestAMD64State,guest_FS_ZERO)
   1281 # define PC_SZB_FS_ZERO sizeof( ((VexGuestAMD64State*)0)->guest_FS_ZERO)
   1282 #endif
   1283 
   1284 #if defined(VGA_ppc32)
   1285 # include "libvex_guest_ppc32.h"
   1286 # define PC_SIZEOF_GUEST_STATE sizeof(VexGuestPPC32State)
   1287 #endif
   1288 
   1289 #if defined(VGA_ppc64)
   1290 # include "libvex_guest_ppc64.h"
   1291 # define PC_SIZEOF_GUEST_STATE sizeof(VexGuestPPC64State)
   1292 #endif
   1293 
   1294 #if defined(VGA_arm)
   1295 # include "libvex_guest_arm.h"
   1296 # define PC_SIZEOF_GUEST_STATE sizeof(VexGuestARMState)
   1297 #endif
   1298 
   1299 
   1300 /* See description on definition of type IntRegInfo. */
   1301 static void get_IntRegInfo ( /*OUT*/IntRegInfo* iii, Int offset, Int szB )
   1302 {
   1303    /* --------------------- x86 --------------------- */
   1304 
   1305 #  if defined(VGA_x86)
   1306 
   1307 #  define GOF(_fieldname) \
   1308       (offsetof(VexGuestX86State,guest_##_fieldname))
   1309 
   1310    Int  o    = offset;
   1311    Int  sz   = szB;
   1312    Bool is4  = sz == 4;
   1313    Bool is21 = sz == 2 || sz == 1;
   1314 
   1315    tl_assert(sz > 0);
   1316    tl_assert(host_is_little_endian());
   1317 
   1318    /* Set default state to "does not intersect any int register". */
   1319    VG_(memset)( iii, 0, sizeof(*iii) );
   1320 
   1321    /* Exact accesses to integer registers */
   1322    if (o == GOF(EAX)     && is4) goto exactly1;
   1323    if (o == GOF(ECX)     && is4) goto exactly1;
   1324    if (o == GOF(EDX)     && is4) goto exactly1;
   1325    if (o == GOF(EBX)     && is4) goto exactly1;
   1326    if (o == GOF(ESP)     && is4) goto exactly1;
   1327    if (o == GOF(EBP)     && is4) goto exactly1;
   1328    if (o == GOF(ESI)     && is4) goto exactly1;
   1329    if (o == GOF(EDI)     && is4) goto exactly1;
   1330    if (o == GOF(EIP)     && is4) goto none;
   1331    if (o == GOF(IP_AT_SYSCALL) && is4) goto none;
   1332    if (o == GOF(CC_OP)   && is4) goto none;
   1333    if (o == GOF(CC_DEP1) && is4) goto none;
   1334    if (o == GOF(CC_DEP2) && is4) goto none;
   1335    if (o == GOF(CC_NDEP) && is4) goto none;
   1336    if (o == GOF(DFLAG)   && is4) goto none;
   1337    if (o == GOF(IDFLAG)  && is4) goto none;
   1338    if (o == GOF(ACFLAG)  && is4) goto none;
   1339 
   1340    /* Partial accesses to integer registers */
   1341    if (o == GOF(EAX)     && is21) {         o -= 0; goto contains_o; }
   1342    if (o == GOF(EAX)+1   && is21) { o -= 1; o -= 0; goto contains_o; }
   1343    if (o == GOF(ECX)     && is21) {         o -= 0; goto contains_o; }
   1344    if (o == GOF(ECX)+1   && is21) { o -= 1; o -= 0; goto contains_o; }
   1345    if (o == GOF(EBX)     && is21) {         o -= 0; goto contains_o; }
   1346    if (o == GOF(EBX)+1   && is21) { o -= 1; o -= 0; goto contains_o; }
   1347    if (o == GOF(EDX)     && is21) {         o -= 0; goto contains_o; }
   1348    if (o == GOF(EDX)+1   && is21) { o -= 1; o -= 0; goto contains_o; }
   1349    if (o == GOF(ESI)     && is21) {         o -= 0; goto contains_o; }
   1350    if (o == GOF(EDI)     && is21) {         o -= 0; goto contains_o; }
   1351 
   1352    /* Segment related guff */
   1353    if (o == GOF(GS)  && sz == 2) goto none;
   1354    if (o == GOF(LDT) && is4) goto none;
   1355    if (o == GOF(GDT) && is4) goto none;
   1356 
   1357    /* FP admin related */
   1358    if (o == GOF(SSEROUND) && is4) goto none;
   1359    if (o == GOF(FPROUND)  && is4) goto none;
   1360    if (o == GOF(EMWARN)   && is4) goto none;
   1361    if (o == GOF(FTOP)     && is4) goto none;
   1362    if (o == GOF(FPTAG)    && sz == 8) goto none;
   1363    if (o == GOF(FC3210)   && is4) goto none;
   1364 
   1365    /* xmm registers, including arbitrary sub-parts */
   1366    if (o >= GOF(XMM0) && o+sz <= GOF(XMM0)+16) goto none;
   1367    if (o >= GOF(XMM1) && o+sz <= GOF(XMM1)+16) goto none;
   1368    if (o >= GOF(XMM2) && o+sz <= GOF(XMM2)+16) goto none;
   1369    if (o >= GOF(XMM3) && o+sz <= GOF(XMM3)+16) goto none;
   1370    if (o >= GOF(XMM4) && o+sz <= GOF(XMM4)+16) goto none;
   1371    if (o >= GOF(XMM5) && o+sz <= GOF(XMM5)+16) goto none;
   1372    if (o >= GOF(XMM6) && o+sz <= GOF(XMM6)+16) goto none;
   1373    if (o >= GOF(XMM7) && o+sz <= GOF(XMM7)+16) goto none;
   1374 
   1375    /* mmx/x87 registers (a bit of a kludge, since 'o' is not checked
   1376       to be exactly equal to one of FPREG[0] .. FPREG[7]) */
   1377    if (o >= GOF(FPREG[0]) && o < GOF(FPREG[7])+8 && sz == 8) goto none;
   1378 
   1379    /* the entire mmx/x87 register bank in one big piece */
   1380    if (o == GOF(FPREG) && sz == 64) goto none;
   1381 
   1382    VG_(printf)("get_IntRegInfo(x86):failing on (%d,%d)\n", o, sz);
   1383    tl_assert(0);
   1384 #  undef GOF
   1385 
   1386    /* -------------------- amd64 -------------------- */
   1387 
   1388 #  elif defined(VGA_amd64)
   1389 
   1390 #  define GOF(_fieldname) \
   1391       (offsetof(VexGuestAMD64State,guest_##_fieldname))
   1392 
   1393    Int  o     = offset;
   1394    Int  sz    = szB;
   1395    Bool is421 = sz == 4 || sz == 2 || sz == 1;
   1396    Bool is8   = sz == 8;
   1397 
   1398    tl_assert(sz > 0);
   1399    tl_assert(host_is_little_endian());
   1400 
   1401    /* Set default state to "does not intersect any int register". */
   1402    VG_(memset)( iii, 0, sizeof(*iii) );
   1403 
   1404    /* Exact accesses to integer registers */
   1405    if (o == GOF(RAX)     && is8) goto exactly1;
   1406    if (o == GOF(RCX)     && is8) goto exactly1;
   1407    if (o == GOF(RDX)     && is8) goto exactly1;
   1408    if (o == GOF(RBX)     && is8) goto exactly1;
   1409    if (o == GOF(RSP)     && is8) goto exactly1;
   1410    if (o == GOF(RBP)     && is8) goto exactly1;
   1411    if (o == GOF(RSI)     && is8) goto exactly1;
   1412    if (o == GOF(RDI)     && is8) goto exactly1;
   1413    if (o == GOF(R8)      && is8) goto exactly1;
   1414    if (o == GOF(R9)      && is8) goto exactly1;
   1415    if (o == GOF(R10)     && is8) goto exactly1;
   1416    if (o == GOF(R11)     && is8) goto exactly1;
   1417    if (o == GOF(R12)     && is8) goto exactly1;
   1418    if (o == GOF(R13)     && is8) goto exactly1;
   1419    if (o == GOF(R14)     && is8) goto exactly1;
   1420    if (o == GOF(R15)     && is8) goto exactly1;
   1421    if (o == GOF(RIP)     && is8) goto exactly1;
   1422    if (o == GOF(IP_AT_SYSCALL) && is8) goto none;
   1423    if (o == GOF(CC_OP)   && is8) goto none;
   1424    if (o == GOF(CC_DEP1) && is8) goto none;
   1425    if (o == GOF(CC_DEP2) && is8) goto none;
   1426    if (o == GOF(CC_NDEP) && is8) goto none;
   1427    if (o == GOF(DFLAG)   && is8) goto none;
   1428    if (o == GOF(IDFLAG)  && is8) goto none;
   1429 
   1430    /* Partial accesses to integer registers */
   1431    if (o == GOF(RAX)     && is421) {         o -= 0; goto contains_o; }
   1432    if (o == GOF(RAX)+1   && is421) { o -= 1; o -= 0; goto contains_o; }
   1433    if (o == GOF(RCX)     && is421) {         o -= 0; goto contains_o; }
   1434    if (o == GOF(RCX)+1   && is421) { o -= 1; o -= 0; goto contains_o; }
   1435    if (o == GOF(RDX)     && is421) {         o -= 0; goto contains_o; }
   1436    if (o == GOF(RDX)+1   && is421) { o -= 1; o -= 0; goto contains_o; }
   1437    if (o == GOF(RBX)     && is421) {         o -= 0; goto contains_o; }
   1438    if (o == GOF(RBX)+1   && is421) { o -= 1; o -= 0; goto contains_o; }
   1439    if (o == GOF(RBP)     && is421) {         o -= 0; goto contains_o; }
   1440    if (o == GOF(RSI)     && is421) {         o -= 0; goto contains_o; }
   1441    if (o == GOF(RDI)     && is421) {         o -= 0; goto contains_o; }
   1442    if (o == GOF(R8)      && is421) {         o -= 0; goto contains_o; }
   1443    if (o == GOF(R9)      && is421) {         o -= 0; goto contains_o; }
   1444    if (o == GOF(R10)     && is421) {         o -= 0; goto contains_o; }
   1445    if (o == GOF(R11)     && is421) {         o -= 0; goto contains_o; }
   1446    if (o == GOF(R12)     && is421) {         o -= 0; goto contains_o; }
   1447    if (o == GOF(R13)     && is421) {         o -= 0; goto contains_o; }
   1448    if (o == GOF(R14)     && is421) {         o -= 0; goto contains_o; }
   1449    if (o == GOF(R15)     && is421) {         o -= 0; goto contains_o; }
   1450 
   1451    /* Segment related guff */
   1452    if (o == GOF(FS_ZERO) && is8) goto exactly1;
   1453 
   1454    /* FP admin related */
   1455    if (o == GOF(SSEROUND) && is8) goto none;
   1456    if (o == GOF(FPROUND)  && is8) goto none;
   1457    if (o == GOF(EMWARN)   && sz == 4) goto none;
   1458    if (o == GOF(FTOP)     && sz == 4) goto none;
   1459    if (o == GOF(FPTAG)    && is8) goto none;
   1460    if (o == GOF(FC3210)   && is8) goto none;
   1461 
   1462    /* xmm registers, including arbitrary sub-parts */
   1463    if (o >= GOF(XMM0)  && o+sz <= GOF(XMM0)+16)  goto none;
   1464    if (o >= GOF(XMM1)  && o+sz <= GOF(XMM1)+16)  goto none;
   1465    if (o >= GOF(XMM2)  && o+sz <= GOF(XMM2)+16)  goto none;
   1466    if (o >= GOF(XMM3)  && o+sz <= GOF(XMM3)+16)  goto none;
   1467    if (o >= GOF(XMM4)  && o+sz <= GOF(XMM4)+16)  goto none;
   1468    if (o >= GOF(XMM5)  && o+sz <= GOF(XMM5)+16)  goto none;
   1469    if (o >= GOF(XMM6)  && o+sz <= GOF(XMM6)+16)  goto none;
   1470    if (o >= GOF(XMM7)  && o+sz <= GOF(XMM7)+16)  goto none;
   1471    if (o >= GOF(XMM8)  && o+sz <= GOF(XMM8)+16)  goto none;
   1472    if (o >= GOF(XMM9)  && o+sz <= GOF(XMM9)+16)  goto none;
   1473    if (o >= GOF(XMM10) && o+sz <= GOF(XMM10)+16) goto none;
   1474    if (o >= GOF(XMM11) && o+sz <= GOF(XMM11)+16) goto none;
   1475    if (o >= GOF(XMM12) && o+sz <= GOF(XMM12)+16) goto none;
   1476    if (o >= GOF(XMM13) && o+sz <= GOF(XMM13)+16) goto none;
   1477    if (o >= GOF(XMM14) && o+sz <= GOF(XMM14)+16) goto none;
   1478    if (o >= GOF(XMM15) && o+sz <= GOF(XMM15)+16) goto none;
   1479 
   1480    /* mmx/x87 registers (a bit of a kludge, since 'o' is not checked
   1481       to be exactly equal to one of FPREG[0] .. FPREG[7]) */
   1482    if (o >= GOF(FPREG[0]) && o < GOF(FPREG[7])+8 && sz == 8) goto none;
   1483 
   1484    VG_(printf)("get_IntRegInfo(amd64):failing on (%d,%d)\n", o, sz);
   1485    tl_assert(0);
   1486 #  undef GOF
   1487 
   1488    /* -------------------- ppc32 -------------------- */
   1489 
   1490 #  elif defined(VGA_ppc32)
   1491 
   1492 #  define GOF(_fieldname) \
   1493       (offsetof(VexGuestPPC32State,guest_##_fieldname))
   1494 
   1495    Int  o    = offset;
   1496    Int  sz   = szB;
   1497    Bool is4  = sz == 4;
   1498    Bool is8  = sz == 8;
   1499 
   1500    tl_assert(sz > 0);
   1501    tl_assert(host_is_big_endian());
   1502 
   1503    /* Set default state to "does not intersect any int register". */
   1504    VG_(memset)( iii, 0, sizeof(*iii) );
   1505 
   1506    /* Exact accesses to integer registers */
   1507    if (o == GOF(GPR0)  && is4) goto exactly1;
   1508    if (o == GOF(GPR1)  && is4) goto exactly1;
   1509    if (o == GOF(GPR2)  && is4) goto exactly1;
   1510    if (o == GOF(GPR3)  && is4) goto exactly1;
   1511    if (o == GOF(GPR4)  && is4) goto exactly1;
   1512    if (o == GOF(GPR5)  && is4) goto exactly1;
   1513    if (o == GOF(GPR6)  && is4) goto exactly1;
   1514    if (o == GOF(GPR7)  && is4) goto exactly1;
   1515    if (o == GOF(GPR8)  && is4) goto exactly1;
   1516    if (o == GOF(GPR9)  && is4) goto exactly1;
   1517    if (o == GOF(GPR10) && is4) goto exactly1;
   1518    if (o == GOF(GPR11) && is4) goto exactly1;
   1519    if (o == GOF(GPR12) && is4) goto exactly1;
   1520    if (o == GOF(GPR13) && is4) goto exactly1;
   1521    if (o == GOF(GPR14) && is4) goto exactly1;
   1522    if (o == GOF(GPR15) && is4) goto exactly1;
   1523    if (o == GOF(GPR16) && is4) goto exactly1;
   1524    if (o == GOF(GPR17) && is4) goto exactly1;
   1525    if (o == GOF(GPR18) && is4) goto exactly1;
   1526    if (o == GOF(GPR19) && is4) goto exactly1;
   1527    if (o == GOF(GPR20) && is4) goto exactly1;
   1528    if (o == GOF(GPR21) && is4) goto exactly1;
   1529    if (o == GOF(GPR22) && is4) goto exactly1;
   1530    if (o == GOF(GPR23) && is4) goto exactly1;
   1531    if (o == GOF(GPR24) && is4) goto exactly1;
   1532    if (o == GOF(GPR25) && is4) goto exactly1;
   1533    if (o == GOF(GPR26) && is4) goto exactly1;
   1534    if (o == GOF(GPR27) && is4) goto exactly1;
   1535    if (o == GOF(GPR28) && is4) goto exactly1;
   1536    if (o == GOF(GPR29) && is4) goto exactly1;
   1537    if (o == GOF(GPR30) && is4) goto exactly1;
   1538    if (o == GOF(GPR31) && is4) goto exactly1;
   1539 
   1540    /* Misc integer reg and condition code accesses */
   1541    if (o == GOF(LR)        && is4) goto exactly1;
   1542    if (o == GOF(CTR)       && is4) goto exactly1;
   1543    if (o == GOF(CIA)       && is4) goto none;
   1544    if (o == GOF(IP_AT_SYSCALL) && is4) goto none;
   1545    if (o == GOF(TISTART)   && is4) goto none;
   1546    if (o == GOF(TILEN)     && is4) goto none;
   1547    if (o == GOF(REDIR_SP)  && is4) goto none;
   1548 
   1549    if (sz == 1) {
   1550       if (o == GOF(XER_SO))  goto none;
   1551       if (o == GOF(XER_OV))  goto none;
   1552       if (o == GOF(XER_CA))  goto none;
   1553       if (o == GOF(XER_BC))  goto none;
   1554       if (o == GOF(CR0_321)) goto none;
   1555       if (o == GOF(CR0_0))   goto none;
   1556       if (o == GOF(CR1_321)) goto none;
   1557       if (o == GOF(CR1_0))   goto none;
   1558       if (o == GOF(CR2_321)) goto none;
   1559       if (o == GOF(CR2_0))   goto none;
   1560       if (o == GOF(CR3_321)) goto none;
   1561       if (o == GOF(CR3_0))   goto none;
   1562       if (o == GOF(CR4_321)) goto none;
   1563       if (o == GOF(CR4_0))   goto none;
   1564       if (o == GOF(CR5_321)) goto none;
   1565       if (o == GOF(CR5_0))   goto none;
   1566       if (o == GOF(CR6_321)) goto none;
   1567       if (o == GOF(CR6_0))   goto none;
   1568       if (o == GOF(CR7_321)) goto none;
   1569       if (o == GOF(CR7_0))   goto none;
   1570    }
   1571 
   1572    /* Exact accesses to FP registers */
   1573    if (o == GOF(FPR0)  && is8) goto none;
   1574    if (o == GOF(FPR1)  && is8) goto none;
   1575    if (o == GOF(FPR2)  && is8) goto none;
   1576    if (o == GOF(FPR3)  && is8) goto none;
   1577    if (o == GOF(FPR4)  && is8) goto none;
   1578    if (o == GOF(FPR5)  && is8) goto none;
   1579    if (o == GOF(FPR6)  && is8) goto none;
   1580    if (o == GOF(FPR7)  && is8) goto none;
   1581    if (o == GOF(FPR8)  && is8) goto none;
   1582    if (o == GOF(FPR9)  && is8) goto none;
   1583    if (o == GOF(FPR10) && is8) goto none;
   1584    if (o == GOF(FPR11) && is8) goto none;
   1585    if (o == GOF(FPR12) && is8) goto none;
   1586    if (o == GOF(FPR13) && is8) goto none;
   1587    if (o == GOF(FPR14) && is8) goto none;
   1588    if (o == GOF(FPR15) && is8) goto none;
   1589    if (o == GOF(FPR16) && is8) goto none;
   1590    if (o == GOF(FPR17) && is8) goto none;
   1591    if (o == GOF(FPR18) && is8) goto none;
   1592    if (o == GOF(FPR19) && is8) goto none;
   1593    if (o == GOF(FPR20) && is8) goto none;
   1594    if (o == GOF(FPR21) && is8) goto none;
   1595    if (o == GOF(FPR22) && is8) goto none;
   1596    if (o == GOF(FPR23) && is8) goto none;
   1597    if (o == GOF(FPR24) && is8) goto none;
   1598    if (o == GOF(FPR25) && is8) goto none;
   1599    if (o == GOF(FPR26) && is8) goto none;
   1600    if (o == GOF(FPR27) && is8) goto none;
   1601    if (o == GOF(FPR28) && is8) goto none;
   1602    if (o == GOF(FPR29) && is8) goto none;
   1603    if (o == GOF(FPR30) && is8) goto none;
   1604    if (o == GOF(FPR31) && is8) goto none;
   1605 
   1606    /* FP admin related */
   1607    if (o == GOF(FPROUND) && is4) goto none;
   1608    if (o == GOF(EMWARN)  && is4) goto none;
   1609 
   1610    /* Altivec registers */
   1611    if (o == GOF(VR0)  && sz == 16) goto none;
   1612    if (o == GOF(VR1)  && sz == 16) goto none;
   1613    if (o == GOF(VR2)  && sz == 16) goto none;
   1614    if (o == GOF(VR3)  && sz == 16) goto none;
   1615    if (o == GOF(VR4)  && sz == 16) goto none;
   1616    if (o == GOF(VR5)  && sz == 16) goto none;
   1617    if (o == GOF(VR6)  && sz == 16) goto none;
   1618    if (o == GOF(VR7)  && sz == 16) goto none;
   1619    if (o == GOF(VR8)  && sz == 16) goto none;
   1620    if (o == GOF(VR9)  && sz == 16) goto none;
   1621    if (o == GOF(VR10) && sz == 16) goto none;
   1622    if (o == GOF(VR11) && sz == 16) goto none;
   1623    if (o == GOF(VR12) && sz == 16) goto none;
   1624    if (o == GOF(VR13) && sz == 16) goto none;
   1625    if (o == GOF(VR14) && sz == 16) goto none;
   1626    if (o == GOF(VR15) && sz == 16) goto none;
   1627    if (o == GOF(VR16) && sz == 16) goto none;
   1628    if (o == GOF(VR17) && sz == 16) goto none;
   1629    if (o == GOF(VR18) && sz == 16) goto none;
   1630    if (o == GOF(VR19) && sz == 16) goto none;
   1631    if (o == GOF(VR20) && sz == 16) goto none;
   1632    if (o == GOF(VR21) && sz == 16) goto none;
   1633    if (o == GOF(VR22) && sz == 16) goto none;
   1634    if (o == GOF(VR23) && sz == 16) goto none;
   1635    if (o == GOF(VR24) && sz == 16) goto none;
   1636    if (o == GOF(VR25) && sz == 16) goto none;
   1637    if (o == GOF(VR26) && sz == 16) goto none;
   1638    if (o == GOF(VR27) && sz == 16) goto none;
   1639    if (o == GOF(VR28) && sz == 16) goto none;
   1640    if (o == GOF(VR29) && sz == 16) goto none;
   1641    if (o == GOF(VR30) && sz == 16) goto none;
   1642    if (o == GOF(VR31) && sz == 16) goto none;
   1643 
   1644    /* Altivec admin related */
   1645    if (o == GOF(VRSAVE) && is4) goto none;
   1646 
   1647    VG_(printf)("get_IntRegInfo(ppc32):failing on (%d,%d)\n", o, sz);
   1648    tl_assert(0);
   1649 #  undef GOF
   1650 
   1651    /* -------------------- ppc64 -------------------- */
   1652 
   1653 #  elif defined(VGA_ppc64)
   1654 
   1655 #  define GOF(_fieldname) \
   1656       (offsetof(VexGuestPPC64State,guest_##_fieldname))
   1657 
   1658    Int  o    = offset;
   1659    Int  sz   = szB;
   1660    Bool is4  = sz == 4;
   1661    Bool is8  = sz == 8;
   1662 
   1663    tl_assert(sz > 0);
   1664    tl_assert(host_is_big_endian());
   1665 
   1666    /* Set default state to "does not intersect any int register". */
   1667    VG_(memset)( iii, 0, sizeof(*iii) );
   1668 
   1669    /* Exact accesses to integer registers */
   1670    if (o == GOF(GPR0)  && is8) goto exactly1;
   1671    if (o == GOF(GPR1)  && is8) goto exactly1;
   1672    if (o == GOF(GPR2)  && is8) goto exactly1;
   1673    if (o == GOF(GPR3)  && is8) goto exactly1;
   1674    if (o == GOF(GPR4)  && is8) goto exactly1;
   1675    if (o == GOF(GPR5)  && is8) goto exactly1;
   1676    if (o == GOF(GPR6)  && is8) goto exactly1;
   1677    if (o == GOF(GPR7)  && is8) goto exactly1;
   1678    if (o == GOF(GPR8)  && is8) goto exactly1;
   1679    if (o == GOF(GPR9)  && is8) goto exactly1;
   1680    if (o == GOF(GPR10) && is8) goto exactly1;
   1681    if (o == GOF(GPR11) && is8) goto exactly1;
   1682    if (o == GOF(GPR12) && is8) goto exactly1;
   1683    if (o == GOF(GPR13) && is8) goto exactly1;
   1684    if (o == GOF(GPR14) && is8) goto exactly1;
   1685    if (o == GOF(GPR15) && is8) goto exactly1;
   1686    if (o == GOF(GPR16) && is8) goto exactly1;
   1687    if (o == GOF(GPR17) && is8) goto exactly1;
   1688    if (o == GOF(GPR18) && is8) goto exactly1;
   1689    if (o == GOF(GPR19) && is8) goto exactly1;
   1690    if (o == GOF(GPR20) && is8) goto exactly1;
   1691    if (o == GOF(GPR21) && is8) goto exactly1;
   1692    if (o == GOF(GPR22) && is8) goto exactly1;
   1693    if (o == GOF(GPR23) && is8) goto exactly1;
   1694    if (o == GOF(GPR24) && is8) goto exactly1;
   1695    if (o == GOF(GPR25) && is8) goto exactly1;
   1696    if (o == GOF(GPR26) && is8) goto exactly1;
   1697    if (o == GOF(GPR27) && is8) goto exactly1;
   1698    if (o == GOF(GPR28) && is8) goto exactly1;
   1699    if (o == GOF(GPR29) && is8) goto exactly1;
   1700    if (o == GOF(GPR30) && is8) goto exactly1;
   1701    if (o == GOF(GPR31) && is8) goto exactly1;
   1702 
   1703    /* Misc integer reg and condition code accesses */
   1704    if (o == GOF(LR)        && is8) goto exactly1;
   1705    if (o == GOF(CTR)       && is8) goto exactly1;
   1706    if (o == GOF(CIA)       && is8) goto none;
   1707    if (o == GOF(IP_AT_SYSCALL) && is8) goto none;
   1708    if (o == GOF(TISTART)   && is8) goto none;
   1709    if (o == GOF(TILEN)     && is8) goto none;
   1710    if (o == GOF(REDIR_SP)  && is8) goto none;
   1711 
   1712    if (sz == 1) {
   1713       if (o == GOF(XER_SO))  goto none;
   1714       if (o == GOF(XER_OV))  goto none;
   1715       if (o == GOF(XER_CA))  goto none;
   1716       if (o == GOF(XER_BC))  goto none;
   1717       if (o == GOF(CR0_321)) goto none;
   1718       if (o == GOF(CR0_0))   goto none;
   1719       if (o == GOF(CR1_321)) goto none;
   1720       if (o == GOF(CR1_0))   goto none;
   1721       if (o == GOF(CR2_321)) goto none;
   1722       if (o == GOF(CR2_0))   goto none;
   1723       if (o == GOF(CR3_321)) goto none;
   1724       if (o == GOF(CR3_0))   goto none;
   1725       if (o == GOF(CR4_321)) goto none;
   1726       if (o == GOF(CR4_0))   goto none;
   1727       if (o == GOF(CR5_321)) goto none;
   1728       if (o == GOF(CR5_0))   goto none;
   1729       if (o == GOF(CR6_321)) goto none;
   1730       if (o == GOF(CR6_0))   goto none;
   1731       if (o == GOF(CR7_321)) goto none;
   1732       if (o == GOF(CR7_0))   goto none;
   1733    }
   1734 
   1735    /* Exact accesses to FP registers */
   1736    if (o == GOF(FPR0)  && is8) goto none;
   1737    if (o == GOF(FPR1)  && is8) goto none;
   1738    if (o == GOF(FPR2)  && is8) goto none;
   1739    if (o == GOF(FPR3)  && is8) goto none;
   1740    if (o == GOF(FPR4)  && is8) goto none;
   1741    if (o == GOF(FPR5)  && is8) goto none;
   1742    if (o == GOF(FPR6)  && is8) goto none;
   1743    if (o == GOF(FPR7)  && is8) goto none;
   1744    if (o == GOF(FPR8)  && is8) goto none;
   1745    if (o == GOF(FPR9)  && is8) goto none;
   1746    if (o == GOF(FPR10) && is8) goto none;
   1747    if (o == GOF(FPR11) && is8) goto none;
   1748    if (o == GOF(FPR12) && is8) goto none;
   1749    if (o == GOF(FPR13) && is8) goto none;
   1750    if (o == GOF(FPR14) && is8) goto none;
   1751    if (o == GOF(FPR15) && is8) goto none;
   1752    if (o == GOF(FPR16) && is8) goto none;
   1753    if (o == GOF(FPR17) && is8) goto none;
   1754    if (o == GOF(FPR18) && is8) goto none;
   1755    if (o == GOF(FPR19) && is8) goto none;
   1756    if (o == GOF(FPR20) && is8) goto none;
   1757    if (o == GOF(FPR21) && is8) goto none;
   1758    if (o == GOF(FPR22) && is8) goto none;
   1759    if (o == GOF(FPR23) && is8) goto none;
   1760    if (o == GOF(FPR24) && is8) goto none;
   1761    if (o == GOF(FPR25) && is8) goto none;
   1762    if (o == GOF(FPR26) && is8) goto none;
   1763    if (o == GOF(FPR27) && is8) goto none;
   1764    if (o == GOF(FPR28) && is8) goto none;
   1765    if (o == GOF(FPR29) && is8) goto none;
   1766    if (o == GOF(FPR30) && is8) goto none;
   1767    if (o == GOF(FPR31) && is8) goto none;
   1768 
   1769    /* FP admin related */
   1770    if (o == GOF(FPROUND) && is4) goto none;
   1771    if (o == GOF(EMWARN)  && is4) goto none;
   1772 
   1773    /* Altivec registers */
   1774    if (o == GOF(VR0)  && sz == 16) goto none;
   1775    if (o == GOF(VR1)  && sz == 16) goto none;
   1776    if (o == GOF(VR2)  && sz == 16) goto none;
   1777    if (o == GOF(VR3)  && sz == 16) goto none;
   1778    if (o == GOF(VR4)  && sz == 16) goto none;
   1779    if (o == GOF(VR5)  && sz == 16) goto none;
   1780    if (o == GOF(VR6)  && sz == 16) goto none;
   1781    if (o == GOF(VR7)  && sz == 16) goto none;
   1782    if (o == GOF(VR8)  && sz == 16) goto none;
   1783    if (o == GOF(VR9)  && sz == 16) goto none;
   1784    if (o == GOF(VR10) && sz == 16) goto none;
   1785    if (o == GOF(VR11) && sz == 16) goto none;
   1786    if (o == GOF(VR12) && sz == 16) goto none;
   1787    if (o == GOF(VR13) && sz == 16) goto none;
   1788    if (o == GOF(VR14) && sz == 16) goto none;
   1789    if (o == GOF(VR15) && sz == 16) goto none;
   1790    if (o == GOF(VR16) && sz == 16) goto none;
   1791    if (o == GOF(VR17) && sz == 16) goto none;
   1792    if (o == GOF(VR18) && sz == 16) goto none;
   1793    if (o == GOF(VR19) && sz == 16) goto none;
   1794    if (o == GOF(VR20) && sz == 16) goto none;
   1795    if (o == GOF(VR21) && sz == 16) goto none;
   1796    if (o == GOF(VR22) && sz == 16) goto none;
   1797    if (o == GOF(VR23) && sz == 16) goto none;
   1798    if (o == GOF(VR24) && sz == 16) goto none;
   1799    if (o == GOF(VR25) && sz == 16) goto none;
   1800    if (o == GOF(VR26) && sz == 16) goto none;
   1801    if (o == GOF(VR27) && sz == 16) goto none;
   1802    if (o == GOF(VR28) && sz == 16) goto none;
   1803    if (o == GOF(VR29) && sz == 16) goto none;
   1804    if (o == GOF(VR30) && sz == 16) goto none;
   1805    if (o == GOF(VR31) && sz == 16) goto none;
   1806 
   1807    /* Altivec admin related */
   1808    if (o == GOF(VRSAVE) && is4) goto none;
   1809 
   1810    VG_(printf)("get_IntRegInfo(ppc64):failing on (%d,%d)\n", o, sz);
   1811    tl_assert(0);
   1812 #  undef GOF
   1813 
   1814    /* -------------------- arm -------------------- */
   1815 
   1816 #  elif defined(VGA_arm)
   1817 
   1818 #  define GOF(_fieldname) \
   1819       (offsetof(VexGuestARMState,guest_##_fieldname))
   1820 
   1821    Int  o    = offset;
   1822    Int  sz   = szB;
   1823    Bool is4  = sz == 4;
   1824    Bool is8  = sz == 8;
   1825 
   1826    tl_assert(sz > 0);
   1827    tl_assert(host_is_big_endian());
   1828 
   1829    /* Set default state to "does not intersect any int register". */
   1830    VG_(memset)( iii, 0, sizeof(*iii) );
   1831 
   1832    VG_(printf)("get_IntRegInfo(arm):failing on (%d,%d)\n", o, sz);
   1833    tl_assert(0);
   1834 
   1835 
   1836 #  else
   1837 #    error "FIXME: not implemented for this architecture"
   1838 #  endif
   1839 
   1840   exactly1:
   1841    iii->n_offsets = -1;
   1842    return;
   1843   none:
   1844    iii->n_offsets = 0;
   1845    return;
   1846   contains_o:
   1847    tl_assert(o >= 0 && 0 == (o % sizeof(UWord)));
   1848    iii->n_offsets = 1;
   1849    iii->offsets[0] = o;
   1850    return;
   1851 }
   1852 
   1853 
   1854 /* Does 'arr' describe an indexed guest state section containing host
   1855    words, that we want to shadow? */
   1856 
   1857 static Bool is_integer_guest_reg_array ( IRRegArray* arr )
   1858 {
   1859    /* --------------------- x86 --------------------- */
   1860 #  if defined(VGA_x86)
   1861    /* The x87 tag array. */
   1862    if (arr->base == offsetof(VexGuestX86State,guest_FPTAG[0])
   1863        && arr->elemTy == Ity_I8 && arr->nElems == 8)
   1864       return False;
   1865    /* The x87 register array. */
   1866    if (arr->base == offsetof(VexGuestX86State,guest_FPREG[0])
   1867        && arr->elemTy == Ity_F64 && arr->nElems == 8)
   1868       return False;
   1869 
   1870    VG_(printf)("is_integer_guest_reg_array(x86): unhandled: ");
   1871    ppIRRegArray(arr);
   1872    VG_(printf)("\n");
   1873    tl_assert(0);
   1874 
   1875    /* -------------------- amd64 -------------------- */
   1876 #  elif defined(VGA_amd64)
   1877    /* The x87 tag array. */
   1878    if (arr->base == offsetof(VexGuestAMD64State,guest_FPTAG[0])
   1879        && arr->elemTy == Ity_I8 && arr->nElems == 8)
   1880       return False;
   1881    /* The x87 register array. */
   1882    if (arr->base == offsetof(VexGuestAMD64State,guest_FPREG[0])
   1883        && arr->elemTy == Ity_F64 && arr->nElems == 8)
   1884       return False;
   1885 
   1886    VG_(printf)("is_integer_guest_reg_array(amd64): unhandled: ");
   1887    ppIRRegArray(arr);
   1888    VG_(printf)("\n");
   1889    tl_assert(0);
   1890 
   1891    /* -------------------- ppc32 -------------------- */
   1892 #  elif defined(VGA_ppc32)
   1893    /* The redir stack. */
   1894    if (arr->base == offsetof(VexGuestPPC32State,guest_REDIR_STACK[0])
   1895        && arr->elemTy == Ity_I32
   1896        && arr->nElems == VEX_GUEST_PPC32_REDIR_STACK_SIZE)
   1897       return True;
   1898 
   1899    VG_(printf)("is_integer_guest_reg_array(ppc32): unhandled: ");
   1900    ppIRRegArray(arr);
   1901    VG_(printf)("\n");
   1902    tl_assert(0);
   1903 
   1904    /* -------------------- ppc64 -------------------- */
   1905 #  elif defined(VGA_ppc64)
   1906    /* The redir stack. */
   1907    if (arr->base == offsetof(VexGuestPPC64State,guest_REDIR_STACK[0])
   1908        && arr->elemTy == Ity_I64
   1909        && arr->nElems == VEX_GUEST_PPC64_REDIR_STACK_SIZE)
   1910       return True;
   1911 
   1912    VG_(printf)("is_integer_guest_reg_array(ppc64): unhandled: ");
   1913    ppIRRegArray(arr);
   1914    VG_(printf)("\n");
   1915    tl_assert(0);
   1916 
   1917    /* -------------------- arm -------------------- */
   1918 #  elif defined(VGA_arm)
   1919    /* There are no rotating register sections on ARM. */
   1920    VG_(printf)("is_integer_guest_reg_array(arm): unhandled: ");
   1921    ppIRRegArray(arr);
   1922    VG_(printf)("\n");
   1923    tl_assert(0);
   1924 
   1925 #  else
   1926 #    error "FIXME: not implemented for this architecture"
   1927 #  endif
   1928 }
   1929 
   1930 
   1931 // END move this uglyness to pc_machine.c
   1932 
   1933 /* returns True iff given slice exactly matches an int reg.  Merely
   1934    a convenience wrapper around get_IntRegInfo. */
   1935 static Bool is_integer_guest_reg ( Int offset, Int szB )
   1936 {
   1937    IntRegInfo iii;
   1938    get_IntRegInfo( &iii, offset, szB );
   1939    tl_assert(iii.n_offsets >= -1 && iii.n_offsets <= N_INTREGINFO_OFFSETS);
   1940    return iii.n_offsets == -1;
   1941 }
   1942 
   1943 /* these assume guest and host have the same endianness and
   1944    word size (probably). */
   1945 static UWord get_guest_intreg ( ThreadId tid, Int shadowNo,
   1946                                 PtrdiffT offset, SizeT size )
   1947 {
   1948    UChar tmp[ 2 + sizeof(UWord) ];
   1949    tl_assert(size == sizeof(UWord));
   1950    tl_assert(0 == (offset % sizeof(UWord)));
   1951    VG_(memset)(tmp, 0, sizeof(tmp));
   1952    tmp[0] = 0x31;
   1953    tmp[ sizeof(tmp)-1 ] = 0x27;
   1954    VG_(get_shadow_regs_area)(tid, &tmp[1], shadowNo, offset, size);
   1955    tl_assert(tmp[0] == 0x31);
   1956    tl_assert(tmp[ sizeof(tmp)-1 ] == 0x27);
   1957    return * ((UWord*) &tmp[1] ); /* MISALIGNED LOAD */
   1958 }
   1959 static void put_guest_intreg ( ThreadId tid, Int shadowNo,
   1960                                PtrdiffT offset, SizeT size, UWord w )
   1961 {
   1962    tl_assert(size == sizeof(UWord));
   1963    tl_assert(0 == (offset % sizeof(UWord)));
   1964    VG_(set_shadow_regs_area)(tid, shadowNo, offset, size,
   1965                              (const UChar*)&w);
   1966 }
   1967 
   1968 /* Initialise the integer shadow registers to UNKNOWN.  This is a bit
   1969    of a nasty kludge, but it does mean we don't need to know which
   1970    registers we really need to initialise -- simply assume that all
   1971    integer registers will be naturally aligned w.r.t. the start of the
   1972    guest state, and fill in all possible entries. */
   1973 static void init_shadow_registers ( ThreadId tid )
   1974 {
   1975    Int i, wordSzB = sizeof(UWord);
   1976    for (i = 0; i < PC_SIZEOF_GUEST_STATE-wordSzB; i += wordSzB) {
   1977       put_guest_intreg( tid, 1, i, wordSzB, (UWord)UNKNOWN );
   1978    }
   1979 }
   1980 
   1981 static void post_reg_write_nonptr ( ThreadId tid, PtrdiffT offset, SizeT size )
   1982 {
   1983    // syscall_return: Default is non-pointer.  If it really is a pointer
   1984    // (eg. for mmap()), SK_(post_syscall) sets it again afterwards.
   1985    //
   1986    // clientreq_return: All the global client requests return non-pointers
   1987    // (except possibly CLIENT_CALL[0123], but they're handled by
   1988    // post_reg_write_clientcall, not here).
   1989    //
   1990    if (is_integer_guest_reg( (Int)offset, (Int)size )) {
   1991       put_guest_intreg( tid, 1, offset, size, (UWord)NONPTR );
   1992    }
   1993    else
   1994    if (size == 1 || size == 2) {
   1995       /* can't possibly be an integer guest reg.  Ignore. */
   1996    }
   1997    else {
   1998       // DDD: on Darwin, this assertion fails because we currently do a
   1999       // 'post_reg_write' on the 'guest_CC_DEP1' pseudo-register.
   2000       // JRS 2009July13: we should change is_integer_guest_reg()
   2001       // to accept guest_CC_DEP* and guest_CC_NDEP
   2002       // as legitimate pointer-holding registers
   2003       tl_assert(0);
   2004    }
   2005    //   VG_(set_thread_shadow_archreg)( tid, reg, (UInt)NONPTR );
   2006 }
   2007 
   2008 static void post_reg_write_nonptr_or_unknown ( ThreadId tid,
   2009                                                PtrdiffT offset, SizeT size )
   2010 {
   2011    // deliver_signal: called from two places; one sets the reg to zero, the
   2012    // other sets the stack pointer.
   2013    //
   2014    if (is_integer_guest_reg( (Int)offset, (Int)size )) {
   2015       put_guest_intreg(
   2016          tid, 1/*shadowno*/, offset, size,
   2017          (UWord)nonptr_or_unknown(
   2018                    get_guest_intreg( tid, 0/*shadowno*/,
   2019                                      offset, size )));
   2020    } else {
   2021       tl_assert(0);
   2022    }
   2023 }
   2024 
   2025 void h_post_reg_write_demux ( CorePart part, ThreadId tid,
   2026                               PtrdiffT guest_state_offset, SizeT size)
   2027 {
   2028    if (0)
   2029    VG_(printf)("post_reg_write_demux: tid %d part %d off %ld size %ld\n",
   2030                (Int)tid, (Int)part,
   2031               guest_state_offset, size);
   2032    switch (part) {
   2033       case Vg_CoreStartup:
   2034          /* This is a bit of a kludge since for any Vg_CoreStartup
   2035             event we overwrite the entire shadow register set.  But
   2036             that's ok - we're only called once with
   2037             part==Vg_CoreStartup event, and in that case the supplied
   2038             offset & size cover the entire guest state anyway. */
   2039          init_shadow_registers(tid);
   2040          break;
   2041       case Vg_CoreSysCall:
   2042          if (0) VG_(printf)("ZZZZZZZ p_r_w    -> NONPTR\n");
   2043          post_reg_write_nonptr( tid, guest_state_offset, size );
   2044          break;
   2045       case Vg_CoreClientReq:
   2046          post_reg_write_nonptr( tid, guest_state_offset, size );
   2047          break;
   2048       case Vg_CoreSignal:
   2049          post_reg_write_nonptr_or_unknown( tid, guest_state_offset, size );
   2050          break;
   2051       default:
   2052          tl_assert(0);
   2053    }
   2054 }
   2055 
   2056 void h_post_reg_write_clientcall(ThreadId tid, PtrdiffT guest_state_offset,
   2057                                  SizeT size, Addr f )
   2058 {
   2059    UWord p;
   2060 
   2061    // Having to do this is a bit nasty...
   2062    if (f == (Addr)h_replace_malloc
   2063        || f == (Addr)h_replace___builtin_new
   2064        || f == (Addr)h_replace___builtin_vec_new
   2065        || f == (Addr)h_replace_calloc
   2066        || f == (Addr)h_replace_memalign
   2067        || f == (Addr)h_replace_realloc)
   2068    {
   2069       // We remembered the last added segment;  make sure it's the right one.
   2070       /* What's going on: at this point, the scheduler has just called
   2071          'f' -- one of our malloc replacement functions -- and it has
   2072          returned.  The return value has been written to the guest
   2073          state of thread 'tid', offset 'guest_state_offset' length
   2074          'size'.  We need to look at that return value and set the
   2075          shadow return value accordingly.  The shadow return value
   2076          required is handed to us "under the counter" through the
   2077          global variable 'last_seg_added'.  This is all very ugly, not
   2078          to mention, non-thread-safe should V ever become
   2079          multithreaded. */
   2080       /* assert the place where the return value is is a legit int reg */
   2081       tl_assert(is_integer_guest_reg(guest_state_offset, size));
   2082       /* Now we need to look at the returned value, to see whether the
   2083          malloc succeeded or not. */
   2084       p = get_guest_intreg(tid, 0/*non-shadow*/, guest_state_offset, size);
   2085       if ((UWord)NULL == p) {
   2086          // if alloc failed, eg. realloc on bogus pointer
   2087          put_guest_intreg(tid, 1/*first-shadow*/,
   2088                           guest_state_offset, size, (UWord)NONPTR );
   2089       } else {
   2090          // alloc didn't fail.  Check we have the correct segment.
   2091          tl_assert(p == last_seg_added->addr);
   2092          put_guest_intreg(tid, 1/*first-shadow*/,
   2093                           guest_state_offset, size, (UWord)last_seg_added );
   2094       }
   2095    }
   2096    else if (f == (Addr)h_replace_free
   2097             || f == (Addr)h_replace___builtin_delete
   2098             || f == (Addr)h_replace___builtin_vec_delete
   2099          // || f == (Addr)VG_(cli_block_size)
   2100             || f == (Addr)VG_(message))
   2101    {
   2102       // Probably best to set the (non-existent!) return value to
   2103       // non-pointer.
   2104       tl_assert(is_integer_guest_reg(guest_state_offset, size));
   2105       put_guest_intreg(tid, 1/*first-shadow*/,
   2106                        guest_state_offset, size, (UWord)NONPTR );
   2107    }
   2108    else {
   2109       // Anything else, probably best to set return value to non-pointer.
   2110       //VG_(set_thread_shadow_archreg)(tid, reg, (UInt)UNKNOWN);
   2111       Char fbuf[100];
   2112       VG_(printf)("f = %#lx\n", f);
   2113       VG_(get_fnname)(f, fbuf, 100);
   2114       VG_(printf)("name = %s\n", fbuf);
   2115       VG_(tool_panic)("argh: clientcall");
   2116    }
   2117 }
   2118 
   2119 
   2120 //zz /*--------------------------------------------------------------------*/
   2121 //zz /*--- Sanity checking                                              ---*/
   2122 //zz /*--------------------------------------------------------------------*/
   2123 //zz
   2124 //zz /* Check that nobody has spuriously claimed that the first or last 16
   2125 //zz    pages (64 KB) of address space have become accessible.  Failure of
   2126 //zz    the following do not per se indicate an internal consistency
   2127 //zz    problem, but they are so likely to that we really want to know
   2128 //zz    about it if so. */
   2129 //zz Bool pc_replace_cheap_sanity_check) ( void )
   2130 //zz {
   2131 //zz    if (IS_DISTINGUISHED_SM(primary_map[0])
   2132 //zz        /* kludge: kernel drops a page up at top of address range for
   2133 //zz           magic "optimized syscalls", so we can no longer check the
   2134 //zz           highest page */
   2135 //zz        /* && IS_DISTINGUISHED_SM(primary_map[65535]) */
   2136 //zz       )
   2137 //zz       return True;
   2138 //zz    else
   2139 //zz       return False;
   2140 //zz }
   2141 //zz
   2142 //zz Bool SK_(expensive_sanity_check) ( void )
   2143 //zz {
   2144 //zz    Int i;
   2145 //zz
   2146 //zz    /* Make sure nobody changed the distinguished secondary. */
   2147 //zz    for (i = 0; i < SEC_MAP_WORDS; i++)
   2148 //zz       if (distinguished_secondary_map.vseg[i] != UNKNOWN)
   2149 //zz          return False;
   2150 //zz
   2151 //zz    return True;
   2152 //zz }
   2153 
   2154 
   2155 /*--------------------------------------------------------------------*/
   2156 /*--- System calls                                                 ---*/
   2157 /*--------------------------------------------------------------------*/
   2158 
   2159 void h_pre_syscall ( ThreadId tid, UInt sysno,
   2160                      UWord* args, UInt nArgs )
   2161 {
   2162    /* we don't do anything at the pre-syscall point */
   2163 }
   2164 
   2165 /* The post-syscall table is a table of pairs (number, flag).
   2166 
   2167    'flag' is only ever zero or one.  If it is zero, it indicates that
   2168    default handling for that syscall is required -- namely that the
   2169    syscall is deemed to return NONPTR.  This is the case for the vast
   2170    majority of syscalls.  If it is one then some special
   2171    syscall-specific handling is is required.  No further details of it
   2172    are stored in the table.
   2173 
   2174    On Linux and Darwin, 'number' is a __NR_xxx constant.
   2175 
   2176    On AIX5, 'number' is an Int*, which points to the Int variable
   2177    holding the currently assigned number for this syscall.
   2178 
   2179    When querying the table, we compare the supplied syscall number
   2180    with the 'number' field (directly on Linux and Darwin, after
   2181    dereferencing on AIX5), to find the relevant entry.  This requires a
   2182    linear search of the table.  To stop the costs getting too high, the
   2183    table is incrementally rearranged after each search, to move commonly
   2184    requested items a bit closer to the front.
   2185 
   2186    The table is built once, the first time it is used.  After that we
   2187    merely query it (and reorder the entries as a result). */
   2188 
   2189 static XArray* /* of UWordPair */ post_syscall_table = NULL;
   2190 
   2191 static void setup_post_syscall_table ( void )
   2192 {
   2193    tl_assert(!post_syscall_table);
   2194    post_syscall_table = VG_(newXA)( VG_(malloc), "pc.h_main.spst.1",
   2195                                     VG_(free), sizeof(UWordPair) );
   2196    tl_assert(post_syscall_table);
   2197 
   2198    /* --------------- LINUX --------------- */
   2199 
   2200 #  if defined(VGO_linux)
   2201 
   2202 #     define ADD(_flag, _syscallname) \
   2203          do { UWordPair p; p.uw1 = (_syscallname); p.uw2 = (_flag); \
   2204               VG_(addToXA)( post_syscall_table, &p ); \
   2205          } while (0)
   2206 
   2207       /* These ones definitely don't return pointers.  They're not
   2208          particularly grammatical, either. */
   2209 
   2210 #     if defined(__NR__llseek)
   2211       ADD(0, __NR__llseek);
   2212 #     endif
   2213       ADD(0, __NR__sysctl);
   2214 #     if defined(__NR__newselect)
   2215       ADD(0, __NR__newselect);
   2216 #     endif
   2217 #     if defined(__NR_accept)
   2218       ADD(0, __NR_accept);
   2219 #     endif
   2220       ADD(0, __NR_access);
   2221       ADD(0, __NR_alarm);
   2222 #     if defined(__NR_bind)
   2223       ADD(0, __NR_bind);
   2224 #     endif
   2225 #     if defined(__NR_chdir)
   2226       ADD(0, __NR_chdir);
   2227 #     endif
   2228       ADD(0, __NR_chmod);
   2229       ADD(0, __NR_chown);
   2230 #     if defined(__NR_chown32)
   2231       ADD(0, __NR_chown32);
   2232 #     endif
   2233       ADD(0, __NR_clock_getres);
   2234       ADD(0, __NR_clock_gettime);
   2235       ADD(0, __NR_clone);
   2236       ADD(0, __NR_close);
   2237 #     if defined(__NR_connect)
   2238       ADD(0, __NR_connect);
   2239 #     endif
   2240       ADD(0, __NR_creat);
   2241       ADD(0, __NR_dup);
   2242       ADD(0, __NR_dup2);
   2243       ADD(0, __NR_epoll_create);
   2244 #     if defined(__NR_epoll_create1)
   2245       ADD(0, __NR_epoll_create1);
   2246 #     endif
   2247       ADD(0, __NR_epoll_ctl);
   2248 #     if defined(__NR_epoll_pwait)
   2249       ADD(0, __NR_epoll_pwait);
   2250 #     endif
   2251       ADD(0, __NR_epoll_wait);
   2252       ADD(0, __NR_execve); /* presumably we see this because the call failed? */
   2253       ADD(0, __NR_exit); /* hmm, why are we still alive? */
   2254       ADD(0, __NR_exit_group);
   2255       ADD(0, __NR_fadvise64);
   2256       ADD(0, __NR_fallocate);
   2257       ADD(0, __NR_fchmod);
   2258       ADD(0, __NR_fchown);
   2259 #     if defined(__NR_fchown32)
   2260       ADD(0, __NR_fchown32);
   2261 #     endif
   2262       ADD(0, __NR_fcntl);
   2263 #     if defined(__NR_fcntl64)
   2264       ADD(0, __NR_fcntl64);
   2265 #     endif
   2266       ADD(0, __NR_fdatasync);
   2267       ADD(0, __NR_flock);
   2268       ADD(0, __NR_fstat);
   2269 #     if defined(__NR_fstat64)
   2270       ADD(0, __NR_fstat64);
   2271 #     endif
   2272       ADD(0, __NR_fstatfs);
   2273  #     if defined(__NR_fstatfs64)
   2274       ADD(0, __NR_fstatfs64);
   2275 #     endif
   2276      ADD(0, __NR_fsync);
   2277       ADD(0, __NR_ftruncate);
   2278 #     if defined(__NR_ftruncate64)
   2279       ADD(0, __NR_ftruncate64);
   2280 #     endif
   2281       ADD(0, __NR_futex);
   2282       ADD(0, __NR_getcwd);
   2283       ADD(0, __NR_getdents); // something to do with teeth
   2284       ADD(0, __NR_getdents64);
   2285       ADD(0, __NR_getegid);
   2286 #     if defined(__NR_getegid32)
   2287       ADD(0, __NR_getegid32);
   2288 #     endif
   2289       ADD(0, __NR_geteuid);
   2290 #     if defined(__NR_geteuid32)
   2291       ADD(0, __NR_geteuid32);
   2292 #     endif
   2293       ADD(0, __NR_getgid);
   2294 #     if defined(__NR_getgid32)
   2295       ADD(0, __NR_getgid32);
   2296 #     endif
   2297       ADD(0, __NR_getgroups);
   2298 #     if defined(__NR_getgroups32)
   2299       ADD(0, __NR_getgroups32);
   2300 #     endif
   2301       ADD(0, __NR_getitimer);
   2302 #     if defined(__NR_getpeername)
   2303       ADD(0, __NR_getpeername);
   2304 #     endif
   2305       ADD(0, __NR_getpid);
   2306       ADD(0, __NR_getpgrp);
   2307       ADD(0, __NR_getppid);
   2308       ADD(0, __NR_getpriority);
   2309       ADD(0, __NR_getresgid);
   2310 #     if defined(__NR_getresgid32)
   2311       ADD(0, __NR_getresgid32);
   2312 #     endif
   2313       ADD(0, __NR_getresuid);
   2314 #     if defined(__NR_getresuid32)
   2315       ADD(0, __NR_getresuid32);
   2316 #     endif
   2317       ADD(0, __NR_getrlimit);
   2318       ADD(0, __NR_getrusage);
   2319       ADD(0, __NR_getsid);
   2320 #     if defined(__NR_getsockname)
   2321       ADD(0, __NR_getsockname);
   2322 #     endif
   2323 #     if defined(__NR_getsockopt)
   2324       ADD(0, __NR_getsockopt);
   2325 #     endif
   2326       ADD(0, __NR_gettid);
   2327       ADD(0, __NR_gettimeofday);
   2328       ADD(0, __NR_getuid);
   2329 #     if defined(__NR_getuid32)
   2330       ADD(0, __NR_getuid32);
   2331 #     endif
   2332       ADD(0, __NR_getxattr);
   2333 #     if defined(__NR_ioperm)
   2334       ADD(0, __NR_ioperm);
   2335 #     endif
   2336       ADD(0, __NR_inotify_add_watch);
   2337       ADD(0, __NR_inotify_init);
   2338 #     if defined(__NR_inotify_init1)
   2339       ADD(0, __NR_inotify_init1);
   2340 #     endif
   2341       ADD(0, __NR_inotify_rm_watch);
   2342       ADD(0, __NR_ioctl); // ioctl -- assuming no pointers returned
   2343       ADD(0, __NR_ioprio_get);
   2344       ADD(0, __NR_kill);
   2345       ADD(0, __NR_lgetxattr);
   2346       ADD(0, __NR_link);
   2347 #     if defined(__NR_listen)
   2348       ADD(0, __NR_listen);
   2349 #     endif
   2350       ADD(0, __NR_lseek);
   2351       ADD(0, __NR_lstat);
   2352 #     if defined(__NR_lstat64)
   2353       ADD(0, __NR_lstat64);
   2354 #     endif
   2355       ADD(0, __NR_madvise);
   2356       ADD(0, __NR_mkdir);
   2357       ADD(0, __NR_mlock);
   2358       ADD(0, __NR_mlockall);
   2359       ADD(0, __NR_mprotect);
   2360 #     if defined(__NR_mq_open)
   2361       ADD(0, __NR_mq_open);
   2362       ADD(0, __NR_mq_unlink);
   2363       ADD(0, __NR_mq_timedsend);
   2364       ADD(0, __NR_mq_timedreceive);
   2365       ADD(0, __NR_mq_notify);
   2366       ADD(0, __NR_mq_getsetattr);
   2367 #     endif
   2368       ADD(0, __NR_munmap); // die_mem_munmap already called, segment remove);
   2369       ADD(0, __NR_nanosleep);
   2370       ADD(0, __NR_open);
   2371       ADD(0, __NR_personality);
   2372       ADD(0, __NR_pipe);
   2373 #     if defined(__NR_pipe2)
   2374       ADD(0, __NR_pipe2);
   2375 #     endif
   2376       ADD(0, __NR_poll);
   2377       ADD(0, __NR_prctl);
   2378       ADD(0, __NR_pread64);
   2379       ADD(0, __NR_pwrite64);
   2380       ADD(0, __NR_read);
   2381       ADD(0, __NR_readlink);
   2382       ADD(0, __NR_readv);
   2383 #     if defined(__NR_recvfrom)
   2384       ADD(0, __NR_recvfrom);
   2385 #     endif
   2386 #     if defined(__NR_recvmsg)
   2387       ADD(0, __NR_recvmsg);
   2388 #     endif
   2389       ADD(0, __NR_rename);
   2390       ADD(0, __NR_rmdir);
   2391       ADD(0, __NR_rt_sigaction);
   2392       ADD(0, __NR_rt_sigprocmask);
   2393       ADD(0, __NR_rt_sigreturn); /* not sure if we should see this or not */
   2394       ADD(0, __NR_rt_sigsuspend);
   2395       ADD(0, __NR_rt_sigtimedwait);
   2396       ADD(0, __NR_sched_get_priority_max);
   2397       ADD(0, __NR_sched_get_priority_min);
   2398       ADD(0, __NR_sched_getaffinity);
   2399       ADD(0, __NR_sched_getparam);
   2400       ADD(0, __NR_sched_getscheduler);
   2401       ADD(0, __NR_sched_setaffinity);
   2402       ADD(0, __NR_sched_setscheduler);
   2403       ADD(0, __NR_sched_yield);
   2404       ADD(0, __NR_select);
   2405 #     if defined(__NR_semctl)
   2406       ADD(0, __NR_semctl);
   2407 #     endif
   2408 #     if defined(__NR_semget)
   2409       ADD(0, __NR_semget);
   2410 #     endif
   2411 #     if defined(__NR_semop)
   2412       ADD(0, __NR_semop);
   2413 #     endif
   2414       ADD(0, __NR_sendfile);
   2415 #     if defined(__NR_sendto)
   2416       ADD(0, __NR_sendto);
   2417 #     endif
   2418 #     if defined(__NR_sendmsg)
   2419       ADD(0, __NR_sendmsg);
   2420 #     endif
   2421       ADD(0, __NR_set_robust_list);
   2422 #     if defined(__NR_set_thread_area)
   2423       ADD(0, __NR_set_thread_area);
   2424 #     endif
   2425       ADD(0, __NR_set_tid_address);
   2426       ADD(0, __NR_setgid);
   2427       ADD(0, __NR_setfsgid);
   2428       ADD(0, __NR_setfsuid);
   2429       ADD(0, __NR_setgid);
   2430       ADD(0, __NR_setgroups);
   2431       ADD(0, __NR_setitimer);
   2432       ADD(0, __NR_setpgid);
   2433       ADD(0, __NR_setpriority);
   2434       ADD(0, __NR_setregid);
   2435       ADD(0, __NR_setresgid);
   2436       ADD(0, __NR_setresuid);
   2437       ADD(0, __NR_setreuid);
   2438       ADD(0, __NR_setrlimit);
   2439       ADD(0, __NR_setsid);
   2440 #     if defined(__NR_setsockopt)
   2441       ADD(0, __NR_setsockopt);
   2442 #     endif
   2443       ADD(0, __NR_setuid);
   2444 #     if defined(__NR_shmctl)
   2445       ADD(0, __NR_shmctl);
   2446       ADD(0, __NR_shmdt);
   2447 #     endif
   2448 #     if defined(__NR_shutdown)
   2449       ADD(0, __NR_shutdown);
   2450 #     endif
   2451       ADD(0, __NR_sigaltstack);
   2452 #     if defined(__NR_socket)
   2453       ADD(0, __NR_socket);
   2454 #     endif
   2455 #     if defined(__NR_socketcall)
   2456       ADD(0, __NR_socketcall); /* the nasty x86-linux socket multiplexor */
   2457 #     endif
   2458 #     if defined(__NR_socketpair)
   2459       ADD(0, __NR_socketpair);
   2460 #     endif
   2461 #     if defined(__NR_statfs64)
   2462       ADD(0, __NR_statfs64);
   2463 #     endif
   2464 #     if defined(__NR_sigreturn)
   2465       ADD(0, __NR_sigreturn); /* not sure if we should see this or not */
   2466 #     endif
   2467 #     if defined(__NR_stat64)
   2468       ADD(0, __NR_stat64);
   2469 #     endif
   2470       ADD(0, __NR_stat);
   2471       ADD(0, __NR_statfs);
   2472       ADD(0, __NR_symlink);
   2473       ADD(0, __NR_sysinfo);
   2474       ADD(0, __NR_tgkill);
   2475       ADD(0, __NR_time);
   2476       ADD(0, __NR_times);
   2477       ADD(0, __NR_truncate);
   2478 #     if defined(__NR_truncate64)
   2479       ADD(0, __NR_truncate64);
   2480 #     endif
   2481 #     if defined(__NR_ugetrlimit)
   2482       ADD(0, __NR_ugetrlimit);
   2483 #     endif
   2484       ADD(0, __NR_umask);
   2485       ADD(0, __NR_uname);
   2486       ADD(0, __NR_unlink);
   2487       ADD(0, __NR_utime);
   2488       ADD(0, __NR_vfork);
   2489 #     if defined(__NR_waitpid)
   2490       ADD(0, __NR_waitpid);
   2491 #     endif
   2492       ADD(0, __NR_wait4);
   2493       ADD(0, __NR_write);
   2494       ADD(0, __NR_writev);
   2495 
   2496       /* Whereas the following need special treatment */
   2497 #     if defined(__NR_arch_prctl)
   2498       ADD(1, __NR_arch_prctl);
   2499 #     endif
   2500       ADD(1, __NR_brk);
   2501       ADD(1, __NR_mmap);
   2502 #     if defined(__NR_mmap2)
   2503       ADD(1, __NR_mmap2);
   2504 #     endif
   2505 #     if defined(__NR_shmat)
   2506       ADD(1, __NR_shmat);
   2507 #     endif
   2508 #     if defined(__NR_shmget)
   2509       ADD(1, __NR_shmget);
   2510 #     endif
   2511 #     if defined(__NR_ipc) && defined(VKI_SHMAT)
   2512       ADD(1, __NR_ipc); /* ppc{32,64}-linux horrors */
   2513 #     endif
   2514 
   2515    /* --------------- AIX5 --------------- */
   2516 
   2517 #  elif defined(VGO_aix5)
   2518 
   2519 #     define ADD(_flag, _syscallname) \
   2520          do { \
   2521             UWordPair p; \
   2522             if ((_syscallname) != __NR_AIX5_UNKNOWN) { \
   2523                p.uw1 = (UWord)&(_syscallname); p.uw2 = (_flag); \
   2524                VG_(addToXA)( post_syscall_table, &p ); \
   2525             } \
   2526          } while (0)
   2527 
   2528       /* Just a minimal set of handlers, enough to make
   2529          a 32- and 64-bit hello-world program run. */
   2530       ADD(1, __NR_AIX5___loadx); /* not sure what to do here */
   2531       ADD(0, __NR_AIX5__exit);
   2532       ADD(0, __NR_AIX5_access);
   2533       ADD(0, __NR_AIX5_getgidx);
   2534       ADD(0, __NR_AIX5_getuidx);
   2535       ADD(0, __NR_AIX5_kfcntl);
   2536       ADD(0, __NR_AIX5_kioctl);
   2537       ADD(1, __NR_AIX5_kload); /* not sure what to do here */
   2538       ADD(0, __NR_AIX5_kwrite);
   2539 
   2540    /* --------------- DARWIN ------------- */
   2541 
   2542 #  elif defined(VGO_darwin)
   2543 
   2544 #     define ADD(_flag, _syscallname) \
   2545          do { UWordPair p; p.uw1 = (_syscallname); p.uw2 = (_flag); \
   2546               VG_(addToXA)( post_syscall_table, &p ); \
   2547          } while (0)
   2548 
   2549       // DDD: a desultory attempt thus far...
   2550 
   2551       // Unix/BSD syscalls.
   2552 
   2553       // Mach traps.
   2554       ADD(0, __NR_host_self_trap);
   2555       ADD(0, __NR_mach_msg_trap);
   2556       ADD(0, __NR_mach_reply_port);
   2557       ADD(0, __NR_task_self_trap);
   2558 
   2559       // Machine-dependent syscalls.
   2560       ADD(0, __NR_thread_fast_set_cthread_self);
   2561 
   2562    /* ------------------------------------ */
   2563 
   2564 #  else
   2565 #     error "Unsupported OS"
   2566 #  endif
   2567 
   2568 #  undef ADD
   2569 }
   2570 
   2571 
   2572 void h_post_syscall ( ThreadId tid, UInt sysno,
   2573                       UWord* args, UInt nArgs, SysRes res )
   2574 {
   2575    Word i, n;
   2576    UWordPair* pair;
   2577 
   2578    if (!post_syscall_table)
   2579       setup_post_syscall_table();
   2580 
   2581    /* search for 'sysno' in the post_syscall_table */
   2582    n = VG_(sizeXA)( post_syscall_table );
   2583    for (i = 0; i < n; i++) {
   2584       pair = VG_(indexXA)( post_syscall_table, i );
   2585 #     if defined(VGO_linux) || defined(VGO_darwin)
   2586       if (pair->uw1 == (UWord)sysno)
   2587          break;
   2588 #     elif defined(VGO_aix5)
   2589       if (*(Int*)(pair->uw1) == (Int)sysno)
   2590          break;
   2591 #     else
   2592 #        error "Unsupported OS"
   2593 #     endif
   2594    }
   2595 
   2596    tl_assert(i >= 0 && i <= n);
   2597 
   2598    if (i == n) {
   2599       VG_(printf)("sysno == %s", VG_SYSNUM_STRING_EXTRA(sysno));
   2600       VG_(tool_panic)("unhandled syscall");
   2601    }
   2602 
   2603    /* So we found the relevant entry.  Move it one step
   2604       forward so as to speed future accesses to it. */
   2605    if (i > 0) {
   2606       UWordPair tmp, *p, *q;
   2607       p = VG_(indexXA)( post_syscall_table, i-1 );
   2608       q = VG_(indexXA)( post_syscall_table, i-0 );
   2609       tmp = *p;
   2610       *p = *q;
   2611       *q = tmp;
   2612       i--;
   2613    }
   2614 
   2615    /* Deal with the common case */
   2616    pair = VG_(indexXA)( post_syscall_table, i );
   2617    if (pair->uw2 == 0)
   2618       /* the common case */
   2619       goto res_NONPTR_err_NONPTR;
   2620 
   2621    /* Special handling for all remaining cases */
   2622    tl_assert(pair->uw2 == 1);
   2623 
   2624 #  if defined(__NR_arch_prctl)
   2625    if (sysno == __NR_arch_prctl) {
   2626       /* This is nasty.  On amd64-linux, arch_prctl may write a
   2627          value to guest_FS_ZERO, and we need to shadow that value.
   2628          Hence apply nonptr_or_unknown to it here, after the
   2629          syscall completes. */
   2630       post_reg_write_nonptr_or_unknown( tid, PC_OFF_FS_ZERO,
   2631                                              PC_SZB_FS_ZERO );
   2632       goto res_NONPTR_err_NONPTR;
   2633    }
   2634 #  endif
   2635 
   2636 #  if defined(__NR_brk)
   2637    // With brk(), result (of kernel syscall, not glibc wrapper) is a heap
   2638    // pointer.  Make the shadow UNKNOWN.
   2639    if (sysno == __NR_brk)
   2640       goto res_UNKNOWN_err_NONPTR;
   2641 #  endif
   2642 
   2643    // With mmap, new_mem_mmap() has already been called and added the
   2644    // segment (we did it there because we had the result address and size
   2645    // handy).  So just set the return value shadow.
   2646    if (sysno == __NR_mmap
   2647 #      if defined(__NR_mmap2)
   2648        || sysno == __NR_mmap2
   2649 #      endif
   2650 #      if defined(__NR_AIX5___loadx)
   2651        || (sysno == __NR_AIX5___loadx && __NR_AIX5___loadx != __NR_AIX5_UNKNOWN)
   2652 #      endif
   2653 #      if defined(__NR_AIX5_kload)
   2654        || (sysno == __NR_AIX5_kload && __NR_AIX5_kload != __NR_AIX5_UNKNOWN)
   2655 #      endif
   2656       ) {
   2657       if (sr_isError(res)) {
   2658          // mmap() had an error, return value is a small negative integer
   2659          goto res_NONPTR_err_NONPTR;
   2660       } else {
   2661          goto res_UNKNOWN_err_NONPTR;
   2662       }
   2663       return;
   2664    }
   2665 
   2666    // shmat uses the same scheme.  We will just have had a
   2667    // notification via new_mem_mmap.  Just set the return value shadow.
   2668 #  if defined(__NR_shmat)
   2669    if (sysno == __NR_shmat) {
   2670       if (sr_isError(res)) {
   2671          goto res_NONPTR_err_NONPTR;
   2672       } else {
   2673          goto res_UNKNOWN_err_NONPTR;
   2674       }
   2675    }
   2676 #  endif
   2677 
   2678 #  if defined(__NR_shmget)
   2679    if (sysno == __NR_shmget)
   2680       // FIXME: is this correct?
   2681       goto res_UNKNOWN_err_NONPTR;
   2682 #  endif
   2683 
   2684 #  if defined(__NR_ipc) && defined(VKI_SHMAT)
   2685    /* perhaps this should be further conditionalised with
   2686       && (defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
   2687       Note, this just copies the behaviour of __NR_shmget above.
   2688 
   2689       JRS 2009 June 02: it seems that the return value from
   2690       sys_ipc(VKI_SHMAT, ...) doesn't have much relationship to the
   2691       result returned by the originating user-level shmat call.  It's
   2692       different (and much lower) by a large but integral number of
   2693       pages.  I don't have time to chase this right now.  Observed on
   2694       ppc{32,64}-linux.  Result appears to be false errors from apps
   2695       using shmat.  Confusion though -- shouldn't be related to the
   2696       actual numeric values returned by the syscall, though, should
   2697       it?  Confused.  Maybe some bad interaction with a
   2698       nonpointer-or-unknown heuristic? */
   2699    if (sysno == __NR_ipc) {
   2700       if (args[0] == VKI_SHMAT) {
   2701          goto res_UNKNOWN_err_NONPTR;
   2702       } else {
   2703          goto res_NONPTR_err_NONPTR;
   2704       }
   2705    }
   2706 #  endif
   2707 
   2708    /* If we get here, it implies the corresponding entry in
   2709       post_syscall_table has .w2 == 1, which in turn implies there
   2710       should be special-case code for it above. */
   2711    tl_assert(0);
   2712 
   2713   res_NONPTR_err_NONPTR:
   2714    VG_(set_syscall_return_shadows)( tid, /* retval */ (UWord)NONPTR, 0,
   2715                                          /* error */  (UWord)NONPTR, 0 );
   2716    return;
   2717 
   2718   res_UNKNOWN_err_NONPTR:
   2719    VG_(set_syscall_return_shadows)( tid, /* retval */ (UWord)UNKNOWN, 0,
   2720                                          /* error */  (UWord)NONPTR, 0 );
   2721    return;
   2722 }
   2723 
   2724 
   2725 /*--------------------------------------------------------------------*/
   2726 /*--- Functions called from generated code                         ---*/
   2727 /*--------------------------------------------------------------------*/
   2728 
   2729 #if SC_SEGS
   2730 static void checkSeg ( Seg vseg ) {
   2731    tl_assert(vseg == UNKNOWN || vseg == NONPTR || vseg == BOTTOM
   2732              || Seg__plausible(vseg) );
   2733 }
   2734 #endif
   2735 
   2736 // XXX: could be more sophisticated -- actually track the lowest/highest
   2737 // valid address used by the program, and then return False for anything
   2738 // below that (using a suitable safety margin).  Also, nothing above
   2739 // 0xc0000000 is valid [unless you've changed that in your kernel]
   2740 static inline Bool looks_like_a_pointer(Addr a)
   2741 {
   2742 #  if defined(VGA_x86) || defined(VGA_ppc32)
   2743    tl_assert(sizeof(UWord) == 4);
   2744    return (a > 0x800000UL && a < 0xFF000000UL);
   2745 
   2746 #  elif defined(VGA_amd64) || defined(VGA_ppc64)
   2747    tl_assert(sizeof(UWord) == 8);
   2748    return (a >= 16 * 0x10000UL && a < 0xFF00000000000000UL);
   2749 
   2750 #  elif defined(VGA_arm)
   2751    /* Unfortunately arm-linux seems to load the exe at very low, at
   2752       0x8000, so we have to assume any value above that is a pointer,
   2753       which is pretty dismal. */
   2754    tl_assert(sizeof(UWord) == 4);
   2755    return (a >= 0x00008000UL && a < 0xFF000000UL);
   2756 
   2757 #  else
   2758 #    error "Unsupported architecture"
   2759 #  endif
   2760 }
   2761 
   2762 static inline VG_REGPARM(1)
   2763 Seg* nonptr_or_unknown(UWord x)
   2764 {
   2765    Seg* res = looks_like_a_pointer(x) ? UNKNOWN : NONPTR;
   2766    if (0) VG_(printf)("nonptr_or_unknown %s %#lx\n",
   2767                       res==UNKNOWN ? "UUU" : "nnn", x);
   2768    return res;
   2769 }
   2770 
   2771 //zz static __attribute__((regparm(1)))
   2772 //zz void print_BB_entry(UInt bb)
   2773 //zz {
   2774 //zz    VG_(printf)("%u =\n", bb);
   2775 //zz }
   2776 
   2777 //static ULong stats__tot_mem_refs  = 0;
   2778 //static ULong stats__refs_in_a_seg = 0;
   2779 //static ULong stats__refs_lost_seg = 0;
   2780 
   2781 typedef
   2782    struct { ExeContext* ec; UWord count; }
   2783    Lossage;
   2784 
   2785 static OSet* lossage = NULL;
   2786 
   2787 //static void inc_lossage ( ExeContext* ec )
   2788 //{
   2789 //   Lossage key, *res, *nyu;
   2790 //   key.ec = ec;
   2791 //   key.count = 0; /* frivolous */
   2792 //   res = VG_(OSetGen_Lookup)(lossage, &key);
   2793 //   if (res) {
   2794 //      tl_assert(res->ec == ec);
   2795 //      res->count++;
   2796 //   } else {
   2797 //      nyu = (Lossage*)VG_(OSetGen_AllocNode)(lossage, sizeof(Lossage));
   2798 //      tl_assert(nyu);
   2799 //      nyu->ec = ec;
   2800 //      nyu->count = 1;
   2801 //      VG_(OSetGen_Insert)( lossage, nyu );
   2802 //   }
   2803 //}
   2804 
   2805 static void init_lossage ( void )
   2806 {
   2807    lossage = VG_(OSetGen_Create)( /*keyOff*/ offsetof(Lossage,ec),
   2808                                   /*fastCmp*/NULL,
   2809                                   VG_(malloc), "pc.h_main.il.1",
   2810                                   VG_(free) );
   2811    tl_assert(lossage);
   2812 }
   2813 
   2814 //static void show_lossage ( void )
   2815 //{
   2816 //   Lossage* elem;
   2817 //   VG_(OSetGen_ResetIter)( lossage );
   2818 //   while ( (elem = VG_(OSetGen_Next)(lossage)) ) {
   2819 //      if (elem->count < 10) continue;
   2820 //      //Char buf[100];
   2821 //      //(void)VG_(describe_IP)(elem->ec, buf, sizeof(buf)-1);
   2822 //      //buf[sizeof(buf)-1] = 0;
   2823 //      //VG_(printf)("  %,8lu  %s\n", elem->count, buf);
   2824 //      VG_(message)(Vg_UserMsg, "Lossage count %'lu at", elem->count);
   2825 //      VG_(pp_ExeContext)(elem->ec);
   2826 //   }
   2827 //}
   2828 
   2829 // This function is called *a lot*; inlining it sped up Konqueror by 20%.
   2830 static inline
   2831 void check_load_or_store(Bool is_write, Addr m, UWord sz, Seg* mptr_vseg)
   2832 {
   2833 #if 0
   2834    tl_assert(0);
   2835    if (h_clo_lossage_check) {
   2836       Seg* seg;
   2837       stats__tot_mem_refs++;
   2838       if (ISList__findI0( seglist, (Addr)m, &seg )) {
   2839          /* m falls inside 'seg' (that is, we are making a memory
   2840             reference inside 'seg').  Now, really mptr_vseg should be
   2841             a tracked segment of some description.  Badness is when
   2842             mptr_vseg is UNKNOWN, BOTTOM or NONPTR at this point,
   2843             since that means we've lost the type of it somehow: it
   2844             shoud say that m points into a real segment (preferable
   2845             'seg'), but it doesn't. */
   2846          if (Seg__status_is_SegHeap(seg)) {
   2847             stats__refs_in_a_seg++;
   2848             if (UNKNOWN == mptr_vseg
   2849                 || BOTTOM == mptr_vseg || NONPTR == mptr_vseg) {
   2850                ExeContext* ec;
   2851                Char buf[100];
   2852                static UWord xx = 0;
   2853                stats__refs_lost_seg++;
   2854                ec = VG_(record_ExeContext)( VG_(get_running_tid)(), 0 );
   2855                inc_lossage(ec);
   2856                if (0) {
   2857                   VG_(message)(Vg_DebugMsg, "");
   2858                   VG_(message)(Vg_DebugMsg,
   2859                                "Lossage %s %#lx sz %lu inside block alloc'd",
   2860                                is_write ? "wr" : "rd", m, (UWord)sz);
   2861                   VG_(pp_ExeContext)(Seg__where(seg));
   2862                }
   2863                if (xx++ < 0) {
   2864                   Addr ip = VG_(get_IP)( VG_(get_running_tid)() );
   2865                   (void)VG_(describe_IP)( ip, buf, sizeof(buf)-1);
   2866                   buf[sizeof(buf)-1] = 0;
   2867                   VG_(printf)("lossage at %p %s\n", ec, buf );
   2868                }
   2869             }
   2870          }
   2871       }
   2872    } /* clo_lossage_check */
   2873 #endif
   2874 
   2875 #  if SC_SEGS
   2876    checkSeg(mptr_vseg);
   2877 #  endif
   2878 
   2879    if (UNKNOWN == mptr_vseg) {
   2880       // do nothing
   2881 
   2882    } else if (BOTTOM == mptr_vseg) {
   2883       // do nothing
   2884 
   2885    } else if (NONPTR == mptr_vseg) {
   2886       h_record_heap_error( m, sz, mptr_vseg, is_write );
   2887 
   2888    } else {
   2889       // check all segment ranges in the circle
   2890       // if none match, warn about 1st seg
   2891       // else,          check matching one isn't freed
   2892       Bool is_ok = False;
   2893       Seg* curr  = mptr_vseg;
   2894       Addr mhi;
   2895 
   2896       // Accesses partly outside range are an error, unless it's an aligned
   2897       // word-sized read, and --partial-loads-ok=yes.  This is to cope with
   2898       // gcc's/glibc's habits of doing word-sized accesses that read past
   2899       // the ends of arrays/strings.
   2900       // JRS 2008-sept-11: couldn't this be moved off the critical path?
   2901       if (!is_write && sz == sizeof(UWord)
   2902           && h_clo_partial_loads_ok && SHMEM_IS_WORD_ALIGNED(m)) {
   2903          mhi = m;
   2904       } else {
   2905          mhi = m+sz-1;
   2906       }
   2907 
   2908       if (0) VG_(printf)("calling seg_ci %p %#lx %#lx\n", curr,m,mhi);
   2909       is_ok = curr->addr <= m && mhi < curr->addr + curr->szB;
   2910 
   2911       // If it's an overrun/underrun of a freed block, don't give both
   2912       // warnings, since the first one mentions that the block has been
   2913       // freed.
   2914       if ( ! is_ok || Seg__is_freed(curr) )
   2915          h_record_heap_error( m, sz, mptr_vseg, is_write );
   2916    }
   2917 }
   2918 
   2919 // ------------------ Load handlers ------------------ //
   2920 
   2921 /* On 32 bit targets, we will use:
   2922       check_load1 check_load2 check_load4_P
   2923       check_load4  (for 32-bit FP reads)
   2924       check_load8  (for 64-bit FP reads)
   2925       check_load16 (for xmm/altivec reads)
   2926    On 64 bit targets, we will use:
   2927       check_load1 check_load2 check_load4 check_load8_P
   2928       check_load8  (for 64-bit FP reads)
   2929       check_load16 (for xmm/altivec reads)
   2930 
   2931    A "_P" handler reads a pointer from memory, and so returns a value
   2932    to the generated code -- the pointer's shadow value.  That implies
   2933    that check_load4_P is only to be called on a 32 bit host and
   2934    check_load8_P is only to be called on a 64 bit host.  For all other
   2935    cases no shadow value is returned; we merely check that the pointer
   2936    (m) matches the block described by its shadow value (mptr_vseg).
   2937 */
   2938 
   2939 // This handles 128 bit loads on both 32 bit and 64 bit targets.
   2940 static VG_REGPARM(2)
   2941 void check_load16(Addr m, Seg* mptr_vseg)
   2942 {
   2943 #  if SC_SEGS
   2944    checkSeg(mptr_vseg);
   2945 #  endif
   2946    check_load_or_store(/*is_write*/False, m, 16, mptr_vseg);
   2947 }
   2948 
   2949 // This handles 64 bit FP-or-otherwise-nonpointer loads on both
   2950 // 32 bit and 64 bit targets.
   2951 static VG_REGPARM(2)
   2952 void check_load8(Addr m, Seg* mptr_vseg)
   2953 {
   2954 #  if SC_SEGS
   2955    checkSeg(mptr_vseg);
   2956 #  endif
   2957    check_load_or_store(/*is_write*/False, m, 8, mptr_vseg);
   2958 }
   2959 
   2960 // This handles 64 bit loads on 64 bit targets.  It must
   2961 // not be called on 32 bit targets.
   2962 // return m.vseg
   2963 static VG_REGPARM(2)
   2964 Seg* check_load8_P(Addr m, Seg* mptr_vseg)
   2965 {
   2966    Seg* vseg;
   2967    tl_assert(sizeof(UWord) == 8); /* DO NOT REMOVE */
   2968 #  if SC_SEGS
   2969    checkSeg(mptr_vseg);
   2970 #  endif
   2971    check_load_or_store(/*is_write*/False, m, 8, mptr_vseg);
   2972    if (VG_IS_8_ALIGNED(m)) {
   2973       vseg = get_mem_vseg(m);
   2974    } else {
   2975       vseg = nonptr_or_unknown( *(ULong*)m );
   2976    }
   2977    return vseg;
   2978 }
   2979 
   2980 // This handles 32 bit loads on 32 bit targets.  It must
   2981 // not be called on 64 bit targets.
   2982 // return m.vseg
   2983 static VG_REGPARM(2)
   2984 Seg* check_load4_P(Addr m, Seg* mptr_vseg)
   2985 {
   2986    Seg* vseg;
   2987    tl_assert(sizeof(UWord) == 4); /* DO NOT REMOVE */
   2988 #  if SC_SEGS
   2989    checkSeg(mptr_vseg);
   2990 #  endif
   2991    check_load_or_store(/*is_write*/False, m, 4, mptr_vseg);
   2992    if (VG_IS_4_ALIGNED(m)) {
   2993       vseg = get_mem_vseg(m);
   2994    } else {
   2995       vseg = nonptr_or_unknown( *(UInt*)m );
   2996    }
   2997    return vseg;
   2998 }
   2999 
   3000 // Used for both 32 bit and 64 bit targets.
   3001 static VG_REGPARM(2)
   3002 void check_load4(Addr m, Seg* mptr_vseg)
   3003 {
   3004 #  if SC_SEGS
   3005    checkSeg(mptr_vseg);
   3006 #  endif
   3007    check_load_or_store(/*is_write*/False, m, 4, mptr_vseg);
   3008 }
   3009 
   3010 // Used for both 32 bit and 64 bit targets.
   3011 static VG_REGPARM(2)
   3012 void check_load2(Addr m, Seg* mptr_vseg)
   3013 {
   3014 #  if SC_SEGS
   3015    checkSeg(mptr_vseg);
   3016 #  endif
   3017    check_load_or_store(/*is_write*/False, m, 2, mptr_vseg);
   3018 }
   3019 
   3020 // Used for both 32 bit and 64 bit targets.
   3021 static VG_REGPARM(2)
   3022 void check_load1(Addr m, Seg* mptr_vseg)
   3023 {
   3024 #  if SC_SEGS
   3025    checkSeg(mptr_vseg);
   3026 #  endif
   3027    check_load_or_store(/*is_write*/False, m, 1, mptr_vseg);
   3028 }
   3029 
   3030 // ------------------ Store handlers ------------------ //
   3031 
   3032 /* On 32 bit targets, we will use:
   3033       check_store1 check_store2 check_store4_P check_store4C_P
   3034       check_store4 (for 32-bit nonpointer stores)
   3035       check_store8_ms4B_ls4B (for 64-bit stores)
   3036       check_store16_ms4B_4B_4B_ls4B (for xmm/altivec stores)
   3037 
   3038    On 64 bit targets, we will use:
   3039       check_store1 check_store2 check_store4 check_store4C
   3040       check_store8_P check_store_8C_P
   3041       check_store8_all8B (for 64-bit nonpointer stores)
   3042       check_store16_ms8B_ls8B (for xmm/altivec stores)
   3043 
   3044    A "_P" handler writes a pointer to memory, and so has an extra
   3045    argument -- the pointer's shadow value.  That implies that
   3046    check_store4{,C}_P is only to be called on a 32 bit host and
   3047    check_store8{,C}_P is only to be called on a 64 bit host.  For all
   3048    other cases, and for the misaligned _P cases, the strategy is to
   3049    let the store go through, and then snoop around with
   3050    nonptr_or_unknown to fix up the shadow values of any affected
   3051    words. */
   3052 
   3053 /* Helpers for store-conditionals.  Ugly kludge :-(
   3054    They all return 1 if the SC was successful and 0 if it failed. */
   3055 static inline UWord do_store_conditional_32( Addr m/*dst*/, UInt t/*val*/ )
   3056 {
   3057 #  if defined(VGA_ppc32) || defined(VGA_ppc64)
   3058    UWord success;
   3059    /* If this assertion fails, the underlying IR is (semantically) ill-formed
   3060       as per the IR spec for IRStmt_Store. */
   3061    tl_assert(VG_IS_4_ALIGNED(m));
   3062    __asm__ __volatile__(
   3063       "stwcx. %2,0,%1"    "\n\t" /* data,0,addr */
   3064       "mfcr   %0"         "\n\t"
   3065       "srwi   %0,%0,29"   "\n\t" /* move relevant CR bit to LSB */
   3066       : /*out*/"=b"(success)
   3067       : /*in*/ "b"(m), "b"( (UWord)t )
   3068       : /*trash*/ "memory", "cc"
   3069         /* Note: srwi is OK even on 64-bit host because the we're
   3070            after bit 29 (normal numbering) and we mask off all the
   3071            other junk just below. */
   3072    );
   3073    return success & (UWord)1;
   3074 #  else
   3075    tl_assert(0); /* not implemented on other platforms */
   3076 #  endif
   3077 }
   3078 
   3079 static inline UWord do_store_conditional_64( Addr m/*dst*/, ULong t/*val*/ )
   3080 {
   3081 #  if defined(VGA_ppc64)
   3082    UWord success;
   3083    /* If this assertion fails, the underlying IR is (semantically) ill-formed
   3084       as per the IR spec for IRStmt_Store. */
   3085    tl_assert(VG_IS_8_ALIGNED(m));
   3086    __asm__ __volatile__(
   3087       "stdcx. %2,0,%1"    "\n\t" /* data,0,addr */
   3088       "mfcr   %0"         "\n\t"
   3089       "srdi   %0,%0,29"   "\n\t" /* move relevant CR bit to LSB */
   3090       : /*out*/"=b"(success)
   3091       : /*in*/ "b"(m), "b"( (UWord)t )
   3092       : /*trash*/ "memory", "cc"
   3093    );
   3094    return success & (UWord)1;
   3095 #  else
   3096    tl_assert(0); /* not implemented on other platforms */
   3097 #  endif
   3098 }
   3099 
   3100 /* Apply nonptr_or_unknown to all the words intersecting
   3101    [a, a+len). */
   3102 static inline VG_REGPARM(2)
   3103 void nonptr_or_unknown_range ( Addr a, SizeT len )
   3104 {
   3105    const SizeT wszB = sizeof(UWord);
   3106    Addr wfirst = VG_ROUNDDN(a,       wszB);
   3107    Addr wlast  = VG_ROUNDDN(a+len-1, wszB);
   3108    Addr a2;
   3109    tl_assert(wfirst <= wlast);
   3110    for (a2 = wfirst ; a2 <= wlast; a2 += wszB) {
   3111       set_mem_vseg( a2, nonptr_or_unknown( *(UWord*)a2 ));
   3112    }
   3113 }
   3114 
   3115 // Write to shadow memory, for a 32-bit store.  Must only
   3116 // be used on 32-bit targets.
   3117 static inline VG_REGPARM(2)
   3118 void do_shadow_store4_P( Addr m, Seg* vseg )
   3119 {
   3120    tl_assert(sizeof(UWord) == 4); /* DO NOT REMOVE */
   3121    if (VG_IS_4_ALIGNED(m)) {
   3122       set_mem_vseg( m, vseg );
   3123    } else {
   3124       // straddling two words
   3125       nonptr_or_unknown_range(m, 4);
   3126    }
   3127 }
   3128 
   3129 // Write to shadow memory, for a 64-bit store.  Must only
   3130 // be used on 64-bit targets.
   3131 static inline VG_REGPARM(2)
   3132 void do_shadow_store8_P( Addr m, Seg* vseg )
   3133 {
   3134    tl_assert(sizeof(UWord) == 8); /* DO NOT REMOVE */
   3135    if (VG_IS_8_ALIGNED(m)) {
   3136       set_mem_vseg( m, vseg );
   3137    } else {
   3138       // straddling two words
   3139       nonptr_or_unknown_range(m, 8);
   3140    }
   3141 }
   3142 
   3143 // This handles 128 bit stores on 64 bit targets.  The
   3144 // store data is passed in 2 pieces, the most significant
   3145 // bits first.
   3146 static VG_REGPARM(3)
   3147 void check_store16_ms8B_ls8B(Addr m, Seg* mptr_vseg,
   3148                              UWord ms8B, UWord ls8B)
   3149 {
   3150    tl_assert(sizeof(UWord) == 8); /* DO NOT REMOVE */
   3151 #  if SC_SEGS
   3152    checkSeg(mptr_vseg);
   3153 #  endif
   3154    check_load_or_store(/*is_write*/True, m, 16, mptr_vseg);
   3155    // Actually *do* the STORE here
   3156    if (host_is_little_endian()) {
   3157       // FIXME: aren't we really concerned whether the guest
   3158       // is little endian, not whether the host is?
   3159       *(ULong*)(m + 0) = ls8B;
   3160       *(ULong*)(m + 8) = ms8B;
   3161    } else {
   3162       *(ULong*)(m + 0) = ms8B;
   3163       *(ULong*)(m + 8) = ls8B;
   3164    }
   3165    nonptr_or_unknown_range(m, 16);
   3166 }
   3167 
   3168 // This handles 128 bit stores on 64 bit targets.  The
   3169 // store data is passed in 2 pieces, the most significant
   3170 // bits first.
   3171 static VG_REGPARM(3)
   3172 void check_store16_ms4B_4B_4B_ls4B(Addr m, Seg* mptr_vseg,
   3173                                    UWord ms4B, UWord w2,
   3174                                    UWord w1,   UWord ls4B)
   3175 {
   3176    tl_assert(sizeof(UWord) == 4); /* DO NOT REMOVE */
   3177 #  if SC_SEGS
   3178    checkSeg(mptr_vseg);
   3179 #  endif
   3180    check_load_or_store(/*is_write*/True, m, 16, mptr_vseg);
   3181    // Actually *do* the STORE here
   3182    if (host_is_little_endian()) {
   3183       // FIXME: aren't we really concerned whether the guest
   3184       // is little endian, not whether the host is?
   3185       *(UInt*)(m +  0) = ls4B;
   3186       *(UInt*)(m +  4) = w1;
   3187       *(UInt*)(m +  8) = w2;
   3188       *(UInt*)(m + 12) = ms4B;
   3189    } else {
   3190       *(UInt*)(m +  0) = ms4B;
   3191       *(UInt*)(m +  4) = w2;
   3192       *(UInt*)(m +  8) = w1;
   3193       *(UInt*)(m + 12) = ls4B;
   3194    }
   3195    nonptr_or_unknown_range(m, 16);
   3196 }
   3197 
   3198 // This handles 64 bit stores on 32 bit targets.  The
   3199 // store data is passed in 2 pieces, the most significant
   3200 // bits first.
   3201 static VG_REGPARM(3)
   3202 void check_store8_ms4B_ls4B(Addr m, Seg* mptr_vseg,
   3203                             UWord ms4B, UWord ls4B)
   3204 {
   3205    tl_assert(sizeof(UWord) == 4); /* DO NOT REMOVE */
   3206 #  if SC_SEGS
   3207    checkSeg(mptr_vseg);
   3208 #  endif
   3209    check_load_or_store(/*is_write*/True, m, 8, mptr_vseg);
   3210    // Actually *do* the STORE here
   3211    if (host_is_little_endian()) {
   3212       // FIXME: aren't we really concerned whether the guest
   3213       // is little endian, not whether the host is?
   3214       *(UInt*)(m + 0) = ls4B;
   3215       *(UInt*)(m + 4) = ms4B;
   3216    } else {
   3217       *(UInt*)(m + 0) = ms4B;
   3218       *(UInt*)(m + 4) = ls4B;
   3219    }
   3220    nonptr_or_unknown_range(m, 8);
   3221 }
   3222 
   3223 // This handles 64 bit non pointer stores on 64 bit targets.
   3224 // It must not be called on 32 bit targets.
   3225 static VG_REGPARM(3)
   3226 void check_store8_all8B(Addr m, Seg* mptr_vseg, UWord all8B)
   3227 {
   3228    tl_assert(sizeof(UWord) == 8); /* DO NOT REMOVE */
   3229 #  if SC_SEGS
   3230    checkSeg(mptr_vseg);
   3231 #  endif
   3232    check_load_or_store(/*is_write*/True, m, 8, mptr_vseg);
   3233    // Actually *do* the STORE here
   3234    *(ULong*)m = all8B;
   3235    nonptr_or_unknown_range(m, 8);
   3236 }
   3237 
   3238 // This handles 64 bit stores on 64 bit targets.  It must
   3239 // not be called on 32 bit targets.
   3240 static VG_REGPARM(3)
   3241 void check_store8_P(Addr m, Seg* mptr_vseg, UWord t, Seg* t_vseg)
   3242 {
   3243    tl_assert(sizeof(UWord) == 8); /* DO NOT REMOVE */
   3244 #  if SC_SEGS
   3245    checkSeg(t_vseg);
   3246    checkSeg(mptr_vseg);
   3247 #  endif
   3248    check_load_or_store(/*is_write*/True, m, 8, mptr_vseg);
   3249    // Actually *do* the STORE here
   3250    *(ULong*)m = t;
   3251    do_shadow_store8_P( m, t_vseg );
   3252 }
   3253 
   3254 // This handles 64 bit store-conditionals on 64 bit targets.  It must
   3255 // not be called on 32 bit targets.
   3256 static VG_REGPARM(3)
   3257 UWord check_store8C_P(Addr m, Seg* mptr_vseg, UWord t, Seg* t_vseg)
   3258 {
   3259    UWord success;
   3260    tl_assert(sizeof(UWord) == 8); /* DO NOT REMOVE */
   3261 #  if SC_SEGS
   3262    checkSeg(t_vseg);
   3263    checkSeg(mptr_vseg);
   3264 #  endif
   3265    check_load_or_store(/*is_write*/True, m, 8, mptr_vseg);
   3266    // Actually *do* the STORE here
   3267    success = do_store_conditional_64( m, t );
   3268    if (success)
   3269       do_shadow_store8_P( m, t_vseg );
   3270    return success;
   3271 }
   3272 
   3273 // This handles 32 bit stores on 32 bit targets.  It must
   3274 // not be called on 64 bit targets.
   3275 static VG_REGPARM(3)
   3276 void check_store4_P(Addr m, Seg* mptr_vseg, UWord t, Seg* t_vseg)
   3277 {
   3278    tl_assert(sizeof(UWord) == 4); /* DO NOT REMOVE */
   3279 #  if SC_SEGS
   3280    checkSeg(t_vseg);
   3281    checkSeg(mptr_vseg);
   3282 #  endif
   3283    check_load_or_store(/*is_write*/True, m, 4, mptr_vseg);
   3284    // Actually *do* the STORE here
   3285    *(UInt*)m = t;
   3286    do_shadow_store4_P( m, t_vseg );
   3287 }
   3288 
   3289 // This handles 32 bit store-conditionals on 32 bit targets.  It must
   3290 // not be called on 64 bit targets.
   3291 static VG_REGPARM(3)
   3292 UWord check_store4C_P(Addr m, Seg* mptr_vseg, UWord t, Seg* t_vseg)
   3293 {
   3294    UWord success;
   3295    tl_assert(sizeof(UWord) == 4); /* DO NOT REMOVE */
   3296 #  if SC_SEGS
   3297    checkSeg(t_vseg);
   3298    checkSeg(mptr_vseg);
   3299 #  endif
   3300    check_load_or_store(/*is_write*/True, m, 4, mptr_vseg);
   3301    // Actually *do* the STORE here
   3302    success = do_store_conditional_32( m, t );
   3303    if (success)
   3304       do_shadow_store4_P( m, t_vseg );
   3305    return success;
   3306 }
   3307 
   3308 // Used for both 32 bit and 64 bit targets.
   3309 static VG_REGPARM(3)
   3310 void check_store4(Addr m, Seg* mptr_vseg, UWord t)
   3311 {
   3312 #  if SC_SEGS
   3313    checkSeg(mptr_vseg);
   3314 #  endif
   3315    check_load_or_store(/*is_write*/True, m, 4, mptr_vseg);
   3316    // Actually *do* the STORE here  (Nb: cast must be to 4-byte type!)
   3317    *(UInt*)m = t;
   3318    nonptr_or_unknown_range(m, 4);
   3319 }
   3320 
   3321 // Used for 32-bit store-conditionals on 64 bit targets only.  It must
   3322 // not be called on 32 bit targets.
   3323 static VG_REGPARM(3)
   3324 UWord check_store4C(Addr m, Seg* mptr_vseg, UWord t)
   3325 {
   3326    UWord success;
   3327    tl_assert(sizeof(UWord) == 8); /* DO NOT REMOVE */
   3328 #  if SC_SEGS
   3329    checkSeg(mptr_vseg);
   3330 #  endif
   3331    check_load_or_store(/*is_write*/True, m, 4, mptr_vseg);
   3332    // Actually *do* the STORE here
   3333    success = do_store_conditional_32( m, t );
   3334    if (success)
   3335       nonptr_or_unknown_range(m, 4);
   3336    return success;
   3337 }
   3338 
   3339 // Used for both 32 bit and 64 bit targets.
   3340 static VG_REGPARM(3)
   3341 void check_store2(Addr m, Seg* mptr_vseg, UWord t)
   3342 {
   3343 #  if SC_SEGS
   3344    checkSeg(mptr_vseg);
   3345 #  endif
   3346    check_load_or_store(/*is_write*/True, m, 2, mptr_vseg);
   3347    // Actually *do* the STORE here  (Nb: cast must be to 2-byte type!)
   3348    *(UShort*)m = t;
   3349    nonptr_or_unknown_range(m, 2);
   3350 }
   3351 
   3352 // Used for both 32 bit and 64 bit targets.
   3353 static VG_REGPARM(3)
   3354 void check_store1(Addr m, Seg* mptr_vseg, UWord t)
   3355 {
   3356 #  if SC_SEGS
   3357    checkSeg(mptr_vseg);
   3358 #  endif
   3359    check_load_or_store(/*is_write*/True, m, 1, mptr_vseg);
   3360    // Actually *do* the STORE here  (Nb: cast must be to 1-byte type!)
   3361    *(UChar*)m = t;
   3362    nonptr_or_unknown_range(m, 1);
   3363 }
   3364 
   3365 
   3366 // Nb: if the result is BOTTOM, return immedately -- don't let BOTTOM
   3367 //     be changed to NONPTR by a range check on the result.
   3368 #define BINOP(bt, nn, nu, np, un, uu, up, pn, pu, pp) \
   3369    if (BOTTOM == seg1 || BOTTOM == seg2) { bt;                   \
   3370    } else if (NONPTR == seg1)  { if      (NONPTR == seg2)  { nn; }  \
   3371                                  else if (UNKNOWN == seg2) { nu; }    \
   3372                                  else                      { np; }    \
   3373    } else if (UNKNOWN == seg1) { if      (NONPTR == seg2)  { un; }    \
   3374                                  else if (UNKNOWN == seg2) { uu; }    \
   3375                                  else                      { up; }    \
   3376    } else                      { if      (NONPTR == seg2)  { pn; }    \
   3377                                  else if (UNKNOWN == seg2) { pu; }    \
   3378                                  else                      { pp; }    \
   3379    }
   3380 
   3381 #define BINERROR(opname)                    \
   3382    h_record_arith_error(seg1, seg2, opname);  \
   3383    out = NONPTR
   3384 
   3385 
   3386 // -------------
   3387 //  + | n  ?  p
   3388 // -------------
   3389 //  n | n  ?  p
   3390 //  ? | ?  ?  ?
   3391 //  p | p  ?  e   (all results become n if they look like a non-pointer)
   3392 // -------------
   3393 static Seg* do_addW_result(Seg* seg1, Seg* seg2, UWord result, HChar* opname)
   3394 {
   3395    Seg* out;
   3396 #  if SC_SEGS
   3397    checkSeg(seg1);
   3398    checkSeg(seg2);
   3399 #  endif
   3400    BINOP(
   3401       return BOTTOM,
   3402       out = NONPTR,  out = UNKNOWN, out = seg2,
   3403       out = UNKNOWN, out = UNKNOWN, out = UNKNOWN,
   3404       out = seg1,    out = UNKNOWN,       BINERROR(opname)
   3405    );
   3406    return ( looks_like_a_pointer(result) ? out : NONPTR );
   3407 }
   3408 
   3409 static VG_REGPARM(3) Seg* do_addW(Seg* seg1, Seg* seg2, UWord result)
   3410 {
   3411    Seg* out;
   3412 #  if SC_SEGS
   3413    checkSeg(seg1);
   3414    checkSeg(seg2);
   3415 #  endif
   3416    out = do_addW_result(seg1, seg2, result, "Add32/Add64");
   3417 #  if SC_SEGS
   3418    checkSeg(out);
   3419 #  endif
   3420    return out;
   3421 }
   3422 
   3423 // -------------
   3424 //  - | n  ?  p      (Nb: operation is seg1 - seg2)
   3425 // -------------
   3426 //  n | n  ?  n+     (+) happens a lot due to "cmp", but result should never
   3427 //  ? | ?  ?  n/B        be used, so give 'n'
   3428 //  p | p  p? n*/B   (*) and possibly link the segments
   3429 // -------------
   3430 static VG_REGPARM(3) Seg* do_subW(Seg* seg1, Seg* seg2, UWord result)
   3431 {
   3432    Seg* out;
   3433 #  if SC_SEGS
   3434    checkSeg(seg1);
   3435    checkSeg(seg2);
   3436 #  endif
   3437    // Nb: when returning BOTTOM, don't let it go through the range-check;
   3438    //     a segment linking offset can easily look like a nonptr.
   3439    BINOP(
   3440       return BOTTOM,
   3441       out = NONPTR,  out = UNKNOWN,    out = NONPTR,
   3442       out = UNKNOWN, out = UNKNOWN,    return BOTTOM,
   3443       out = seg1,    out = seg1/*??*/, return BOTTOM
   3444    );
   3445    #if 0
   3446          // This is for the p-p segment-linking case
   3447          Seg end2 = seg2;
   3448          while (end2->links != seg2) end2 = end2->links;
   3449          end2->links = seg1->links;
   3450          seg1->links = seg2;
   3451          return NONPTR;
   3452    #endif
   3453    return ( looks_like_a_pointer(result) ? out : NONPTR );
   3454 }
   3455 
   3456 // -------------
   3457 //  & | n  ?  p
   3458 // -------------
   3459 //  n | n  ?  p
   3460 //  ? | ?  ?  ?
   3461 //  p | p  ?  *  (*) if p1==p2 then p else e (see comment)
   3462 // -------------
   3463 /* Seems to be OK to And two pointers:
   3464      testq %ptr1,%ptr2
   3465      jnz ..
   3466    which possibly derives from
   3467      if (ptr1 & ptr2) { A } else { B }
   3468    not sure what that means
   3469 */
   3470 static VG_REGPARM(3) Seg* do_andW(Seg* seg1, Seg* seg2,
   3471                                   UWord result, UWord args_diff)
   3472 {
   3473    Seg* out;
   3474    if (0 == args_diff) {
   3475       // p1==p2
   3476       out = seg1;
   3477    } else {
   3478       BINOP(
   3479          return BOTTOM,
   3480          out = NONPTR,  out = UNKNOWN, out = seg2,
   3481          out = UNKNOWN, out = UNKNOWN, out = UNKNOWN,
   3482          out = seg1,    out = UNKNOWN, out = NONPTR
   3483                                        /*BINERROR("And32/And64")*/
   3484       );
   3485    }
   3486    out = ( looks_like_a_pointer(result) ? out : NONPTR );
   3487    return out;
   3488 }
   3489 
   3490 // -------------
   3491 // `|`| n  ?  p
   3492 // -------------
   3493 //  n | n  ?  p
   3494 //  ? | ?  ?  ?
   3495 //  p | p  ?  n
   3496 // -------------
   3497 /* It's OK to Or two pointers together, but the result definitely
   3498    isn't a pointer.  Why would you want to do that?  Because of this:
   3499      char* p1 = malloc(..);
   3500      char* p2 = malloc(..);
   3501      ...
   3502      if (p1 || p2) { .. }
   3503    In this case gcc on x86/amd64 quite literally or-s the two pointers
   3504    together and throws away the result, the purpose of which is merely
   3505    to sets %eflags.Z/%rflags.Z.  So we have to allow it.
   3506 */
   3507 static VG_REGPARM(3) Seg* do_orW(Seg* seg1, Seg* seg2, UWord result)
   3508 {
   3509    Seg* out;
   3510    BINOP(
   3511       return BOTTOM,
   3512       out = NONPTR,  out = UNKNOWN, out = seg2,
   3513       out = UNKNOWN, out = UNKNOWN, out = UNKNOWN,
   3514       out = seg1,    out = UNKNOWN, out = NONPTR
   3515    );
   3516    out = ( looks_like_a_pointer(result) ? out : NONPTR );
   3517    return out;
   3518 }
   3519 
   3520 // -------------
   3521 //  ~ | n  ?  p
   3522 // -------------
   3523 //    | n  n  n
   3524 // -------------
   3525 static VG_REGPARM(2) Seg* do_notW(Seg* seg1, UWord result)
   3526 {
   3527 #  if SC_SEGS
   3528    checkSeg(seg1);
   3529 #  endif
   3530    if (BOTTOM == seg1) return BOTTOM;
   3531    return NONPTR;
   3532 }
   3533 
   3534 // Pointers are rarely multiplied, but sometimes legitimately, eg. as hash
   3535 // function inputs.  But two pointers args --> error.
   3536 // Pretend it always returns a nonptr.  Maybe improve later.
   3537 static VG_REGPARM(2) Seg* do_mulW(Seg* seg1, Seg* seg2)
   3538 {
   3539 #  if SC_SEGS
   3540    checkSeg(seg1);
   3541    checkSeg(seg2);
   3542 #  endif
   3543    if (is_known_segment(seg1) && is_known_segment(seg2))
   3544       h_record_arith_error(seg1, seg2, "Mul32/Mul64");
   3545    return NONPTR;
   3546 }
   3547 
   3548 
   3549 /*--------------------------------------------------------------------*/
   3550 /*--- Instrumentation                                              ---*/
   3551 /*--------------------------------------------------------------------*/
   3552 
   3553 /* The h_ instrumenter that follows is complex, since it deals with
   3554    shadow value computation.
   3555 
   3556    It also needs to generate instrumentation for the sg_ side of
   3557    things.  That's relatively straightforward.  However, rather than
   3558    confuse the code herein any further, we simply delegate the problem
   3559    to sg_main.c, by using the four functions
   3560    sg_instrument_{init,fini,IRStmt,final_jump}.  These four completely
   3561    abstractify the sg_ instrumentation.  See comments in sg_main.c's
   3562    instrumentation section for further details. */
   3563 
   3564 
   3565 /* Carries info about a particular tmp.  The tmp's number is not
   3566    recorded, as this is implied by (equal to) its index in the tmpMap
   3567    in PCEnv.  The tmp's type is also not recorded, as this is present
   3568    in PCEnv.sb->tyenv.
   3569 
   3570    When .kind is NonShad, .shadow may give the identity of the temp
   3571    currently holding the associated shadow value, or it may be
   3572    IRTemp_INVALID if code to compute the shadow has not yet been
   3573    emitted.
   3574 
   3575    When .kind is Shad tmp holds a shadow value, and so .shadow must be
   3576    IRTemp_INVALID, since it is illogical for a shadow tmp itself to be
   3577    shadowed.
   3578 */
   3579 typedef
   3580    enum { NonShad=1, Shad=2 }
   3581    TempKind;
   3582 
   3583 typedef
   3584    struct {
   3585       TempKind kind;
   3586       IRTemp   shadow;
   3587    }
   3588    TempMapEnt;
   3589 
   3590 
   3591 
   3592 /* Carries around state during Ptrcheck instrumentation. */
   3593 typedef
   3594    struct {
   3595       /* MODIFIED: the superblock being constructed.  IRStmts are
   3596          added. */
   3597       IRSB* sb;
   3598       Bool  trace;
   3599 
   3600       /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
   3601          current kind and possibly shadow temps for each temp in the
   3602          IRSB being constructed.  Note that it does not contain the
   3603          type of each tmp.  If you want to know the type, look at the
   3604          relevant entry in sb->tyenv.  It follows that at all times
   3605          during the instrumentation process, the valid indices for
   3606          tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
   3607          total number of NonShad and Shad temps allocated so far.
   3608 
   3609          The reason for this strange split (types in one place, all
   3610          other info in another) is that we need the types to be
   3611          attached to sb so as to make it possible to do
   3612          "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
   3613          instrumentation process.
   3614 
   3615          Note that only integer temps of the guest word size are
   3616          shadowed, since it is impossible (or meaningless) to hold a
   3617          pointer in any other type of temp. */
   3618       XArray* /* of TempMapEnt */ qmpMap;
   3619 
   3620       /* READONLY: the host word type.  Needed for constructing
   3621          arguments of type 'HWord' to be passed to helper functions.
   3622          Ity_I32 or Ity_I64 only. */
   3623       IRType hWordTy;
   3624 
   3625       /* READONLY: the guest word type, Ity_I32 or Ity_I64 only. */
   3626       IRType gWordTy;
   3627 
   3628       /* READONLY: the guest state size, so we can generate shadow
   3629          offsets correctly. */
   3630       Int guest_state_sizeB;
   3631    }
   3632    PCEnv;
   3633 
   3634 /* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
   3635    demand), as they are encountered.  This is for two reasons.
   3636 
   3637    (1) (less important reason): Many original tmps are unused due to
   3638    initial IR optimisation, and we do not want to spaces in tables
   3639    tracking them.
   3640 
   3641    Shadow IRTemps are therefore allocated on demand.  pce.tmpMap is a
   3642    table indexed [0 .. n_types-1], which gives the current shadow for
   3643    each original tmp, or INVALID_IRTEMP if none is so far assigned.
   3644    It is necessary to support making multiple assignments to a shadow
   3645    -- specifically, after testing a shadow for definedness, it needs
   3646    to be made defined.  But IR's SSA property disallows this.
   3647 
   3648    (2) (more important reason): Therefore, when a shadow needs to get
   3649    a new value, a new temporary is created, the value is assigned to
   3650    that, and the tmpMap is updated to reflect the new binding.
   3651 
   3652    A corollary is that if the tmpMap maps a given tmp to
   3653    IRTemp_INVALID and we are hoping to read that shadow tmp, it means
   3654    there's a read-before-write error in the original tmps.  The IR
   3655    sanity checker should catch all such anomalies, however.
   3656 */
   3657 
   3658 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
   3659    both the table in pce->sb and to our auxiliary mapping.  Note that
   3660    newTemp may cause pce->tmpMap to resize, hence previous results
   3661    from VG_(indexXA)(pce->tmpMap) are invalidated. */
   3662 static IRTemp newTemp ( PCEnv* pce, IRType ty, TempKind kind )
   3663 {
   3664    Word       newIx;
   3665    TempMapEnt ent;
   3666    IRTemp     tmp = newIRTemp(pce->sb->tyenv, ty);
   3667    ent.kind   = kind;
   3668    ent.shadow = IRTemp_INVALID;
   3669    newIx = VG_(addToXA)( pce->qmpMap, &ent );
   3670    tl_assert(newIx == (Word)tmp);
   3671    return tmp;
   3672 }
   3673 
   3674 /* Find the tmp currently shadowing the given original tmp.  If none
   3675    so far exists, allocate one.  */
   3676 static IRTemp findShadowTmp ( PCEnv* pce, IRTemp orig )
   3677 {
   3678    TempMapEnt* ent;
   3679    /* VG_(indexXA) range-checks 'orig', hence no need to check
   3680       here. */
   3681    ent = (TempMapEnt*)VG_(indexXA)( pce->qmpMap, (Word)orig );
   3682    tl_assert(ent->kind == NonShad);
   3683    if (ent->shadow == IRTemp_INVALID) {
   3684       IRTemp shadow = newTemp( pce, pce->gWordTy, Shad );
   3685       /* newTemp may cause pce->tmpMap to resize, hence previous results
   3686          from VG_(indexXA) are invalid. */
   3687       ent = (TempMapEnt*)VG_(indexXA)( pce->qmpMap, (Word)orig );
   3688       tl_assert(ent->kind == NonShad);
   3689       tl_assert(ent->shadow == IRTemp_INVALID);
   3690       ent->shadow = shadow;
   3691    }
   3692    return ent->shadow;
   3693 }
   3694 
   3695 /* Allocate a new shadow for the given original tmp.  This means any
   3696    previous shadow is abandoned.  This is needed because it is
   3697    necessary to give a new value to a shadow once it has been tested
   3698    for undefinedness, but unfortunately IR's SSA property disallows
   3699    this.  Instead we must abandon the old shadow, allocate a new one
   3700    and use that instead.
   3701 
   3702    This is the same as findShadowTmp, except we don't bother to see
   3703    if a shadow temp already existed -- we simply allocate a new one
   3704    regardless. */
   3705 static IRTemp newShadowTmp ( PCEnv* pce, IRTemp orig )
   3706 {
   3707    TempMapEnt* ent;
   3708    /* VG_(indexXA) range-checks 'orig', hence no need to check
   3709       here. */
   3710    ent = (TempMapEnt*)VG_(indexXA)( pce->qmpMap, (Word)orig );
   3711    tl_assert(ent->kind == NonShad);
   3712    if (1) {
   3713       IRTemp shadow = newTemp( pce, pce->gWordTy, Shad );
   3714       /* newTemp may cause pce->tmpMap to resize, hence previous results
   3715          from VG_(indexXA) are invalid. */
   3716       ent = (TempMapEnt*)VG_(indexXA)( pce->qmpMap, (Word)orig );
   3717       tl_assert(ent->kind == NonShad);
   3718       ent->shadow = shadow;
   3719       return shadow;
   3720    }
   3721    /* NOTREACHED */
   3722    tl_assert(0);
   3723 }
   3724 
   3725 
   3726 /*------------------------------------------------------------*/
   3727 /*--- IRAtoms -- a subset of IRExprs                       ---*/
   3728 /*------------------------------------------------------------*/
   3729 
   3730 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
   3731    isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
   3732    input, most of this code deals in atoms.  Usefully, a value atom
   3733    always has a V-value which is also an atom: constants are shadowed
   3734    by constants, and temps are shadowed by the corresponding shadow
   3735    temporary. */
   3736 
   3737 typedef  IRExpr  IRAtom;
   3738 
   3739 //zz /* (used for sanity checks only): is this an atom which looks
   3740 //zz    like it's from original code? */
   3741 //zz static Bool isOriginalAtom ( PCEnv* pce, IRAtom* a1 )
   3742 //zz {
   3743 //zz    if (a1->tag == Iex_Const)
   3744 //zz       return True;
   3745 //zz    if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp < pce->n_originalTmps)
   3746 //zz       return True;
   3747 //zz    return False;
   3748 //zz }
   3749 //zz
   3750 //zz /* (used for sanity checks only): is this an atom which looks
   3751 //zz    like it's from shadow code? */
   3752 //zz static Bool isShadowAtom ( PCEnv* pce, IRAtom* a1 )
   3753 //zz {
   3754 //zz    if (a1->tag == Iex_Const)
   3755 //zz       return True;
   3756 //zz    if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp >= pce->n_originalTmps)
   3757 //zz       return True;
   3758 //zz    return False;
   3759 //zz }
   3760 //zz
   3761 //zz /* (used for sanity checks only): check that both args are atoms and
   3762 //zz    are identically-kinded. */
   3763 //zz static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
   3764 //zz {
   3765 //zz    if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
   3766 //zz       return True;
   3767 //zz    if (a1->tag == Iex_Const && a2->tag == Iex_Const)
   3768 //zz       return True;
   3769 //zz    return False;
   3770 //zz }
   3771 
   3772 
   3773 /*------------------------------------------------------------*/
   3774 /*--- Constructing IR fragments                            ---*/
   3775 /*------------------------------------------------------------*/
   3776 
   3777 /* add stmt to a bb */
   3778 static inline void stmt ( HChar cat, PCEnv* pce, IRStmt* st ) {
   3779    if (pce->trace) {
   3780       VG_(printf)("  %c: ", cat);
   3781       ppIRStmt(st);
   3782       VG_(printf)("\n");
   3783    }
   3784    addStmtToIRSB(pce->sb, st);
   3785 }
   3786 
   3787 /* assign value to tmp */
   3788 static inline
   3789 void assign ( HChar cat, PCEnv* pce, IRTemp tmp, IRExpr* expr ) {
   3790    stmt(cat, pce, IRStmt_WrTmp(tmp,expr));
   3791 }
   3792 
   3793 /* build various kinds of expressions */
   3794 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
   3795 #define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
   3796 #define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
   3797 #define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
   3798 #define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
   3799 #define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
   3800 #define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
   3801 #define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
   3802 
   3803 /* Bind the given expression to a new temporary, and return the
   3804    temporary.  This effectively converts an arbitrary expression into
   3805    an atom.
   3806 
   3807    'ty' is the type of 'e' and hence the type that the new temporary
   3808    needs to be.  But passing it is redundant, since we can deduce the
   3809    type merely by inspecting 'e'.  So at least that fact to assert
   3810    that the two types agree. */
   3811 static IRAtom* assignNew ( HChar cat, PCEnv* pce, IRType ty, IRExpr* e ) {
   3812    IRTemp t;
   3813    IRType tyE = typeOfIRExpr(pce->sb->tyenv, e);
   3814    tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
   3815    t = newTemp(pce, ty, Shad);
   3816    assign(cat, pce, t, e);
   3817    return mkexpr(t);
   3818 }
   3819 
   3820 
   3821 
   3822 //-----------------------------------------------------------------------
   3823 // Approach taken for range-checking for NONPTR/UNKNOWN-ness as follows.
   3824 //
   3825 // Range check (NONPTR/seg):
   3826 // - after modifying a word-sized value in/into a TempReg:
   3827 //    - {ADD, SUB, ADC, SBB, AND, OR, XOR, LEA, LEA2, NEG, NOT}L
   3828 //    - BSWAP
   3829 //
   3830 // Range check (NONPTR/UNKNOWN):
   3831 // - when introducing a new word-sized value into a TempReg:
   3832 //    - MOVL l, t2
   3833 //
   3834 // - when copying a word-sized value which lacks a corresponding segment
   3835 //   into a TempReg:
   3836 //    - straddled LDL
   3837 //
   3838 // - when a sub-word of a word (or two) is updated:
   3839 //    - SHROTL
   3840 //    - {ADD, SUB, ADC, SBB, AND, OR, XOR, SHROT, NEG, NOT}[WB]
   3841 //    - PUT[WB]
   3842 //    - straddled   STL (2 range checks)
   3843 //    - straddled   STW (2 range checks)
   3844 //    - unstraddled STW
   3845 //    - STB
   3846 //
   3847 // Just copy:
   3848 // - when copying word-sized values:
   3849 //    - MOVL t1, t2 (--optimise=no only)
   3850 //    - CMOV
   3851 //    - GETL, PUTL
   3852 //    - unstraddled LDL, unstraddled STL
   3853 //
   3854 // - when barely changing
   3855 //    - INC[LWB]/DEC[LWB]
   3856 //
   3857 // Set to NONPTR:
   3858 // - after copying a sub-word value into a TempReg:
   3859 //    - MOV[WB] l, t2
   3860 //    - GET[WB]
   3861 //    - unstraddled LDW
   3862 //    - straddled   LDW
   3863 //    - LDB
   3864 //    - POP[WB]
   3865 //
   3866 // - after copying an obvious non-ptr into a TempReg:
   3867 //    - GETF
   3868 //    - CC2VAL
   3869 //    - POPL
   3870 //
   3871 // - after copying an obvious non-ptr into a memory word:
   3872 //    - FPU_W
   3873 //
   3874 // Do nothing:
   3875 // - LOCK, INCEIP
   3876 // - WIDEN[WB]
   3877 // - JMP, JIFZ
   3878 // - CALLM_[SE], PUSHL, CALLM, CLEAR
   3879 // - FPU, FPU_R (and similar MMX/SSE ones)
   3880 //
   3881 
   3882 
   3883 
   3884 
   3885 /* Call h_fn (name h_nm) with the given arg, and return a new IRTemp
   3886    holding the result.  The arg must be a word-typed atom.  Callee
   3887    must be a VG_REGPARM(1) function. */
   3888 __attribute__((noinline))
   3889 static IRTemp gen_dirty_W_W ( PCEnv* pce, void* h_fn, HChar* h_nm,
   3890                               IRExpr* a1 )
   3891 {
   3892    IRTemp   res;
   3893    IRDirty* di;
   3894    tl_assert(isIRAtom(a1));
   3895    tl_assert(typeOfIRExpr(pce->sb->tyenv, a1) == pce->gWordTy);
   3896    res = newTemp(pce, pce->gWordTy, Shad);
   3897    di = unsafeIRDirty_1_N( res, 1/*regparms*/,
   3898                            h_nm, VG_(fnptr_to_fnentry)( h_fn ),
   3899                            mkIRExprVec_1( a1 ) );
   3900    stmt( 'I', pce, IRStmt_Dirty(di) );
   3901    return res;
   3902 }
   3903 
   3904 /* Two-arg version of gen_dirty_W_W.  Callee must be a VG_REGPARM(2)
   3905    function.*/
   3906 static IRTemp gen_dirty_W_WW ( PCEnv* pce, void* h_fn, HChar* h_nm,
   3907                                IRExpr* a1, IRExpr* a2 )
   3908 {
   3909    IRTemp   res;
   3910    IRDirty* di;
   3911    tl_assert(isIRAtom(a1));
   3912    tl_assert(isIRAtom(a2));
   3913    tl_assert(typeOfIRExpr(pce->sb->tyenv, a1) == pce->gWordTy);
   3914    tl_assert(typeOfIRExpr(pce->sb->tyenv, a2) == pce->gWordTy);
   3915    res = newTemp(pce, pce->gWordTy, Shad);
   3916    di = unsafeIRDirty_1_N( res, 2/*regparms*/,
   3917                            h_nm, VG_(fnptr_to_fnentry)( h_fn ),
   3918                            mkIRExprVec_2( a1, a2 ) );
   3919    stmt( 'I', pce, IRStmt_Dirty(di) );
   3920    return res;
   3921 }
   3922 
   3923 /* Three-arg version of gen_dirty_W_W.  Callee must be a VG_REGPARM(3)
   3924    function.*/
   3925 static IRTemp gen_dirty_W_WWW ( PCEnv* pce, void* h_fn, HChar* h_nm,
   3926                                 IRExpr* a1, IRExpr* a2, IRExpr* a3 )
   3927 {
   3928    IRTemp   res;
   3929    IRDirty* di;
   3930    tl_assert(isIRAtom(a1));
   3931    tl_assert(isIRAtom(a2));
   3932    tl_assert(isIRAtom(a3));
   3933    tl_assert(typeOfIRExpr(pce->sb->tyenv, a1) == pce->gWordTy);
   3934    tl_assert(typeOfIRExpr(pce->sb->tyenv, a2) == pce->gWordTy);
   3935    tl_assert(typeOfIRExpr(pce->sb->tyenv, a3) == pce->gWordTy);
   3936    res = newTemp(pce, pce->gWordTy, Shad);
   3937    di = unsafeIRDirty_1_N( res, 3/*regparms*/,
   3938                            h_nm, VG_(fnptr_to_fnentry)( h_fn ),
   3939                            mkIRExprVec_3( a1, a2, a3 ) );
   3940    stmt( 'I', pce, IRStmt_Dirty(di) );
   3941    return res;
   3942 }
   3943 
   3944 /* Four-arg version of gen_dirty_W_W.  Callee must be a VG_REGPARM(3)
   3945    function.*/
   3946 static IRTemp gen_dirty_W_WWWW ( PCEnv* pce, void* h_fn, HChar* h_nm,
   3947                                  IRExpr* a1, IRExpr* a2,
   3948                                  IRExpr* a3, IRExpr* a4 )
   3949 {
   3950    IRTemp   res;
   3951    IRDirty* di;
   3952    tl_assert(isIRAtom(a1));
   3953    tl_assert(isIRAtom(a2));
   3954    tl_assert(isIRAtom(a3));
   3955    tl_assert(isIRAtom(a4));
   3956    tl_assert(typeOfIRExpr(pce->sb->tyenv, a1) == pce->gWordTy);
   3957    tl_assert(typeOfIRExpr(pce->sb->tyenv, a2) == pce->gWordTy);
   3958    tl_assert(typeOfIRExpr(pce->sb->tyenv, a3) == pce->gWordTy);
   3959    tl_assert(typeOfIRExpr(pce->sb->tyenv, a4) == pce->gWordTy);
   3960    res = newTemp(pce, pce->gWordTy, Shad);
   3961    di = unsafeIRDirty_1_N( res, 3/*regparms*/,
   3962                            h_nm, VG_(fnptr_to_fnentry)( h_fn ),
   3963                            mkIRExprVec_4( a1, a2, a3, a4 ) );
   3964    stmt( 'I', pce, IRStmt_Dirty(di) );
   3965    return res;
   3966 }
   3967 
   3968 /* Version of gen_dirty_W_WW with no return value.  Callee must be a
   3969    VG_REGPARM(2) function.  If guard is non-NULL then it is used to
   3970    conditionalise the call. */
   3971 static void gen_dirty_v_WW ( PCEnv* pce, IRExpr* guard,
   3972                              void* h_fn, HChar* h_nm,
   3973                              IRExpr* a1, IRExpr* a2 )
   3974 {
   3975    IRDirty* di;
   3976    tl_assert(isIRAtom(a1));
   3977    tl_assert(isIRAtom(a2));
   3978    tl_assert(typeOfIRExpr(pce->sb->tyenv, a1) == pce->gWordTy);
   3979    tl_assert(typeOfIRExpr(pce->sb->tyenv, a2) == pce->gWordTy);
   3980    di = unsafeIRDirty_0_N( 2/*regparms*/,
   3981                            h_nm, VG_(fnptr_to_fnentry)( h_fn ),
   3982                            mkIRExprVec_2( a1, a2 ) );
   3983    if (guard)
   3984       di->guard = guard;
   3985    stmt( 'I', pce, IRStmt_Dirty(di) );
   3986 }
   3987 
   3988 /* Version of gen_dirty_W_WWW with no return value.  Callee must be a
   3989    VG_REGPARM(3) function.*/
   3990 static void gen_dirty_v_WWW ( PCEnv* pce, void* h_fn, HChar* h_nm,
   3991                               IRExpr* a1, IRExpr* a2, IRExpr* a3 )
   3992 {
   3993    IRDirty* di;
   3994    tl_assert(isIRAtom(a1));
   3995    tl_assert(isIRAtom(a2));
   3996    tl_assert(isIRAtom(a3));
   3997    tl_assert(typeOfIRExpr(pce->sb->tyenv, a1) == pce->gWordTy);
   3998    tl_assert(typeOfIRExpr(pce->sb->tyenv, a2) == pce->gWordTy);
   3999    tl_assert(typeOfIRExpr(pce->sb->tyenv, a3) == pce->gWordTy);
   4000    di = unsafeIRDirty_0_N( 3/*regparms*/,
   4001                            h_nm, VG_(fnptr_to_fnentry)( h_fn ),
   4002                            mkIRExprVec_3( a1, a2, a3 ) );
   4003    stmt( 'I', pce, IRStmt_Dirty(di) );
   4004 }
   4005 
   4006 /* Version of gen_dirty_v_WWW for 4 arguments.  Callee must be a
   4007    VG_REGPARM(3) function.*/
   4008 static void gen_dirty_v_WWWW ( PCEnv* pce, void* h_fn, HChar* h_nm,
   4009                                IRExpr* a1, IRExpr* a2,
   4010                                IRExpr* a3, IRExpr* a4 )
   4011 {
   4012    IRDirty* di;
   4013    tl_assert(isIRAtom(a1));
   4014    tl_assert(isIRAtom(a2));
   4015    tl_assert(isIRAtom(a3));
   4016    tl_assert(isIRAtom(a4));
   4017    tl_assert(typeOfIRExpr(pce->sb->tyenv, a1) == pce->gWordTy);
   4018    tl_assert(typeOfIRExpr(pce->sb->tyenv, a2) == pce->gWordTy);
   4019    tl_assert(typeOfIRExpr(pce->sb->tyenv, a3) == pce->gWordTy);
   4020    tl_assert(typeOfIRExpr(pce->sb->tyenv, a4) == pce->gWordTy);
   4021    di = unsafeIRDirty_0_N( 3/*regparms*/,
   4022                            h_nm, VG_(fnptr_to_fnentry)( h_fn ),
   4023                            mkIRExprVec_4( a1, a2, a3, a4 ) );
   4024    stmt( 'I', pce, IRStmt_Dirty(di) );
   4025 }
   4026 
   4027 /* Version of gen_dirty_v_WWW for 6 arguments.  Callee must be a
   4028    VG_REGPARM(3) function.*/
   4029 static void gen_dirty_v_6W ( PCEnv* pce, void* h_fn, HChar* h_nm,
   4030                              IRExpr* a1, IRExpr* a2, IRExpr* a3,
   4031                              IRExpr* a4, IRExpr* a5, IRExpr* a6 )
   4032 {
   4033    IRDirty* di;
   4034    tl_assert(isIRAtom(a1));
   4035    tl_assert(isIRAtom(a2));
   4036    tl_assert(isIRAtom(a3));
   4037    tl_assert(isIRAtom(a4));
   4038    tl_assert(isIRAtom(a5));
   4039    tl_assert(isIRAtom(a6));
   4040    tl_assert(typeOfIRExpr(pce->sb->tyenv, a1) == pce->gWordTy);
   4041    tl_assert(typeOfIRExpr(pce->sb->tyenv, a2) == pce->gWordTy);
   4042    tl_assert(typeOfIRExpr(pce->sb->tyenv, a3) == pce->gWordTy);
   4043    tl_assert(typeOfIRExpr(pce->sb->tyenv, a4) == pce->gWordTy);
   4044    tl_assert(typeOfIRExpr(pce->sb->tyenv, a5) == pce->gWordTy);
   4045    tl_assert(typeOfIRExpr(pce->sb->tyenv, a6) == pce->gWordTy);
   4046    di = unsafeIRDirty_0_N( 3/*regparms*/,
   4047                            h_nm, VG_(fnptr_to_fnentry)( h_fn ),
   4048                            mkIRExprVec_6( a1, a2, a3, a4, a5, a6 ) );
   4049    stmt( 'I', pce, IRStmt_Dirty(di) );
   4050 }
   4051 
   4052 static IRAtom* uwiden_to_host_word ( PCEnv* pce, IRAtom* a )
   4053 {
   4054    IRType a_ty = typeOfIRExpr(pce->sb->tyenv, a);
   4055    tl_assert(isIRAtom(a));
   4056    if (pce->hWordTy == Ity_I32) {
   4057       switch (a_ty) {
   4058          case Ity_I8:
   4059             return assignNew( 'I', pce, Ity_I32, unop(Iop_8Uto32, a) );
   4060          case Ity_I16:
   4061             return assignNew( 'I', pce, Ity_I32, unop(Iop_16Uto32, a) );
   4062          default:
   4063             ppIRType(a_ty);
   4064             tl_assert(0);
   4065       }
   4066    } else {
   4067       tl_assert(pce->hWordTy == Ity_I64);
   4068       switch (a_ty) {
   4069          case Ity_I8:
   4070             return assignNew( 'I', pce, Ity_I64, unop(Iop_8Uto64, a) );
   4071          case Ity_I16:
   4072             return assignNew( 'I', pce, Ity_I64, unop(Iop_16Uto64, a) );
   4073          case Ity_I32:
   4074             return assignNew( 'I', pce, Ity_I64, unop(Iop_32Uto64, a) );
   4075          default:
   4076             ppIRType(a_ty);
   4077             tl_assert(0);
   4078       }
   4079    }
   4080 }
   4081 
   4082 /* 'e' is a word-sized atom.  Call nonptr_or_unknown with it, bind the
   4083    results to a new temporary, and return the temporary.  Note this
   4084    takes an original expression but returns a shadow value. */
   4085 static IRTemp gen_call_nonptr_or_unknown_w ( PCEnv* pce, IRExpr* e )
   4086 {
   4087    return gen_dirty_W_W( pce, &nonptr_or_unknown,
   4088                               "nonptr_or_unknown", e );
   4089 }
   4090 
   4091 
   4092 /* Generate the shadow value for an IRExpr which is an atom and
   4093    guaranteed to be word-sized. */
   4094 static IRAtom* schemeEw_Atom ( PCEnv* pce, IRExpr* e )
   4095 {
   4096    if (pce->gWordTy == Ity_I32) {
   4097       if (e->tag == Iex_Const && e->Iex.Const.con->tag == Ico_U32) {
   4098          IRTemp t;
   4099          tl_assert(sizeof(UWord) == 4);
   4100          t = gen_call_nonptr_or_unknown_w(pce, e);
   4101          return mkexpr(t);
   4102       }
   4103       if (e->tag == Iex_RdTmp
   4104           && typeOfIRExpr(pce->sb->tyenv, e) == Ity_I32) {
   4105          return mkexpr( findShadowTmp(pce, e->Iex.RdTmp.tmp) );
   4106       }
   4107       /* there are no other word-sized atom cases */
   4108    } else {
   4109       if (e->tag == Iex_Const && e->Iex.Const.con->tag == Ico_U64) {
   4110          IRTemp t;
   4111          tl_assert(sizeof(UWord) == 8);
   4112          //return mkU64( (ULong)(UWord)NONPTR );
   4113          t = gen_call_nonptr_or_unknown_w(pce, e);
   4114          return mkexpr(t);
   4115       }
   4116       if (e->tag == Iex_RdTmp
   4117           && typeOfIRExpr(pce->sb->tyenv, e) == Ity_I64) {
   4118          return mkexpr( findShadowTmp(pce, e->Iex.RdTmp.tmp) );
   4119       }
   4120       /* there are no other word-sized atom cases */
   4121    }
   4122    ppIRExpr(e);
   4123    tl_assert(0);
   4124 }
   4125 
   4126 
   4127 static
   4128 void instrument_arithop ( PCEnv* pce,
   4129                           IRTemp dst, /* already holds result */
   4130                           IRTemp dstv, /* generate an assignment to this */
   4131                           IROp op,
   4132                           /* original args, guaranteed to be atoms */
   4133                           IRExpr* a1, IRExpr* a2, IRExpr* a3, IRExpr* a4 )
   4134 {
   4135    HChar*  nm  = NULL;
   4136    void*   fn  = NULL;
   4137    IRExpr* a1v = NULL;
   4138    IRExpr* a2v = NULL;
   4139    //IRExpr* a3v = NULL;
   4140    //IRExpr* a4v = NULL;
   4141    IRTemp  res = IRTemp_INVALID;
   4142 
   4143    if (pce->gWordTy == Ity_I32) {
   4144 
   4145       tl_assert(pce->hWordTy == Ity_I32);
   4146       switch (op) {
   4147 
   4148          /* For these cases, pass Segs for both arguments, and the
   4149             result value. */
   4150          case Iop_Add32: nm = "do_addW"; fn = &do_addW; goto ssr32;
   4151          case Iop_Sub32: nm = "do_subW"; fn = &do_subW; goto ssr32;
   4152          case Iop_Or32:  nm = "do_orW";  fn = &do_orW;  goto ssr32;
   4153          ssr32:
   4154             a1v = schemeEw_Atom( pce, a1 );
   4155             a2v = schemeEw_Atom( pce, a2 );
   4156             res = gen_dirty_W_WWW( pce, fn, nm, a1v, a2v, mkexpr(dst) );
   4157             assign( 'I', pce, dstv, mkexpr(res) );
   4158             break;
   4159 
   4160          /* In this case, pass Segs for both arguments, the result
   4161             value, and the difference between the (original) values of
   4162             the arguments. */
   4163          case Iop_And32:
   4164             nm = "do_andW"; fn = &do_andW;
   4165             a1v = schemeEw_Atom( pce, a1 );
   4166             a2v = schemeEw_Atom( pce, a2 );
   4167             res = gen_dirty_W_WWWW(
   4168                      pce, fn, nm, a1v, a2v, mkexpr(dst),
   4169                      assignNew( 'I', pce, Ity_I32,
   4170                                 binop(Iop_Sub32,a1,a2) ) );
   4171             assign( 'I', pce, dstv, mkexpr(res) );
   4172             break;
   4173 
   4174          /* Pass one shadow arg and the result to the helper. */
   4175          case Iop_Not32: nm = "do_notW"; fn = &do_notW; goto vr32;
   4176          vr32:
   4177             a1v = schemeEw_Atom( pce, a1 );
   4178             res = gen_dirty_W_WW( pce, fn, nm, a1v, mkexpr(dst) );
   4179             assign( 'I', pce, dstv, mkexpr(res) );
   4180             break;
   4181 
   4182          /* Pass two shadow args only to the helper. */
   4183          case Iop_Mul32: nm = "do_mulW"; fn = &do_mulW; goto vv32;
   4184          vv32:
   4185             a1v = schemeEw_Atom( pce, a1 );
   4186             a2v = schemeEw_Atom( pce, a2 );
   4187             res = gen_dirty_W_WW( pce, fn, nm, a1v, a2v );
   4188             assign( 'I', pce, dstv, mkexpr(res) );
   4189             break;
   4190 
   4191          /* We don't really know what the result could be; test at run
   4192             time. */
   4193          case Iop_64HIto32: goto n_or_u_32;
   4194          case Iop_64to32:   goto n_or_u_32;
   4195          case Iop_Xor32:    goto n_or_u_32;
   4196          n_or_u_32:
   4197             assign( 'I', pce, dstv,
   4198                     mkexpr(
   4199                        gen_call_nonptr_or_unknown_w( pce,
   4200                                                      mkexpr(dst) ) ) );
   4201             break;
   4202 
   4203          /* Cases where it's very obvious that the result cannot be a
   4204             pointer.  Hence declare directly that it's NONPTR; don't
   4205             bother with the overhead of calling nonptr_or_unknown. */
   4206 
   4207          /* cases where it makes no sense for the result to be a ptr */
   4208          /* FIXME: for Shl/Shr/Sar, really should do a test on the 2nd
   4209             arg, so that shift by zero preserves the original
   4210             value. */
   4211          case Iop_Shl32:     goto n32;
   4212          case Iop_Sar32:     goto n32;
   4213          case Iop_Shr32:     goto n32;
   4214          case Iop_16Uto32:   goto n32;
   4215          case Iop_16Sto32:   goto n32;
   4216          case Iop_F64toI32S: goto n32;
   4217          case Iop_16HLto32:  goto n32;
   4218          case Iop_MullS16:   goto n32;
   4219          case Iop_MullU16:   goto n32;
   4220          case Iop_PRemC3210F64: goto n32;
   4221          case Iop_DivU32:    goto n32;
   4222          case Iop_DivS32:    goto n32;
   4223          case Iop_V128to32:  goto n32;
   4224 
   4225          /* cases where result range is very limited and clearly cannot
   4226             be a pointer */
   4227          case Iop_1Uto32: goto n32;
   4228          case Iop_1Sto32: goto n32;
   4229          case Iop_8Uto32: goto n32;
   4230          case Iop_8Sto32: goto n32;
   4231          case Iop_Clz32:  goto n32;
   4232          case Iop_Ctz32:  goto n32;
   4233          case Iop_CmpF64: goto n32;
   4234          case Iop_CmpORD32S: goto n32;
   4235          case Iop_CmpORD32U: goto n32;
   4236          n32:
   4237             assign( 'I', pce, dstv, mkU32( (UWord)NONPTR ));
   4238             break;
   4239 
   4240          default:
   4241             VG_(printf)("instrument_arithop(32-bit): unhandled: ");
   4242             ppIROp(op);
   4243             tl_assert(0);
   4244       }
   4245 
   4246    } else {
   4247 
   4248       tl_assert(pce->gWordTy == Ity_I64);
   4249       switch (op) {
   4250 
   4251          /* For these cases, pass Segs for both arguments, and the
   4252             result value. */
   4253          case Iop_Add64: nm = "do_addW"; fn = &do_addW; goto ssr64;
   4254          case Iop_Sub64: nm = "do_subW"; fn = &do_subW; goto ssr64;
   4255          case Iop_Or64:  nm = "do_orW";  fn = &do_orW;  goto ssr64;
   4256          ssr64:
   4257             a1v = schemeEw_Atom( pce, a1 );
   4258             a2v = schemeEw_Atom( pce, a2 );
   4259             res = gen_dirty_W_WWW( pce, fn, nm, a1v, a2v, mkexpr(dst) );
   4260             assign( 'I', pce, dstv, mkexpr(res) );
   4261             break;
   4262 
   4263          /* In this case, pass Segs for both arguments, the result
   4264             value, and the difference between the (original) values of
   4265             the arguments. */
   4266          case Iop_And64:
   4267             nm = "do_andW"; fn = &do_andW;
   4268             a1v = schemeEw_Atom( pce, a1 );
   4269             a2v = schemeEw_Atom( pce, a2 );
   4270             res = gen_dirty_W_WWWW(
   4271                      pce, fn, nm, a1v, a2v, mkexpr(dst),
   4272                      assignNew( 'I', pce, Ity_I64,
   4273                                 binop(Iop_Sub64,a1,a2) ) );
   4274             assign( 'I', pce, dstv, mkexpr(res) );
   4275             break;
   4276 
   4277          /* Pass one shadow arg and the result to the helper. */
   4278          case Iop_Not64: nm = "do_notW"; fn = &do_notW; goto vr64;
   4279          vr64:
   4280             a1v = schemeEw_Atom( pce, a1 );
   4281             res = gen_dirty_W_WW( pce, fn, nm, a1v, mkexpr(dst) );
   4282             assign( 'I', pce, dstv, mkexpr(res) );
   4283             break;
   4284 
   4285          /* Pass two shadow args only to the helper. */
   4286          case Iop_Mul64: nm = "do_mulW"; fn = &do_mulW; goto vv64;
   4287          vv64:
   4288             a1v = schemeEw_Atom( pce, a1 );
   4289             a2v = schemeEw_Atom( pce, a2 );
   4290             res = gen_dirty_W_WW( pce, fn, nm, a1v, a2v );
   4291             assign( 'I', pce, dstv, mkexpr(res) );
   4292             break;
   4293 
   4294          /* We don't really know what the result could be; test at run
   4295             time. */
   4296          case Iop_Xor64:      goto n_or_u_64;
   4297          case Iop_128HIto64:  goto n_or_u_64;
   4298          case Iop_128to64:    goto n_or_u_64;
   4299          case Iop_V128HIto64: goto n_or_u_64;
   4300          case Iop_V128to64:   goto n_or_u_64;
   4301          n_or_u_64:
   4302             assign( 'I', pce, dstv,
   4303                     mkexpr(
   4304                        gen_call_nonptr_or_unknown_w( pce,
   4305                                                      mkexpr(dst) ) ) );
   4306             break;
   4307 
   4308          /* Cases where it's very obvious that the result cannot be a
   4309             pointer.  Hence declare directly that it's NONPTR; don't
   4310             bother with the overhead of calling nonptr_or_unknown. */
   4311 
   4312          /* cases where it makes no sense for the result to be a ptr */
   4313          /* FIXME: for Shl/Shr/Sar, really should do a test on the 2nd
   4314             arg, so that shift by zero preserves the original
   4315             value. */
   4316          case Iop_Shl64:      goto n64;
   4317          case Iop_Sar64:      goto n64;
   4318          case Iop_Shr64:      goto n64;
   4319          case Iop_32Uto64:    goto n64;
   4320          case Iop_32Sto64:    goto n64;
   4321          case Iop_16Uto64:    goto n64;
   4322          case Iop_16Sto64:    goto n64;
   4323          case Iop_32HLto64:   goto n64;
   4324          case Iop_DivModU64to32: goto n64;
   4325          case Iop_DivModS64to32: goto n64;
   4326          case Iop_F64toI64S:     goto n64;
   4327          case Iop_MullS32:    goto n64;
   4328          case Iop_MullU32:    goto n64;
   4329          case Iop_DivU64:     goto n64;
   4330          case Iop_DivS64:     goto n64;
   4331          case Iop_ReinterpF64asI64: goto n64;
   4332 
   4333          /* cases where result range is very limited and clearly cannot
   4334             be a pointer */
   4335          case Iop_1Uto64:        goto n64;
   4336          case Iop_8Uto64:        goto n64;
   4337          case Iop_8Sto64:        goto n64;
   4338          case Iop_Ctz64:         goto n64;
   4339          case Iop_Clz64:         goto n64;
   4340          case Iop_CmpORD64S:     goto n64;
   4341          case Iop_CmpORD64U:     goto n64;
   4342          /* 64-bit simd */
   4343          case Iop_Avg8Ux8: case Iop_Avg16Ux4:
   4344          case Iop_Max16Sx4: case Iop_Max8Ux8: case Iop_Min16Sx4:
   4345          case Iop_Min8Ux8: case Iop_MulHi16Ux4:
   4346          case Iop_QNarrow32Sx2: case Iop_QNarrow16Sx4:
   4347          case Iop_QNarrow16Ux4: case Iop_Add8x8: case Iop_Add32x2:
   4348          case Iop_QAdd8Sx8: case Iop_QAdd16Sx4: case Iop_QAdd8Ux8:
   4349          case Iop_QAdd16Ux4: case Iop_Add16x4: case Iop_CmpEQ8x8:
   4350          case Iop_CmpEQ32x2: case Iop_CmpEQ16x4: case Iop_CmpGT8Sx8:
   4351          case Iop_CmpGT32Sx2: case Iop_CmpGT16Sx4: case Iop_MulHi16Sx4:
   4352          case Iop_Mul16x4: case Iop_ShlN32x2: case Iop_ShlN16x4:
   4353          case Iop_SarN32x2: case Iop_SarN16x4: case Iop_ShrN32x2: case Iop_ShrN8x8:
   4354          case Iop_ShrN16x4: case Iop_Sub8x8: case Iop_Sub32x2:
   4355          case Iop_QSub8Sx8: case Iop_QSub16Sx4: case Iop_QSub8Ux8:
   4356          case Iop_QSub16Ux4: case Iop_Sub16x4: case Iop_InterleaveHI8x8:
   4357          case Iop_InterleaveHI32x2: case Iop_InterleaveHI16x4:
   4358          case Iop_InterleaveLO8x8: case Iop_InterleaveLO32x2:
   4359          case Iop_InterleaveLO16x4: case Iop_SarN8x8:
   4360          case Iop_Perm8x8: case Iop_ShlN8x8: case Iop_Mul32x2:
   4361          case Iop_CatEvenLanes16x4: case Iop_CatOddLanes16x4:
   4362          n64:
   4363             assign( 'I', pce, dstv, mkU64( (UWord)NONPTR ));
   4364             break;
   4365 
   4366          default:
   4367             VG_(printf)("instrument_arithop(64-bit): unhandled: ");
   4368             ppIROp(op);
   4369             tl_assert(0);
   4370       }
   4371    }
   4372 }
   4373 
   4374 static
   4375 void gen_call_nonptr_or_unknown_range ( PCEnv* pce,
   4376                                         IRExpr* guard,
   4377                                         IRAtom* addr, IRAtom* len )
   4378 {
   4379    gen_dirty_v_WW( pce, guard,
   4380                    &nonptr_or_unknown_range,
   4381                    "nonptr_or_unknown_range",
   4382                    addr, len );
   4383 }
   4384 
   4385 /* iii describes zero or more non-exact integer register updates.  For
   4386    each one, generate IR to get the containing register, apply
   4387    nonptr_or_unknown to it, and write it back again. */
   4388 static void gen_nonptr_or_unknown_for_III( PCEnv* pce, IntRegInfo* iii )
   4389 {
   4390    Int i;
   4391    tl_assert(iii && iii->n_offsets >= 0);
   4392    for (i = 0; i < iii->n_offsets; i++) {
   4393       IRAtom* a1 = assignNew( 'I', pce, pce->gWordTy,
   4394                               IRExpr_Get( iii->offsets[i], pce->gWordTy ));
   4395       IRTemp a2 = gen_call_nonptr_or_unknown_w( pce, a1 );
   4396       stmt( 'I', pce, IRStmt_Put( iii->offsets[i]
   4397                                      + pce->guest_state_sizeB,
   4398                                   mkexpr(a2) ));
   4399    }
   4400 }
   4401 
   4402 
   4403 /* schemeS helper for doing stores, pulled out into a function because
   4404    it needs to handle both normal stores and store-conditionals.
   4405    Returns False if we see a case we don't know how to handle.
   4406 */
   4407 static Bool schemeS_store ( PCEnv* pce,
   4408                             IRExpr* data, IRExpr* addr, IRTemp resSC )
   4409 {
   4410    /* We have: STle(addr) = data
   4411       if data is int-word sized, do
   4412       check_store4(addr, addr#, data, data#)
   4413       for all other stores
   4414       check_store{1,2}(addr, addr#, data)
   4415 
   4416       The helper actually *does* the store, so that it can do the
   4417       post-hoc ugly hack of inspecting and "improving" the shadow data
   4418       after the store, in the case where it isn't an aligned word
   4419       store.
   4420 
   4421       Only word-sized values are shadowed.  If this is a
   4422       store-conditional, .resSC will denote a non-word-typed temp, and
   4423       so we don't need to shadow it.  Assert about the type, tho.
   4424       However, since we're not re-emitting the original IRStmt_Store,
   4425       but rather doing it as part of the helper function, we need to
   4426       actually do a SC in the helper, and assign the result bit to
   4427       .resSC.  Ugly.
   4428    */
   4429    IRType  d_ty  = typeOfIRExpr(pce->sb->tyenv, data);
   4430    IRExpr* addrv = schemeEw_Atom( pce, addr );
   4431    if (resSC != IRTemp_INVALID) {
   4432       tl_assert(typeOfIRTemp(pce->sb->tyenv, resSC) == Ity_I1);
   4433       /* viz, not something we want to shadow */
   4434       /* also, throw out all store-conditional cases that
   4435          we can't handle */
   4436       if (pce->gWordTy == Ity_I32 && d_ty != Ity_I32)
   4437          return False;
   4438       if (pce->gWordTy == Ity_I64 && d_ty != Ity_I32 && d_ty != Ity_I64)
   4439          return False;
   4440    }
   4441    if (pce->gWordTy == Ity_I32) {
   4442       /* ------ 32 bit host/guest (cough, cough) ------ */
   4443       switch (d_ty) {
   4444          /* Integer word case */
   4445          case Ity_I32: {
   4446             IRExpr* datav = schemeEw_Atom( pce, data );
   4447             if (resSC == IRTemp_INVALID) {
   4448                /* "normal" store */
   4449                gen_dirty_v_WWWW( pce,
   4450                                  &check_store4_P, "check_store4_P",
   4451                                  addr, addrv, data, datav );
   4452             } else {
   4453                /* store-conditional; need to snarf the success bit */
   4454                IRTemp resSC32
   4455                    = gen_dirty_W_WWWW( pce,
   4456                                        &check_store4C_P,
   4457                                        "check_store4C_P",
   4458                                        addr, addrv, data, datav );
   4459                /* presumably resSC32 will really be Ity_I32.  In
   4460                   any case we'll get jumped by the IR sanity
   4461                   checker if it's not, when it sees the
   4462                   following statement. */
   4463                assign( 'I', pce, resSC, unop(Iop_32to1, mkexpr(resSC32)) );
   4464             }
   4465             break;
   4466          }
   4467          /* Integer subword cases */
   4468          case Ity_I16:
   4469             gen_dirty_v_WWW( pce,
   4470                              &check_store2, "check_store2",
   4471                              addr, addrv,
   4472                              uwiden_to_host_word( pce, data ));
   4473             break;
   4474          case Ity_I8:
   4475             gen_dirty_v_WWW( pce,
   4476                              &check_store1, "check_store1",
   4477                              addr, addrv,
   4478                              uwiden_to_host_word( pce, data ));
   4479             break;
   4480          /* 64-bit float.  Pass store data in 2 32-bit pieces. */
   4481          case Ity_F64: {
   4482             IRAtom* d64 = assignNew( 'I', pce, Ity_I64,
   4483                                      unop(Iop_ReinterpF64asI64, data) );
   4484             IRAtom* dLo32 = assignNew( 'I', pce, Ity_I32,
   4485                                        unop(Iop_64to32, d64) );
   4486             IRAtom* dHi32 = assignNew( 'I', pce, Ity_I32,
   4487                                        unop(Iop_64HIto32, d64) );
   4488             gen_dirty_v_WWWW( pce,
   4489                               &check_store8_ms4B_ls4B,
   4490                               "check_store8_ms4B_ls4B",
   4491                               addr, addrv, dHi32, dLo32 );
   4492             break;
   4493          }
   4494          /* 32-bit float.  We can just use _store4, but need
   4495             to futz with the argument type. */
   4496          case Ity_F32: {
   4497             IRAtom* i32 = assignNew( 'I', pce, Ity_I32,
   4498                                      unop(Iop_ReinterpF32asI32,
   4499                                           data ) );
   4500             gen_dirty_v_WWW( pce,
   4501                              &check_store4,
   4502                              "check_store4",
   4503                              addr, addrv, i32 );
   4504             break;
   4505          }
   4506          /* 64-bit int.  Pass store data in 2 32-bit pieces. */
   4507          case Ity_I64: {
   4508             IRAtom* dLo32 = assignNew( 'I', pce, Ity_I32,
   4509                                        unop(Iop_64to32, data) );
   4510             IRAtom* dHi32 = assignNew( 'I', pce, Ity_I32,
   4511                                        unop(Iop_64HIto32, data) );
   4512             gen_dirty_v_WWWW( pce,
   4513                               &check_store8_ms4B_ls4B,
   4514                               "check_store8_ms4B_ls4B",
   4515                               addr, addrv, dHi32, dLo32 );
   4516             break;
   4517          }
   4518          /* 128-bit vector.  Pass store data in 4 32-bit pieces.
   4519             This is all very ugly and inefficient, but it is
   4520             hard to better without considerably complicating the
   4521             store-handling schemes. */
   4522          case Ity_V128: {
   4523             IRAtom* dHi64 = assignNew( 'I', pce, Ity_I64,
   4524                                        unop(Iop_V128HIto64, data) );
   4525             IRAtom* dLo64 = assignNew( 'I', pce, Ity_I64,
   4526                                        unop(Iop_V128to64, data) );
   4527             IRAtom* w3    = assignNew( 'I', pce, Ity_I32,
   4528                                        unop(Iop_64HIto32, dHi64) );
   4529             IRAtom* w2    = assignNew( 'I', pce, Ity_I32,
   4530                                        unop(Iop_64to32, dHi64) );
   4531             IRAtom* w1    = assignNew( 'I', pce, Ity_I32,
   4532                                        unop(Iop_64HIto32, dLo64) );
   4533             IRAtom* w0    = assignNew( 'I', pce, Ity_I32,
   4534                                        unop(Iop_64to32, dLo64) );
   4535             gen_dirty_v_6W( pce,
   4536                             &check_store16_ms4B_4B_4B_ls4B,
   4537                             "check_store16_ms4B_4B_4B_ls4B",
   4538                             addr, addrv, w3, w2, w1, w0 );
   4539             break;
   4540          }
   4541          default:
   4542             ppIRType(d_ty); tl_assert(0);
   4543       }
   4544    } else {
   4545       /* ------ 64 bit host/guest (cough, cough) ------ */
   4546       switch (d_ty) {
   4547          /* Integer word case */
   4548          case Ity_I64: {
   4549             IRExpr* datav = schemeEw_Atom( pce, data );
   4550             if (resSC == IRTemp_INVALID) {
   4551                /* "normal" store */
   4552                gen_dirty_v_WWWW( pce,
   4553                                  &check_store8_P, "check_store8_P",
   4554                                  addr, addrv, data, datav );
   4555             } else {
   4556                IRTemp resSC64
   4557                    = gen_dirty_W_WWWW( pce,
   4558                                        &check_store8C_P,
   4559                                        "check_store8C_P",
   4560                                        addr, addrv, data, datav );
   4561                assign( 'I', pce, resSC, unop(Iop_64to1, mkexpr(resSC64)) );
   4562             }
   4563             break;
   4564          }
   4565          /* Integer subword cases */
   4566          case Ity_I32:
   4567             if (resSC == IRTemp_INVALID) {
   4568                /* "normal" store */
   4569                gen_dirty_v_WWW( pce,
   4570                                 &check_store4, "check_store4",
   4571                                 addr, addrv,
   4572                                 uwiden_to_host_word( pce, data ));
   4573             } else {
   4574                /* store-conditional; need to snarf the success bit */
   4575                IRTemp resSC64
   4576                    = gen_dirty_W_WWW( pce,
   4577                                       &check_store4C,
   4578                                       "check_store4C",
   4579                                       addr, addrv,
   4580                                       uwiden_to_host_word( pce, data ));
   4581                assign( 'I', pce, resSC, unop(Iop_64to1, mkexpr(resSC64)) );
   4582             }
   4583             break;
   4584          case Ity_I16:
   4585             gen_dirty_v_WWW( pce,
   4586                              &check_store2, "check_store2",
   4587                              addr, addrv,
   4588                              uwiden_to_host_word( pce, data ));
   4589             break;
   4590          case Ity_I8:
   4591             gen_dirty_v_WWW( pce,
   4592                              &check_store1, "check_store1",
   4593                              addr, addrv,
   4594                              uwiden_to_host_word( pce, data ));
   4595             break;
   4596          /* 128-bit vector.  Pass store data in 2 64-bit pieces. */
   4597          case Ity_V128: {
   4598             IRAtom* dHi64 = assignNew( 'I', pce, Ity_I64,
   4599                                        unop(Iop_V128HIto64, data) );
   4600             IRAtom* dLo64 = assignNew( 'I', pce, Ity_I64,
   4601                                        unop(Iop_V128to64, data) );
   4602             gen_dirty_v_WWWW( pce,
   4603                               &check_store16_ms8B_ls8B,
   4604                               "check_store16_ms8B_ls8B",
   4605                               addr, addrv, dHi64, dLo64 );
   4606             break;
   4607          }
   4608          /* 64-bit float. */
   4609          case Ity_F64: {
   4610             IRAtom* dI = assignNew( 'I', pce, Ity_I64,
   4611                                      unop(Iop_ReinterpF64asI64,
   4612                                           data ) );
   4613             gen_dirty_v_WWW( pce,
   4614                              &check_store8_all8B,
   4615                              "check_store8_all8B",
   4616                              addr, addrv, dI );
   4617             break;
   4618          }
   4619          /* 32-bit float.  We can just use _store4, but need
   4620             to futz with the argument type. */
   4621          case Ity_F32: {
   4622             IRAtom* i32 = assignNew( 'I', pce, Ity_I32,
   4623                                      unop(Iop_ReinterpF32asI32,
   4624                                           data ) );
   4625             IRAtom* i64 = assignNew( 'I', pce, Ity_I64,
   4626                                      unop(Iop_32Uto64,
   4627                                           i32 ) );
   4628             gen_dirty_v_WWW( pce,
   4629                              &check_store4,
   4630                              "check_store4",
   4631                              addr, addrv, i64 );
   4632             break;
   4633          }
   4634          default:
   4635             ppIRType(d_ty); tl_assert(0);
   4636       }
   4637    }
   4638    /* And don't copy the original, since the helper does the store.
   4639       Ick. */
   4640    return True; /* store was successfully instrumented */
   4641 }
   4642 
   4643 
   4644 /* schemeS helper for doing loads, pulled out into a function because
   4645    it needs to handle both normal loads and load-linked's.
   4646 */
   4647 static void schemeS_load ( PCEnv* pce, IRExpr* addr, IRType e_ty, IRTemp dstv )
   4648 {
   4649    HChar*  h_nm  = NULL;
   4650    void*   h_fn  = NULL;
   4651    IRExpr* addrv = NULL;
   4652    if (e_ty == pce->gWordTy) {
   4653       tl_assert(dstv != IRTemp_INVALID);
   4654    } else {
   4655       tl_assert(dstv == IRTemp_INVALID);
   4656    }
   4657    if (pce->gWordTy == Ity_I32) {
   4658       /* 32 bit host/guest (cough, cough) */
   4659       switch (e_ty) {
   4660          /* Ity_I32: helper returns shadow value. */
   4661          case Ity_I32:  h_fn = &check_load4_P;
   4662                         h_nm = "check_load4_P"; break;
   4663          /* all others: helper does not return a shadow
   4664             value. */
   4665          case Ity_V128: h_fn = &check_load16;
   4666                         h_nm = "check_load16"; break;
   4667          case Ity_I64:
   4668          case Ity_F64:  h_fn = &check_load8;
   4669                         h_nm = "check_load8"; break;
   4670          case Ity_F32:  h_fn = &check_load4;
   4671                         h_nm = "check_load4"; break;
   4672          case Ity_I16:  h_fn = &check_load2;
   4673                         h_nm = "check_load2"; break;
   4674          case Ity_I8:   h_fn = &check_load1;
   4675                         h_nm = "check_load1"; break;
   4676          default: ppIRType(e_ty); tl_assert(0);
   4677       }
   4678       addrv = schemeEw_Atom( pce, addr );
   4679       if (e_ty == Ity_I32) {
   4680          assign( 'I', pce, dstv,
   4681                   mkexpr( gen_dirty_W_WW( pce, h_fn, h_nm,
   4682                                                addr, addrv )) );
   4683       } else {
   4684          gen_dirty_v_WW( pce, NULL, h_fn, h_nm, addr, addrv );
   4685       }
   4686    } else {
   4687       /* 64 bit host/guest (cough, cough) */
   4688       switch (e_ty) {
   4689          /* Ity_I64: helper returns shadow value. */
   4690          case Ity_I64:  h_fn = &check_load8_P;
   4691                         h_nm = "check_load8_P"; break;
   4692          /* all others: helper does not return a shadow
   4693             value. */
   4694          case Ity_V128: h_fn = &check_load16;
   4695                         h_nm = "check_load16"; break;
   4696          case Ity_F64:  h_fn = &check_load8;
   4697                         h_nm = "check_load8"; break;
   4698          case Ity_F32:
   4699          case Ity_I32:  h_fn = &check_load4;
   4700                         h_nm = "check_load4"; break;
   4701          case Ity_I16:  h_fn = &check_load2;
   4702                         h_nm = "check_load2"; break;
   4703          case Ity_I8:   h_fn = &check_load1;
   4704                         h_nm = "check_load1"; break;
   4705          default: ppIRType(e_ty); tl_assert(0);
   4706       }
   4707       addrv = schemeEw_Atom( pce, addr );
   4708       if (e_ty == Ity_I64) {
   4709          assign( 'I', pce, dstv,
   4710                   mkexpr( gen_dirty_W_WW( pce, h_fn, h_nm,
   4711                                                addr, addrv )) );
   4712       } else {
   4713          gen_dirty_v_WW( pce, NULL, h_fn, h_nm, addr, addrv );
   4714       }
   4715    }
   4716 }
   4717 
   4718 
   4719 /* Generate into 'pce', instrumentation for 'st'.  Also copy 'st'
   4720    itself into 'pce' (the caller does not do so).  This is somewhat
   4721    complex and relies heavily on the assumption that the incoming IR
   4722    is in flat form.
   4723 
   4724    Generally speaking, the instrumentation is placed after the
   4725    original statement, so that results computed by the original can be
   4726    used in the instrumentation.  However, that isn't safe for memory
   4727    references, since we need the instrumentation (hence bounds check
   4728    and potential error message) to happen before the reference itself,
   4729    as the latter could cause a fault. */
   4730 static void schemeS ( PCEnv* pce, IRStmt* st )
   4731 {
   4732    tl_assert(st);
   4733    tl_assert(isFlatIRStmt(st));
   4734 
   4735    switch (st->tag) {
   4736 
   4737       case Ist_CAS: {
   4738          /* In all these CAS cases, the did-we-succeed? comparison is
   4739             done using Iop_CasCmpEQ{8,16,32,64} rather than the plain
   4740             Iop_CmpEQ equivalents.  This isn't actually necessary,
   4741             since the generated IR is not going to be subsequently
   4742             instrumented by Memcheck.  But it's done for consistency.
   4743             See COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
   4744             background/rationale. */
   4745          IRCAS* cas = st->Ist.CAS.details;
   4746          IRType elTy = typeOfIRExpr(pce->sb->tyenv, cas->expdLo);
   4747          if (cas->oldHi == IRTemp_INVALID) {
   4748             /* ------------ SINGLE CAS ------------ */
   4749             /* -- single cas -- 32 bits, on 32-bit host -- */
   4750             /* -- single cas -- 64 bits, on 64-bit host -- */
   4751             /* -- viz, single cas, native-word case -- */
   4752             if ( (pce->gWordTy == Ity_I32 && elTy == Ity_I32)
   4753                  || (pce->gWordTy == Ity_I64 && elTy == Ity_I64) ) {
   4754                // 32 bit host translation scheme; 64-bit is analogous
   4755                // old#    = check_load4_P(addr, addr#)
   4756                // old     = CAS(addr:expd->new) [COPY]
   4757                // success = CasCmpEQ32(old,expd)
   4758                // if (success) do_shadow_store4_P(addr, new#)
   4759                IRTemp  success;
   4760                Bool    is64  = elTy == Ity_I64;
   4761                IROp    cmpEQ = is64 ? Iop_CasCmpEQ64 : Iop_CasCmpEQ32;
   4762                void*   r_fn  = is64 ? &check_load8_P  : &check_load4_P;
   4763                HChar*  r_nm  = is64 ? "check_load8_P" : "check_load4_P";
   4764                void*   w_fn  = is64 ? &do_shadow_store8_P  : &do_shadow_store4_P;
   4765                void*   w_nm  = is64 ? "do_shadow_store8_P" : "do_shadow_store4_P";
   4766                IRExpr* addr  = cas->addr;
   4767                IRExpr* addrV = schemeEw_Atom(pce, addr);
   4768                IRTemp  old   = cas->oldLo;
   4769                IRTemp  oldV  = newShadowTmp(pce, old);
   4770                IRExpr* nyu   = cas->dataLo;
   4771                IRExpr* nyuV  = schemeEw_Atom(pce, nyu);
   4772                IRExpr* expd  = cas->expdLo;
   4773                assign( 'I', pce, oldV,
   4774                        mkexpr( gen_dirty_W_WW( pce, r_fn, r_nm, addr, addrV )));
   4775                stmt( 'C', pce, st );
   4776                success = newTemp(pce, Ity_I1, NonShad);
   4777                assign('I', pce, success, binop(cmpEQ, mkexpr(old), expd));
   4778                gen_dirty_v_WW( pce, mkexpr(success), w_fn, w_nm, addr, nyuV );
   4779             }
   4780             else
   4781             /* -- single cas -- 8 or 16 bits, on 32-bit host -- */
   4782             /* -- viz, single cas, 32-bit subword cases -- */
   4783             if (pce->gWordTy == Ity_I32
   4784                 && (elTy == Ity_I8 || elTy == Ity_I16)) {
   4785                // 8-bit translation scheme; 16-bit is analogous
   4786                // check_load1(addr, addr#)
   4787                // old     = CAS(addr:expd->new) [COPY]
   4788                // success = CasCmpEQ8(old,expd)
   4789                // if (success) nonptr_or_unknown_range(addr, 1)
   4790                IRTemp  success;
   4791                Bool    is16  = elTy == Ity_I16;
   4792                IRExpr* addr  = cas->addr;
   4793                IRExpr* addrV = schemeEw_Atom(pce, addr);
   4794                IRTemp  old   = cas->oldLo;
   4795                IRExpr* expd  = cas->expdLo;
   4796                void*   h_fn  = is16 ? &check_load2  : &check_load1;
   4797                HChar*  h_nm  = is16 ? "check_load2" : "check_load1";
   4798                IROp    cmpEQ = is16 ? Iop_CasCmpEQ16 : Iop_CasCmpEQ8;
   4799                Int     szB   = is16 ? 2 : 1;
   4800                gen_dirty_v_WW( pce, NULL, h_fn, h_nm, addr, addrV );
   4801                stmt( 'C', pce, st );
   4802                success = newTemp(pce, Ity_I1, NonShad);
   4803                assign('I', pce, success,
   4804                            binop(cmpEQ, mkexpr(old), expd));
   4805                gen_call_nonptr_or_unknown_range( pce, mkexpr(success),
   4806                                                  addr, mkIRExpr_HWord(szB) );
   4807             }
   4808             else
   4809             /* -- single cas -- 8, 16 or 32 bits, on 64-bit host -- */
   4810             /* -- viz, single cas, 64-bit subword cases -- */
   4811             if (pce->gWordTy == Ity_I64
   4812                 && (elTy == Ity_I8 || elTy == Ity_I16 || elTy == Ity_I32)) {
   4813                // 8-bit translation scheme; 16/32-bit are analogous
   4814                // check_load1(addr, addr#)
   4815                // old     = CAS(addr:expd->new) [COPY]
   4816                // success = CasCmpEQ8(old,expd)
   4817                // if (success) nonptr_or_unknown_range(addr, 1)
   4818                IRTemp  success;
   4819                Bool    is16  = elTy == Ity_I16;
   4820                Bool    is32  = elTy == Ity_I32;
   4821                IRExpr* addr  = cas->addr;
   4822                IRExpr* addrV = schemeEw_Atom(pce, addr);
   4823                IRTemp  old   = cas->oldLo;
   4824                IRExpr* expd  = cas->expdLo;
   4825                void*   h_fn  = is32 ? &check_load4
   4826                                     : (is16 ? &check_load2 : &check_load1);
   4827                HChar*  h_nm  = is32 ? "check_load4"
   4828                                     : (is16 ? "check_load2" : "check_load1");
   4829                IROp    cmpEQ = is32 ? Iop_CasCmpEQ32
   4830                                     : (is16 ? Iop_CasCmpEQ16 : Iop_CasCmpEQ8);
   4831                Int     szB   = is32 ? 4 : (is16 ? 2 : 1);
   4832                gen_dirty_v_WW( pce, NULL, h_fn, h_nm, addr, addrV );
   4833                stmt( 'C', pce, st );
   4834                success = newTemp(pce, Ity_I1, NonShad);
   4835                assign('I', pce, success,
   4836                            binop(cmpEQ, mkexpr(old), expd));
   4837                gen_call_nonptr_or_unknown_range( pce, mkexpr(success),
   4838                                                  addr, mkIRExpr_HWord(szB) );
   4839             }
   4840             else
   4841                goto unhandled;
   4842          } else {
   4843             /* ------------ DOUBLE CAS ------------ */
   4844             /* Punt on bigendian DCAS.  In fact it's probably trivial
   4845                to do; just swap the individual shadow loads/stores
   4846                around in memory, but we'd have to verify it, and there
   4847                is no use case.  So punt. */
   4848             if (cas->end != Iend_LE)
   4849                goto unhandled;
   4850             /* -- double cas -- 2 x 32 bits, on 32-bit host -- */
   4851             /* -- double cas -- 2 x 64 bits, on 64-bit host -- */
   4852             /* -- viz, double cas, native-word case -- */
   4853             if ( (pce->gWordTy == Ity_I32 && elTy == Ity_I32)
   4854                  || (pce->gWordTy == Ity_I64 && elTy == Ity_I64) ) {
   4855                // 32 bit host translation scheme; 64-bit is analogous
   4856                // oldHi#    = check_load4_P(addr+4, addr#)
   4857                // oldLo#    = check_load4_P(addr+0, addr#)
   4858                // oldHi/Lo  = DCAS(addr:expdHi/Lo->newHi/Lo) [COPY]
   4859                // success   = CasCmpEQ32(oldHi,expdHi) && CasCmpEQ32(oldLo,expdLo)
   4860                //           = ((oldHi ^ expdHi) | (oldLo ^ expdLo)) == 0
   4861                // if (success) do_shadow_store4_P(addr+4, newHi#)
   4862                // if (success) do_shadow_store4_P(addr+0, newLo#)
   4863                IRTemp  diffHi, diffLo, diff, success, addrpp;
   4864                Bool    is64       = elTy == Ity_I64;
   4865                void*   r_fn       = is64 ? &check_load8_P  : &check_load4_P;
   4866                HChar*  r_nm       = is64 ? "check_load8_P" : "check_load4_P";
   4867                void*   w_fn       = is64 ? &do_shadow_store8_P
   4868                                          : &do_shadow_store4_P;
   4869                void*   w_nm       = is64 ? "do_shadow_store8_P"
   4870                                          : "do_shadow_store4_P";
   4871                IROp    opADD      = is64 ? Iop_Add64 : Iop_Add32;
   4872                IROp    opXOR      = is64 ? Iop_Xor64 : Iop_Xor32;
   4873                IROp    opOR       = is64 ? Iop_Or64 : Iop_Or32;
   4874                IROp    opCasCmpEQ = is64 ? Iop_CasCmpEQ64 : Iop_CasCmpEQ32;
   4875                IRExpr* step       = is64 ? mkU64(8) : mkU32(4);
   4876                IRExpr* zero       = is64 ? mkU64(0) : mkU32(0);
   4877                IRExpr* addr       = cas->addr;
   4878                IRExpr* addrV      = schemeEw_Atom(pce, addr);
   4879                IRTemp  oldLo      = cas->oldLo;
   4880                IRTemp  oldLoV     = newShadowTmp(pce, oldLo);
   4881                IRTemp  oldHi      = cas->oldHi;
   4882                IRTemp  oldHiV     = newShadowTmp(pce, oldHi);
   4883                IRExpr* nyuLo      = cas->dataLo;
   4884                IRExpr* nyuLoV     = schemeEw_Atom(pce, nyuLo);
   4885                IRExpr* nyuHi      = cas->dataHi;
   4886                IRExpr* nyuHiV     = schemeEw_Atom(pce, nyuHi);
   4887                IRExpr* expdLo     = cas->expdLo;
   4888                IRExpr* expdHi     = cas->expdHi;
   4889                tl_assert(elTy == Ity_I32 || elTy == Ity_I64);
   4890                tl_assert(pce->gWordTy == elTy);
   4891                addrpp = newTemp(pce, elTy, NonShad);
   4892                assign('I', pce, addrpp, binop(opADD, addr, step));
   4893                assign('I', pce, oldHiV,
   4894                       mkexpr( gen_dirty_W_WW( pce, r_fn, r_nm,
   4895                                                    mkexpr(addrpp), addrV ))
   4896                );
   4897                assign('I', pce, oldLoV,
   4898                       mkexpr( gen_dirty_W_WW( pce, r_fn, r_nm,
   4899                                                    addr, addrV ))
   4900                );
   4901                stmt( 'C', pce, st );
   4902                diffHi = newTemp(pce, elTy, NonShad);
   4903                assign('I', pce, diffHi,
   4904                            binop(opXOR, mkexpr(oldHi), expdHi));
   4905                diffLo = newTemp(pce, elTy, NonShad);
   4906                assign('I', pce, diffLo,
   4907                            binop(opXOR, mkexpr(oldLo), expdLo));
   4908                diff = newTemp(pce, elTy, NonShad);
   4909                assign('I', pce, diff,
   4910                       binop(opOR, mkexpr(diffHi), mkexpr(diffLo)));
   4911                success = newTemp(pce, Ity_I1, NonShad);
   4912                assign('I', pce, success,
   4913                       binop(opCasCmpEQ, mkexpr(diff), zero));
   4914                gen_dirty_v_WW( pce, mkexpr(success),
   4915                                      w_fn, w_nm, mkexpr(addrpp), nyuHiV );
   4916                gen_dirty_v_WW( pce, mkexpr(success),
   4917                                     w_fn, w_nm, addr, nyuLoV );
   4918             }
   4919             else
   4920             /* -- double cas -- 2 x 32 bits, on 64-bit host -- */
   4921             if (pce->gWordTy == Ity_I64 && elTy == Ity_I32) {
   4922                // check_load8(addr, addr#)
   4923                // oldHi/Lo  = DCAS(addr:expdHi/Lo->newHi/Lo) [COPY]
   4924                // success   = CasCmpEQ32(oldHi,expdHi) && CasCmpEQ32(oldLo,expdLo)
   4925                //           = ((oldHi ^ expdHi) | (oldLo ^ expdLo)) == 0
   4926                // if (success) nonptr_or_unknown_range(addr, 8)
   4927                IRTemp  diffHi, diffLo, diff, success;
   4928                IRExpr* addr   = cas->addr;
   4929                IRExpr* addrV  = schemeEw_Atom(pce, addr);
   4930                IRTemp  oldLo  = cas->oldLo;
   4931                IRTemp  oldHi  = cas->oldHi;
   4932                IRExpr* expdLo = cas->expdLo;
   4933                IRExpr* expdHi = cas->expdHi;
   4934                gen_dirty_v_WW( pce, NULL, &check_load8, "check_load8",
   4935                                addr, addrV );
   4936                stmt( 'C', pce, st );
   4937                diffHi = newTemp(pce, Ity_I32, NonShad);
   4938                assign('I', pce, diffHi,
   4939                            binop(Iop_Xor32, mkexpr(oldHi), expdHi));
   4940                diffLo = newTemp(pce, Ity_I32, NonShad);
   4941                assign('I', pce, diffLo,
   4942                            binop(Iop_Xor32, mkexpr(oldLo), expdLo));
   4943                diff = newTemp(pce, Ity_I32, NonShad);
   4944                assign('I', pce, diff,
   4945                       binop(Iop_Or32, mkexpr(diffHi), mkexpr(diffLo)));
   4946                success = newTemp(pce, Ity_I1, NonShad);
   4947                assign('I', pce, success,
   4948                       binop(Iop_CasCmpEQ32, mkexpr(diff), mkU32(0)));
   4949                gen_call_nonptr_or_unknown_range( pce, mkexpr(success),
   4950                                                  addr, mkU64(8) );
   4951             }
   4952             else
   4953                goto unhandled;
   4954          }
   4955          break;
   4956       }
   4957 
   4958       case Ist_LLSC: {
   4959          if (st->Ist.LLSC.storedata == NULL) {
   4960             /* LL */
   4961             IRTemp dst    = st->Ist.LLSC.result;
   4962             IRType dataTy = typeOfIRTemp(pce->sb->tyenv, dst);
   4963             Bool   isWord = dataTy == pce->gWordTy;
   4964             IRTemp dstv   = isWord ? newShadowTmp( pce, dst )
   4965                                    : IRTemp_INVALID;
   4966             schemeS_load( pce, st->Ist.LLSC.addr, dataTy, dstv );
   4967             /* copy the original -- must happen after the helper call */
   4968             stmt( 'C', pce, st );
   4969          } else {
   4970             /* SC */
   4971             schemeS_store( pce,
   4972                            st->Ist.LLSC.storedata,
   4973                            st->Ist.LLSC.addr,
   4974                            st->Ist.LLSC.result );
   4975             /* Don't copy the original, since the helper does the
   4976                store itself. */
   4977          }
   4978          break;
   4979       }
   4980 
   4981       case Ist_Dirty: {
   4982          Int i;
   4983          IRDirty* di;
   4984          stmt( 'C', pce, st );
   4985          /* nasty.  assumes that (1) all helpers are unconditional,
   4986             and (2) all outputs are non-ptr */
   4987          di = st->Ist.Dirty.details;
   4988          /* deal with the return tmp, if any */
   4989          if (di->tmp != IRTemp_INVALID
   4990              && typeOfIRTemp(pce->sb->tyenv, di->tmp) == pce->gWordTy) {
   4991             /* di->tmp is shadowed.  Set it to NONPTR. */
   4992             IRTemp dstv = newShadowTmp( pce, di->tmp );
   4993             if (pce->gWordTy == Ity_I32) {
   4994               assign( 'I', pce, dstv, mkU32( (UWord)NONPTR ));
   4995             } else {
   4996               assign( 'I', pce, dstv, mkU64( (UWord)NONPTR ));
   4997             }
   4998          }
   4999          /* apply the nonptr_or_unknown technique to any parts of
   5000             the guest state that happen to get written */
   5001          for (i = 0; i < di->nFxState; i++) {
   5002             IntRegInfo iii;
   5003             tl_assert(di->fxState[i].fx != Ifx_None);
   5004             if (di->fxState[i].fx == Ifx_Read)
   5005                continue; /* this bit is only read -- not interesting */
   5006             get_IntRegInfo( &iii, di->fxState[i].offset,
   5007                                   di->fxState[i].size );
   5008             tl_assert(iii.n_offsets >= -1
   5009                       && iii.n_offsets <= N_INTREGINFO_OFFSETS);
   5010             /* Deal with 3 possible cases, same as with Ist_Put
   5011                elsewhere in this function. */
   5012             if (iii.n_offsets == -1) {
   5013                /* case (1): exact write of an integer register. */
   5014                IRAtom* a1
   5015                   = assignNew( 'I', pce, pce->gWordTy,
   5016                                IRExpr_Get( iii.offsets[i], pce->gWordTy ));
   5017                IRTemp a2 = gen_call_nonptr_or_unknown_w( pce, a1 );
   5018                stmt( 'I', pce, IRStmt_Put( iii.offsets[i]
   5019                                               + pce->guest_state_sizeB,
   5020                                            mkexpr(a2) ));
   5021             } else {
   5022                /* when == 0: case (3): no instrumentation needed */
   5023                /* when > 0: case (2) .. complex case.  Fish out the
   5024                   stored value for the whole register, heave it
   5025                   through nonptr_or_unknown, and use that as the new
   5026                   shadow value. */
   5027                tl_assert(iii.n_offsets >= 0
   5028                          && iii.n_offsets <= N_INTREGINFO_OFFSETS);
   5029                gen_nonptr_or_unknown_for_III( pce, &iii );
   5030             }
   5031          } /* for (i = 0; i < di->nFxState; i++) */
   5032          /* finally, deal with memory outputs */
   5033          if (di->mFx != Ifx_None) {
   5034             tl_assert(di->mAddr && isIRAtom(di->mAddr));
   5035             tl_assert(di->mSize > 0);
   5036             gen_call_nonptr_or_unknown_range( pce, NULL, di->mAddr,
   5037                                               mkIRExpr_HWord(di->mSize));
   5038          }
   5039          break;
   5040       }
   5041 
   5042       case Ist_NoOp:
   5043          break;
   5044 
   5045       /* nothing interesting in these; just copy them through */
   5046       case Ist_AbiHint:
   5047       case Ist_MBE:
   5048       case Ist_Exit:
   5049       case Ist_IMark:
   5050          stmt( 'C', pce, st );
   5051          break;
   5052 
   5053       case Ist_PutI: {
   5054          IRRegArray* descr = st->Ist.PutI.descr;
   5055          stmt( 'C', pce, st );
   5056          tl_assert(descr && descr->elemTy);
   5057          if (is_integer_guest_reg_array(descr)) {
   5058             /* if this fails, is_integer_guest_reg_array is returning
   5059                bogus results */
   5060             tl_assert(descr->elemTy == pce->gWordTy);
   5061             stmt(
   5062                'I', pce,
   5063                IRStmt_PutI(
   5064                   mkIRRegArray(descr->base + pce->guest_state_sizeB,
   5065                                descr->elemTy, descr->nElems),
   5066                   st->Ist.PutI.ix,
   5067                   st->Ist.PutI.bias,
   5068                   schemeEw_Atom( pce, st->Ist.PutI.data)
   5069                )
   5070             );
   5071          }
   5072          break;
   5073       }
   5074 
   5075       case Ist_Put: {
   5076          /* PUT(offset) = atom */
   5077          /* 3 cases:
   5078             1. It's a complete write of an integer register.  Get hold of
   5079                'atom's shadow value and write it in the shadow state.
   5080             2. It's a partial write of an integer register.  Let the write
   5081                happen, then fish out the complete register value and see if,
   5082                via range checking, consultation of tea leaves, etc, its
   5083                shadow value can be upgraded to anything useful.
   5084             3. It is none of the above.  Generate no instrumentation. */
   5085          IntRegInfo iii;
   5086          IRType     ty;
   5087          stmt( 'C', pce, st );
   5088          ty = typeOfIRExpr(pce->sb->tyenv, st->Ist.Put.data);
   5089          get_IntRegInfo( &iii, st->Ist.Put.offset,
   5090                          sizeofIRType(ty) );
   5091          if (iii.n_offsets == -1) {
   5092             /* case (1): exact write of an integer register. */
   5093             tl_assert(ty == pce->gWordTy);
   5094             stmt( 'I', pce,
   5095                        IRStmt_Put( st->Ist.Put.offset
   5096                                       + pce->guest_state_sizeB,
   5097                                    schemeEw_Atom( pce, st->Ist.Put.data)) );
   5098          } else {
   5099             /* when == 0: case (3): no instrumentation needed */
   5100             /* when > 0: case (2) .. complex case.  Fish out the
   5101                stored value for the whole register, heave it through
   5102                nonptr_or_unknown, and use that as the new shadow
   5103                value. */
   5104             tl_assert(iii.n_offsets >= 0
   5105                       && iii.n_offsets <= N_INTREGINFO_OFFSETS);
   5106             gen_nonptr_or_unknown_for_III( pce, &iii );
   5107          }
   5108          break;
   5109       } /* case Ist_Put */
   5110 
   5111       case Ist_Store: {
   5112          Bool ok = schemeS_store( pce,
   5113                                   st->Ist.Store.data,
   5114                                   st->Ist.Store.addr,
   5115                                   IRTemp_INVALID/*not a SC*/ );
   5116          if (!ok) goto unhandled;
   5117          /* Don't copy the original, since the helper does the store
   5118             itself. */
   5119          break;
   5120       }
   5121 
   5122       case Ist_WrTmp: {
   5123          /* This is the only place we have to deal with the full
   5124             IRExpr range.  In all other places where an IRExpr could
   5125             appear, we in fact only get an atom (Iex_RdTmp or
   5126             Iex_Const). */
   5127          IRExpr* e      = st->Ist.WrTmp.data;
   5128          IRType  e_ty   = typeOfIRExpr( pce->sb->tyenv, e );
   5129          Bool    isWord = e_ty == pce->gWordTy;
   5130          IRTemp  dst    = st->Ist.WrTmp.tmp;
   5131          IRTemp  dstv   = isWord ? newShadowTmp( pce, dst )
   5132                                  : IRTemp_INVALID;
   5133 
   5134          switch (e->tag) {
   5135 
   5136             case Iex_Const: {
   5137                stmt( 'C', pce, st );
   5138                if (isWord)
   5139                   assign( 'I', pce, dstv, schemeEw_Atom( pce, e ) );
   5140                break;
   5141             }
   5142 
   5143             case Iex_CCall: {
   5144                stmt( 'C', pce, st );
   5145                if (isWord)
   5146                   assign( 'I', pce, dstv,
   5147                           mkexpr( gen_call_nonptr_or_unknown_w(
   5148                                      pce, mkexpr(dst))));
   5149                break;
   5150             }
   5151 
   5152             case Iex_Mux0X: {
   5153                /* Just steer the shadow values in the same way as the
   5154                   originals. */
   5155                stmt( 'C', pce, st );
   5156                if (isWord)
   5157                   assign( 'I', pce, dstv,
   5158                           IRExpr_Mux0X(
   5159                              e->Iex.Mux0X.cond,
   5160                              schemeEw_Atom( pce, e->Iex.Mux0X.expr0 ),
   5161                              schemeEw_Atom( pce, e->Iex.Mux0X.exprX ) ));
   5162                break;
   5163             }
   5164 
   5165             case Iex_RdTmp: {
   5166                stmt( 'C', pce, st );
   5167                if (isWord)
   5168                   assign( 'I', pce, dstv, schemeEw_Atom( pce, e ));
   5169                break;
   5170             }
   5171 
   5172             case Iex_Load: {
   5173                schemeS_load( pce, e->Iex.Load.addr, e_ty, dstv );
   5174                /* copy the original -- must happen after the helper call */
   5175                stmt( 'C', pce, st );
   5176                break;
   5177             }
   5178 
   5179             case Iex_GetI: {
   5180                IRRegArray* descr = e->Iex.GetI.descr;
   5181                stmt( 'C', pce, st );
   5182                tl_assert(descr && descr->elemTy);
   5183                if (is_integer_guest_reg_array(descr)) {
   5184                   /* if this fails, is_integer_guest_reg_array is
   5185                      returning bogus results */
   5186                   tl_assert(isWord);
   5187                   assign(
   5188                      'I', pce, dstv,
   5189                      IRExpr_GetI(
   5190                         mkIRRegArray(descr->base + pce->guest_state_sizeB,
   5191                                      descr->elemTy, descr->nElems),
   5192                         e->Iex.GetI.ix,
   5193                         e->Iex.GetI.bias
   5194                      )
   5195                   );
   5196                }
   5197                break;
   5198             }
   5199 
   5200             case Iex_Get: {
   5201                stmt( 'C', pce, st );
   5202                if (isWord) {
   5203                   /* guest-word-typed tmp assignment, so it will have a
   5204                      shadow tmp, and we must make an assignment to
   5205                      that */
   5206                   if (is_integer_guest_reg(e->Iex.Get.offset,
   5207                                            sizeofIRType(e->Iex.Get.ty))) {
   5208                      assign( 'I', pce, dstv,
   5209                              IRExpr_Get( e->Iex.Get.offset
   5210                                             + pce->guest_state_sizeB,
   5211                                          e->Iex.Get.ty) );
   5212                   } else {
   5213                      if (pce->hWordTy == Ity_I32) {
   5214                         assign( 'I', pce, dstv, mkU32( (UWord)NONPTR ));
   5215                      } else {
   5216                        assign( 'I', pce, dstv, mkU64( (UWord)NONPTR ));
   5217                      }
   5218                   }
   5219                } else {
   5220                   /* tmp isn't guest-word-typed, so isn't shadowed, so
   5221                      generate no instrumentation */
   5222                }
   5223                break;
   5224             }
   5225 
   5226             case Iex_Unop: {
   5227                stmt( 'C', pce, st );
   5228                tl_assert(isIRAtom(e->Iex.Unop.arg));
   5229                if (isWord)
   5230                   instrument_arithop( pce, dst, dstv, e->Iex.Unop.op,
   5231                                       e->Iex.Unop.arg,
   5232                                       NULL, NULL, NULL );
   5233                break;
   5234             }
   5235 
   5236             case Iex_Binop: {
   5237                stmt( 'C', pce, st );
   5238                tl_assert(isIRAtom(e->Iex.Binop.arg1));
   5239                tl_assert(isIRAtom(e->Iex.Binop.arg2));
   5240                if (isWord)
   5241                   instrument_arithop( pce, dst, dstv, e->Iex.Binop.op,
   5242                                       e->Iex.Binop.arg1, e->Iex.Binop.arg2,
   5243                                       NULL, NULL );
   5244                break;
   5245             }
   5246 
   5247             case Iex_Triop: {
   5248                stmt( 'C', pce, st );
   5249                tl_assert(isIRAtom(e->Iex.Triop.arg1));
   5250                tl_assert(isIRAtom(e->Iex.Triop.arg2));
   5251                tl_assert(isIRAtom(e->Iex.Triop.arg3));
   5252                if (isWord)
   5253                   instrument_arithop( pce, dst, dstv, e->Iex.Triop.op,
   5254                                       e->Iex.Triop.arg1, e->Iex.Triop.arg2,
   5255                                       e->Iex.Triop.arg3, NULL );
   5256                break;
   5257             }
   5258 
   5259             case Iex_Qop: {
   5260                stmt( 'C', pce, st );
   5261                tl_assert(isIRAtom(e->Iex.Qop.arg1));
   5262                tl_assert(isIRAtom(e->Iex.Qop.arg2));
   5263                tl_assert(isIRAtom(e->Iex.Qop.arg3));
   5264                tl_assert(isIRAtom(e->Iex.Qop.arg4));
   5265                if (isWord)
   5266                   instrument_arithop( pce, dst, dstv, e->Iex.Qop.op,
   5267                                       e->Iex.Qop.arg1, e->Iex.Qop.arg2,
   5268                                       e->Iex.Qop.arg3, e->Iex.Qop.arg4 );
   5269                break;
   5270             }
   5271 
   5272             default:
   5273                goto unhandled;
   5274          } /* switch (e->tag) */
   5275 
   5276          break;
   5277 
   5278       } /* case Ist_WrTmp */
   5279 
   5280       default:
   5281       unhandled:
   5282          ppIRStmt(st);
   5283          tl_assert(0);
   5284    }
   5285 }
   5286 
   5287 
   5288 static IRTemp for_sg__newIRTemp_cb ( IRType ty, void* opaque )
   5289 {
   5290    PCEnv* pce = (PCEnv*)opaque;
   5291    return newTemp( pce, ty, NonShad );
   5292 }
   5293 
   5294 
   5295 IRSB* h_instrument ( VgCallbackClosure* closure,
   5296                      IRSB* sbIn,
   5297                      VexGuestLayout* layout,
   5298                      VexGuestExtents* vge,
   5299                      IRType gWordTy, IRType hWordTy )
   5300 {
   5301    Bool  verboze = 0||False;
   5302    Int   i /*, j*/;
   5303    PCEnv pce;
   5304    struct _SGEnv* sgenv;
   5305 
   5306    if (gWordTy != hWordTy) {
   5307       /* We don't currently support this case. */
   5308       VG_(tool_panic)("host/guest word size mismatch");
   5309    }
   5310 
   5311    /* Check we're not completely nuts */
   5312    tl_assert(sizeof(UWord)  == sizeof(void*));
   5313    tl_assert(sizeof(Word)   == sizeof(void*));
   5314    tl_assert(sizeof(Addr)   == sizeof(void*));
   5315    tl_assert(sizeof(ULong)  == 8);
   5316    tl_assert(sizeof(Long)   == 8);
   5317    tl_assert(sizeof(Addr64) == 8);
   5318    tl_assert(sizeof(UInt)   == 4);
   5319    tl_assert(sizeof(Int)    == 4);
   5320 
   5321    /* Set up the running environment.  Both .sb and .tmpMap are
   5322       modified as we go along.  Note that tmps are added to both
   5323       .sb->tyenv and .tmpMap together, so the valid index-set for
   5324       those two arrays should always be identical. */
   5325    VG_(memset)(&pce, 0, sizeof(pce));
   5326    pce.sb                = deepCopyIRSBExceptStmts(sbIn);
   5327    pce.trace             = verboze;
   5328    pce.hWordTy           = hWordTy;
   5329    pce.gWordTy           = gWordTy;
   5330    pce.guest_state_sizeB = layout->total_sizeB;
   5331 
   5332    pce.qmpMap = VG_(newXA)( VG_(malloc), "pc.h_instrument.1", VG_(free),
   5333                             sizeof(TempMapEnt));
   5334    for (i = 0; i < sbIn->tyenv->types_used; i++) {
   5335       TempMapEnt ent;
   5336       ent.kind   = NonShad;
   5337       ent.shadow = IRTemp_INVALID;
   5338       VG_(addToXA)( pce.qmpMap, &ent );
   5339    }
   5340    tl_assert( VG_(sizeXA)( pce.qmpMap ) == sbIn->tyenv->types_used );
   5341 
   5342    /* Also set up for the sg_ instrumenter.  See comments at the top
   5343       of this instrumentation section for details.  The two parameters
   5344       constitute a closure, which sg_ can use to correctly generate
   5345       new IRTemps as needed. */
   5346    sgenv = sg_instrument_init( for_sg__newIRTemp_cb,
   5347                                (void*)&pce );
   5348 
   5349    /* Stay sane.  These two should agree! */
   5350    tl_assert(layout->total_sizeB == PC_SIZEOF_GUEST_STATE);
   5351 
   5352    /* Copy verbatim any IR preamble preceding the first IMark */
   5353 
   5354    i = 0;
   5355    while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
   5356       IRStmt* st = sbIn->stmts[i];
   5357       tl_assert(st);
   5358       tl_assert(isFlatIRStmt(st));
   5359       stmt( 'C', &pce, sbIn->stmts[i] );
   5360       i++;
   5361    }
   5362 
   5363    /* Nasty problem.  IR optimisation of the pre-instrumented IR may
   5364       cause the IR following the preamble to contain references to IR
   5365       temporaries defined in the preamble.  Because the preamble isn't
   5366       instrumented, these temporaries don't have any shadows.
   5367       Nevertheless uses of them following the preamble will cause
   5368       memcheck to generate references to their shadows.  End effect is
   5369       to cause IR sanity check failures, due to references to
   5370       non-existent shadows.  This is only evident for the complex
   5371       preambles used for function wrapping on TOC-afflicted platforms
   5372       (ppc64-linux, ppc32-aix5, ppc64-aix5).
   5373 
   5374       The following loop therefore scans the preamble looking for
   5375       assignments to temporaries.  For each one found it creates an
   5376       assignment to the corresponding shadow temp, marking it as
   5377       'defined'.  This is the same resulting IR as if the main
   5378       instrumentation loop before had been applied to the statement
   5379       'tmp = CONSTANT'.
   5380    */
   5381 #if 0
   5382    // FIXME: this isn't exactly right; only needs to generate shadows
   5383    // for guest-word-typed temps
   5384    for (j = 0; j < i; j++) {
   5385       if (sbIn->stmts[j]->tag == Ist_WrTmp) {
   5386          /* findShadowTmpV checks its arg is an original tmp;
   5387             no need to assert that here. */
   5388          IRTemp tmp_o = sbIn->stmts[j]->Ist.WrTmp.tmp;
   5389          IRTemp tmp_s = findShadowTmp(&pce, tmp_o);
   5390          IRType ty_s  = typeOfIRTemp(sbIn->tyenv, tmp_s);
   5391          assign( 'V', &pce, tmp_s, definedOfType( ty_s ) );
   5392          if (0) {
   5393             VG_(printf)("create shadow tmp for preamble tmp [%d] ty ", j);
   5394             ppIRType( ty_s );
   5395             VG_(printf)("\n");
   5396          }
   5397       }
   5398    }
   5399 #endif
   5400 
   5401    /* Iterate over the remaining stmts to generate instrumentation. */
   5402 
   5403    tl_assert(sbIn->stmts_used > 0);
   5404    tl_assert(i >= 0);
   5405    tl_assert(i < sbIn->stmts_used);
   5406    tl_assert(sbIn->stmts[i]->tag == Ist_IMark);
   5407 
   5408    for (/*use current i*/; i < sbIn->stmts_used; i++) {
   5409       /* generate sg_ instrumentation for this stmt */
   5410       sg_instrument_IRStmt( sgenv, pce.sb, sbIn->stmts[i],
   5411                             layout, gWordTy, hWordTy );
   5412       /* generate h_ instrumentation for this stmt */
   5413       schemeS( &pce, sbIn->stmts[i] );
   5414    }
   5415 
   5416    /* generate sg_ instrumentation for the final jump */
   5417    sg_instrument_final_jump( sgenv, pce.sb, sbIn->next, sbIn->jumpkind,
   5418                              layout, gWordTy, hWordTy );
   5419 
   5420    /* and finalise .. */
   5421    sg_instrument_fini( sgenv );
   5422 
   5423    /* If this fails, there's been some serious snafu with tmp management,
   5424       that should be investigated. */
   5425    tl_assert( VG_(sizeXA)( pce.qmpMap ) == pce.sb->tyenv->types_used );
   5426    VG_(deleteXA)( pce.qmpMap );
   5427 
   5428    return pce.sb;
   5429 }
   5430 
   5431 
   5432 /*--------------------------------------------------------------------*/
   5433 /*--- Initialisation                                               ---*/
   5434 /*--------------------------------------------------------------------*/
   5435 
   5436 void h_pre_clo_init ( void )
   5437 {
   5438    // Other initialisation
   5439    init_shadow_memory();
   5440    init_lossage();
   5441 }
   5442 
   5443 void h_post_clo_init ( void )
   5444 {
   5445 }
   5446 
   5447 /*--------------------------------------------------------------------*/
   5448 /*--- Finalisation                                                 ---*/
   5449 /*--------------------------------------------------------------------*/
   5450 
   5451 void h_fini ( Int exitcode )
   5452 {
   5453    if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
   5454       VG_(message)(Vg_UserMsg,
   5455                    "For counts of detected and suppressed errors, "
   5456                    "rerun with: -v\n");
   5457    }
   5458 
   5459    if (VG_(clo_stats)) {
   5460       VG_(message)(Vg_DebugMsg,
   5461                    "  h_:  %'10llu client allocs, %'10llu client frees\n",
   5462                    stats__client_mallocs, stats__client_frees);
   5463       VG_(message)(Vg_DebugMsg,
   5464                    "  h_:  %'10llu Segs allocd,   %'10llu Segs recycled\n",
   5465                    stats__segs_allocd, stats__segs_recycled);
   5466    }
   5467 
   5468 #if 0
   5469    if (h_clo_lossage_check) {
   5470       VG_(message)(Vg_UserMsg, "\n");
   5471       VG_(message)(Vg_UserMsg, "%12lld total memory references\n",
   5472                                stats__tot_mem_refs);
   5473       VG_(message)(Vg_UserMsg, "%12lld   of which are in a known segment\n",
   5474                                stats__refs_in_a_seg);
   5475       VG_(message)(Vg_UserMsg, "%12lld   of which are 'lost' w.r.t the seg\n",
   5476                                stats__refs_lost_seg);
   5477       VG_(message)(Vg_UserMsg, "\n");
   5478       show_lossage();
   5479       VG_(message)(Vg_UserMsg, "\n");
   5480    } else {
   5481       tl_assert( 0 == VG_(OSetGen_Size)(lossage) );
   5482    }
   5483 #endif
   5484 }
   5485 
   5486 
   5487 /*--------------------------------------------------------------------*/
   5488 /*--- end                                                 h_main.c ---*/
   5489 /*--------------------------------------------------------------------*/
   5490