Home | History | Annotate | Download | only in VEX
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                       test_main.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2011 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include <stdio.h>
     37 #include <stdlib.h>
     38 #include <assert.h>
     39 #include <string.h>
     40 
     41 #include "libvex_basictypes.h"
     42 #include "libvex.h"
     43 
     44 #include "test_main.h"
     45 
     46 
     47 /*---------------------------------------------------------------*/
     48 /*--- Test                                                    ---*/
     49 /*---------------------------------------------------------------*/
     50 
     51 
     52 __attribute__ ((noreturn))
     53 static
     54 void failure_exit ( void )
     55 {
     56    fprintf(stdout, "VEX did failure_exit.  Bye.\n");
     57    exit(1);
     58 }
     59 
     60 static
     61 void log_bytes ( HChar* bytes, Int nbytes )
     62 {
     63    fwrite ( bytes, 1, nbytes, stdout );
     64 }
     65 
     66 #define N_LINEBUF 10000
     67 static HChar linebuf[N_LINEBUF];
     68 
     69 #define N_ORIGBUF 10000
     70 #define N_TRANSBUF 5000
     71 
     72 static UChar origbuf[N_ORIGBUF];
     73 static UChar transbuf[N_TRANSBUF];
     74 
     75 static Bool verbose = True;
     76 
     77 /* Forwards */
     78 #if 1 /* UNUSED */
     79 //static IRSB* ac_instrument ( IRSB*, VexGuestLayout*, IRType );
     80 static
     81 IRSB* mc_instrument ( void* closureV,
     82                       IRSB* bb_in, VexGuestLayout* layout,
     83                       VexGuestExtents* vge,
     84                       IRType gWordTy, IRType hWordTy );
     85 #endif
     86 
     87 static Bool chase_into_not_ok ( void* opaque, Addr64 dst ) {
     88    return False;
     89 }
     90 static UInt needs_self_check ( void* opaque, VexGuestExtents* vge ) {
     91    return 0;
     92 }
     93 
     94 int main ( int argc, char** argv )
     95 {
     96    FILE* f;
     97    Int i;
     98    UInt u, sum;
     99    Addr32 orig_addr;
    100    Int bb_number, n_bbs_done = 0;
    101    Int orig_nbytes, trans_used;
    102    VexTranslateResult tres;
    103    VexControl vcon;
    104    VexGuestExtents vge;
    105    VexArchInfo vai_x86, vai_amd64, vai_ppc32;
    106    VexAbiInfo vbi;
    107    VexTranslateArgs vta;
    108 
    109    if (argc != 2) {
    110       fprintf(stderr, "usage: vex file.org\n");
    111       exit(1);
    112    }
    113    f = fopen(argv[1], "r");
    114    if (!f) {
    115       fprintf(stderr, "can't open `%s'\n", argv[1]);
    116       exit(1);
    117    }
    118 
    119    /* Run with default params.  However, we can't allow bb chasing
    120       since that causes the front end to get segfaults when it tries
    121       to read code outside the initial BB we hand it.  So when calling
    122       LibVEX_Translate, send in a chase-into predicate that always
    123       returns False. */
    124    LibVEX_default_VexControl ( &vcon );
    125    vcon.iropt_level = 2;
    126    vcon.guest_max_insns = 50;
    127 
    128    LibVEX_Init ( &failure_exit, &log_bytes,
    129                  1,  /* debug_paranoia */
    130                  TEST_VSUPPORT, /* valgrind support */
    131                  &vcon );
    132 
    133 
    134    while (!feof(f)) {
    135 
    136       __attribute__((unused))
    137       char* unused1 = fgets(linebuf, N_LINEBUF,f);
    138       if (linebuf[0] == 0) continue;
    139       if (linebuf[0] != '.') continue;
    140 
    141       if (n_bbs_done == TEST_N_BBS) break;
    142       n_bbs_done++;
    143 
    144       /* first line is:   . bb-number bb-addr n-bytes */
    145       assert(3 == sscanf(&linebuf[1], " %d %x %d\n",
    146                                  & bb_number,
    147                                  & orig_addr, & orig_nbytes ));
    148       assert(orig_nbytes >= 1);
    149       assert(!feof(f));
    150       __attribute__((unused))
    151       char* unused2 = fgets(linebuf, N_LINEBUF,f);
    152       assert(linebuf[0] == '.');
    153 
    154       /* second line is:   . byte byte byte etc */
    155       if (verbose)
    156          printf("============ Basic Block %d, Done %d, "
    157                 "Start %x, nbytes %2d ============",
    158                 bb_number, n_bbs_done-1, orig_addr, orig_nbytes);
    159 
    160       assert(orig_nbytes >= 1 && orig_nbytes <= N_ORIGBUF);
    161       for (i = 0; i < orig_nbytes; i++) {
    162          assert(1 == sscanf(&linebuf[2 + 3*i], "%x", &u));
    163          origbuf[i] = (UChar)u;
    164       }
    165 
    166       /* FIXME: put sensible values into the .hwcaps fields */
    167       LibVEX_default_VexArchInfo(&vai_x86);
    168       vai_x86.hwcaps = VEX_HWCAPS_X86_SSE1
    169                        | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3;
    170 
    171       LibVEX_default_VexArchInfo(&vai_amd64);
    172       vai_amd64.hwcaps = 0;
    173 
    174       LibVEX_default_VexArchInfo(&vai_ppc32);
    175       vai_ppc32.hwcaps = 0;
    176       vai_ppc32.ppc_cache_line_szB = 128;
    177 
    178       LibVEX_default_VexAbiInfo(&vbi);
    179 
    180       /* ----- Set up args for LibVEX_Translate ----- */
    181 #if 0 /* ppc32 -> ppc32 */
    182       vta.arch_guest     = VexArchPPC32;
    183       vta.archinfo_guest = vai_ppc32;
    184       vta.arch_host      = VexArchPPC32;
    185       vta.archinfo_host  = vai_ppc32;
    186 #endif
    187 #if 0 /* amd64 -> amd64 */
    188       vta.arch_guest     = VexArchAMD64;
    189       vta.archinfo_guest = vai_amd64;
    190       vta.arch_host      = VexArchAMD64;
    191       vta.archinfo_host  = vai_amd64;
    192 #endif
    193 #if 1 /* x86 -> x86 */
    194       vta.arch_guest     = VexArchX86;
    195       vta.archinfo_guest = vai_x86;
    196       vta.arch_host      = VexArchX86;
    197       vta.archinfo_host  = vai_x86;
    198 #endif
    199       vta.abiinfo_both    = vbi;
    200       vta.guest_bytes     = origbuf;
    201       vta.guest_bytes_addr = (Addr64)orig_addr;
    202       vta.callback_opaque = NULL;
    203       vta.chase_into_ok   = chase_into_not_ok;
    204       vta.guest_extents   = &vge;
    205       vta.host_bytes      = transbuf;
    206       vta.host_bytes_size = N_TRANSBUF;
    207       vta.host_bytes_used = &trans_used;
    208 #if 0 /* no instrumentation */
    209       vta.instrument1     = NULL;
    210       vta.instrument2     = NULL;
    211 #endif
    212 #if 0 /* addrcheck */
    213       vta.instrument1     = ac_instrument;
    214       vta.instrument2     = NULL;
    215 #endif
    216 #if 1 /* memcheck */
    217       vta.instrument1     = mc_instrument;
    218       vta.instrument2     = NULL;
    219 #endif
    220       vta.needs_self_check  = needs_self_check;
    221       vta.preamble_function = NULL;
    222       vta.traceflags      = TEST_FLAGS;
    223 #if 1 /* x86, amd64 hosts */
    224       vta.dispatch_unassisted = (void*)0x12345678;
    225       vta.dispatch_assisted   = (void*)0x12345678;
    226 #else /* ppc32, ppc64 hosts */
    227       vta.dispatch        = NULL;
    228 #endif
    229 
    230       vta.finaltidy = NULL;
    231 
    232       for (i = 0; i < TEST_N_ITERS; i++)
    233          tres = LibVEX_Translate ( &vta );
    234 
    235       if (tres.status != VexTransOK)
    236          printf("\ntres = %d\n", (Int)tres.status);
    237       assert(tres.status == VexTransOK);
    238       assert(tres.n_sc_extents == 0);
    239       assert(vge.n_used == 1);
    240       assert((UInt)(vge.len[0]) == orig_nbytes);
    241 
    242       sum = 0;
    243       for (i = 0; i < trans_used; i++)
    244          sum += (UInt)transbuf[i];
    245       printf ( " %6.2f ... %u\n",
    246                (double)trans_used / (double)vge.len[0], sum );
    247    }
    248 
    249    fclose(f);
    250    printf("\n");
    251    LibVEX_ShowAllocStats();
    252 
    253    return 0;
    254 }
    255 
    256 //////////////////////////////////////////////////////////////////////
    257 //////////////////////////////////////////////////////////////////////
    258 //////////////////////////////////////////////////////////////////////
    259 //////////////////////////////////////////////////////////////////////
    260 //////////////////////////////////////////////////////////////////////
    261 //////////////////////////////////////////////////////////////////////
    262 //////////////////////////////////////////////////////////////////////
    263 //////////////////////////////////////////////////////////////////////
    264 
    265 #if 0 /* UNUSED */
    266 
    267 static
    268 __attribute((noreturn))
    269 void panic ( HChar* s )
    270 {
    271   printf("\npanic: %s\n", s);
    272   failure_exit();
    273 }
    274 
    275 static
    276 IRSB* ac_instrument (IRSB* bb_in, VexGuestLayout* layout, IRType hWordTy )
    277 {
    278 /* Use this rather than eg. -1 because it's a UInt. */
    279 #define INVALID_DATA_SIZE   999999
    280 
    281    Int         i;
    282    Int         sz;
    283    IRCallee*   helper;
    284    IRStmt*    st;
    285    IRExpr* data;
    286    IRExpr* addr;
    287    Bool needSz;
    288 
    289    /* Set up BB */
    290    IRSB* bb     = emptyIRSB();
    291    bb->tyenv    = dopyIRTypeEnv(bb_in->tyenv);
    292    bb->next     = dopyIRExpr(bb_in->next);
    293    bb->jumpkind = bb_in->jumpkind;
    294 
    295    /* No loads to consider in ->next. */
    296    assert(isIRAtom(bb_in->next));
    297 
    298    for (i = 0; i <  bb_in->stmts_used; i++) {
    299       st = bb_in->stmts[i];
    300       if (!st) continue;
    301 
    302       switch (st->tag) {
    303 
    304          case Ist_Tmp:
    305             data = st->Ist.Tmp.data;
    306             if (data->tag == Iex_LDle) {
    307                addr = data->Iex.LDle.addr;
    308                sz = sizeofIRType(data->Iex.LDle.ty);
    309                needSz = False;
    310                switch (sz) {
    311                   case 4: helper = mkIRCallee(1, "ac_helperc_LOAD4",
    312                                                  (void*)0x12345601); break;
    313                   case 2: helper = mkIRCallee(0, "ac_helperc_LOAD2",
    314                                                  (void*)0x12345602); break;
    315                   case 1: helper = mkIRCallee(1, "ac_helperc_LOAD1",
    316                                                  (void*)0x12345603); break;
    317                   default: helper = mkIRCallee(0, "ac_helperc_LOADN",
    318                                                   (void*)0x12345604);
    319                                                   needSz = True; break;
    320                }
    321                if (needSz) {
    322                   addStmtToIRSB(
    323                      bb,
    324                      IRStmt_Dirty(
    325                         unsafeIRDirty_0_N( helper->regparms,
    326 					   helper->name, helper->addr,
    327                                            mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
    328                   ));
    329                } else {
    330                   addStmtToIRSB(
    331                      bb,
    332                      IRStmt_Dirty(
    333                         unsafeIRDirty_0_N( helper->regparms,
    334 					   helper->name, helper->addr,
    335                                            mkIRExprVec_1(addr) )
    336                   ));
    337                }
    338             }
    339             break;
    340 
    341          case Ist_STle:
    342             data = st->Ist.STle.data;
    343             addr = st->Ist.STle.addr;
    344             assert(isIRAtom(data));
    345             assert(isIRAtom(addr));
    346             sz = sizeofIRType(typeOfIRExpr(bb_in->tyenv, data));
    347             needSz = False;
    348             switch (sz) {
    349                case 4: helper = mkIRCallee(1, "ac_helperc_STORE4",
    350                                               (void*)0x12345605); break;
    351                case 2: helper = mkIRCallee(0, "ac_helperc_STORE2",
    352                                               (void*)0x12345606); break;
    353                case 1: helper = mkIRCallee(1, "ac_helperc_STORE1",
    354                                               (void*)0x12345607); break;
    355                default: helper = mkIRCallee(0, "ac_helperc_STOREN",
    356                                                (void*)0x12345608);
    357                                                needSz = True; break;
    358             }
    359             if (needSz) {
    360                addStmtToIRSB(
    361                   bb,
    362                   IRStmt_Dirty(
    363                      unsafeIRDirty_0_N( helper->regparms,
    364     				        helper->name, helper->addr,
    365                                         mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
    366                ));
    367             } else {
    368                addStmtToIRSB(
    369                   bb,
    370                   IRStmt_Dirty(
    371                      unsafeIRDirty_0_N( helper->regparms,
    372                                         helper->name, helper->addr,
    373                                         mkIRExprVec_1(addr) )
    374                ));
    375             }
    376             break;
    377 
    378          case Ist_Put:
    379             assert(isIRAtom(st->Ist.Put.data));
    380             break;
    381 
    382          case Ist_PutI:
    383             assert(isIRAtom(st->Ist.PutI.ix));
    384             assert(isIRAtom(st->Ist.PutI.data));
    385             break;
    386 
    387          case Ist_Exit:
    388             assert(isIRAtom(st->Ist.Exit.guard));
    389             break;
    390 
    391          case Ist_Dirty:
    392             /* If the call doesn't interact with memory, we ain't
    393                interested. */
    394             if (st->Ist.Dirty.details->mFx == Ifx_None)
    395                break;
    396             goto unhandled;
    397 
    398          default:
    399          unhandled:
    400             printf("\n");
    401             ppIRStmt(st);
    402             printf("\n");
    403             panic("addrcheck: unhandled IRStmt");
    404       }
    405 
    406       addStmtToIRSB( bb, dopyIRStmt(st));
    407    }
    408 
    409    return bb;
    410 }
    411 #endif /* UNUSED */
    412 
    413 //////////////////////////////////////////////////////////////////////
    414 //////////////////////////////////////////////////////////////////////
    415 //////////////////////////////////////////////////////////////////////
    416 //////////////////////////////////////////////////////////////////////
    417 //////////////////////////////////////////////////////////////////////
    418 //////////////////////////////////////////////////////////////////////
    419 //////////////////////////////////////////////////////////////////////
    420 //////////////////////////////////////////////////////////////////////
    421 
    422 #if 1 /* UNUSED */
    423 
    424 static
    425 __attribute((noreturn))
    426 void panic ( HChar* s )
    427 {
    428   printf("\npanic: %s\n", s);
    429   failure_exit();
    430 }
    431 
    432 #define tl_assert(xxx) assert(xxx)
    433 #define VG_(xxxx) xxxx
    434 #define tool_panic(zzz) panic(zzz)
    435 #define MC_(zzzz) MC_##zzzz
    436 #define TL_(zzzz) SK_##zzzz
    437 
    438 
    439 static void MC_helperc_complain_undef ( void );
    440 static void MC_helperc_LOADV8 ( void );
    441 static void MC_helperc_LOADV4 ( void );
    442 static void MC_helperc_LOADV2 ( void );
    443 static void MC_helperc_LOADV1 ( void );
    444 static void MC_helperc_STOREV8( void );
    445 static void MC_helperc_STOREV4( void );
    446 static void MC_helperc_STOREV2( void );
    447 static void MC_helperc_STOREV1( void );
    448 static void MC_helperc_value_check0_fail( void );
    449 static void MC_helperc_value_check1_fail( void );
    450 static void MC_helperc_value_check4_fail( void );
    451 
    452 static void MC_helperc_complain_undef ( void ) { }
    453 static void MC_helperc_LOADV8 ( void ) { }
    454 static void MC_helperc_LOADV4 ( void ) { }
    455 static void MC_helperc_LOADV2 ( void ) { }
    456 static void MC_helperc_LOADV1 ( void ) { }
    457 static void MC_helperc_STOREV8( void ) { }
    458 static void MC_helperc_STOREV4( void ) { }
    459 static void MC_helperc_STOREV2( void ) { }
    460 static void MC_helperc_STOREV1( void ) { }
    461 static void MC_helperc_value_check0_fail( void ) { }
    462 static void MC_helperc_value_check1_fail( void ) { }
    463 static void MC_helperc_value_check4_fail( void ) { }
    464 
    465 
    466 /*--------------------------------------------------------------------*/
    467 /*--- Instrument IR to perform memory checking operations.         ---*/
    468 /*---                                               mc_translate.c ---*/
    469 /*--------------------------------------------------------------------*/
    470 
    471 /*
    472    This file is part of MemCheck, a heavyweight Valgrind tool for
    473    detecting memory errors.
    474 
    475    Copyright (C) 2000-2011 Julian Seward
    476       jseward (at) acm.org
    477 
    478    This program is free software; you can redistribute it and/or
    479    modify it under the terms of the GNU General Public License as
    480    published by the Free Software Foundation; either version 2 of the
    481    License, or (at your option) any later version.
    482 
    483    This program is distributed in the hope that it will be useful, but
    484    WITHOUT ANY WARRANTY; without even the implied warranty of
    485    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    486    General Public License for more details.
    487 
    488    You should have received a copy of the GNU General Public License
    489    along with this program; if not, write to the Free Software
    490    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    491    02111-1307, USA.
    492 
    493    The GNU General Public License is contained in the file COPYING.
    494 */
    495 
    496 //#include "mc_include.h"
    497 
    498 
    499 /*------------------------------------------------------------*/
    500 /*--- Forward decls                                        ---*/
    501 /*------------------------------------------------------------*/
    502 
    503 struct _MCEnv;
    504 
    505 static IRType  shadowType ( IRType ty );
    506 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
    507 
    508 
    509 /*------------------------------------------------------------*/
    510 /*--- Memcheck running state, and tmp management.          ---*/
    511 /*------------------------------------------------------------*/
    512 
    513 /* Carries around state during memcheck instrumentation. */
    514 typedef
    515    struct _MCEnv {
    516       /* MODIFIED: the bb being constructed.  IRStmts are added. */
    517       IRSB* bb;
    518 
    519       /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
    520          original temps to their current their current shadow temp.
    521          Initially all entries are IRTemp_INVALID.  Entries are added
    522          lazily since many original temps are not used due to
    523          optimisation prior to instrumentation.  Note that floating
    524          point original tmps are shadowed by integer tmps of the same
    525          size, and Bit-typed original tmps are shadowed by the type
    526          Ity_I8.  See comment below. */
    527       IRTemp* tmpMap;
    528       Int     n_originalTmps; /* for range checking */
    529 
    530       /* READONLY: the guest layout.  This indicates which parts of
    531          the guest state should be regarded as 'always defined'. */
    532       VexGuestLayout* layout;
    533       /* READONLY: the host word type.  Needed for constructing
    534          arguments of type 'HWord' to be passed to helper functions.
    535          Ity_I32 or Ity_I64 only. */
    536       IRType hWordTy;
    537    }
    538    MCEnv;
    539 
    540 /* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
    541    demand), as they are encountered.  This is for two reasons.
    542 
    543    (1) (less important reason): Many original tmps are unused due to
    544    initial IR optimisation, and we do not want to spaces in tables
    545    tracking them.
    546 
    547    Shadow IRTemps are therefore allocated on demand.  mce.tmpMap is a
    548    table indexed [0 .. n_types-1], which gives the current shadow for
    549    each original tmp, or INVALID_IRTEMP if none is so far assigned.
    550    It is necessary to support making multiple assignments to a shadow
    551    -- specifically, after testing a shadow for definedness, it needs
    552    to be made defined.  But IR's SSA property disallows this.
    553 
    554    (2) (more important reason): Therefore, when a shadow needs to get
    555    a new value, a new temporary is created, the value is assigned to
    556    that, and the tmpMap is updated to reflect the new binding.
    557 
    558    A corollary is that if the tmpMap maps a given tmp to
    559    INVALID_IRTEMP and we are hoping to read that shadow tmp, it means
    560    there's a read-before-write error in the original tmps.  The IR
    561    sanity checker should catch all such anomalies, however.
    562 */
    563 
    564 /* Find the tmp currently shadowing the given original tmp.  If none
    565    so far exists, allocate one.  */
    566 static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig )
    567 {
    568    tl_assert(orig < mce->n_originalTmps);
    569    if (mce->tmpMap[orig] == IRTemp_INVALID) {
    570       mce->tmpMap[orig]
    571          = newIRTemp(mce->bb->tyenv,
    572                      shadowType(mce->bb->tyenv->types[orig]));
    573    }
    574    return mce->tmpMap[orig];
    575 }
    576 
    577 /* Allocate a new shadow for the given original tmp.  This means any
    578    previous shadow is abandoned.  This is needed because it is
    579    necessary to give a new value to a shadow once it has been tested
    580    for undefinedness, but unfortunately IR's SSA property disallows
    581    this.  Instead we must abandon the old shadow, allocate a new one
    582    and use that instead. */
    583 static void newShadowTmp ( MCEnv* mce, IRTemp orig )
    584 {
    585    tl_assert(orig < mce->n_originalTmps);
    586    mce->tmpMap[orig]
    587       = newIRTemp(mce->bb->tyenv,
    588                   shadowType(mce->bb->tyenv->types[orig]));
    589 }
    590 
    591 
    592 /*------------------------------------------------------------*/
    593 /*--- IRAtoms -- a subset of IRExprs                       ---*/
    594 /*------------------------------------------------------------*/
    595 
    596 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
    597    isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
    598    input, most of this code deals in atoms.  Usefully, a value atom
    599    always has a V-value which is also an atom: constants are shadowed
    600    by constants, and temps are shadowed by the corresponding shadow
    601    temporary. */
    602 
    603 typedef  IRExpr  IRAtom;
    604 
    605 /* (used for sanity checks only): is this an atom which looks
    606    like it's from original code? */
    607 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
    608 {
    609    if (a1->tag == Iex_Const)
    610       return True;
    611    if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp < mce->n_originalTmps)
    612       return True;
    613    return False;
    614 }
    615 
    616 /* (used for sanity checks only): is this an atom which looks
    617    like it's from shadow code? */
    618 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
    619 {
    620    if (a1->tag == Iex_Const)
    621       return True;
    622    if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp >= mce->n_originalTmps)
    623       return True;
    624    return False;
    625 }
    626 
    627 /* (used for sanity checks only): check that both args are atoms and
    628    are identically-kinded. */
    629 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
    630 {
    631    if (a1->tag == Iex_RdTmp && a1->tag == Iex_RdTmp)
    632       return True;
    633    if (a1->tag == Iex_Const && a1->tag == Iex_Const)
    634       return True;
    635    return False;
    636 }
    637 
    638 
    639 /*------------------------------------------------------------*/
    640 /*--- Type management                                      ---*/
    641 /*------------------------------------------------------------*/
    642 
    643 /* Shadow state is always accessed using integer types.  This returns
    644    an integer type with the same size (as per sizeofIRType) as the
    645    given type.  The only valid shadow types are Bit, I8, I16, I32,
    646    I64, V128. */
    647 
    648 static IRType shadowType ( IRType ty )
    649 {
    650    switch (ty) {
    651       case Ity_I1:
    652       case Ity_I8:
    653       case Ity_I16:
    654       case Ity_I32:
    655       case Ity_I64:  return ty;
    656       case Ity_F32:  return Ity_I32;
    657       case Ity_F64:  return Ity_I64;
    658       case Ity_V128: return Ity_V128;
    659       default: ppIRType(ty);
    660                VG_(tool_panic)("memcheck:shadowType");
    661    }
    662 }
    663 
    664 /* Produce a 'defined' value of the given shadow type.  Should only be
    665    supplied shadow types (Bit/I8/I16/I32/UI64). */
    666 static IRExpr* definedOfType ( IRType ty ) {
    667    switch (ty) {
    668       case Ity_I1:   return IRExpr_Const(IRConst_U1(False));
    669       case Ity_I8:   return IRExpr_Const(IRConst_U8(0));
    670       case Ity_I16:  return IRExpr_Const(IRConst_U16(0));
    671       case Ity_I32:  return IRExpr_Const(IRConst_U32(0));
    672       case Ity_I64:  return IRExpr_Const(IRConst_U64(0));
    673       case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
    674       default:      VG_(tool_panic)("memcheck:definedOfType");
    675    }
    676 }
    677 
    678 
    679 /*------------------------------------------------------------*/
    680 /*--- Constructing IR fragments                            ---*/
    681 /*------------------------------------------------------------*/
    682 
    683 /* assign value to tmp */
    684 #define assign(_bb,_tmp,_expr)   \
    685    addStmtToIRSB((_bb), IRStmt_WrTmp((_tmp),(_expr)))
    686 
    687 /* add stmt to a bb */
    688 #define stmt(_bb,_stmt)    \
    689    addStmtToIRSB((_bb), (_stmt))
    690 
    691 /* build various kinds of expressions */
    692 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
    693 #define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
    694 #define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
    695 #define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
    696 #define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
    697 #define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
    698 #define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
    699 #define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
    700 
    701 /* bind the given expression to a new temporary, and return the
    702    temporary.  This effectively converts an arbitrary expression into
    703    an atom. */
    704 static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) {
    705    IRTemp t = newIRTemp(mce->bb->tyenv, ty);
    706    assign(mce->bb, t, e);
    707    return mkexpr(t);
    708 }
    709 
    710 
    711 /*------------------------------------------------------------*/
    712 /*--- Constructing definedness primitive ops               ---*/
    713 /*------------------------------------------------------------*/
    714 
    715 /* --------- Defined-if-either-defined --------- */
    716 
    717 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    718    tl_assert(isShadowAtom(mce,a1));
    719    tl_assert(isShadowAtom(mce,a2));
    720    return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2));
    721 }
    722 
    723 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    724    tl_assert(isShadowAtom(mce,a1));
    725    tl_assert(isShadowAtom(mce,a2));
    726    return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2));
    727 }
    728 
    729 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    730    tl_assert(isShadowAtom(mce,a1));
    731    tl_assert(isShadowAtom(mce,a2));
    732    return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2));
    733 }
    734 
    735 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    736    tl_assert(isShadowAtom(mce,a1));
    737    tl_assert(isShadowAtom(mce,a2));
    738    return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2));
    739 }
    740 
    741 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    742    tl_assert(isShadowAtom(mce,a1));
    743    tl_assert(isShadowAtom(mce,a2));
    744    return assignNew(mce, Ity_V128, binop(Iop_AndV128, a1, a2));
    745 }
    746 
    747 /* --------- Undefined-if-either-undefined --------- */
    748 
    749 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    750    tl_assert(isShadowAtom(mce,a1));
    751    tl_assert(isShadowAtom(mce,a2));
    752    return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2));
    753 }
    754 
    755 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    756    tl_assert(isShadowAtom(mce,a1));
    757    tl_assert(isShadowAtom(mce,a2));
    758    return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2));
    759 }
    760 
    761 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    762    tl_assert(isShadowAtom(mce,a1));
    763    tl_assert(isShadowAtom(mce,a2));
    764    return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2));
    765 }
    766 
    767 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    768    tl_assert(isShadowAtom(mce,a1));
    769    tl_assert(isShadowAtom(mce,a2));
    770    return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2));
    771 }
    772 
    773 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    774    tl_assert(isShadowAtom(mce,a1));
    775    tl_assert(isShadowAtom(mce,a2));
    776    return assignNew(mce, Ity_V128, binop(Iop_OrV128, a1, a2));
    777 }
    778 
    779 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
    780    switch (vty) {
    781       case Ity_I8:   return mkUifU8(mce, a1, a2);
    782       case Ity_I16:  return mkUifU16(mce, a1, a2);
    783       case Ity_I32:  return mkUifU32(mce, a1, a2);
    784       case Ity_I64:  return mkUifU64(mce, a1, a2);
    785       case Ity_V128: return mkUifUV128(mce, a1, a2);
    786       default:
    787          VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
    788          VG_(tool_panic)("memcheck:mkUifU");
    789    }
    790 }
    791 
    792 /* --------- The Left-family of operations. --------- */
    793 
    794 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
    795    tl_assert(isShadowAtom(mce,a1));
    796    /* It's safe to duplicate a1 since it's only an atom */
    797    return assignNew(mce, Ity_I8,
    798                     binop(Iop_Or8, a1,
    799                           assignNew(mce, Ity_I8,
    800                                     /* unop(Iop_Neg8, a1)))); */
    801                                     binop(Iop_Sub8, mkU8(0), a1) )));
    802 }
    803 
    804 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
    805    tl_assert(isShadowAtom(mce,a1));
    806    /* It's safe to duplicate a1 since it's only an atom */
    807    return assignNew(mce, Ity_I16,
    808                     binop(Iop_Or16, a1,
    809                           assignNew(mce, Ity_I16,
    810                                     /* unop(Iop_Neg16, a1)))); */
    811                                     binop(Iop_Sub16, mkU16(0), a1) )));
    812 }
    813 
    814 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
    815    tl_assert(isShadowAtom(mce,a1));
    816    /* It's safe to duplicate a1 since it's only an atom */
    817    return assignNew(mce, Ity_I32,
    818                     binop(Iop_Or32, a1,
    819                           assignNew(mce, Ity_I32,
    820                                     /* unop(Iop_Neg32, a1)))); */
    821                                     binop(Iop_Sub32, mkU32(0), a1) )));
    822 }
    823 
    824 /* --------- 'Improvement' functions for AND/OR. --------- */
    825 
    826 /* ImproveAND(data, vbits) = data OR vbits.  Defined (0) data 0s give
    827    defined (0); all other -> undefined (1).
    828 */
    829 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    830 {
    831    tl_assert(isOriginalAtom(mce, data));
    832    tl_assert(isShadowAtom(mce, vbits));
    833    tl_assert(sameKindedAtoms(data, vbits));
    834    return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits));
    835 }
    836 
    837 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    838 {
    839    tl_assert(isOriginalAtom(mce, data));
    840    tl_assert(isShadowAtom(mce, vbits));
    841    tl_assert(sameKindedAtoms(data, vbits));
    842    return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits));
    843 }
    844 
    845 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    846 {
    847    tl_assert(isOriginalAtom(mce, data));
    848    tl_assert(isShadowAtom(mce, vbits));
    849    tl_assert(sameKindedAtoms(data, vbits));
    850    return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits));
    851 }
    852 
    853 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    854 {
    855    tl_assert(isOriginalAtom(mce, data));
    856    tl_assert(isShadowAtom(mce, vbits));
    857    tl_assert(sameKindedAtoms(data, vbits));
    858    return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits));
    859 }
    860 
    861 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    862 {
    863    tl_assert(isOriginalAtom(mce, data));
    864    tl_assert(isShadowAtom(mce, vbits));
    865    tl_assert(sameKindedAtoms(data, vbits));
    866    return assignNew(mce, Ity_V128, binop(Iop_OrV128, data, vbits));
    867 }
    868 
    869 /* ImproveOR(data, vbits) = ~data OR vbits.  Defined (0) data 1s give
    870    defined (0); all other -> undefined (1).
    871 */
    872 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    873 {
    874    tl_assert(isOriginalAtom(mce, data));
    875    tl_assert(isShadowAtom(mce, vbits));
    876    tl_assert(sameKindedAtoms(data, vbits));
    877    return assignNew(
    878              mce, Ity_I8,
    879              binop(Iop_Or8,
    880                    assignNew(mce, Ity_I8, unop(Iop_Not8, data)),
    881                    vbits) );
    882 }
    883 
    884 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    885 {
    886    tl_assert(isOriginalAtom(mce, data));
    887    tl_assert(isShadowAtom(mce, vbits));
    888    tl_assert(sameKindedAtoms(data, vbits));
    889    return assignNew(
    890              mce, Ity_I16,
    891              binop(Iop_Or16,
    892                    assignNew(mce, Ity_I16, unop(Iop_Not16, data)),
    893                    vbits) );
    894 }
    895 
    896 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    897 {
    898    tl_assert(isOriginalAtom(mce, data));
    899    tl_assert(isShadowAtom(mce, vbits));
    900    tl_assert(sameKindedAtoms(data, vbits));
    901    return assignNew(
    902              mce, Ity_I32,
    903              binop(Iop_Or32,
    904                    assignNew(mce, Ity_I32, unop(Iop_Not32, data)),
    905                    vbits) );
    906 }
    907 
    908 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    909 {
    910    tl_assert(isOriginalAtom(mce, data));
    911    tl_assert(isShadowAtom(mce, vbits));
    912    tl_assert(sameKindedAtoms(data, vbits));
    913    return assignNew(
    914              mce, Ity_I64,
    915              binop(Iop_Or64,
    916                    assignNew(mce, Ity_I64, unop(Iop_Not64, data)),
    917                    vbits) );
    918 }
    919 
    920 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    921 {
    922    tl_assert(isOriginalAtom(mce, data));
    923    tl_assert(isShadowAtom(mce, vbits));
    924    tl_assert(sameKindedAtoms(data, vbits));
    925    return assignNew(
    926              mce, Ity_V128,
    927              binop(Iop_OrV128,
    928                    assignNew(mce, Ity_V128, unop(Iop_NotV128, data)),
    929                    vbits) );
    930 }
    931 
    932 /* --------- Pessimising casts. --------- */
    933 
    934 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
    935 {
    936    IRType  ty;
    937    IRAtom* tmp1;
    938    /* Note, dst_ty is a shadow type, not an original type. */
    939    /* First of all, collapse vbits down to a single bit. */
    940    tl_assert(isShadowAtom(mce,vbits));
    941    ty   = typeOfIRExpr(mce->bb->tyenv, vbits);
    942    tmp1 = NULL;
    943    switch (ty) {
    944       case Ity_I1:
    945          tmp1 = vbits;
    946          break;
    947       case Ity_I8:
    948          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE8, vbits, mkU8(0)));
    949          break;
    950       case Ity_I16:
    951          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE16, vbits, mkU16(0)));
    952          break;
    953       case Ity_I32:
    954          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE32, vbits, mkU32(0)));
    955          break;
    956       case Ity_I64:
    957          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE64, vbits, mkU64(0)));
    958          break;
    959       default:
    960          VG_(tool_panic)("mkPCastTo(1)");
    961    }
    962    tl_assert(tmp1);
    963    /* Now widen up to the dst type. */
    964    switch (dst_ty) {
    965       case Ity_I1:
    966          return tmp1;
    967       case Ity_I8:
    968          return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1));
    969       case Ity_I16:
    970          return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1));
    971       case Ity_I32:
    972          return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1));
    973       case Ity_I64:
    974          return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
    975       case Ity_V128:
    976          tmp1 = assignNew(mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
    977          tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
    978          return tmp1;
    979       default:
    980          ppIRType(dst_ty);
    981          VG_(tool_panic)("mkPCastTo(2)");
    982    }
    983 }
    984 
    985 
    986 /*------------------------------------------------------------*/
    987 /*--- Emit a test and complaint if something is undefined. ---*/
    988 /*------------------------------------------------------------*/
    989 
    990 /* Set the annotations on a dirty helper to indicate that the stack
    991    pointer and instruction pointers might be read.  This is the
    992    behaviour of all 'emit-a-complaint' style functions we might
    993    call. */
    994 
    995 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
    996    di->nFxState = 2;
    997    di->fxState[0].fx     = Ifx_Read;
    998    di->fxState[0].offset = mce->layout->offset_SP;
    999    di->fxState[0].size   = mce->layout->sizeof_SP;
   1000    di->fxState[1].fx     = Ifx_Read;
   1001    di->fxState[1].offset = mce->layout->offset_IP;
   1002    di->fxState[1].size   = mce->layout->sizeof_IP;
   1003 }
   1004 
   1005 
   1006 /* Check the supplied **original** atom for undefinedness, and emit a
   1007    complaint if so.  Once that happens, mark it as defined.  This is
   1008    possible because the atom is either a tmp or literal.  If it's a
   1009    tmp, it will be shadowed by a tmp, and so we can set the shadow to
   1010    be defined.  In fact as mentioned above, we will have to allocate a
   1011    new tmp to carry the new 'defined' shadow value, and update the
   1012    original->tmp mapping accordingly; we cannot simply assign a new
   1013    value to an existing shadow tmp as this breaks SSAness -- resulting
   1014    in the post-instrumentation sanity checker spluttering in disapproval.
   1015 */
   1016 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
   1017 {
   1018    IRAtom*  vatom;
   1019    IRType   ty;
   1020    Int      sz;
   1021    IRDirty* di;
   1022    IRAtom*  cond;
   1023 
   1024    /* Since the original expression is atomic, there's no duplicated
   1025       work generated by making multiple V-expressions for it.  So we
   1026       don't really care about the possibility that someone else may
   1027       also create a V-interpretion for it. */
   1028    tl_assert(isOriginalAtom(mce, atom));
   1029    vatom = expr2vbits( mce, atom );
   1030    tl_assert(isShadowAtom(mce, vatom));
   1031    tl_assert(sameKindedAtoms(atom, vatom));
   1032 
   1033    ty = typeOfIRExpr(mce->bb->tyenv, vatom);
   1034 
   1035    /* sz is only used for constructing the error message */
   1036    sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
   1037 
   1038    cond = mkPCastTo( mce, Ity_I1, vatom );
   1039    /* cond will be 0 if all defined, and 1 if any not defined. */
   1040 
   1041    switch (sz) {
   1042       case 0:
   1043          di = unsafeIRDirty_0_N( 0/*regparms*/,
   1044                                  "MC_(helperc_value_check0_fail)",
   1045                                  &MC_(helperc_value_check0_fail),
   1046                                  mkIRExprVec_0()
   1047                                );
   1048          break;
   1049       case 1:
   1050          di = unsafeIRDirty_0_N( 0/*regparms*/,
   1051                                  "MC_(helperc_value_check1_fail)",
   1052                                  &MC_(helperc_value_check1_fail),
   1053                                  mkIRExprVec_0()
   1054                                );
   1055          break;
   1056       case 4:
   1057          di = unsafeIRDirty_0_N( 0/*regparms*/,
   1058                                  "MC_(helperc_value_check4_fail)",
   1059                                  &MC_(helperc_value_check4_fail),
   1060                                  mkIRExprVec_0()
   1061                                );
   1062          break;
   1063       default:
   1064          di = unsafeIRDirty_0_N( 1/*regparms*/,
   1065                                  "MC_(helperc_complain_undef)",
   1066                                  &MC_(helperc_complain_undef),
   1067                                  mkIRExprVec_1( mkIRExpr_HWord( sz ))
   1068                                );
   1069          break;
   1070    }
   1071    di->guard = cond;
   1072    setHelperAnns( mce, di );
   1073    stmt( mce->bb, IRStmt_Dirty(di));
   1074 
   1075    /* Set the shadow tmp to be defined.  First, update the
   1076       orig->shadow tmp mapping to reflect the fact that this shadow is
   1077       getting a new value. */
   1078    tl_assert(isIRAtom(vatom));
   1079    /* sameKindedAtoms ... */
   1080    if (vatom->tag == Iex_RdTmp) {
   1081       tl_assert(atom->tag == Iex_RdTmp);
   1082       newShadowTmp(mce, atom->Iex.RdTmp.tmp);
   1083       assign(mce->bb, findShadowTmp(mce, atom->Iex.RdTmp.tmp),
   1084                       definedOfType(ty));
   1085    }
   1086 }
   1087 
   1088 
   1089 /*------------------------------------------------------------*/
   1090 /*--- Shadowing PUTs/GETs, and indexed variants thereof    ---*/
   1091 /*------------------------------------------------------------*/
   1092 
   1093 /* Examine the always-defined sections declared in layout to see if
   1094    the (offset,size) section is within one.  Note, is is an error to
   1095    partially fall into such a region: (offset,size) should either be
   1096    completely in such a region or completely not-in such a region.
   1097 */
   1098 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
   1099 {
   1100    Int minoffD, maxoffD, i;
   1101    Int minoff = offset;
   1102    Int maxoff = minoff + size - 1;
   1103    tl_assert((minoff & ~0xFFFF) == 0);
   1104    tl_assert((maxoff & ~0xFFFF) == 0);
   1105 
   1106    for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
   1107       minoffD = mce->layout->alwaysDefd[i].offset;
   1108       maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
   1109       tl_assert((minoffD & ~0xFFFF) == 0);
   1110       tl_assert((maxoffD & ~0xFFFF) == 0);
   1111 
   1112       if (maxoff < minoffD || maxoffD < minoff)
   1113          continue; /* no overlap */
   1114       if (minoff >= minoffD && maxoff <= maxoffD)
   1115          return True; /* completely contained in an always-defd section */
   1116 
   1117       VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
   1118    }
   1119    return False; /* could not find any containing section */
   1120 }
   1121 
   1122 
   1123 /* Generate into bb suitable actions to shadow this Put.  If the state
   1124    slice is marked 'always defined', do nothing.  Otherwise, write the
   1125    supplied V bits to the shadow state.  We can pass in either an
   1126    original atom or a V-atom, but not both.  In the former case the
   1127    relevant V-bits are then generated from the original.
   1128 */
   1129 static
   1130 void do_shadow_PUT ( MCEnv* mce,  Int offset,
   1131                      IRAtom* atom, IRAtom* vatom )
   1132 {
   1133    IRType ty;
   1134    if (atom) {
   1135       tl_assert(!vatom);
   1136       tl_assert(isOriginalAtom(mce, atom));
   1137       vatom = expr2vbits( mce, atom );
   1138    } else {
   1139       tl_assert(vatom);
   1140       tl_assert(isShadowAtom(mce, vatom));
   1141    }
   1142 
   1143    ty = typeOfIRExpr(mce->bb->tyenv, vatom);
   1144    tl_assert(ty != Ity_I1);
   1145    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
   1146       /* later: no ... */
   1147       /* emit code to emit a complaint if any of the vbits are 1. */
   1148       /* complainIfUndefined(mce, atom); */
   1149    } else {
   1150       /* Do a plain shadow Put. */
   1151       stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
   1152    }
   1153 }
   1154 
   1155 
   1156 /* Return an expression which contains the V bits corresponding to the
   1157    given GETI (passed in in pieces).
   1158 */
   1159 static
   1160 void do_shadow_PUTI ( MCEnv* mce,
   1161                       IRRegArray* descr, IRAtom* ix, Int bias, IRAtom* atom )
   1162 {
   1163    IRAtom* vatom;
   1164    IRType  ty, tyS;
   1165    Int     arrSize;;
   1166 
   1167    tl_assert(isOriginalAtom(mce,atom));
   1168    vatom = expr2vbits( mce, atom );
   1169    tl_assert(sameKindedAtoms(atom, vatom));
   1170    ty   = descr->elemTy;
   1171    tyS  = shadowType(ty);
   1172    arrSize = descr->nElems * sizeofIRType(ty);
   1173    tl_assert(ty != Ity_I1);
   1174    tl_assert(isOriginalAtom(mce,ix));
   1175    complainIfUndefined(mce,ix);
   1176    if (isAlwaysDefd(mce, descr->base, arrSize)) {
   1177       /* later: no ... */
   1178       /* emit code to emit a complaint if any of the vbits are 1. */
   1179       /* complainIfUndefined(mce, atom); */
   1180    } else {
   1181       /* Do a cloned version of the Put that refers to the shadow
   1182          area. */
   1183       IRRegArray* new_descr
   1184          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
   1185                       tyS, descr->nElems);
   1186       stmt( mce->bb, IRStmt_PutI( new_descr, ix, bias, vatom ));
   1187    }
   1188 }
   1189 
   1190 
   1191 /* Return an expression which contains the V bits corresponding to the
   1192    given GET (passed in in pieces).
   1193 */
   1194 static
   1195 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
   1196 {
   1197    IRType tyS = shadowType(ty);
   1198    tl_assert(ty != Ity_I1);
   1199    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
   1200       /* Always defined, return all zeroes of the relevant type */
   1201       return definedOfType(tyS);
   1202    } else {
   1203       /* return a cloned version of the Get that refers to the shadow
   1204          area. */
   1205       return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
   1206    }
   1207 }
   1208 
   1209 
   1210 /* Return an expression which contains the V bits corresponding to the
   1211    given GETI (passed in in pieces).
   1212 */
   1213 static
   1214 IRExpr* shadow_GETI ( MCEnv* mce, IRRegArray* descr, IRAtom* ix, Int bias )
   1215 {
   1216    IRType ty   = descr->elemTy;
   1217    IRType tyS  = shadowType(ty);
   1218    Int arrSize = descr->nElems * sizeofIRType(ty);
   1219    tl_assert(ty != Ity_I1);
   1220    tl_assert(isOriginalAtom(mce,ix));
   1221    complainIfUndefined(mce,ix);
   1222    if (isAlwaysDefd(mce, descr->base, arrSize)) {
   1223       /* Always defined, return all zeroes of the relevant type */
   1224       return definedOfType(tyS);
   1225    } else {
   1226       /* return a cloned version of the Get that refers to the shadow
   1227          area. */
   1228       IRRegArray* new_descr
   1229          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
   1230                       tyS, descr->nElems);
   1231       return IRExpr_GetI( new_descr, ix, bias );
   1232    }
   1233 }
   1234 
   1235 
   1236 /*------------------------------------------------------------*/
   1237 /*--- Generating approximations for unknown operations,    ---*/
   1238 /*--- using lazy-propagate semantics                       ---*/
   1239 /*------------------------------------------------------------*/
   1240 
   1241 /* Lazy propagation of undefinedness from two values, resulting in the
   1242    specified shadow type.
   1243 */
   1244 static
   1245 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
   1246 {
   1247    /* force everything via 32-bit intermediaries. */
   1248    IRAtom* at;
   1249    tl_assert(isShadowAtom(mce,va1));
   1250    tl_assert(isShadowAtom(mce,va2));
   1251    at = mkPCastTo(mce, Ity_I32, va1);
   1252    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
   1253    at = mkPCastTo(mce, finalVty, at);
   1254    return at;
   1255 }
   1256 
   1257 
   1258 /* Do the lazy propagation game from a null-terminated vector of
   1259    atoms.  This is presumably the arguments to a helper call, so the
   1260    IRCallee info is also supplied in order that we can know which
   1261    arguments should be ignored (via the .mcx_mask field).
   1262 */
   1263 static
   1264 IRAtom* mkLazyN ( MCEnv* mce,
   1265                   IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
   1266 {
   1267    Int i;
   1268    IRAtom* here;
   1269    IRAtom* curr = definedOfType(Ity_I32);
   1270    for (i = 0; exprvec[i]; i++) {
   1271       tl_assert(i < 32);
   1272       tl_assert(isOriginalAtom(mce, exprvec[i]));
   1273       /* Only take notice of this arg if the callee's mc-exclusion
   1274          mask does not say it is to be excluded. */
   1275       if (cee->mcx_mask & (1<<i)) {
   1276          /* the arg is to be excluded from definedness checking.  Do
   1277             nothing. */
   1278          if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
   1279       } else {
   1280          /* calculate the arg's definedness, and pessimistically merge
   1281             it in. */
   1282          here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) );
   1283          curr = mkUifU32(mce, here, curr);
   1284       }
   1285    }
   1286    return mkPCastTo(mce, finalVtype, curr );
   1287 }
   1288 
   1289 
   1290 /*------------------------------------------------------------*/
   1291 /*--- Generating expensive sequences for exact carry-chain ---*/
   1292 /*--- propagation in add/sub and related operations.       ---*/
   1293 /*------------------------------------------------------------*/
   1294 
   1295 static
   1296 __attribute__((unused))
   1297 IRAtom* expensiveAdd32 ( MCEnv* mce, IRAtom* qaa, IRAtom* qbb,
   1298                                      IRAtom* aa,  IRAtom* bb )
   1299 {
   1300    IRAtom *a_min, *b_min, *a_max, *b_max;
   1301    IRType ty;
   1302    IROp   opAND, opOR, opXOR, opNOT, opADD;
   1303 
   1304    tl_assert(isShadowAtom(mce,qaa));
   1305    tl_assert(isShadowAtom(mce,qbb));
   1306    tl_assert(isOriginalAtom(mce,aa));
   1307    tl_assert(isOriginalAtom(mce,bb));
   1308    tl_assert(sameKindedAtoms(qaa,aa));
   1309    tl_assert(sameKindedAtoms(qbb,bb));
   1310 
   1311    ty    = Ity_I32;
   1312    opAND = Iop_And32;
   1313    opOR  = Iop_Or32;
   1314    opXOR = Iop_Xor32;
   1315    opNOT = Iop_Not32;
   1316    opADD = Iop_Add32;
   1317 
   1318    // a_min = aa & ~qaa
   1319    a_min = assignNew(mce,ty,
   1320                      binop(opAND, aa,
   1321                                   assignNew(mce,ty, unop(opNOT, qaa))));
   1322 
   1323    // b_min = bb & ~qbb
   1324    b_min = assignNew(mce,ty,
   1325                      binop(opAND, bb,
   1326                                   assignNew(mce,ty, unop(opNOT, qbb))));
   1327 
   1328    // a_max = aa | qaa
   1329    a_max = assignNew(mce,ty, binop(opOR, aa, qaa));
   1330 
   1331    // b_max = bb | qbb
   1332    b_max = assignNew(mce,ty, binop(opOR, bb, qbb));
   1333 
   1334    // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
   1335    return
   1336    assignNew(mce,ty,
   1337       binop( opOR,
   1338              assignNew(mce,ty, binop(opOR, qaa, qbb)),
   1339              assignNew(mce,ty,
   1340                 binop(opXOR, assignNew(mce,ty, binop(opADD, a_min, b_min)),
   1341                              assignNew(mce,ty, binop(opADD, a_max, b_max))
   1342                 )
   1343              )
   1344       )
   1345    );
   1346 }
   1347 
   1348 
   1349 /*------------------------------------------------------------*/
   1350 /*--- Helpers for dealing with vector primops.            ---*/
   1351 /*------------------------------------------------------------*/
   1352 
   1353 /* Vector pessimisation -- pessimise within each lane individually. */
   1354 
   1355 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
   1356 {
   1357    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
   1358 }
   1359 
   1360 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
   1361 {
   1362    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
   1363 }
   1364 
   1365 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
   1366 {
   1367    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
   1368 }
   1369 
   1370 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
   1371 {
   1372    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
   1373 }
   1374 
   1375 
   1376 /* Here's a simple scheme capable of handling ops derived from SSE1
   1377    code and while only generating ops that can be efficiently
   1378    implemented in SSE1. */
   1379 
   1380 /* All-lanes versions are straightforward:
   1381 
   1382    binary32Fx4(x,y)   ==> PCast32x4(UifUV128(x#,y#))
   1383 
   1384    unary32Fx4(x,y)    ==> PCast32x4(x#)
   1385 
   1386    Lowest-lane-only versions are more complex:
   1387 
   1388    binary32F0x4(x,y)  ==> SetV128lo32(
   1389                              x#,
   1390                              PCast32(V128to32(UifUV128(x#,y#)))
   1391                           )
   1392 
   1393    This is perhaps not so obvious.  In particular, it's faster to
   1394    do a V128-bit UifU and then take the bottom 32 bits than the more
   1395    obvious scheme of taking the bottom 32 bits of each operand
   1396    and doing a 32-bit UifU.  Basically since UifU is fast and
   1397    chopping lanes off vector values is slow.
   1398 
   1399    Finally:
   1400 
   1401    unary32F0x4(x)     ==> SetV128lo32(
   1402                              x#,
   1403                              PCast32(V128to32(x#))
   1404                           )
   1405 
   1406    Where:
   1407 
   1408    PCast32(v#)   = 1Sto32(CmpNE32(v#,0))
   1409    PCast32x4(v#) = CmpNEZ32x4(v#)
   1410 */
   1411 
   1412 static
   1413 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   1414 {
   1415    IRAtom* at;
   1416    tl_assert(isShadowAtom(mce, vatomX));
   1417    tl_assert(isShadowAtom(mce, vatomY));
   1418    at = mkUifUV128(mce, vatomX, vatomY);
   1419    at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at));
   1420    return at;
   1421 }
   1422 
   1423 static
   1424 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
   1425 {
   1426    IRAtom* at;
   1427    tl_assert(isShadowAtom(mce, vatomX));
   1428    at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX));
   1429    return at;
   1430 }
   1431 
   1432 static
   1433 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   1434 {
   1435    IRAtom* at;
   1436    tl_assert(isShadowAtom(mce, vatomX));
   1437    tl_assert(isShadowAtom(mce, vatomY));
   1438    at = mkUifUV128(mce, vatomX, vatomY);
   1439    at = assignNew(mce, Ity_I32, unop(Iop_V128to32, at));
   1440    at = mkPCastTo(mce, Ity_I32, at);
   1441    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
   1442    return at;
   1443 }
   1444 
   1445 static
   1446 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
   1447 {
   1448    IRAtom* at;
   1449    tl_assert(isShadowAtom(mce, vatomX));
   1450    at = assignNew(mce, Ity_I32, unop(Iop_V128to32, vatomX));
   1451    at = mkPCastTo(mce, Ity_I32, at);
   1452    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
   1453    return at;
   1454 }
   1455 
   1456 /* --- ... and ... 64Fx2 versions of the same ... --- */
   1457 
   1458 static
   1459 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   1460 {
   1461    IRAtom* at;
   1462    tl_assert(isShadowAtom(mce, vatomX));
   1463    tl_assert(isShadowAtom(mce, vatomY));
   1464    at = mkUifUV128(mce, vatomX, vatomY);
   1465    at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at));
   1466    return at;
   1467 }
   1468 
   1469 static
   1470 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
   1471 {
   1472    IRAtom* at;
   1473    tl_assert(isShadowAtom(mce, vatomX));
   1474    at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX));
   1475    return at;
   1476 }
   1477 
   1478 static
   1479 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   1480 {
   1481    IRAtom* at;
   1482    tl_assert(isShadowAtom(mce, vatomX));
   1483    tl_assert(isShadowAtom(mce, vatomY));
   1484    at = mkUifUV128(mce, vatomX, vatomY);
   1485    at = assignNew(mce, Ity_I64, unop(Iop_V128to64, at));
   1486    at = mkPCastTo(mce, Ity_I64, at);
   1487    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
   1488    return at;
   1489 }
   1490 
   1491 static
   1492 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
   1493 {
   1494    IRAtom* at;
   1495    tl_assert(isShadowAtom(mce, vatomX));
   1496    at = assignNew(mce, Ity_I64, unop(Iop_V128to64, vatomX));
   1497    at = mkPCastTo(mce, Ity_I64, at);
   1498    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
   1499    return at;
   1500 }
   1501 
   1502 /* --- --- Vector saturated narrowing --- --- */
   1503 
   1504 /* This is quite subtle.  What to do is simple:
   1505 
   1506    Let the original narrowing op be QNarrowW{S,U}xN.  Produce:
   1507 
   1508       the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
   1509 
   1510    Why this is right is not so simple.  Consider a lane in the args,
   1511    vatom1 or 2, doesn't matter.
   1512 
   1513    After the PCast, that lane is all 0s (defined) or all
   1514    1s(undefined).
   1515 
   1516    Both signed and unsigned saturating narrowing of all 0s produces
   1517    all 0s, which is what we want.
   1518 
   1519    The all-1s case is more complex.  Unsigned narrowing interprets an
   1520    all-1s input as the largest unsigned integer, and so produces all
   1521    1s as a result since that is the largest unsigned value at the
   1522    smaller width.
   1523 
   1524    Signed narrowing interprets all 1s as -1.  Fortunately, -1 narrows
   1525    to -1, so we still wind up with all 1s at the smaller width.
   1526 
   1527    So: In short, pessimise the args, then apply the original narrowing
   1528    op.
   1529 */
   1530 static
   1531 IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
   1532                           IRAtom* vatom1, IRAtom* vatom2)
   1533 {
   1534    IRAtom *at1, *at2, *at3;
   1535    IRAtom* (*pcast)( MCEnv*, IRAtom* );
   1536    switch (narrow_op) {
   1537       case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
   1538       case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
   1539       case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
   1540       default: VG_(tool_panic)("vectorNarrowV128");
   1541    }
   1542    tl_assert(isShadowAtom(mce,vatom1));
   1543    tl_assert(isShadowAtom(mce,vatom2));
   1544    at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1));
   1545    at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2));
   1546    at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2));
   1547    return at3;
   1548 }
   1549 
   1550 
   1551 /* --- --- Vector integer arithmetic --- --- */
   1552 
   1553 /* Simple ... UifU the args and per-lane pessimise the results. */
   1554 static
   1555 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   1556 {
   1557    IRAtom* at;
   1558    at = mkUifUV128(mce, vatom1, vatom2);
   1559    at = mkPCast8x16(mce, at);
   1560    return at;
   1561 }
   1562 
   1563 static
   1564 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   1565 {
   1566    IRAtom* at;
   1567    at = mkUifUV128(mce, vatom1, vatom2);
   1568    at = mkPCast16x8(mce, at);
   1569    return at;
   1570 }
   1571 
   1572 static
   1573 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   1574 {
   1575    IRAtom* at;
   1576    at = mkUifUV128(mce, vatom1, vatom2);
   1577    at = mkPCast32x4(mce, at);
   1578    return at;
   1579 }
   1580 
   1581 static
   1582 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   1583 {
   1584    IRAtom* at;
   1585    at = mkUifUV128(mce, vatom1, vatom2);
   1586    at = mkPCast64x2(mce, at);
   1587    return at;
   1588 }
   1589 
   1590 
   1591 /*------------------------------------------------------------*/
   1592 /*--- Generate shadow values from all kinds of IRExprs.    ---*/
   1593 /*------------------------------------------------------------*/
   1594 
   1595 static
   1596 IRAtom* expr2vbits_Binop ( MCEnv* mce,
   1597                            IROp op,
   1598                            IRAtom* atom1, IRAtom* atom2 )
   1599 {
   1600    IRType  and_or_ty;
   1601    IRAtom* (*uifu)    (MCEnv*, IRAtom*, IRAtom*);
   1602    IRAtom* (*difd)    (MCEnv*, IRAtom*, IRAtom*);
   1603    IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
   1604 
   1605    IRAtom* vatom1 = expr2vbits( mce, atom1 );
   1606    IRAtom* vatom2 = expr2vbits( mce, atom2 );
   1607 
   1608    tl_assert(isOriginalAtom(mce,atom1));
   1609    tl_assert(isOriginalAtom(mce,atom2));
   1610    tl_assert(isShadowAtom(mce,vatom1));
   1611    tl_assert(isShadowAtom(mce,vatom2));
   1612    tl_assert(sameKindedAtoms(atom1,vatom1));
   1613    tl_assert(sameKindedAtoms(atom2,vatom2));
   1614    switch (op) {
   1615 
   1616       /* V128-bit SIMD (SSE2-esque) */
   1617 
   1618       case Iop_ShrN16x8:
   1619       case Iop_ShrN32x4:
   1620       case Iop_ShrN64x2:
   1621       case Iop_SarN16x8:
   1622       case Iop_SarN32x4:
   1623       case Iop_ShlN16x8:
   1624       case Iop_ShlN32x4:
   1625       case Iop_ShlN64x2:
   1626          /* Same scheme as with all other shifts. */
   1627          complainIfUndefined(mce, atom2);
   1628          return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
   1629 
   1630       case Iop_QSub8Ux16:
   1631       case Iop_QSub8Sx16:
   1632       case Iop_Sub8x16:
   1633       case Iop_Min8Ux16:
   1634       case Iop_Max8Ux16:
   1635       case Iop_CmpGT8Sx16:
   1636       case Iop_CmpEQ8x16:
   1637       case Iop_Avg8Ux16:
   1638       case Iop_QAdd8Ux16:
   1639       case Iop_QAdd8Sx16:
   1640       case Iop_Add8x16:
   1641          return binary8Ix16(mce, vatom1, vatom2);
   1642 
   1643       case Iop_QSub16Ux8:
   1644       case Iop_QSub16Sx8:
   1645       case Iop_Sub16x8:
   1646       case Iop_Mul16x8:
   1647       case Iop_MulHi16Sx8:
   1648       case Iop_MulHi16Ux8:
   1649       case Iop_Min16Sx8:
   1650       case Iop_Max16Sx8:
   1651       case Iop_CmpGT16Sx8:
   1652       case Iop_CmpEQ16x8:
   1653       case Iop_Avg16Ux8:
   1654       case Iop_QAdd16Ux8:
   1655       case Iop_QAdd16Sx8:
   1656       case Iop_Add16x8:
   1657          return binary16Ix8(mce, vatom1, vatom2);
   1658 
   1659       case Iop_Sub32x4:
   1660       case Iop_QSub32Sx4:
   1661       case Iop_QSub32Ux4:
   1662       case Iop_CmpGT32Sx4:
   1663       case Iop_CmpEQ32x4:
   1664       case Iop_Add32x4:
   1665       case Iop_QAdd32Ux4:
   1666       case Iop_QAdd32Sx4:
   1667          return binary32Ix4(mce, vatom1, vatom2);
   1668 
   1669       case Iop_Sub64x2:
   1670       case Iop_QSub64Ux2:
   1671       case Iop_QSub64Sx2:
   1672       case Iop_Add64x2:
   1673       case Iop_QAdd64Ux2:
   1674       case Iop_QAdd64Sx2:
   1675          return binary64Ix2(mce, vatom1, vatom2);
   1676 
   1677       case Iop_QNarrowBin32Sto16Sx8:
   1678       case Iop_QNarrowBin16Sto8Sx16:
   1679       case Iop_QNarrowBin16Sto8Ux16:
   1680          return vectorNarrowV128(mce, op, vatom1, vatom2);
   1681 
   1682       case Iop_Sub64Fx2:
   1683       case Iop_Mul64Fx2:
   1684       case Iop_Min64Fx2:
   1685       case Iop_Max64Fx2:
   1686       case Iop_Div64Fx2:
   1687       case Iop_CmpLT64Fx2:
   1688       case Iop_CmpLE64Fx2:
   1689       case Iop_CmpEQ64Fx2:
   1690       case Iop_Add64Fx2:
   1691          return binary64Fx2(mce, vatom1, vatom2);
   1692 
   1693       case Iop_Sub64F0x2:
   1694       case Iop_Mul64F0x2:
   1695       case Iop_Min64F0x2:
   1696       case Iop_Max64F0x2:
   1697       case Iop_Div64F0x2:
   1698       case Iop_CmpLT64F0x2:
   1699       case Iop_CmpLE64F0x2:
   1700       case Iop_CmpEQ64F0x2:
   1701       case Iop_Add64F0x2:
   1702          return binary64F0x2(mce, vatom1, vatom2);
   1703 
   1704       /* V128-bit SIMD (SSE1-esque) */
   1705 
   1706       case Iop_Sub32Fx4:
   1707       case Iop_Mul32Fx4:
   1708       case Iop_Min32Fx4:
   1709       case Iop_Max32Fx4:
   1710       case Iop_Div32Fx4:
   1711       case Iop_CmpLT32Fx4:
   1712       case Iop_CmpLE32Fx4:
   1713       case Iop_CmpEQ32Fx4:
   1714       case Iop_Add32Fx4:
   1715          return binary32Fx4(mce, vatom1, vatom2);
   1716 
   1717       case Iop_Sub32F0x4:
   1718       case Iop_Mul32F0x4:
   1719       case Iop_Min32F0x4:
   1720       case Iop_Max32F0x4:
   1721       case Iop_Div32F0x4:
   1722       case Iop_CmpLT32F0x4:
   1723       case Iop_CmpLE32F0x4:
   1724       case Iop_CmpEQ32F0x4:
   1725       case Iop_Add32F0x4:
   1726          return binary32F0x4(mce, vatom1, vatom2);
   1727 
   1728       /* V128-bit data-steering */
   1729       case Iop_SetV128lo32:
   1730       case Iop_SetV128lo64:
   1731       case Iop_64HLtoV128:
   1732       case Iop_InterleaveLO64x2:
   1733       case Iop_InterleaveLO32x4:
   1734       case Iop_InterleaveLO16x8:
   1735       case Iop_InterleaveLO8x16:
   1736       case Iop_InterleaveHI64x2:
   1737       case Iop_InterleaveHI32x4:
   1738       case Iop_InterleaveHI16x8:
   1739       case Iop_InterleaveHI8x16:
   1740          return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2));
   1741 
   1742       /* Scalar floating point */
   1743 
   1744          //      case Iop_RoundF64:
   1745       case Iop_F64toI64S:
   1746       case Iop_I64StoF64:
   1747          /* First arg is I32 (rounding mode), second is F64 or I64
   1748             (data). */
   1749          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   1750 
   1751       case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
   1752          /* Takes two F64 args. */
   1753       case Iop_F64toI32S:
   1754       case Iop_F64toF32:
   1755          /* First arg is I32 (rounding mode), second is F64 (data). */
   1756          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   1757 
   1758       case Iop_F64toI16S:
   1759          /* First arg is I32 (rounding mode), second is F64 (data). */
   1760          return mkLazy2(mce, Ity_I16, vatom1, vatom2);
   1761 
   1762       case Iop_ScaleF64:
   1763       case Iop_Yl2xF64:
   1764       case Iop_Yl2xp1F64:
   1765       case Iop_PRemF64:
   1766       case Iop_AtanF64:
   1767       case Iop_AddF64:
   1768       case Iop_DivF64:
   1769       case Iop_SubF64:
   1770       case Iop_MulF64:
   1771          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   1772 
   1773       case Iop_CmpF64:
   1774          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   1775 
   1776       /* non-FP after here */
   1777 
   1778       case Iop_DivModU64to32:
   1779       case Iop_DivModS64to32:
   1780          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   1781 
   1782       case Iop_16HLto32:
   1783          return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2));
   1784       case Iop_32HLto64:
   1785          return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
   1786 
   1787       case Iop_MullS32:
   1788       case Iop_MullU32: {
   1789          IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
   1790          IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
   1791          return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
   1792       }
   1793 
   1794       case Iop_MullS16:
   1795       case Iop_MullU16: {
   1796          IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
   1797          IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
   1798          return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
   1799       }
   1800 
   1801       case Iop_MullS8:
   1802       case Iop_MullU8: {
   1803          IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
   1804          IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
   1805          return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
   1806       }
   1807 
   1808       case Iop_Add32:
   1809 #        if 0
   1810          return expensiveAdd32(mce, vatom1,vatom2, atom1,atom2);
   1811 #        endif
   1812       case Iop_Sub32:
   1813       case Iop_Mul32:
   1814          return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
   1815 
   1816       case Iop_Mul16:
   1817       case Iop_Add16:
   1818       case Iop_Sub16:
   1819          return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
   1820 
   1821       case Iop_Sub8:
   1822       case Iop_Add8:
   1823          return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
   1824 
   1825       case Iop_CmpLE32S: case Iop_CmpLE32U:
   1826       case Iop_CmpLT32U: case Iop_CmpLT32S:
   1827       case Iop_CmpEQ32: case Iop_CmpNE32:
   1828          return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
   1829 
   1830       case Iop_CmpEQ16: case Iop_CmpNE16:
   1831          return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
   1832 
   1833       case Iop_CmpEQ8: case Iop_CmpNE8:
   1834          return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
   1835 
   1836       case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
   1837          /* Complain if the shift amount is undefined.  Then simply
   1838             shift the first arg's V bits by the real shift amount. */
   1839          complainIfUndefined(mce, atom2);
   1840          return assignNew(mce, Ity_I32, binop(op, vatom1, atom2));
   1841 
   1842       case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
   1843          /* Same scheme as with 32-bit shifts. */
   1844          complainIfUndefined(mce, atom2);
   1845          return assignNew(mce, Ity_I16, binop(op, vatom1, atom2));
   1846 
   1847       case Iop_Shl8: case Iop_Shr8:
   1848          /* Same scheme as with 32-bit shifts. */
   1849          complainIfUndefined(mce, atom2);
   1850          return assignNew(mce, Ity_I8, binop(op, vatom1, atom2));
   1851 
   1852       case Iop_Shl64: case Iop_Shr64:
   1853          /* Same scheme as with 32-bit shifts. */
   1854          complainIfUndefined(mce, atom2);
   1855          return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
   1856 
   1857       case Iop_AndV128:
   1858          uifu = mkUifUV128; difd = mkDifDV128;
   1859          and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
   1860       case Iop_And64:
   1861          uifu = mkUifU64; difd = mkDifD64;
   1862          and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
   1863       case Iop_And32:
   1864          uifu = mkUifU32; difd = mkDifD32;
   1865          and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
   1866       case Iop_And16:
   1867          uifu = mkUifU16; difd = mkDifD16;
   1868          and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
   1869       case Iop_And8:
   1870          uifu = mkUifU8; difd = mkDifD8;
   1871          and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
   1872 
   1873       case Iop_OrV128:
   1874          uifu = mkUifUV128; difd = mkDifDV128;
   1875          and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
   1876       case Iop_Or64:
   1877          uifu = mkUifU64; difd = mkDifD64;
   1878          and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
   1879       case Iop_Or32:
   1880          uifu = mkUifU32; difd = mkDifD32;
   1881          and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
   1882       case Iop_Or16:
   1883          uifu = mkUifU16; difd = mkDifD16;
   1884          and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
   1885       case Iop_Or8:
   1886          uifu = mkUifU8; difd = mkDifD8;
   1887          and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
   1888 
   1889       do_And_Or:
   1890          return
   1891          assignNew(
   1892             mce,
   1893             and_or_ty,
   1894             difd(mce, uifu(mce, vatom1, vatom2),
   1895                       difd(mce, improve(mce, atom1, vatom1),
   1896                                 improve(mce, atom2, vatom2) ) ) );
   1897 
   1898       case Iop_Xor8:
   1899          return mkUifU8(mce, vatom1, vatom2);
   1900       case Iop_Xor16:
   1901          return mkUifU16(mce, vatom1, vatom2);
   1902       case Iop_Xor32:
   1903          return mkUifU32(mce, vatom1, vatom2);
   1904       case Iop_Xor64:
   1905          return mkUifU64(mce, vatom1, vatom2);
   1906       case Iop_XorV128:
   1907          return mkUifUV128(mce, vatom1, vatom2);
   1908 
   1909       default:
   1910          ppIROp(op);
   1911          VG_(tool_panic)("memcheck:expr2vbits_Binop");
   1912    }
   1913 }
   1914 
   1915 
   1916 static
   1917 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
   1918 {
   1919    IRAtom* vatom = expr2vbits( mce, atom );
   1920    tl_assert(isOriginalAtom(mce,atom));
   1921    switch (op) {
   1922 
   1923       case Iop_Sqrt64Fx2:
   1924          return unary64Fx2(mce, vatom);
   1925 
   1926       case Iop_Sqrt64F0x2:
   1927          return unary64F0x2(mce, vatom);
   1928 
   1929       case Iop_Sqrt32Fx4:
   1930       case Iop_RSqrt32Fx4:
   1931       case Iop_Recip32Fx4:
   1932          return unary32Fx4(mce, vatom);
   1933 
   1934       case Iop_Sqrt32F0x4:
   1935       case Iop_RSqrt32F0x4:
   1936       case Iop_Recip32F0x4:
   1937          return unary32F0x4(mce, vatom);
   1938 
   1939       case Iop_32UtoV128:
   1940       case Iop_64UtoV128:
   1941          return assignNew(mce, Ity_V128, unop(op, vatom));
   1942 
   1943       case Iop_F32toF64:
   1944       case Iop_I32StoF64:
   1945       case Iop_NegF64:
   1946       case Iop_SinF64:
   1947       case Iop_CosF64:
   1948       case Iop_TanF64:
   1949       case Iop_SqrtF64:
   1950       case Iop_AbsF64:
   1951       case Iop_2xm1F64:
   1952          return mkPCastTo(mce, Ity_I64, vatom);
   1953 
   1954       case Iop_Clz32:
   1955       case Iop_Ctz32:
   1956          return mkPCastTo(mce, Ity_I32, vatom);
   1957 
   1958       case Iop_32Sto64:
   1959       case Iop_32Uto64:
   1960       case Iop_V128to64:
   1961       case Iop_V128HIto64:
   1962          return assignNew(mce, Ity_I64, unop(op, vatom));
   1963 
   1964       case Iop_64to32:
   1965       case Iop_64HIto32:
   1966       case Iop_1Uto32:
   1967       case Iop_8Uto32:
   1968       case Iop_16Uto32:
   1969       case Iop_16Sto32:
   1970       case Iop_8Sto32:
   1971          return assignNew(mce, Ity_I32, unop(op, vatom));
   1972 
   1973       case Iop_8Sto16:
   1974       case Iop_8Uto16:
   1975       case Iop_32to16:
   1976       case Iop_32HIto16:
   1977          return assignNew(mce, Ity_I16, unop(op, vatom));
   1978 
   1979       case Iop_1Uto8:
   1980       case Iop_16to8:
   1981       case Iop_32to8:
   1982          return assignNew(mce, Ity_I8, unop(op, vatom));
   1983 
   1984       case Iop_32to1:
   1985          return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom));
   1986 
   1987       case Iop_ReinterpF64asI64:
   1988       case Iop_ReinterpI64asF64:
   1989       case Iop_ReinterpI32asF32:
   1990       case Iop_NotV128:
   1991       case Iop_Not64:
   1992       case Iop_Not32:
   1993       case Iop_Not16:
   1994       case Iop_Not8:
   1995       case Iop_Not1:
   1996          return vatom;
   1997 
   1998       default:
   1999          ppIROp(op);
   2000          VG_(tool_panic)("memcheck:expr2vbits_Unop");
   2001    }
   2002 }
   2003 
   2004 
   2005 /* Worker function; do not call directly. */
   2006 static
   2007 IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
   2008 {
   2009    void*    helper;
   2010    HChar*   hname;
   2011    IRDirty* di;
   2012    IRTemp   datavbits;
   2013    IRAtom*  addrAct;
   2014 
   2015    tl_assert(isOriginalAtom(mce,addr));
   2016 
   2017    /* First, emit a definedness test for the address.  This also sets
   2018       the address (shadow) to 'defined' following the test. */
   2019    complainIfUndefined( mce, addr );
   2020 
   2021    /* Now cook up a call to the relevant helper function, to read the
   2022       data V bits from shadow memory. */
   2023    ty = shadowType(ty);
   2024    switch (ty) {
   2025       case Ity_I64: helper = &MC_(helperc_LOADV8);
   2026                     hname = "MC_(helperc_LOADV8)";
   2027                     break;
   2028       case Ity_I32: helper = &MC_(helperc_LOADV4);
   2029                     hname = "MC_(helperc_LOADV4)";
   2030                     break;
   2031       case Ity_I16: helper = &MC_(helperc_LOADV2);
   2032                     hname = "MC_(helperc_LOADV2)";
   2033                     break;
   2034       case Ity_I8:  helper = &MC_(helperc_LOADV1);
   2035                     hname = "MC_(helperc_LOADV1)";
   2036                     break;
   2037       default:      ppIRType(ty);
   2038                     VG_(tool_panic)("memcheck:do_shadow_LDle");
   2039    }
   2040 
   2041    /* Generate the actual address into addrAct. */
   2042    if (bias == 0) {
   2043       addrAct = addr;
   2044    } else {
   2045       IROp    mkAdd;
   2046       IRAtom* eBias;
   2047       IRType  tyAddr  = mce->hWordTy;
   2048       tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
   2049       mkAdd   = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
   2050       eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
   2051       addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
   2052    }
   2053 
   2054    /* We need to have a place to park the V bits we're just about to
   2055       read. */
   2056    datavbits = newIRTemp(mce->bb->tyenv, ty);
   2057    di = unsafeIRDirty_1_N( datavbits,
   2058                            1/*regparms*/, hname, helper,
   2059                            mkIRExprVec_1( addrAct ));
   2060    setHelperAnns( mce, di );
   2061    stmt( mce->bb, IRStmt_Dirty(di) );
   2062 
   2063    return mkexpr(datavbits);
   2064 }
   2065 
   2066 
   2067 static
   2068 IRAtom* expr2vbits_LDle ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
   2069 {
   2070    IRAtom *v64hi, *v64lo;
   2071    switch (shadowType(ty)) {
   2072       case Ity_I8:
   2073       case Ity_I16:
   2074       case Ity_I32:
   2075       case Ity_I64:
   2076          return expr2vbits_LDle_WRK(mce, ty, addr, bias);
   2077       case Ity_V128:
   2078          v64lo = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias);
   2079          v64hi = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias+8);
   2080          return assignNew( mce,
   2081                            Ity_V128,
   2082                            binop(Iop_64HLtoV128, v64hi, v64lo));
   2083       default:
   2084          VG_(tool_panic)("expr2vbits_LDle");
   2085    }
   2086 }
   2087 
   2088 
   2089 static
   2090 IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
   2091                            IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
   2092 {
   2093    IRAtom *vbitsC, *vbits0, *vbitsX;
   2094    IRType ty;
   2095    /* Given Mux0X(cond,expr0,exprX), generate
   2096          Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
   2097       That is, steer the V bits like the originals, but trash the
   2098       result if the steering value is undefined.  This gives
   2099       lazy propagation. */
   2100    tl_assert(isOriginalAtom(mce, cond));
   2101    tl_assert(isOriginalAtom(mce, expr0));
   2102    tl_assert(isOriginalAtom(mce, exprX));
   2103 
   2104    vbitsC = expr2vbits(mce, cond);
   2105    vbits0 = expr2vbits(mce, expr0);
   2106    vbitsX = expr2vbits(mce, exprX);
   2107    ty = typeOfIRExpr(mce->bb->tyenv, vbits0);
   2108 
   2109    return
   2110       mkUifU(mce, ty, assignNew(mce, ty, IRExpr_Mux0X(cond, vbits0, vbitsX)),
   2111                       mkPCastTo(mce, ty, vbitsC) );
   2112 }
   2113 
   2114 /* --------- This is the main expression-handling function. --------- */
   2115 
   2116 static
   2117 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
   2118 {
   2119    switch (e->tag) {
   2120 
   2121       case Iex_Get:
   2122          return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
   2123 
   2124       case Iex_GetI:
   2125          return shadow_GETI( mce, e->Iex.GetI.descr,
   2126                                   e->Iex.GetI.ix, e->Iex.GetI.bias );
   2127 
   2128       case Iex_RdTmp:
   2129          return IRExpr_RdTmp( findShadowTmp(mce, e->Iex.RdTmp.tmp) );
   2130 
   2131       case Iex_Const:
   2132          return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
   2133 
   2134       case Iex_Binop:
   2135          return expr2vbits_Binop(
   2136                    mce,
   2137                    e->Iex.Binop.op,
   2138                    e->Iex.Binop.arg1, e->Iex.Binop.arg2
   2139                 );
   2140 
   2141       case Iex_Unop:
   2142          return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
   2143 
   2144       case Iex_Load:
   2145          return expr2vbits_LDle( mce, e->Iex.Load.ty,
   2146                                       e->Iex.Load.addr, 0/*addr bias*/ );
   2147 
   2148       case Iex_CCall:
   2149          return mkLazyN( mce, e->Iex.CCall.args,
   2150                               e->Iex.CCall.retty,
   2151                               e->Iex.CCall.cee );
   2152 
   2153       case Iex_Mux0X:
   2154          return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
   2155                                        e->Iex.Mux0X.exprX);
   2156 
   2157       default:
   2158          VG_(printf)("\n");
   2159          ppIRExpr(e);
   2160          VG_(printf)("\n");
   2161          VG_(tool_panic)("memcheck: expr2vbits");
   2162    }
   2163 }
   2164 
   2165 /*------------------------------------------------------------*/
   2166 /*--- Generate shadow stmts from all kinds of IRStmts.     ---*/
   2167 /*------------------------------------------------------------*/
   2168 
   2169 /* Widen a value to the host word size. */
   2170 
   2171 static
   2172 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
   2173 {
   2174    IRType ty, tyH;
   2175 
   2176    /* vatom is vbits-value and as such can only have a shadow type. */
   2177    tl_assert(isShadowAtom(mce,vatom));
   2178 
   2179    ty  = typeOfIRExpr(mce->bb->tyenv, vatom);
   2180    tyH = mce->hWordTy;
   2181 
   2182    if (tyH == Ity_I32) {
   2183       switch (ty) {
   2184          case Ity_I32: return vatom;
   2185          case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom));
   2186          case Ity_I8:  return assignNew(mce, tyH, unop(Iop_8Uto32, vatom));
   2187          default:      goto unhandled;
   2188       }
   2189    } else {
   2190       goto unhandled;
   2191    }
   2192   unhandled:
   2193    VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
   2194    VG_(tool_panic)("zwidenToHostWord");
   2195 }
   2196 
   2197 
   2198 /* Generate a shadow store.  addr is always the original address atom.
   2199    You can pass in either originals or V-bits for the data atom, but
   2200    obviously not both.  */
   2201 
   2202 static
   2203 void do_shadow_STle ( MCEnv* mce,
   2204                       IRAtom* addr, UInt bias,
   2205                       IRAtom* data, IRAtom* vdata )
   2206 {
   2207    IROp     mkAdd;
   2208    IRType   ty, tyAddr;
   2209    IRDirty  *di, *diLo64, *diHi64;
   2210    IRAtom   *addrAct, *addrLo64, *addrHi64;
   2211    IRAtom   *vdataLo64, *vdataHi64;
   2212    IRAtom   *eBias, *eBias0, *eBias8;
   2213    void*    helper = NULL;
   2214    HChar*   hname = NULL;
   2215 
   2216    tyAddr = mce->hWordTy;
   2217    mkAdd  = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
   2218    tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
   2219 
   2220    di = diLo64 = diHi64 = NULL;
   2221    eBias = eBias0 = eBias8 = NULL;
   2222    addrAct = addrLo64 = addrHi64 = NULL;
   2223    vdataLo64 = vdataHi64 = NULL;
   2224 
   2225    if (data) {
   2226       tl_assert(!vdata);
   2227       tl_assert(isOriginalAtom(mce, data));
   2228       tl_assert(bias == 0);
   2229       vdata = expr2vbits( mce, data );
   2230    } else {
   2231       tl_assert(vdata);
   2232    }
   2233 
   2234    tl_assert(isOriginalAtom(mce,addr));
   2235    tl_assert(isShadowAtom(mce,vdata));
   2236 
   2237    ty = typeOfIRExpr(mce->bb->tyenv, vdata);
   2238 
   2239    /* First, emit a definedness test for the address.  This also sets
   2240       the address (shadow) to 'defined' following the test. */
   2241    complainIfUndefined( mce, addr );
   2242 
   2243    /* Now decide which helper function to call to write the data V
   2244       bits into shadow memory. */
   2245    switch (ty) {
   2246       case Ity_V128: /* we'll use the helper twice */
   2247       case Ity_I64: helper = &MC_(helperc_STOREV8);
   2248                     hname = "MC_(helperc_STOREV8)";
   2249                     break;
   2250       case Ity_I32: helper = &MC_(helperc_STOREV4);
   2251                     hname = "MC_(helperc_STOREV4)";
   2252                     break;
   2253       case Ity_I16: helper = &MC_(helperc_STOREV2);
   2254                     hname = "MC_(helperc_STOREV2)";
   2255                     break;
   2256       case Ity_I8:  helper = &MC_(helperc_STOREV1);
   2257                     hname = "MC_(helperc_STOREV1)";
   2258                     break;
   2259       default:      VG_(tool_panic)("memcheck:do_shadow_STle");
   2260    }
   2261 
   2262    if (ty == Ity_V128) {
   2263 
   2264       /* V128-bit case */
   2265       /* See comment in next clause re 64-bit regparms */
   2266       eBias0    = tyAddr==Ity_I32 ? mkU32(bias)   : mkU64(bias);
   2267       addrLo64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias0) );
   2268       vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata));
   2269       diLo64    = unsafeIRDirty_0_N(
   2270                      1/*regparms*/, hname, helper,
   2271                      mkIRExprVec_2( addrLo64, vdataLo64 ));
   2272 
   2273       eBias8    = tyAddr==Ity_I32 ? mkU32(bias+8) : mkU64(bias+8);
   2274       addrHi64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias8) );
   2275       vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata));
   2276       diHi64    = unsafeIRDirty_0_N(
   2277                      1/*regparms*/, hname, helper,
   2278                      mkIRExprVec_2( addrHi64, vdataHi64 ));
   2279 
   2280       setHelperAnns( mce, diLo64 );
   2281       setHelperAnns( mce, diHi64 );
   2282       stmt( mce->bb, IRStmt_Dirty(diLo64) );
   2283       stmt( mce->bb, IRStmt_Dirty(diHi64) );
   2284 
   2285    } else {
   2286 
   2287       /* 8/16/32/64-bit cases */
   2288       /* Generate the actual address into addrAct. */
   2289       if (bias == 0) {
   2290          addrAct = addr;
   2291       } else {
   2292          eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
   2293          addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
   2294       }
   2295 
   2296       if (ty == Ity_I64) {
   2297          /* We can't do this with regparm 2 on 32-bit platforms, since
   2298             the back ends aren't clever enough to handle 64-bit
   2299             regparm args.  Therefore be different. */
   2300          di = unsafeIRDirty_0_N(
   2301                  1/*regparms*/, hname, helper,
   2302                  mkIRExprVec_2( addrAct, vdata ));
   2303       } else {
   2304          di = unsafeIRDirty_0_N(
   2305                  2/*regparms*/, hname, helper,
   2306                  mkIRExprVec_2( addrAct,
   2307                                 zwidenToHostWord( mce, vdata )));
   2308       }
   2309       setHelperAnns( mce, di );
   2310       stmt( mce->bb, IRStmt_Dirty(di) );
   2311    }
   2312 
   2313 }
   2314 
   2315 
   2316 /* Do lazy pessimistic propagation through a dirty helper call, by
   2317    looking at the annotations on it.  This is the most complex part of
   2318    Memcheck. */
   2319 
   2320 static IRType szToITy ( Int n )
   2321 {
   2322    switch (n) {
   2323       case 1: return Ity_I8;
   2324       case 2: return Ity_I16;
   2325       case 4: return Ity_I32;
   2326       case 8: return Ity_I64;
   2327       default: VG_(tool_panic)("szToITy(memcheck)");
   2328    }
   2329 }
   2330 
   2331 static
   2332 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
   2333 {
   2334    Int     i, n, offset, toDo, gSz, gOff;
   2335    IRAtom  *src, *here, *curr;
   2336    IRType  tyAddr, tySrc, tyDst;
   2337    IRTemp  dst;
   2338 
   2339    /* First check the guard. */
   2340    complainIfUndefined(mce, d->guard);
   2341 
   2342    /* Now round up all inputs and PCast over them. */
   2343    curr = definedOfType(Ity_I32);
   2344 
   2345    /* Inputs: unmasked args */
   2346    for (i = 0; d->args[i]; i++) {
   2347       if (d->cee->mcx_mask & (1<<i)) {
   2348          /* ignore this arg */
   2349       } else {
   2350          here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
   2351          curr = mkUifU32(mce, here, curr);
   2352       }
   2353    }
   2354 
   2355    /* Inputs: guest state that we read. */
   2356    for (i = 0; i < d->nFxState; i++) {
   2357       tl_assert(d->fxState[i].fx != Ifx_None);
   2358       if (d->fxState[i].fx == Ifx_Write)
   2359          continue;
   2360 
   2361       /* Ignore any sections marked as 'always defined'. */
   2362       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
   2363          if (0)
   2364          VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
   2365                      d->fxState[i].offset, d->fxState[i].size );
   2366          continue;
   2367       }
   2368 
   2369       /* This state element is read or modified.  So we need to
   2370          consider it.  If larger than 8 bytes, deal with it in 8-byte
   2371          chunks. */
   2372       gSz  = d->fxState[i].size;
   2373       gOff = d->fxState[i].offset;
   2374       tl_assert(gSz > 0);
   2375       while (True) {
   2376          if (gSz == 0) break;
   2377          n = gSz <= 8 ? gSz : 8;
   2378          /* update 'curr' with UifU of the state slice
   2379             gOff .. gOff+n-1 */
   2380          tySrc = szToITy( n );
   2381          src   = assignNew( mce, tySrc,
   2382                             shadow_GET(mce, gOff, tySrc ) );
   2383          here = mkPCastTo( mce, Ity_I32, src );
   2384          curr = mkUifU32(mce, here, curr);
   2385          gSz -= n;
   2386          gOff += n;
   2387       }
   2388 
   2389    }
   2390 
   2391    /* Inputs: memory.  First set up some info needed regardless of
   2392       whether we're doing reads or writes. */
   2393    tyAddr = Ity_INVALID;
   2394 
   2395    if (d->mFx != Ifx_None) {
   2396       /* Because we may do multiple shadow loads/stores from the same
   2397          base address, it's best to do a single test of its
   2398          definedness right now.  Post-instrumentation optimisation
   2399          should remove all but this test. */
   2400       tl_assert(d->mAddr);
   2401       complainIfUndefined(mce, d->mAddr);
   2402 
   2403       tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr);
   2404       tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
   2405       tl_assert(tyAddr == mce->hWordTy); /* not really right */
   2406    }
   2407 
   2408    /* Deal with memory inputs (reads or modifies) */
   2409    if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
   2410       offset = 0;
   2411       toDo   = d->mSize;
   2412       /* chew off 32-bit chunks */
   2413       while (toDo >= 4) {
   2414          here = mkPCastTo(
   2415                    mce, Ity_I32,
   2416                    expr2vbits_LDle ( mce, Ity_I32,
   2417                                      d->mAddr, d->mSize - toDo )
   2418                 );
   2419          curr = mkUifU32(mce, here, curr);
   2420          toDo -= 4;
   2421       }
   2422       /* chew off 16-bit chunks */
   2423       while (toDo >= 2) {
   2424          here = mkPCastTo(
   2425                    mce, Ity_I32,
   2426                    expr2vbits_LDle ( mce, Ity_I16,
   2427                                      d->mAddr, d->mSize - toDo )
   2428                 );
   2429          curr = mkUifU32(mce, here, curr);
   2430          toDo -= 2;
   2431       }
   2432       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
   2433    }
   2434 
   2435    /* Whew!  So curr is a 32-bit V-value summarising pessimistically
   2436       all the inputs to the helper.  Now we need to re-distribute the
   2437       results to all destinations. */
   2438 
   2439    /* Outputs: the destination temporary, if there is one. */
   2440    if (d->tmp != IRTemp_INVALID) {
   2441       dst   = findShadowTmp(mce, d->tmp);
   2442       tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp);
   2443       assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) );
   2444    }
   2445 
   2446    /* Outputs: guest state that we write or modify. */
   2447    for (i = 0; i < d->nFxState; i++) {
   2448       tl_assert(d->fxState[i].fx != Ifx_None);
   2449       if (d->fxState[i].fx == Ifx_Read)
   2450          continue;
   2451       /* Ignore any sections marked as 'always defined'. */
   2452       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
   2453          continue;
   2454       /* This state element is written or modified.  So we need to
   2455          consider it.  If larger than 8 bytes, deal with it in 8-byte
   2456          chunks. */
   2457       gSz  = d->fxState[i].size;
   2458       gOff = d->fxState[i].offset;
   2459       tl_assert(gSz > 0);
   2460       while (True) {
   2461          if (gSz == 0) break;
   2462          n = gSz <= 8 ? gSz : 8;
   2463          /* Write suitably-casted 'curr' to the state slice
   2464             gOff .. gOff+n-1 */
   2465          tyDst = szToITy( n );
   2466          do_shadow_PUT( mce, gOff,
   2467                              NULL, /* original atom */
   2468                              mkPCastTo( mce, tyDst, curr ) );
   2469          gSz -= n;
   2470          gOff += n;
   2471       }
   2472    }
   2473 
   2474    /* Outputs: memory that we write or modify. */
   2475    if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
   2476       offset = 0;
   2477       toDo   = d->mSize;
   2478       /* chew off 32-bit chunks */
   2479       while (toDo >= 4) {
   2480          do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
   2481                          NULL, /* original data */
   2482                          mkPCastTo( mce, Ity_I32, curr ) );
   2483          toDo -= 4;
   2484       }
   2485       /* chew off 16-bit chunks */
   2486       while (toDo >= 2) {
   2487          do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
   2488                          NULL, /* original data */
   2489                          mkPCastTo( mce, Ity_I16, curr ) );
   2490          toDo -= 2;
   2491       }
   2492       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
   2493    }
   2494 
   2495 }
   2496 
   2497 
   2498 /*------------------------------------------------------------*/
   2499 /*--- Memcheck main                                        ---*/
   2500 /*------------------------------------------------------------*/
   2501 
   2502 static Bool isBogusAtom ( IRAtom* at )
   2503 {
   2504    ULong n = 0;
   2505    IRConst* con;
   2506    tl_assert(isIRAtom(at));
   2507    if (at->tag == Iex_RdTmp)
   2508       return False;
   2509    tl_assert(at->tag == Iex_Const);
   2510    con = at->Iex.Const.con;
   2511    switch (con->tag) {
   2512       case Ico_U8:  n = (ULong)con->Ico.U8; break;
   2513       case Ico_U16: n = (ULong)con->Ico.U16; break;
   2514       case Ico_U32: n = (ULong)con->Ico.U32; break;
   2515       case Ico_U64: n = (ULong)con->Ico.U64; break;
   2516       default: ppIRExpr(at); tl_assert(0);
   2517    }
   2518    /* VG_(printf)("%llx\n", n); */
   2519    return (n == 0xFEFEFEFF
   2520            || n == 0x80808080
   2521            || n == 0x1010101
   2522            || n == 1010100);
   2523 }
   2524 
   2525 __attribute__((unused))
   2526 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
   2527 {
   2528    Int     i;
   2529    IRExpr* e;
   2530    switch (st->tag) {
   2531       case Ist_WrTmp:
   2532          e = st->Ist.WrTmp.data;
   2533          switch (e->tag) {
   2534             case Iex_Get:
   2535             case Iex_RdTmp:
   2536                return False;
   2537             case Iex_Unop:
   2538                return isBogusAtom(e->Iex.Unop.arg);
   2539             case Iex_Binop:
   2540                return isBogusAtom(e->Iex.Binop.arg1)
   2541                       || isBogusAtom(e->Iex.Binop.arg2);
   2542             case Iex_Mux0X:
   2543                return isBogusAtom(e->Iex.Mux0X.cond)
   2544                       || isBogusAtom(e->Iex.Mux0X.expr0)
   2545                       || isBogusAtom(e->Iex.Mux0X.exprX);
   2546             case Iex_Load:
   2547                return isBogusAtom(e->Iex.Load.addr);
   2548             case Iex_CCall:
   2549                for (i = 0; e->Iex.CCall.args[i]; i++)
   2550                   if (isBogusAtom(e->Iex.CCall.args[i]))
   2551                      return True;
   2552                return False;
   2553             default:
   2554                goto unhandled;
   2555          }
   2556       case Ist_Put:
   2557          return isBogusAtom(st->Ist.Put.data);
   2558       case Ist_Store:
   2559          return isBogusAtom(st->Ist.Store.addr)
   2560                 || isBogusAtom(st->Ist.Store.data);
   2561       case Ist_Exit:
   2562          return isBogusAtom(st->Ist.Exit.guard);
   2563       default:
   2564       unhandled:
   2565          ppIRStmt(st);
   2566          VG_(tool_panic)("hasBogusLiterals");
   2567    }
   2568 }
   2569 
   2570 IRSB* mc_instrument ( void* closureV,
   2571                       IRSB* bb_in, VexGuestLayout* layout,
   2572                       VexGuestExtents* vge,
   2573                       IRType gWordTy, IRType hWordTy )
   2574 {
   2575    Bool verboze = False; //True;
   2576 
   2577    /* Bool hasBogusLiterals = False; */
   2578 
   2579    Int i, j, first_stmt;
   2580    IRStmt* st;
   2581    MCEnv mce;
   2582 
   2583    /* Set up BB */
   2584    IRSB* bb     = emptyIRSB();
   2585    bb->tyenv    = deepCopyIRTypeEnv(bb_in->tyenv);
   2586    bb->next     = deepCopyIRExpr(bb_in->next);
   2587    bb->jumpkind = bb_in->jumpkind;
   2588 
   2589    /* Set up the running environment.  Only .bb is modified as we go
   2590       along. */
   2591    mce.bb             = bb;
   2592    mce.layout         = layout;
   2593    mce.n_originalTmps = bb->tyenv->types_used;
   2594    mce.hWordTy        = hWordTy;
   2595    mce.tmpMap         = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
   2596    for (i = 0; i < mce.n_originalTmps; i++)
   2597       mce.tmpMap[i] = IRTemp_INVALID;
   2598 
   2599    /* Iterate over the stmts. */
   2600 
   2601    for (i = 0; i <  bb_in->stmts_used; i++) {
   2602       st = bb_in->stmts[i];
   2603       if (!st) continue;
   2604 
   2605       tl_assert(isFlatIRStmt(st));
   2606 
   2607       /*
   2608       if (!hasBogusLiterals) {
   2609          hasBogusLiterals = checkForBogusLiterals(st);
   2610          if (hasBogusLiterals) {
   2611             VG_(printf)("bogus: ");
   2612             ppIRStmt(st);
   2613             VG_(printf)("\n");
   2614          }
   2615       }
   2616       */
   2617       first_stmt = bb->stmts_used;
   2618 
   2619       if (verboze) {
   2620          ppIRStmt(st);
   2621          VG_(printf)("\n\n");
   2622       }
   2623 
   2624       switch (st->tag) {
   2625 
   2626          case Ist_WrTmp:
   2627             assign( bb, findShadowTmp(&mce, st->Ist.WrTmp.tmp),
   2628                         expr2vbits( &mce, st->Ist.WrTmp.data) );
   2629             break;
   2630 
   2631          case Ist_Put:
   2632             do_shadow_PUT( &mce,
   2633                            st->Ist.Put.offset,
   2634                            st->Ist.Put.data,
   2635                            NULL /* shadow atom */ );
   2636             break;
   2637 
   2638          case Ist_PutI:
   2639             do_shadow_PUTI( &mce,
   2640                             st->Ist.PutI.descr,
   2641                             st->Ist.PutI.ix,
   2642                             st->Ist.PutI.bias,
   2643                             st->Ist.PutI.data );
   2644             break;
   2645 
   2646          case Ist_Store:
   2647             do_shadow_STle( &mce, st->Ist.Store.addr, 0/* addr bias */,
   2648                                   st->Ist.Store.data,
   2649                                   NULL /* shadow data */ );
   2650             break;
   2651 
   2652          case Ist_Exit:
   2653             /* if (!hasBogusLiterals) */
   2654                complainIfUndefined( &mce, st->Ist.Exit.guard );
   2655             break;
   2656 
   2657          case Ist_Dirty:
   2658             do_shadow_Dirty( &mce, st->Ist.Dirty.details );
   2659             break;
   2660 
   2661          case Ist_IMark:
   2662          case Ist_NoOp:
   2663             break;
   2664 
   2665          default:
   2666             VG_(printf)("\n");
   2667             ppIRStmt(st);
   2668             VG_(printf)("\n");
   2669             VG_(tool_panic)("memcheck: unhandled IRStmt");
   2670 
   2671       } /* switch (st->tag) */
   2672 
   2673       if (verboze) {
   2674          for (j = first_stmt; j < bb->stmts_used; j++) {
   2675             VG_(printf)("   ");
   2676             ppIRStmt(bb->stmts[j]);
   2677             VG_(printf)("\n");
   2678          }
   2679          VG_(printf)("\n");
   2680       }
   2681 
   2682       addStmtToIRSB(bb, st);
   2683 
   2684    }
   2685 
   2686    /* Now we need to complain if the jump target is undefined. */
   2687    first_stmt = bb->stmts_used;
   2688 
   2689    if (verboze) {
   2690       VG_(printf)("bb->next = ");
   2691       ppIRExpr(bb->next);
   2692       VG_(printf)("\n\n");
   2693    }
   2694 
   2695    complainIfUndefined( &mce, bb->next );
   2696 
   2697    if (verboze) {
   2698       for (j = first_stmt; j < bb->stmts_used; j++) {
   2699          VG_(printf)("   ");
   2700          ppIRStmt(bb->stmts[j]);
   2701          VG_(printf)("\n");
   2702       }
   2703       VG_(printf)("\n");
   2704    }
   2705 
   2706    return bb;
   2707 }
   2708 #endif /* UNUSED */
   2709 
   2710 /*--------------------------------------------------------------------*/
   2711 /*--- end                                              test_main.c ---*/
   2712 /*--------------------------------------------------------------------*/
   2713