Home | History | Annotate | Download | only in useful
      1 
      2 /*---------------------------------------------------------------*/
      3 /*--- begin                                       test_main.c ---*/
      4 /*---------------------------------------------------------------*/
      5 
      6 /*
      7    This file is part of Valgrind, a dynamic binary instrumentation
      8    framework.
      9 
     10    Copyright (C) 2004-2015 OpenWorks LLP
     11       info (at) open-works.net
     12 
     13    This program is free software; you can redistribute it and/or
     14    modify it under the terms of the GNU General Public License as
     15    published by the Free Software Foundation; either version 2 of the
     16    License, or (at your option) any later version.
     17 
     18    This program is distributed in the hope that it will be useful, but
     19    WITHOUT ANY WARRANTY; without even the implied warranty of
     20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     21    General Public License for more details.
     22 
     23    You should have received a copy of the GNU General Public License
     24    along with this program; if not, write to the Free Software
     25    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     26    02110-1301, USA.
     27 
     28    The GNU General Public License is contained in the file COPYING.
     29 
     30    Neither the names of the U.S. Department of Energy nor the
     31    University of California nor the names of its contributors may be
     32    used to endorse or promote products derived from this software
     33    without prior written permission.
     34 */
     35 
     36 #include <stdio.h>
     37 #include <stdlib.h>
     38 #include <assert.h>
     39 #include <string.h>
     40 
     41 #include "libvex_basictypes.h"
     42 #include "libvex.h"
     43 
     44 #include "test_main.h"
     45 
     46 
     47 /*---------------------------------------------------------------*/
     48 /*--- Test                                                    ---*/
     49 /*---------------------------------------------------------------*/
     50 
     51 
     52 __attribute__ ((noreturn))
     53 static
     54 void failure_exit ( void )
     55 {
     56    fprintf(stdout, "VEX did failure_exit.  Bye.\n");
     57    exit(1);
     58 }
     59 
     60 static
     61 void log_bytes ( const HChar* bytes, SizeT nbytes )
     62 {
     63    fwrite ( bytes, 1, nbytes, stdout );
     64 }
     65 
     66 #define N_LINEBUF 10000
     67 static HChar linebuf[N_LINEBUF];
     68 
     69 #define N_ORIGBUF 10000
     70 #define N_TRANSBUF 5000
     71 
     72 static UChar origbuf[N_ORIGBUF];
     73 static UChar transbuf[N_TRANSBUF];
     74 
     75 static Bool verbose = True;
     76 
     77 /* Forwards */
     78 #if 1 /* UNUSED */
     79 //static IRSB* ac_instrument ( IRSB*, VexGuestLayout*, IRType );
     80 static
     81 IRSB* mc_instrument ( void* closureV,
     82                       IRSB* bb_in, VexGuestLayout* layout,
     83                       VexGuestExtents* vge,
     84                       IRType gWordTy, IRType hWordTy );
     85 #endif
     86 
     87 static Bool chase_into_not_ok ( void* opaque, Addr dst ) {
     88    return False;
     89 }
     90 static UInt needs_self_check ( void *closureV, VexRegisterUpdates *pxControl,
     91                                const VexGuestExtents *vge ) {
     92    return 0;
     93 }
     94 
     95 int main ( int argc, char** argv )
     96 {
     97    FILE* f;
     98    Int i;
     99    UInt u, sum;
    100    Addr32 orig_addr;
    101    Int bb_number, n_bbs_done = 0;
    102    Int orig_nbytes, trans_used;
    103    VexTranslateResult tres;
    104    VexControl vcon;
    105    VexGuestExtents vge;
    106    VexArchInfo vai_x86, vai_amd64, vai_ppc32, vai_arm, vai_mips32, vai_mips64;
    107    VexAbiInfo vbi;
    108    VexTranslateArgs vta;
    109 
    110    if (argc != 2) {
    111       fprintf(stderr, "usage: vex file.orig\n");
    112       exit(1);
    113    }
    114    f = fopen(argv[1], "r");
    115    if (!f) {
    116       fprintf(stderr, "can't open `%s'\n", argv[1]);
    117       exit(1);
    118    }
    119 
    120    /* Run with default params.  However, we can't allow bb chasing
    121       since that causes the front end to get segfaults when it tries
    122       to read code outside the initial BB we hand it.  So when calling
    123       LibVEX_Translate, send in a chase-into predicate that always
    124       returns False. */
    125    LibVEX_default_VexControl ( &vcon );
    126    vcon.iropt_level = 2;
    127    vcon.guest_max_insns = 60;
    128 
    129    LibVEX_Init ( &failure_exit, &log_bytes,
    130                  1,  /* debug_paranoia */
    131                  &vcon );
    132 
    133 
    134    while (!feof(f)) {
    135 
    136       __attribute__((unused))
    137       char* unused1 = fgets(linebuf, N_LINEBUF,f);
    138       if (linebuf[0] == 0) continue;
    139       if (linebuf[0] != '.') continue;
    140 
    141       if (n_bbs_done == TEST_N_BBS) break;
    142       n_bbs_done++;
    143 
    144       /* first line is:   . bb-number bb-addr n-bytes */
    145       assert(3 == sscanf(&linebuf[1], " %d %x %d\n",
    146                                  & bb_number,
    147                                  & orig_addr, & orig_nbytes ));
    148       assert(orig_nbytes >= 1);
    149       assert(!feof(f));
    150       __attribute__((unused))
    151       char* unused2 = fgets(linebuf, N_LINEBUF,f);
    152       assert(linebuf[0] == '.');
    153 
    154       /* second line is:   . byte byte byte etc */
    155       if (verbose)
    156          printf("============ Basic Block %d, Done %d, "
    157                 "Start %x, nbytes %2d ============",
    158                 bb_number, n_bbs_done-1, orig_addr, orig_nbytes);
    159 
    160       /* thumb ITstate analysis needs to examine the 18 bytes
    161          preceding the first instruction.  So let's leave the first 18
    162          zeroed out. */
    163       memset(origbuf, 0, sizeof(origbuf));
    164 
    165       assert(orig_nbytes >= 1 && orig_nbytes <= N_ORIGBUF);
    166       for (i = 0; i < orig_nbytes; i++) {
    167          assert(1 == sscanf(&linebuf[2 + 3*i], "%x", &u));
    168          origbuf[18+ i] = (UChar)u;
    169       }
    170 
    171       /* FIXME: put sensible values into the .hwcaps fields */
    172       LibVEX_default_VexArchInfo(&vai_x86);
    173       vai_x86.hwcaps = VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1
    174                        | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3;
    175       vai_x86.endness = VexEndnessLE;
    176 
    177       LibVEX_default_VexArchInfo(&vai_amd64);
    178       vai_amd64.hwcaps = 0;
    179       vai_amd64.endness = VexEndnessLE;
    180 
    181       LibVEX_default_VexArchInfo(&vai_ppc32);
    182       vai_ppc32.hwcaps = 0;
    183       vai_ppc32.ppc_icache_line_szB = 128;
    184 
    185       LibVEX_default_VexArchInfo(&vai_arm);
    186       vai_arm.hwcaps = VEX_HWCAPS_ARM_VFP3 | VEX_HWCAPS_ARM_NEON | 7;
    187 
    188       LibVEX_default_VexArchInfo(&vai_mips32);
    189       vai_mips32.endness = VexEndnessLE;
    190       vai_mips32.hwcaps = VEX_PRID_COMP_MIPS;
    191 
    192       LibVEX_default_VexArchInfo(&vai_mips64);
    193       vai_mips64.endness = VexEndnessLE;
    194 
    195       LibVEX_default_VexAbiInfo(&vbi);
    196       vbi.guest_stack_redzone_size = 128;
    197 
    198       /* ----- Set up args for LibVEX_Translate ----- */
    199 
    200       vta.abiinfo_both    = vbi;
    201       vta.guest_bytes     = &origbuf[18];
    202       vta.guest_bytes_addr = orig_addr;
    203       vta.callback_opaque = NULL;
    204       vta.chase_into_ok   = chase_into_not_ok;
    205       vta.guest_extents   = &vge;
    206       vta.host_bytes      = transbuf;
    207       vta.host_bytes_size = N_TRANSBUF;
    208       vta.host_bytes_used = &trans_used;
    209 
    210 #if 0 /* ppc32 -> ppc32 */
    211       vta.arch_guest     = VexArchPPC32;
    212       vta.archinfo_guest = vai_ppc32;
    213       vta.arch_host      = VexArchPPC32;
    214       vta.archinfo_host  = vai_ppc32;
    215 #endif
    216 #if 0 /* amd64 -> amd64 */
    217       vta.arch_guest     = VexArchAMD64;
    218       vta.archinfo_guest = vai_amd64;
    219       vta.arch_host      = VexArchAMD64;
    220       vta.archinfo_host  = vai_amd64;
    221 #endif
    222 #if 0 /* x86 -> x86 */
    223       vta.arch_guest     = VexArchX86;
    224       vta.archinfo_guest = vai_x86;
    225       vta.arch_host      = VexArchX86;
    226       vta.archinfo_host  = vai_x86;
    227 #endif
    228 #if 1 /* x86 -> mips32 */
    229       vta.arch_guest     = VexArchX86;
    230       vta.archinfo_guest = vai_x86;
    231       vta.arch_host      = VexArchMIPS32;
    232       vta.archinfo_host  = vai_mips32;
    233 #endif
    234 #if 0 /* amd64 -> mips64 */
    235       vta.arch_guest     = VexArchAMD64;
    236       vta.archinfo_guest = vai_amd64;
    237       vta.arch_host      = VexArchMIPS64;
    238       vta.archinfo_host  = vai_mips64;
    239 #endif
    240 #if 0 /* arm -> arm */
    241       vta.arch_guest     = VexArchARM;
    242       vta.archinfo_guest = vai_arm;
    243       vta.arch_host      = VexArchARM;
    244       vta.archinfo_host  = vai_arm;
    245       /* ARM/Thumb only hacks, that are needed to keep the ITstate
    246          analyser in the front end happy.  */
    247       vta.guest_bytes     = &origbuf[18 +1];
    248       vta.guest_bytes_addr = (Addr) &origbuf[18 +1];
    249 #endif
    250 
    251 #if 1 /* no instrumentation */
    252       vta.instrument1     = NULL;
    253       vta.instrument2     = NULL;
    254 #endif
    255 #if 0 /* addrcheck */
    256       vta.instrument1     = ac_instrument;
    257       vta.instrument2     = NULL;
    258 #endif
    259 #if 0 /* memcheck */
    260       vta.instrument1     = mc_instrument;
    261       vta.instrument2     = NULL;
    262 #endif
    263       vta.needs_self_check  = needs_self_check;
    264       vta.preamble_function = NULL;
    265       vta.traceflags      = TEST_FLAGS;
    266       vta.addProfInc      = False;
    267       vta.sigill_diag     = True;
    268 
    269       vta.disp_cp_chain_me_to_slowEP = (void*)0x12345678;
    270       vta.disp_cp_chain_me_to_fastEP = (void*)0x12345679;
    271       vta.disp_cp_xindir             = (void*)0x1234567A;
    272       vta.disp_cp_xassisted          = (void*)0x1234567B;
    273 
    274       vta.finaltidy = NULL;
    275 
    276       for (i = 0; i < TEST_N_ITERS; i++)
    277          tres = LibVEX_Translate ( &vta );
    278 
    279       if (tres.status != VexTransOK)
    280          printf("\ntres = %d\n", (Int)tres.status);
    281       assert(tres.status == VexTransOK);
    282       assert(tres.n_sc_extents == 0);
    283       assert(vge.n_used == 1);
    284       assert((UInt)(vge.len[0]) == orig_nbytes);
    285 
    286       sum = 0;
    287       for (i = 0; i < trans_used; i++)
    288          sum += (UInt)transbuf[i];
    289       printf ( " %6.2f ... %u\n",
    290                (double)trans_used / (double)vge.len[0], sum );
    291    }
    292 
    293    fclose(f);
    294    printf("\n");
    295    LibVEX_ShowAllocStats();
    296 
    297    return 0;
    298 }
    299 
    300 //////////////////////////////////////////////////////////////////////
    301 //////////////////////////////////////////////////////////////////////
    302 //////////////////////////////////////////////////////////////////////
    303 //////////////////////////////////////////////////////////////////////
    304 //////////////////////////////////////////////////////////////////////
    305 //////////////////////////////////////////////////////////////////////
    306 //////////////////////////////////////////////////////////////////////
    307 //////////////////////////////////////////////////////////////////////
    308 
    309 #if 0 /* UNUSED */
    310 
    311 static
    312 __attribute((noreturn))
    313 void panic ( HChar* s )
    314 {
    315   printf("\npanic: %s\n", s);
    316   failure_exit();
    317 }
    318 
    319 static
    320 IRSB* ac_instrument (IRSB* bb_in, VexGuestLayout* layout, IRType hWordTy )
    321 {
    322 /* Use this rather than eg. -1 because it's a UInt. */
    323 #define INVALID_DATA_SIZE   999999
    324 
    325    Int         i;
    326    Int         sz;
    327    IRCallee*   helper;
    328    IRStmt*    st;
    329    IRExpr* data;
    330    IRExpr* addr;
    331    Bool needSz;
    332 
    333    /* Set up BB */
    334    IRSB* bb     = emptyIRSB();
    335    bb->tyenv    = dopyIRTypeEnv(bb_in->tyenv);
    336    bb->next     = dopyIRExpr(bb_in->next);
    337    bb->jumpkind = bb_in->jumpkind;
    338 
    339    /* No loads to consider in ->next. */
    340    assert(isIRAtom(bb_in->next));
    341 
    342    for (i = 0; i <  bb_in->stmts_used; i++) {
    343       st = bb_in->stmts[i];
    344       if (!st) continue;
    345 
    346       switch (st->tag) {
    347 
    348          case Ist_Tmp:
    349             data = st->Ist.Tmp.data;
    350             if (data->tag == Iex_LDle) {
    351                addr = data->Iex.LDle.addr;
    352                sz = sizeofIRType(data->Iex.LDle.ty);
    353                needSz = False;
    354                switch (sz) {
    355                   case 4: helper = mkIRCallee(1, "ac_helperc_LOAD4",
    356                                                  (void*)0x12345601); break;
    357                   case 2: helper = mkIRCallee(0, "ac_helperc_LOAD2",
    358                                                  (void*)0x12345602); break;
    359                   case 1: helper = mkIRCallee(1, "ac_helperc_LOAD1",
    360                                                  (void*)0x12345603); break;
    361                   default: helper = mkIRCallee(0, "ac_helperc_LOADN",
    362                                                   (void*)0x12345604);
    363                                                   needSz = True; break;
    364                }
    365                if (needSz) {
    366                   addStmtToIRSB(
    367                      bb,
    368                      IRStmt_Dirty(
    369                         unsafeIRDirty_0_N( helper->regparms,
    370 					   helper->name, helper->addr,
    371                                            mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
    372                   ));
    373                } else {
    374                   addStmtToIRSB(
    375                      bb,
    376                      IRStmt_Dirty(
    377                         unsafeIRDirty_0_N( helper->regparms,
    378 					   helper->name, helper->addr,
    379                                            mkIRExprVec_1(addr) )
    380                   ));
    381                }
    382             }
    383             break;
    384 
    385          case Ist_STle:
    386             data = st->Ist.STle.data;
    387             addr = st->Ist.STle.addr;
    388             assert(isIRAtom(data));
    389             assert(isIRAtom(addr));
    390             sz = sizeofIRType(typeOfIRExpr(bb_in->tyenv, data));
    391             needSz = False;
    392             switch (sz) {
    393                case 4: helper = mkIRCallee(1, "ac_helperc_STORE4",
    394                                               (void*)0x12345605); break;
    395                case 2: helper = mkIRCallee(0, "ac_helperc_STORE2",
    396                                               (void*)0x12345606); break;
    397                case 1: helper = mkIRCallee(1, "ac_helperc_STORE1",
    398                                               (void*)0x12345607); break;
    399                default: helper = mkIRCallee(0, "ac_helperc_STOREN",
    400                                                (void*)0x12345608);
    401                                                needSz = True; break;
    402             }
    403             if (needSz) {
    404                addStmtToIRSB(
    405                   bb,
    406                   IRStmt_Dirty(
    407                      unsafeIRDirty_0_N( helper->regparms,
    408     				        helper->name, helper->addr,
    409                                         mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
    410                ));
    411             } else {
    412                addStmtToIRSB(
    413                   bb,
    414                   IRStmt_Dirty(
    415                      unsafeIRDirty_0_N( helper->regparms,
    416                                         helper->name, helper->addr,
    417                                         mkIRExprVec_1(addr) )
    418                ));
    419             }
    420             break;
    421 
    422          case Ist_Put:
    423             assert(isIRAtom(st->Ist.Put.data));
    424             break;
    425 
    426          case Ist_PutI:
    427             assert(isIRAtom(st->Ist.PutI.ix));
    428             assert(isIRAtom(st->Ist.PutI.data));
    429             break;
    430 
    431          case Ist_Exit:
    432             assert(isIRAtom(st->Ist.Exit.guard));
    433             break;
    434 
    435          case Ist_Dirty:
    436             /* If the call doesn't interact with memory, we ain't
    437                interested. */
    438             if (st->Ist.Dirty.details->mFx == Ifx_None)
    439                break;
    440             goto unhandled;
    441 
    442          default:
    443          unhandled:
    444             printf("\n");
    445             ppIRStmt(st);
    446             printf("\n");
    447             panic("addrcheck: unhandled IRStmt");
    448       }
    449 
    450       addStmtToIRSB( bb, dopyIRStmt(st));
    451    }
    452 
    453    return bb;
    454 }
    455 #endif /* UNUSED */
    456 
    457 //////////////////////////////////////////////////////////////////////
    458 //////////////////////////////////////////////////////////////////////
    459 //////////////////////////////////////////////////////////////////////
    460 //////////////////////////////////////////////////////////////////////
    461 //////////////////////////////////////////////////////////////////////
    462 //////////////////////////////////////////////////////////////////////
    463 //////////////////////////////////////////////////////////////////////
    464 //////////////////////////////////////////////////////////////////////
    465 
    466 #if 1 /* UNUSED */
    467 
    468 static
    469 __attribute((noreturn))
    470 void panic ( HChar* s )
    471 {
    472   printf("\npanic: %s\n", s);
    473   failure_exit();
    474 }
    475 
    476 #define tl_assert(xxx) assert(xxx)
    477 #define VG_(xxxx) xxxx
    478 #define tool_panic(zzz) panic(zzz)
    479 #define MC_(zzzz) MC_##zzzz
    480 #define TL_(zzzz) SK_##zzzz
    481 
    482 
    483 static void MC_helperc_complain_undef ( void );
    484 static void MC_helperc_LOADV8 ( void );
    485 static void MC_helperc_LOADV4 ( void );
    486 static void MC_helperc_LOADV2 ( void );
    487 static void MC_helperc_LOADV1 ( void );
    488 static void MC_helperc_STOREV8( void );
    489 static void MC_helperc_STOREV4( void );
    490 static void MC_helperc_STOREV2( void );
    491 static void MC_helperc_STOREV1( void );
    492 static void MC_helperc_value_check0_fail( void );
    493 static void MC_helperc_value_check1_fail( void );
    494 static void MC_helperc_value_check4_fail( void );
    495 
    496 static void MC_helperc_complain_undef ( void ) { }
    497 static void MC_helperc_LOADV8 ( void ) { }
    498 static void MC_helperc_LOADV4 ( void ) { }
    499 static void MC_helperc_LOADV2 ( void ) { }
    500 static void MC_helperc_LOADV1 ( void ) { }
    501 static void MC_helperc_STOREV8( void ) { }
    502 static void MC_helperc_STOREV4( void ) { }
    503 static void MC_helperc_STOREV2( void ) { }
    504 static void MC_helperc_STOREV1( void ) { }
    505 static void MC_helperc_value_check0_fail( void ) { }
    506 static void MC_helperc_value_check1_fail( void ) { }
    507 static void MC_helperc_value_check4_fail( void ) { }
    508 
    509 
    510 /*--------------------------------------------------------------------*/
    511 /*--- Instrument IR to perform memory checking operations.         ---*/
    512 /*---                                               mc_translate.c ---*/
    513 /*--------------------------------------------------------------------*/
    514 
    515 /*
    516    This file is part of MemCheck, a heavyweight Valgrind tool for
    517    detecting memory errors.
    518 
    519    Copyright (C) 2000-2015 Julian Seward
    520       jseward (at) acm.org
    521 
    522    This program is free software; you can redistribute it and/or
    523    modify it under the terms of the GNU General Public License as
    524    published by the Free Software Foundation; either version 2 of the
    525    License, or (at your option) any later version.
    526 
    527    This program is distributed in the hope that it will be useful, but
    528    WITHOUT ANY WARRANTY; without even the implied warranty of
    529    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    530    General Public License for more details.
    531 
    532    You should have received a copy of the GNU General Public License
    533    along with this program; if not, write to the Free Software
    534    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    535    02111-1307, USA.
    536 
    537    The GNU General Public License is contained in the file COPYING.
    538 */
    539 
    540 //#include "mc_include.h"
    541 
    542 
    543 /*------------------------------------------------------------*/
    544 /*--- Forward decls                                        ---*/
    545 /*------------------------------------------------------------*/
    546 
    547 struct _MCEnv;
    548 
    549 static IRType  shadowType ( IRType ty );
    550 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
    551 
    552 
    553 /*------------------------------------------------------------*/
    554 /*--- Memcheck running state, and tmp management.          ---*/
    555 /*------------------------------------------------------------*/
    556 
    557 /* Carries around state during memcheck instrumentation. */
    558 typedef
    559    struct _MCEnv {
    560       /* MODIFIED: the bb being constructed.  IRStmts are added. */
    561       IRSB* bb;
    562 
    563       /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
    564          original temps to their current their current shadow temp.
    565          Initially all entries are IRTemp_INVALID.  Entries are added
    566          lazily since many original temps are not used due to
    567          optimisation prior to instrumentation.  Note that floating
    568          point original tmps are shadowed by integer tmps of the same
    569          size, and Bit-typed original tmps are shadowed by the type
    570          Ity_I8.  See comment below. */
    571       IRTemp* tmpMap;
    572       Int     n_originalTmps; /* for range checking */
    573 
    574       /* READONLY: the guest layout.  This indicates which parts of
    575          the guest state should be regarded as 'always defined'. */
    576       VexGuestLayout* layout;
    577       /* READONLY: the host word type.  Needed for constructing
    578          arguments of type 'HWord' to be passed to helper functions.
    579          Ity_I32 or Ity_I64 only. */
    580       IRType hWordTy;
    581    }
    582    MCEnv;
    583 
    584 /* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
    585    demand), as they are encountered.  This is for two reasons.
    586 
    587    (1) (less important reason): Many original tmps are unused due to
    588    initial IR optimisation, and we do not want to spaces in tables
    589    tracking them.
    590 
    591    Shadow IRTemps are therefore allocated on demand.  mce.tmpMap is a
    592    table indexed [0 .. n_types-1], which gives the current shadow for
    593    each original tmp, or INVALID_IRTEMP if none is so far assigned.
    594    It is necessary to support making multiple assignments to a shadow
    595    -- specifically, after testing a shadow for definedness, it needs
    596    to be made defined.  But IR's SSA property disallows this.
    597 
    598    (2) (more important reason): Therefore, when a shadow needs to get
    599    a new value, a new temporary is created, the value is assigned to
    600    that, and the tmpMap is updated to reflect the new binding.
    601 
    602    A corollary is that if the tmpMap maps a given tmp to
    603    INVALID_IRTEMP and we are hoping to read that shadow tmp, it means
    604    there's a read-before-write error in the original tmps.  The IR
    605    sanity checker should catch all such anomalies, however.
    606 */
    607 
    608 /* Find the tmp currently shadowing the given original tmp.  If none
    609    so far exists, allocate one.  */
    610 static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig )
    611 {
    612    tl_assert(orig < mce->n_originalTmps);
    613    if (mce->tmpMap[orig] == IRTemp_INVALID) {
    614       mce->tmpMap[orig]
    615          = newIRTemp(mce->bb->tyenv,
    616                      shadowType(mce->bb->tyenv->types[orig]));
    617    }
    618    return mce->tmpMap[orig];
    619 }
    620 
    621 /* Allocate a new shadow for the given original tmp.  This means any
    622    previous shadow is abandoned.  This is needed because it is
    623    necessary to give a new value to a shadow once it has been tested
    624    for undefinedness, but unfortunately IR's SSA property disallows
    625    this.  Instead we must abandon the old shadow, allocate a new one
    626    and use that instead. */
    627 static void newShadowTmp ( MCEnv* mce, IRTemp orig )
    628 {
    629    tl_assert(orig < mce->n_originalTmps);
    630    mce->tmpMap[orig]
    631       = newIRTemp(mce->bb->tyenv,
    632                   shadowType(mce->bb->tyenv->types[orig]));
    633 }
    634 
    635 
    636 /*------------------------------------------------------------*/
    637 /*--- IRAtoms -- a subset of IRExprs                       ---*/
    638 /*------------------------------------------------------------*/
    639 
    640 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
    641    isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
    642    input, most of this code deals in atoms.  Usefully, a value atom
    643    always has a V-value which is also an atom: constants are shadowed
    644    by constants, and temps are shadowed by the corresponding shadow
    645    temporary. */
    646 
    647 typedef  IRExpr  IRAtom;
    648 
    649 /* (used for sanity checks only): is this an atom which looks
    650    like it's from original code? */
    651 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
    652 {
    653    if (a1->tag == Iex_Const)
    654       return True;
    655    if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp < mce->n_originalTmps)
    656       return True;
    657    return False;
    658 }
    659 
    660 /* (used for sanity checks only): is this an atom which looks
    661    like it's from shadow code? */
    662 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
    663 {
    664    if (a1->tag == Iex_Const)
    665       return True;
    666    if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp >= mce->n_originalTmps)
    667       return True;
    668    return False;
    669 }
    670 
    671 /* (used for sanity checks only): check that both args are atoms and
    672    are identically-kinded. */
    673 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
    674 {
    675    if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
    676       return True;
    677    if (a1->tag == Iex_Const && a2->tag == Iex_Const)
    678       return True;
    679    return False;
    680 }
    681 
    682 
    683 /*------------------------------------------------------------*/
    684 /*--- Type management                                      ---*/
    685 /*------------------------------------------------------------*/
    686 
    687 /* Shadow state is always accessed using integer types.  This returns
    688    an integer type with the same size (as per sizeofIRType) as the
    689    given type.  The only valid shadow types are Bit, I8, I16, I32,
    690    I64, V128. */
    691 
    692 static IRType shadowType ( IRType ty )
    693 {
    694    switch (ty) {
    695       case Ity_I1:
    696       case Ity_I8:
    697       case Ity_I16:
    698       case Ity_I32:
    699       case Ity_I64:  return ty;
    700       case Ity_F32:  return Ity_I32;
    701       case Ity_F64:  return Ity_I64;
    702       case Ity_V128: return Ity_V128;
    703       default: ppIRType(ty);
    704                VG_(tool_panic)("memcheck:shadowType");
    705    }
    706 }
    707 
    708 /* Produce a 'defined' value of the given shadow type.  Should only be
    709    supplied shadow types (Bit/I8/I16/I32/UI64). */
    710 static IRExpr* definedOfType ( IRType ty ) {
    711    switch (ty) {
    712       case Ity_I1:   return IRExpr_Const(IRConst_U1(False));
    713       case Ity_I8:   return IRExpr_Const(IRConst_U8(0));
    714       case Ity_I16:  return IRExpr_Const(IRConst_U16(0));
    715       case Ity_I32:  return IRExpr_Const(IRConst_U32(0));
    716       case Ity_I64:  return IRExpr_Const(IRConst_U64(0));
    717       case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
    718       default:      VG_(tool_panic)("memcheck:definedOfType");
    719    }
    720 }
    721 
    722 
    723 /*------------------------------------------------------------*/
    724 /*--- Constructing IR fragments                            ---*/
    725 /*------------------------------------------------------------*/
    726 
    727 /* assign value to tmp */
    728 #define assign(_bb,_tmp,_expr)   \
    729    addStmtToIRSB((_bb), IRStmt_WrTmp((_tmp),(_expr)))
    730 
    731 /* add stmt to a bb */
    732 #define stmt(_bb,_stmt)    \
    733    addStmtToIRSB((_bb), (_stmt))
    734 
    735 /* build various kinds of expressions */
    736 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
    737 #define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
    738 #define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
    739 #define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
    740 #define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
    741 #define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
    742 #define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
    743 #define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
    744 
    745 /* bind the given expression to a new temporary, and return the
    746    temporary.  This effectively converts an arbitrary expression into
    747    an atom. */
    748 static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) {
    749    IRTemp t = newIRTemp(mce->bb->tyenv, ty);
    750    assign(mce->bb, t, e);
    751    return mkexpr(t);
    752 }
    753 
    754 
    755 /*------------------------------------------------------------*/
    756 /*--- Constructing definedness primitive ops               ---*/
    757 /*------------------------------------------------------------*/
    758 
    759 /* --------- Defined-if-either-defined --------- */
    760 
    761 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    762    tl_assert(isShadowAtom(mce,a1));
    763    tl_assert(isShadowAtom(mce,a2));
    764    return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2));
    765 }
    766 
    767 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    768    tl_assert(isShadowAtom(mce,a1));
    769    tl_assert(isShadowAtom(mce,a2));
    770    return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2));
    771 }
    772 
    773 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    774    tl_assert(isShadowAtom(mce,a1));
    775    tl_assert(isShadowAtom(mce,a2));
    776    return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2));
    777 }
    778 
    779 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    780    tl_assert(isShadowAtom(mce,a1));
    781    tl_assert(isShadowAtom(mce,a2));
    782    return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2));
    783 }
    784 
    785 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    786    tl_assert(isShadowAtom(mce,a1));
    787    tl_assert(isShadowAtom(mce,a2));
    788    return assignNew(mce, Ity_V128, binop(Iop_AndV128, a1, a2));
    789 }
    790 
    791 /* --------- Undefined-if-either-undefined --------- */
    792 
    793 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    794    tl_assert(isShadowAtom(mce,a1));
    795    tl_assert(isShadowAtom(mce,a2));
    796    return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2));
    797 }
    798 
    799 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    800    tl_assert(isShadowAtom(mce,a1));
    801    tl_assert(isShadowAtom(mce,a2));
    802    return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2));
    803 }
    804 
    805 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    806    tl_assert(isShadowAtom(mce,a1));
    807    tl_assert(isShadowAtom(mce,a2));
    808    return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2));
    809 }
    810 
    811 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    812    tl_assert(isShadowAtom(mce,a1));
    813    tl_assert(isShadowAtom(mce,a2));
    814    return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2));
    815 }
    816 
    817 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    818    tl_assert(isShadowAtom(mce,a1));
    819    tl_assert(isShadowAtom(mce,a2));
    820    return assignNew(mce, Ity_V128, binop(Iop_OrV128, a1, a2));
    821 }
    822 
    823 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
    824    switch (vty) {
    825       case Ity_I8:   return mkUifU8(mce, a1, a2);
    826       case Ity_I16:  return mkUifU16(mce, a1, a2);
    827       case Ity_I32:  return mkUifU32(mce, a1, a2);
    828       case Ity_I64:  return mkUifU64(mce, a1, a2);
    829       case Ity_V128: return mkUifUV128(mce, a1, a2);
    830       default:
    831          VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
    832          VG_(tool_panic)("memcheck:mkUifU");
    833    }
    834 }
    835 
    836 /* --------- The Left-family of operations. --------- */
    837 
    838 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
    839    tl_assert(isShadowAtom(mce,a1));
    840    /* It's safe to duplicate a1 since it's only an atom */
    841    return assignNew(mce, Ity_I8,
    842                     binop(Iop_Or8, a1,
    843                           assignNew(mce, Ity_I8,
    844                                     /* unop(Iop_Neg8, a1)))); */
    845                                     binop(Iop_Sub8, mkU8(0), a1) )));
    846 }
    847 
    848 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
    849    tl_assert(isShadowAtom(mce,a1));
    850    /* It's safe to duplicate a1 since it's only an atom */
    851    return assignNew(mce, Ity_I16,
    852                     binop(Iop_Or16, a1,
    853                           assignNew(mce, Ity_I16,
    854                                     /* unop(Iop_Neg16, a1)))); */
    855                                     binop(Iop_Sub16, mkU16(0), a1) )));
    856 }
    857 
    858 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
    859    tl_assert(isShadowAtom(mce,a1));
    860    /* It's safe to duplicate a1 since it's only an atom */
    861    return assignNew(mce, Ity_I32,
    862                     binop(Iop_Or32, a1,
    863                           assignNew(mce, Ity_I32,
    864                                     /* unop(Iop_Neg32, a1)))); */
    865                                     binop(Iop_Sub32, mkU32(0), a1) )));
    866 }
    867 
    868 /* --------- 'Improvement' functions for AND/OR. --------- */
    869 
    870 /* ImproveAND(data, vbits) = data OR vbits.  Defined (0) data 0s give
    871    defined (0); all other -> undefined (1).
    872 */
    873 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    874 {
    875    tl_assert(isOriginalAtom(mce, data));
    876    tl_assert(isShadowAtom(mce, vbits));
    877    tl_assert(sameKindedAtoms(data, vbits));
    878    return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits));
    879 }
    880 
    881 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    882 {
    883    tl_assert(isOriginalAtom(mce, data));
    884    tl_assert(isShadowAtom(mce, vbits));
    885    tl_assert(sameKindedAtoms(data, vbits));
    886    return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits));
    887 }
    888 
    889 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    890 {
    891    tl_assert(isOriginalAtom(mce, data));
    892    tl_assert(isShadowAtom(mce, vbits));
    893    tl_assert(sameKindedAtoms(data, vbits));
    894    return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits));
    895 }
    896 
    897 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    898 {
    899    tl_assert(isOriginalAtom(mce, data));
    900    tl_assert(isShadowAtom(mce, vbits));
    901    tl_assert(sameKindedAtoms(data, vbits));
    902    return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits));
    903 }
    904 
    905 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    906 {
    907    tl_assert(isOriginalAtom(mce, data));
    908    tl_assert(isShadowAtom(mce, vbits));
    909    tl_assert(sameKindedAtoms(data, vbits));
    910    return assignNew(mce, Ity_V128, binop(Iop_OrV128, data, vbits));
    911 }
    912 
    913 /* ImproveOR(data, vbits) = ~data OR vbits.  Defined (0) data 1s give
    914    defined (0); all other -> undefined (1).
    915 */
    916 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    917 {
    918    tl_assert(isOriginalAtom(mce, data));
    919    tl_assert(isShadowAtom(mce, vbits));
    920    tl_assert(sameKindedAtoms(data, vbits));
    921    return assignNew(
    922              mce, Ity_I8,
    923              binop(Iop_Or8,
    924                    assignNew(mce, Ity_I8, unop(Iop_Not8, data)),
    925                    vbits) );
    926 }
    927 
    928 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    929 {
    930    tl_assert(isOriginalAtom(mce, data));
    931    tl_assert(isShadowAtom(mce, vbits));
    932    tl_assert(sameKindedAtoms(data, vbits));
    933    return assignNew(
    934              mce, Ity_I16,
    935              binop(Iop_Or16,
    936                    assignNew(mce, Ity_I16, unop(Iop_Not16, data)),
    937                    vbits) );
    938 }
    939 
    940 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    941 {
    942    tl_assert(isOriginalAtom(mce, data));
    943    tl_assert(isShadowAtom(mce, vbits));
    944    tl_assert(sameKindedAtoms(data, vbits));
    945    return assignNew(
    946              mce, Ity_I32,
    947              binop(Iop_Or32,
    948                    assignNew(mce, Ity_I32, unop(Iop_Not32, data)),
    949                    vbits) );
    950 }
    951 
    952 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    953 {
    954    tl_assert(isOriginalAtom(mce, data));
    955    tl_assert(isShadowAtom(mce, vbits));
    956    tl_assert(sameKindedAtoms(data, vbits));
    957    return assignNew(
    958              mce, Ity_I64,
    959              binop(Iop_Or64,
    960                    assignNew(mce, Ity_I64, unop(Iop_Not64, data)),
    961                    vbits) );
    962 }
    963 
    964 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    965 {
    966    tl_assert(isOriginalAtom(mce, data));
    967    tl_assert(isShadowAtom(mce, vbits));
    968    tl_assert(sameKindedAtoms(data, vbits));
    969    return assignNew(
    970              mce, Ity_V128,
    971              binop(Iop_OrV128,
    972                    assignNew(mce, Ity_V128, unop(Iop_NotV128, data)),
    973                    vbits) );
    974 }
    975 
    976 /* --------- Pessimising casts. --------- */
    977 
    978 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
    979 {
    980    IRType  ty;
    981    IRAtom* tmp1;
    982    /* Note, dst_ty is a shadow type, not an original type. */
    983    /* First of all, collapse vbits down to a single bit. */
    984    tl_assert(isShadowAtom(mce,vbits));
    985    ty   = typeOfIRExpr(mce->bb->tyenv, vbits);
    986    tmp1 = NULL;
    987    switch (ty) {
    988       case Ity_I1:
    989          tmp1 = vbits;
    990          break;
    991       case Ity_I8:
    992          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE8, vbits, mkU8(0)));
    993          break;
    994       case Ity_I16:
    995          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE16, vbits, mkU16(0)));
    996          break;
    997       case Ity_I32:
    998          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE32, vbits, mkU32(0)));
    999          break;
   1000       case Ity_I64:
   1001          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE64, vbits, mkU64(0)));
   1002          break;
   1003       default:
   1004          VG_(tool_panic)("mkPCastTo(1)");
   1005    }
   1006    tl_assert(tmp1);
   1007    /* Now widen up to the dst type. */
   1008    switch (dst_ty) {
   1009       case Ity_I1:
   1010          return tmp1;
   1011       case Ity_I8:
   1012          return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1));
   1013       case Ity_I16:
   1014          return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1));
   1015       case Ity_I32:
   1016          return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1));
   1017       case Ity_I64:
   1018          return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
   1019       case Ity_V128:
   1020          tmp1 = assignNew(mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
   1021          tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
   1022          return tmp1;
   1023       default:
   1024          ppIRType(dst_ty);
   1025          VG_(tool_panic)("mkPCastTo(2)");
   1026    }
   1027 }
   1028 
   1029 
   1030 /*------------------------------------------------------------*/
   1031 /*--- Emit a test and complaint if something is undefined. ---*/
   1032 /*------------------------------------------------------------*/
   1033 
   1034 /* Set the annotations on a dirty helper to indicate that the stack
   1035    pointer and instruction pointers might be read.  This is the
   1036    behaviour of all 'emit-a-complaint' style functions we might
   1037    call. */
   1038 
   1039 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
   1040    di->nFxState = 2;
   1041    di->fxState[0].fx     = Ifx_Read;
   1042    di->fxState[0].offset = mce->layout->offset_SP;
   1043    di->fxState[0].size   = mce->layout->sizeof_SP;
   1044    di->fxState[1].fx     = Ifx_Read;
   1045    di->fxState[1].offset = mce->layout->offset_IP;
   1046    di->fxState[1].size   = mce->layout->sizeof_IP;
   1047 }
   1048 
   1049 
   1050 /* Check the supplied **original** atom for undefinedness, and emit a
   1051    complaint if so.  Once that happens, mark it as defined.  This is
   1052    possible because the atom is either a tmp or literal.  If it's a
   1053    tmp, it will be shadowed by a tmp, and so we can set the shadow to
   1054    be defined.  In fact as mentioned above, we will have to allocate a
   1055    new tmp to carry the new 'defined' shadow value, and update the
   1056    original->tmp mapping accordingly; we cannot simply assign a new
   1057    value to an existing shadow tmp as this breaks SSAness -- resulting
   1058    in the post-instrumentation sanity checker spluttering in disapproval.
   1059 */
   1060 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
   1061 {
   1062    IRAtom*  vatom;
   1063    IRType   ty;
   1064    Int      sz;
   1065    IRDirty* di;
   1066    IRAtom*  cond;
   1067 
   1068    /* Since the original expression is atomic, there's no duplicated
   1069       work generated by making multiple V-expressions for it.  So we
   1070       don't really care about the possibility that someone else may
   1071       also create a V-interpretion for it. */
   1072    tl_assert(isOriginalAtom(mce, atom));
   1073    vatom = expr2vbits( mce, atom );
   1074    tl_assert(isShadowAtom(mce, vatom));
   1075    tl_assert(sameKindedAtoms(atom, vatom));
   1076 
   1077    ty = typeOfIRExpr(mce->bb->tyenv, vatom);
   1078 
   1079    /* sz is only used for constructing the error message */
   1080    sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
   1081 
   1082    cond = mkPCastTo( mce, Ity_I1, vatom );
   1083    /* cond will be 0 if all defined, and 1 if any not defined. */
   1084 
   1085    switch (sz) {
   1086       case 0:
   1087          di = unsafeIRDirty_0_N( 0/*regparms*/,
   1088                                  "MC_(helperc_value_check0_fail)",
   1089                                  &MC_(helperc_value_check0_fail),
   1090                                  mkIRExprVec_0()
   1091                                );
   1092          break;
   1093       case 1:
   1094          di = unsafeIRDirty_0_N( 0/*regparms*/,
   1095                                  "MC_(helperc_value_check1_fail)",
   1096                                  &MC_(helperc_value_check1_fail),
   1097                                  mkIRExprVec_0()
   1098                                );
   1099          break;
   1100       case 4:
   1101          di = unsafeIRDirty_0_N( 0/*regparms*/,
   1102                                  "MC_(helperc_value_check4_fail)",
   1103                                  &MC_(helperc_value_check4_fail),
   1104                                  mkIRExprVec_0()
   1105                                );
   1106          break;
   1107       default:
   1108          di = unsafeIRDirty_0_N( 1/*regparms*/,
   1109                                  "MC_(helperc_complain_undef)",
   1110                                  &MC_(helperc_complain_undef),
   1111                                  mkIRExprVec_1( mkIRExpr_HWord( sz ))
   1112                                );
   1113          break;
   1114    }
   1115    di->guard = cond;
   1116    setHelperAnns( mce, di );
   1117    stmt( mce->bb, IRStmt_Dirty(di));
   1118 
   1119    /* Set the shadow tmp to be defined.  First, update the
   1120       orig->shadow tmp mapping to reflect the fact that this shadow is
   1121       getting a new value. */
   1122    tl_assert(isIRAtom(vatom));
   1123    /* sameKindedAtoms ... */
   1124    if (vatom->tag == Iex_RdTmp) {
   1125       tl_assert(atom->tag == Iex_RdTmp);
   1126       newShadowTmp(mce, atom->Iex.RdTmp.tmp);
   1127       assign(mce->bb, findShadowTmp(mce, atom->Iex.RdTmp.tmp),
   1128                       definedOfType(ty));
   1129    }
   1130 }
   1131 
   1132 
   1133 /*------------------------------------------------------------*/
   1134 /*--- Shadowing PUTs/GETs, and indexed variants thereof    ---*/
   1135 /*------------------------------------------------------------*/
   1136 
   1137 /* Examine the always-defined sections declared in layout to see if
   1138    the (offset,size) section is within one.  Note, is is an error to
   1139    partially fall into such a region: (offset,size) should either be
   1140    completely in such a region or completely not-in such a region.
   1141 */
   1142 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
   1143 {
   1144    Int minoffD, maxoffD, i;
   1145    Int minoff = offset;
   1146    Int maxoff = minoff + size - 1;
   1147    tl_assert((minoff & ~0xFFFF) == 0);
   1148    tl_assert((maxoff & ~0xFFFF) == 0);
   1149 
   1150    for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
   1151       minoffD = mce->layout->alwaysDefd[i].offset;
   1152       maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
   1153       tl_assert((minoffD & ~0xFFFF) == 0);
   1154       tl_assert((maxoffD & ~0xFFFF) == 0);
   1155 
   1156       if (maxoff < minoffD || maxoffD < minoff)
   1157          continue; /* no overlap */
   1158       if (minoff >= minoffD && maxoff <= maxoffD)
   1159          return True; /* completely contained in an always-defd section */
   1160 
   1161       VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
   1162    }
   1163    return False; /* could not find any containing section */
   1164 }
   1165 
   1166 
   1167 /* Generate into bb suitable actions to shadow this Put.  If the state
   1168    slice is marked 'always defined', do nothing.  Otherwise, write the
   1169    supplied V bits to the shadow state.  We can pass in either an
   1170    original atom or a V-atom, but not both.  In the former case the
   1171    relevant V-bits are then generated from the original.
   1172 */
   1173 static
   1174 void do_shadow_PUT ( MCEnv* mce,  Int offset,
   1175                      IRAtom* atom, IRAtom* vatom )
   1176 {
   1177    IRType ty;
   1178    if (atom) {
   1179       tl_assert(!vatom);
   1180       tl_assert(isOriginalAtom(mce, atom));
   1181       vatom = expr2vbits( mce, atom );
   1182    } else {
   1183       tl_assert(vatom);
   1184       tl_assert(isShadowAtom(mce, vatom));
   1185    }
   1186 
   1187    ty = typeOfIRExpr(mce->bb->tyenv, vatom);
   1188    tl_assert(ty != Ity_I1);
   1189    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
   1190       /* later: no ... */
   1191       /* emit code to emit a complaint if any of the vbits are 1. */
   1192       /* complainIfUndefined(mce, atom); */
   1193    } else {
   1194       /* Do a plain shadow Put. */
   1195       stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
   1196    }
   1197 }
   1198 
   1199 
   1200 /* Return an expression which contains the V bits corresponding to the
   1201    given GETI (passed in in pieces).
   1202 */
   1203 static
   1204 void do_shadow_PUTI ( MCEnv* mce,
   1205                       IRRegArray* descr, IRAtom* ix, Int bias, IRAtom* atom )
   1206 {
   1207    IRAtom* vatom;
   1208    IRType  ty, tyS;
   1209    Int     arrSize;;
   1210 
   1211    tl_assert(isOriginalAtom(mce,atom));
   1212    vatom = expr2vbits( mce, atom );
   1213    tl_assert(sameKindedAtoms(atom, vatom));
   1214    ty   = descr->elemTy;
   1215    tyS  = shadowType(ty);
   1216    arrSize = descr->nElems * sizeofIRType(ty);
   1217    tl_assert(ty != Ity_I1);
   1218    tl_assert(isOriginalAtom(mce,ix));
   1219    complainIfUndefined(mce,ix);
   1220    if (isAlwaysDefd(mce, descr->base, arrSize)) {
   1221       /* later: no ... */
   1222       /* emit code to emit a complaint if any of the vbits are 1. */
   1223       /* complainIfUndefined(mce, atom); */
   1224    } else {
   1225       /* Do a cloned version of the Put that refers to the shadow
   1226          area. */
   1227       IRRegArray* new_descr
   1228          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
   1229                       tyS, descr->nElems);
   1230       stmt( mce->bb, IRStmt_PutI( mkIRPutI( new_descr, ix, bias, vatom ) ));
   1231    }
   1232 }
   1233 
   1234 
   1235 /* Return an expression which contains the V bits corresponding to the
   1236    given GET (passed in in pieces).
   1237 */
   1238 static
   1239 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
   1240 {
   1241    IRType tyS = shadowType(ty);
   1242    tl_assert(ty != Ity_I1);
   1243    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
   1244       /* Always defined, return all zeroes of the relevant type */
   1245       return definedOfType(tyS);
   1246    } else {
   1247       /* return a cloned version of the Get that refers to the shadow
   1248          area. */
   1249       return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
   1250    }
   1251 }
   1252 
   1253 
   1254 /* Return an expression which contains the V bits corresponding to the
   1255    given GETI (passed in in pieces).
   1256 */
   1257 static
   1258 IRExpr* shadow_GETI ( MCEnv* mce, IRRegArray* descr, IRAtom* ix, Int bias )
   1259 {
   1260    IRType ty   = descr->elemTy;
   1261    IRType tyS  = shadowType(ty);
   1262    Int arrSize = descr->nElems * sizeofIRType(ty);
   1263    tl_assert(ty != Ity_I1);
   1264    tl_assert(isOriginalAtom(mce,ix));
   1265    complainIfUndefined(mce,ix);
   1266    if (isAlwaysDefd(mce, descr->base, arrSize)) {
   1267       /* Always defined, return all zeroes of the relevant type */
   1268       return definedOfType(tyS);
   1269    } else {
   1270       /* return a cloned version of the Get that refers to the shadow
   1271          area. */
   1272       IRRegArray* new_descr
   1273          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
   1274                       tyS, descr->nElems);
   1275       return IRExpr_GetI( new_descr, ix, bias );
   1276    }
   1277 }
   1278 
   1279 
   1280 /*------------------------------------------------------------*/
   1281 /*--- Generating approximations for unknown operations,    ---*/
   1282 /*--- using lazy-propagate semantics                       ---*/
   1283 /*------------------------------------------------------------*/
   1284 
   1285 /* Lazy propagation of undefinedness from two values, resulting in the
   1286    specified shadow type.
   1287 */
   1288 static
   1289 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
   1290 {
   1291    /* force everything via 32-bit intermediaries. */
   1292    IRAtom* at;
   1293    tl_assert(isShadowAtom(mce,va1));
   1294    tl_assert(isShadowAtom(mce,va2));
   1295    at = mkPCastTo(mce, Ity_I32, va1);
   1296    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
   1297    at = mkPCastTo(mce, finalVty, at);
   1298    return at;
   1299 }
   1300 
   1301 
   1302 /* Do the lazy propagation game from a null-terminated vector of
   1303    atoms.  This is presumably the arguments to a helper call, so the
   1304    IRCallee info is also supplied in order that we can know which
   1305    arguments should be ignored (via the .mcx_mask field).
   1306 */
   1307 static
   1308 IRAtom* mkLazyN ( MCEnv* mce,
   1309                   IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
   1310 {
   1311    Int i;
   1312    IRAtom* here;
   1313    IRAtom* curr = definedOfType(Ity_I32);
   1314    for (i = 0; exprvec[i]; i++) {
   1315       tl_assert(i < 32);
   1316       tl_assert(isOriginalAtom(mce, exprvec[i]));
   1317       /* Only take notice of this arg if the callee's mc-exclusion
   1318          mask does not say it is to be excluded. */
   1319       if (cee->mcx_mask & (1<<i)) {
   1320          /* the arg is to be excluded from definedness checking.  Do
   1321             nothing. */
   1322          if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
   1323       } else {
   1324          /* calculate the arg's definedness, and pessimistically merge
   1325             it in. */
   1326          here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) );
   1327          curr = mkUifU32(mce, here, curr);
   1328       }
   1329    }
   1330    return mkPCastTo(mce, finalVtype, curr );
   1331 }
   1332 
   1333 
   1334 /*------------------------------------------------------------*/
   1335 /*--- Generating expensive sequences for exact carry-chain ---*/
   1336 /*--- propagation in add/sub and related operations.       ---*/
   1337 /*------------------------------------------------------------*/
   1338 
   1339 static
   1340 __attribute__((unused))
   1341 IRAtom* expensiveAdd32 ( MCEnv* mce, IRAtom* qaa, IRAtom* qbb,
   1342                                      IRAtom* aa,  IRAtom* bb )
   1343 {
   1344    IRAtom *a_min, *b_min, *a_max, *b_max;
   1345    IRType ty;
   1346    IROp   opAND, opOR, opXOR, opNOT, opADD;
   1347 
   1348    tl_assert(isShadowAtom(mce,qaa));
   1349    tl_assert(isShadowAtom(mce,qbb));
   1350    tl_assert(isOriginalAtom(mce,aa));
   1351    tl_assert(isOriginalAtom(mce,bb));
   1352    tl_assert(sameKindedAtoms(qaa,aa));
   1353    tl_assert(sameKindedAtoms(qbb,bb));
   1354 
   1355    ty    = Ity_I32;
   1356    opAND = Iop_And32;
   1357    opOR  = Iop_Or32;
   1358    opXOR = Iop_Xor32;
   1359    opNOT = Iop_Not32;
   1360    opADD = Iop_Add32;
   1361 
   1362    // a_min = aa & ~qaa
   1363    a_min = assignNew(mce,ty,
   1364                      binop(opAND, aa,
   1365                                   assignNew(mce,ty, unop(opNOT, qaa))));
   1366 
   1367    // b_min = bb & ~qbb
   1368    b_min = assignNew(mce,ty,
   1369                      binop(opAND, bb,
   1370                                   assignNew(mce,ty, unop(opNOT, qbb))));
   1371 
   1372    // a_max = aa | qaa
   1373    a_max = assignNew(mce,ty, binop(opOR, aa, qaa));
   1374 
   1375    // b_max = bb | qbb
   1376    b_max = assignNew(mce,ty, binop(opOR, bb, qbb));
   1377 
   1378    // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
   1379    return
   1380    assignNew(mce,ty,
   1381       binop( opOR,
   1382              assignNew(mce,ty, binop(opOR, qaa, qbb)),
   1383              assignNew(mce,ty,
   1384                 binop(opXOR, assignNew(mce,ty, binop(opADD, a_min, b_min)),
   1385                              assignNew(mce,ty, binop(opADD, a_max, b_max))
   1386                 )
   1387              )
   1388       )
   1389    );
   1390 }
   1391 
   1392 
   1393 /*------------------------------------------------------------*/
   1394 /*--- Helpers for dealing with vector primops.            ---*/
   1395 /*------------------------------------------------------------*/
   1396 
   1397 /* Vector pessimisation -- pessimise within each lane individually. */
   1398 
   1399 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
   1400 {
   1401    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
   1402 }
   1403 
   1404 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
   1405 {
   1406    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
   1407 }
   1408 
   1409 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
   1410 {
   1411    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
   1412 }
   1413 
   1414 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
   1415 {
   1416    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
   1417 }
   1418 
   1419 
   1420 /* Here's a simple scheme capable of handling ops derived from SSE1
   1421    code and while only generating ops that can be efficiently
   1422    implemented in SSE1. */
   1423 
   1424 /* All-lanes versions are straightforward:
   1425 
   1426    binary32Fx4(x,y)   ==> PCast32x4(UifUV128(x#,y#))
   1427 
   1428    unary32Fx4(x,y)    ==> PCast32x4(x#)
   1429 
   1430    Lowest-lane-only versions are more complex:
   1431 
   1432    binary32F0x4(x,y)  ==> SetV128lo32(
   1433                              x#,
   1434                              PCast32(V128to32(UifUV128(x#,y#)))
   1435                           )
   1436 
   1437    This is perhaps not so obvious.  In particular, it's faster to
   1438    do a V128-bit UifU and then take the bottom 32 bits than the more
   1439    obvious scheme of taking the bottom 32 bits of each operand
   1440    and doing a 32-bit UifU.  Basically since UifU is fast and
   1441    chopping lanes off vector values is slow.
   1442 
   1443    Finally:
   1444 
   1445    unary32F0x4(x)     ==> SetV128lo32(
   1446                              x#,
   1447                              PCast32(V128to32(x#))
   1448                           )
   1449 
   1450    Where:
   1451 
   1452    PCast32(v#)   = 1Sto32(CmpNE32(v#,0))
   1453    PCast32x4(v#) = CmpNEZ32x4(v#)
   1454 */
   1455 
   1456 static
   1457 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   1458 {
   1459    IRAtom* at;
   1460    tl_assert(isShadowAtom(mce, vatomX));
   1461    tl_assert(isShadowAtom(mce, vatomY));
   1462    at = mkUifUV128(mce, vatomX, vatomY);
   1463    at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at));
   1464    return at;
   1465 }
   1466 
   1467 static
   1468 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
   1469 {
   1470    IRAtom* at;
   1471    tl_assert(isShadowAtom(mce, vatomX));
   1472    at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX));
   1473    return at;
   1474 }
   1475 
   1476 static
   1477 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   1478 {
   1479    IRAtom* at;
   1480    tl_assert(isShadowAtom(mce, vatomX));
   1481    tl_assert(isShadowAtom(mce, vatomY));
   1482    at = mkUifUV128(mce, vatomX, vatomY);
   1483    at = assignNew(mce, Ity_I32, unop(Iop_V128to32, at));
   1484    at = mkPCastTo(mce, Ity_I32, at);
   1485    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
   1486    return at;
   1487 }
   1488 
   1489 static
   1490 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
   1491 {
   1492    IRAtom* at;
   1493    tl_assert(isShadowAtom(mce, vatomX));
   1494    at = assignNew(mce, Ity_I32, unop(Iop_V128to32, vatomX));
   1495    at = mkPCastTo(mce, Ity_I32, at);
   1496    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
   1497    return at;
   1498 }
   1499 
   1500 /* --- ... and ... 64Fx2 versions of the same ... --- */
   1501 
   1502 static
   1503 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   1504 {
   1505    IRAtom* at;
   1506    tl_assert(isShadowAtom(mce, vatomX));
   1507    tl_assert(isShadowAtom(mce, vatomY));
   1508    at = mkUifUV128(mce, vatomX, vatomY);
   1509    at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at));
   1510    return at;
   1511 }
   1512 
   1513 static
   1514 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
   1515 {
   1516    IRAtom* at;
   1517    tl_assert(isShadowAtom(mce, vatomX));
   1518    at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX));
   1519    return at;
   1520 }
   1521 
   1522 static
   1523 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   1524 {
   1525    IRAtom* at;
   1526    tl_assert(isShadowAtom(mce, vatomX));
   1527    tl_assert(isShadowAtom(mce, vatomY));
   1528    at = mkUifUV128(mce, vatomX, vatomY);
   1529    at = assignNew(mce, Ity_I64, unop(Iop_V128to64, at));
   1530    at = mkPCastTo(mce, Ity_I64, at);
   1531    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
   1532    return at;
   1533 }
   1534 
   1535 static
   1536 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
   1537 {
   1538    IRAtom* at;
   1539    tl_assert(isShadowAtom(mce, vatomX));
   1540    at = assignNew(mce, Ity_I64, unop(Iop_V128to64, vatomX));
   1541    at = mkPCastTo(mce, Ity_I64, at);
   1542    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
   1543    return at;
   1544 }
   1545 
   1546 /* --- --- Vector saturated narrowing --- --- */
   1547 
   1548 /* This is quite subtle.  What to do is simple:
   1549 
   1550    Let the original narrowing op be QNarrowW{S,U}xN.  Produce:
   1551 
   1552       the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
   1553 
   1554    Why this is right is not so simple.  Consider a lane in the args,
   1555    vatom1 or 2, doesn't matter.
   1556 
   1557    After the PCast, that lane is all 0s (defined) or all
   1558    1s(undefined).
   1559 
   1560    Both signed and unsigned saturating narrowing of all 0s produces
   1561    all 0s, which is what we want.
   1562 
   1563    The all-1s case is more complex.  Unsigned narrowing interprets an
   1564    all-1s input as the largest unsigned integer, and so produces all
   1565    1s as a result since that is the largest unsigned value at the
   1566    smaller width.
   1567 
   1568    Signed narrowing interprets all 1s as -1.  Fortunately, -1 narrows
   1569    to -1, so we still wind up with all 1s at the smaller width.
   1570 
   1571    So: In short, pessimise the args, then apply the original narrowing
   1572    op.
   1573 */
   1574 static
   1575 IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
   1576                           IRAtom* vatom1, IRAtom* vatom2)
   1577 {
   1578    IRAtom *at1, *at2, *at3;
   1579    IRAtom* (*pcast)( MCEnv*, IRAtom* );
   1580    switch (narrow_op) {
   1581       case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
   1582       case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
   1583       case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
   1584       default: VG_(tool_panic)("vectorNarrowV128");
   1585    }
   1586    tl_assert(isShadowAtom(mce,vatom1));
   1587    tl_assert(isShadowAtom(mce,vatom2));
   1588    at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1));
   1589    at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2));
   1590    at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2));
   1591    return at3;
   1592 }
   1593 
   1594 
   1595 /* --- --- Vector integer arithmetic --- --- */
   1596 
   1597 /* Simple ... UifU the args and per-lane pessimise the results. */
   1598 static
   1599 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   1600 {
   1601    IRAtom* at;
   1602    at = mkUifUV128(mce, vatom1, vatom2);
   1603    at = mkPCast8x16(mce, at);
   1604    return at;
   1605 }
   1606 
   1607 static
   1608 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   1609 {
   1610    IRAtom* at;
   1611    at = mkUifUV128(mce, vatom1, vatom2);
   1612    at = mkPCast16x8(mce, at);
   1613    return at;
   1614 }
   1615 
   1616 static
   1617 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   1618 {
   1619    IRAtom* at;
   1620    at = mkUifUV128(mce, vatom1, vatom2);
   1621    at = mkPCast32x4(mce, at);
   1622    return at;
   1623 }
   1624 
   1625 static
   1626 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   1627 {
   1628    IRAtom* at;
   1629    at = mkUifUV128(mce, vatom1, vatom2);
   1630    at = mkPCast64x2(mce, at);
   1631    return at;
   1632 }
   1633 
   1634 
   1635 /*------------------------------------------------------------*/
   1636 /*--- Generate shadow values from all kinds of IRExprs.    ---*/
   1637 /*------------------------------------------------------------*/
   1638 
   1639 static
   1640 IRAtom* expr2vbits_Binop ( MCEnv* mce,
   1641                            IROp op,
   1642                            IRAtom* atom1, IRAtom* atom2 )
   1643 {
   1644    IRType  and_or_ty;
   1645    IRAtom* (*uifu)    (MCEnv*, IRAtom*, IRAtom*);
   1646    IRAtom* (*difd)    (MCEnv*, IRAtom*, IRAtom*);
   1647    IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
   1648 
   1649    IRAtom* vatom1 = expr2vbits( mce, atom1 );
   1650    IRAtom* vatom2 = expr2vbits( mce, atom2 );
   1651 
   1652    tl_assert(isOriginalAtom(mce,atom1));
   1653    tl_assert(isOriginalAtom(mce,atom2));
   1654    tl_assert(isShadowAtom(mce,vatom1));
   1655    tl_assert(isShadowAtom(mce,vatom2));
   1656    tl_assert(sameKindedAtoms(atom1,vatom1));
   1657    tl_assert(sameKindedAtoms(atom2,vatom2));
   1658    switch (op) {
   1659 
   1660       /* V128-bit SIMD (SSE2-esque) */
   1661 
   1662       case Iop_ShrN16x8:
   1663       case Iop_ShrN32x4:
   1664       case Iop_ShrN64x2:
   1665       case Iop_SarN16x8:
   1666       case Iop_SarN32x4:
   1667       case Iop_ShlN16x8:
   1668       case Iop_ShlN32x4:
   1669       case Iop_ShlN64x2:
   1670          /* Same scheme as with all other shifts. */
   1671          complainIfUndefined(mce, atom2);
   1672          return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
   1673 
   1674       case Iop_QSub8Ux16:
   1675       case Iop_QSub8Sx16:
   1676       case Iop_Sub8x16:
   1677       case Iop_Min8Ux16:
   1678       case Iop_Max8Ux16:
   1679       case Iop_CmpGT8Sx16:
   1680       case Iop_CmpEQ8x16:
   1681       case Iop_Avg8Ux16:
   1682       case Iop_QAdd8Ux16:
   1683       case Iop_QAdd8Sx16:
   1684       case Iop_Add8x16:
   1685          return binary8Ix16(mce, vatom1, vatom2);
   1686 
   1687       case Iop_QSub16Ux8:
   1688       case Iop_QSub16Sx8:
   1689       case Iop_Sub16x8:
   1690       case Iop_Mul16x8:
   1691       case Iop_MulHi16Sx8:
   1692       case Iop_MulHi16Ux8:
   1693       case Iop_Min16Sx8:
   1694       case Iop_Max16Sx8:
   1695       case Iop_CmpGT16Sx8:
   1696       case Iop_CmpEQ16x8:
   1697       case Iop_Avg16Ux8:
   1698       case Iop_QAdd16Ux8:
   1699       case Iop_QAdd16Sx8:
   1700       case Iop_Add16x8:
   1701          return binary16Ix8(mce, vatom1, vatom2);
   1702 
   1703       case Iop_Sub32x4:
   1704       case Iop_QSub32Sx4:
   1705       case Iop_QSub32Ux4:
   1706       case Iop_CmpGT32Sx4:
   1707       case Iop_CmpEQ32x4:
   1708       case Iop_Add32x4:
   1709       case Iop_QAdd32Ux4:
   1710       case Iop_QAdd32Sx4:
   1711          return binary32Ix4(mce, vatom1, vatom2);
   1712 
   1713       case Iop_Sub64x2:
   1714       case Iop_QSub64Ux2:
   1715       case Iop_QSub64Sx2:
   1716       case Iop_Add64x2:
   1717       case Iop_QAdd64Ux2:
   1718       case Iop_QAdd64Sx2:
   1719          return binary64Ix2(mce, vatom1, vatom2);
   1720 
   1721       case Iop_QNarrowBin32Sto16Sx8:
   1722       case Iop_QNarrowBin16Sto8Sx16:
   1723       case Iop_QNarrowBin16Sto8Ux16:
   1724          return vectorNarrowV128(mce, op, vatom1, vatom2);
   1725 
   1726       case Iop_Sub64Fx2:
   1727       case Iop_Mul64Fx2:
   1728       case Iop_Min64Fx2:
   1729       case Iop_Max64Fx2:
   1730       case Iop_Div64Fx2:
   1731       case Iop_CmpLT64Fx2:
   1732       case Iop_CmpLE64Fx2:
   1733       case Iop_CmpEQ64Fx2:
   1734       case Iop_Add64Fx2:
   1735          return binary64Fx2(mce, vatom1, vatom2);
   1736 
   1737       case Iop_Sub64F0x2:
   1738       case Iop_Mul64F0x2:
   1739       case Iop_Min64F0x2:
   1740       case Iop_Max64F0x2:
   1741       case Iop_Div64F0x2:
   1742       case Iop_CmpLT64F0x2:
   1743       case Iop_CmpLE64F0x2:
   1744       case Iop_CmpEQ64F0x2:
   1745       case Iop_Add64F0x2:
   1746          return binary64F0x2(mce, vatom1, vatom2);
   1747 
   1748       /* V128-bit SIMD (SSE1-esque) */
   1749 
   1750       case Iop_Sub32Fx4:
   1751       case Iop_Mul32Fx4:
   1752       case Iop_Min32Fx4:
   1753       case Iop_Max32Fx4:
   1754       case Iop_Div32Fx4:
   1755       case Iop_CmpLT32Fx4:
   1756       case Iop_CmpLE32Fx4:
   1757       case Iop_CmpEQ32Fx4:
   1758       case Iop_Add32Fx4:
   1759          return binary32Fx4(mce, vatom1, vatom2);
   1760 
   1761       case Iop_Sub32F0x4:
   1762       case Iop_Mul32F0x4:
   1763       case Iop_Min32F0x4:
   1764       case Iop_Max32F0x4:
   1765       case Iop_Div32F0x4:
   1766       case Iop_CmpLT32F0x4:
   1767       case Iop_CmpLE32F0x4:
   1768       case Iop_CmpEQ32F0x4:
   1769       case Iop_Add32F0x4:
   1770          return binary32F0x4(mce, vatom1, vatom2);
   1771 
   1772       /* V128-bit data-steering */
   1773       case Iop_SetV128lo32:
   1774       case Iop_SetV128lo64:
   1775       case Iop_64HLtoV128:
   1776       case Iop_InterleaveLO64x2:
   1777       case Iop_InterleaveLO32x4:
   1778       case Iop_InterleaveLO16x8:
   1779       case Iop_InterleaveLO8x16:
   1780       case Iop_InterleaveHI64x2:
   1781       case Iop_InterleaveHI32x4:
   1782       case Iop_InterleaveHI16x8:
   1783       case Iop_InterleaveHI8x16:
   1784          return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2));
   1785 
   1786       /* Scalar floating point */
   1787 
   1788          //      case Iop_RoundF64:
   1789       case Iop_F64toI64S:
   1790       case Iop_I64StoF64:
   1791          /* First arg is I32 (rounding mode), second is F64 or I64
   1792             (data). */
   1793          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   1794 
   1795       case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
   1796          /* Takes two F64 args. */
   1797       case Iop_F64toI32S:
   1798       case Iop_F64toF32:
   1799          /* First arg is I32 (rounding mode), second is F64 (data). */
   1800          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   1801 
   1802       case Iop_F64toI16S:
   1803          /* First arg is I32 (rounding mode), second is F64 (data). */
   1804          return mkLazy2(mce, Ity_I16, vatom1, vatom2);
   1805 
   1806       case Iop_ScaleF64:
   1807       case Iop_Yl2xF64:
   1808       case Iop_Yl2xp1F64:
   1809       case Iop_PRemF64:
   1810       case Iop_AtanF64:
   1811       case Iop_AddF64:
   1812       case Iop_DivF64:
   1813       case Iop_SubF64:
   1814       case Iop_MulF64:
   1815          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   1816 
   1817       case Iop_CmpF64:
   1818          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   1819 
   1820       /* non-FP after here */
   1821 
   1822       case Iop_DivModU64to32:
   1823       case Iop_DivModS64to32:
   1824          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   1825 
   1826       case Iop_16HLto32:
   1827          return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2));
   1828       case Iop_32HLto64:
   1829          return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
   1830 
   1831       case Iop_MullS32:
   1832       case Iop_MullU32: {
   1833          IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
   1834          IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
   1835          return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
   1836       }
   1837 
   1838       case Iop_MullS16:
   1839       case Iop_MullU16: {
   1840          IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
   1841          IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
   1842          return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
   1843       }
   1844 
   1845       case Iop_MullS8:
   1846       case Iop_MullU8: {
   1847          IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
   1848          IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
   1849          return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
   1850       }
   1851 
   1852       case Iop_Add32:
   1853 #        if 0
   1854          return expensiveAdd32(mce, vatom1,vatom2, atom1,atom2);
   1855 #        endif
   1856       case Iop_Sub32:
   1857       case Iop_Mul32:
   1858          return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
   1859 
   1860       case Iop_Mul16:
   1861       case Iop_Add16:
   1862       case Iop_Sub16:
   1863          return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
   1864 
   1865       case Iop_Sub8:
   1866       case Iop_Add8:
   1867          return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
   1868 
   1869       case Iop_CmpLE32S: case Iop_CmpLE32U:
   1870       case Iop_CmpLT32U: case Iop_CmpLT32S:
   1871       case Iop_CmpEQ32: case Iop_CmpNE32:
   1872          return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
   1873 
   1874       case Iop_CmpEQ16: case Iop_CmpNE16:
   1875          return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
   1876 
   1877       case Iop_CmpEQ8: case Iop_CmpNE8:
   1878          return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
   1879 
   1880       case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
   1881          /* Complain if the shift amount is undefined.  Then simply
   1882             shift the first arg's V bits by the real shift amount. */
   1883          complainIfUndefined(mce, atom2);
   1884          return assignNew(mce, Ity_I32, binop(op, vatom1, atom2));
   1885 
   1886       case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
   1887          /* Same scheme as with 32-bit shifts. */
   1888          complainIfUndefined(mce, atom2);
   1889          return assignNew(mce, Ity_I16, binop(op, vatom1, atom2));
   1890 
   1891       case Iop_Shl8: case Iop_Shr8:
   1892          /* Same scheme as with 32-bit shifts. */
   1893          complainIfUndefined(mce, atom2);
   1894          return assignNew(mce, Ity_I8, binop(op, vatom1, atom2));
   1895 
   1896       case Iop_Shl64: case Iop_Shr64:
   1897          /* Same scheme as with 32-bit shifts. */
   1898          complainIfUndefined(mce, atom2);
   1899          return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
   1900 
   1901       case Iop_AndV128:
   1902          uifu = mkUifUV128; difd = mkDifDV128;
   1903          and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
   1904       case Iop_And64:
   1905          uifu = mkUifU64; difd = mkDifD64;
   1906          and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
   1907       case Iop_And32:
   1908          uifu = mkUifU32; difd = mkDifD32;
   1909          and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
   1910       case Iop_And16:
   1911          uifu = mkUifU16; difd = mkDifD16;
   1912          and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
   1913       case Iop_And8:
   1914          uifu = mkUifU8; difd = mkDifD8;
   1915          and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
   1916 
   1917       case Iop_OrV128:
   1918          uifu = mkUifUV128; difd = mkDifDV128;
   1919          and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
   1920       case Iop_Or64:
   1921          uifu = mkUifU64; difd = mkDifD64;
   1922          and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
   1923       case Iop_Or32:
   1924          uifu = mkUifU32; difd = mkDifD32;
   1925          and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
   1926       case Iop_Or16:
   1927          uifu = mkUifU16; difd = mkDifD16;
   1928          and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
   1929       case Iop_Or8:
   1930          uifu = mkUifU8; difd = mkDifD8;
   1931          and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
   1932 
   1933       do_And_Or:
   1934          return
   1935          assignNew(
   1936             mce,
   1937             and_or_ty,
   1938             difd(mce, uifu(mce, vatom1, vatom2),
   1939                       difd(mce, improve(mce, atom1, vatom1),
   1940                                 improve(mce, atom2, vatom2) ) ) );
   1941 
   1942       case Iop_Xor8:
   1943          return mkUifU8(mce, vatom1, vatom2);
   1944       case Iop_Xor16:
   1945          return mkUifU16(mce, vatom1, vatom2);
   1946       case Iop_Xor32:
   1947          return mkUifU32(mce, vatom1, vatom2);
   1948       case Iop_Xor64:
   1949          return mkUifU64(mce, vatom1, vatom2);
   1950       case Iop_XorV128:
   1951          return mkUifUV128(mce, vatom1, vatom2);
   1952 
   1953       default:
   1954          ppIROp(op);
   1955          VG_(tool_panic)("memcheck:expr2vbits_Binop");
   1956    }
   1957 }
   1958 
   1959 
   1960 static
   1961 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
   1962 {
   1963    IRAtom* vatom = expr2vbits( mce, atom );
   1964    tl_assert(isOriginalAtom(mce,atom));
   1965    switch (op) {
   1966 
   1967       case Iop_Sqrt64Fx2:
   1968          return unary64Fx2(mce, vatom);
   1969 
   1970       case Iop_Sqrt64F0x2:
   1971          return unary64F0x2(mce, vatom);
   1972 
   1973       case Iop_Sqrt32Fx4:
   1974       case Iop_RecipEst32Fx4:
   1975          return unary32Fx4(mce, vatom);
   1976 
   1977       case Iop_Sqrt32F0x4:
   1978       case Iop_RSqrtEst32F0x4:
   1979       case Iop_RecipEst32F0x4:
   1980          return unary32F0x4(mce, vatom);
   1981 
   1982       case Iop_32UtoV128:
   1983       case Iop_64UtoV128:
   1984          return assignNew(mce, Ity_V128, unop(op, vatom));
   1985 
   1986       case Iop_F32toF64:
   1987       case Iop_I32StoF64:
   1988       case Iop_NegF64:
   1989       case Iop_SinF64:
   1990       case Iop_CosF64:
   1991       case Iop_TanF64:
   1992       case Iop_SqrtF64:
   1993       case Iop_AbsF64:
   1994       case Iop_2xm1F64:
   1995          return mkPCastTo(mce, Ity_I64, vatom);
   1996 
   1997       case Iop_Clz32:
   1998       case Iop_Ctz32:
   1999          return mkPCastTo(mce, Ity_I32, vatom);
   2000 
   2001       case Iop_32Sto64:
   2002       case Iop_32Uto64:
   2003       case Iop_V128to64:
   2004       case Iop_V128HIto64:
   2005          return assignNew(mce, Ity_I64, unop(op, vatom));
   2006 
   2007       case Iop_64to32:
   2008       case Iop_64HIto32:
   2009       case Iop_1Uto32:
   2010       case Iop_8Uto32:
   2011       case Iop_16Uto32:
   2012       case Iop_16Sto32:
   2013       case Iop_8Sto32:
   2014          return assignNew(mce, Ity_I32, unop(op, vatom));
   2015 
   2016       case Iop_8Sto16:
   2017       case Iop_8Uto16:
   2018       case Iop_32to16:
   2019       case Iop_32HIto16:
   2020          return assignNew(mce, Ity_I16, unop(op, vatom));
   2021 
   2022       case Iop_1Uto8:
   2023       case Iop_16to8:
   2024       case Iop_32to8:
   2025          return assignNew(mce, Ity_I8, unop(op, vatom));
   2026 
   2027       case Iop_32to1:
   2028          return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom));
   2029 
   2030       case Iop_ReinterpF64asI64:
   2031       case Iop_ReinterpI64asF64:
   2032       case Iop_ReinterpI32asF32:
   2033       case Iop_NotV128:
   2034       case Iop_Not64:
   2035       case Iop_Not32:
   2036       case Iop_Not16:
   2037       case Iop_Not8:
   2038       case Iop_Not1:
   2039          return vatom;
   2040 
   2041       default:
   2042          ppIROp(op);
   2043          VG_(tool_panic)("memcheck:expr2vbits_Unop");
   2044    }
   2045 }
   2046 
   2047 
   2048 /* Worker function; do not call directly. */
   2049 static
   2050 IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
   2051 {
   2052    void*    helper;
   2053    HChar*   hname;
   2054    IRDirty* di;
   2055    IRTemp   datavbits;
   2056    IRAtom*  addrAct;
   2057 
   2058    tl_assert(isOriginalAtom(mce,addr));
   2059 
   2060    /* First, emit a definedness test for the address.  This also sets
   2061       the address (shadow) to 'defined' following the test. */
   2062    complainIfUndefined( mce, addr );
   2063 
   2064    /* Now cook up a call to the relevant helper function, to read the
   2065       data V bits from shadow memory. */
   2066    ty = shadowType(ty);
   2067    switch (ty) {
   2068       case Ity_I64: helper = &MC_(helperc_LOADV8);
   2069                     hname = "MC_(helperc_LOADV8)";
   2070                     break;
   2071       case Ity_I32: helper = &MC_(helperc_LOADV4);
   2072                     hname = "MC_(helperc_LOADV4)";
   2073                     break;
   2074       case Ity_I16: helper = &MC_(helperc_LOADV2);
   2075                     hname = "MC_(helperc_LOADV2)";
   2076                     break;
   2077       case Ity_I8:  helper = &MC_(helperc_LOADV1);
   2078                     hname = "MC_(helperc_LOADV1)";
   2079                     break;
   2080       default:      ppIRType(ty);
   2081                     VG_(tool_panic)("memcheck:do_shadow_LDle");
   2082    }
   2083 
   2084    /* Generate the actual address into addrAct. */
   2085    if (bias == 0) {
   2086       addrAct = addr;
   2087    } else {
   2088       IROp    mkAdd;
   2089       IRAtom* eBias;
   2090       IRType  tyAddr  = mce->hWordTy;
   2091       tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
   2092       mkAdd   = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
   2093       eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
   2094       addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
   2095    }
   2096 
   2097    /* We need to have a place to park the V bits we're just about to
   2098       read. */
   2099    datavbits = newIRTemp(mce->bb->tyenv, ty);
   2100    di = unsafeIRDirty_1_N( datavbits,
   2101                            1/*regparms*/, hname, helper,
   2102                            mkIRExprVec_1( addrAct ));
   2103    setHelperAnns( mce, di );
   2104    stmt( mce->bb, IRStmt_Dirty(di) );
   2105 
   2106    return mkexpr(datavbits);
   2107 }
   2108 
   2109 
   2110 static
   2111 IRAtom* expr2vbits_LDle ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
   2112 {
   2113    IRAtom *v64hi, *v64lo;
   2114    switch (shadowType(ty)) {
   2115       case Ity_I8:
   2116       case Ity_I16:
   2117       case Ity_I32:
   2118       case Ity_I64:
   2119          return expr2vbits_LDle_WRK(mce, ty, addr, bias);
   2120       case Ity_V128:
   2121          v64lo = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias);
   2122          v64hi = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias+8);
   2123          return assignNew( mce,
   2124                            Ity_V128,
   2125                            binop(Iop_64HLtoV128, v64hi, v64lo));
   2126       default:
   2127          VG_(tool_panic)("expr2vbits_LDle");
   2128    }
   2129 }
   2130 
   2131 
   2132 static
   2133 IRAtom* expr2vbits_ITE ( MCEnv* mce,
   2134                          IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse )
   2135 {
   2136    IRAtom *vbitsC, *vbits0, *vbits1;
   2137    IRType ty;
   2138    /* Given ITE(cond,iftrue,iffalse), generate
   2139          ITE(cond,iftrue#,iffalse#) `UifU` PCast(cond#)
   2140       That is, steer the V bits like the originals, but trash the
   2141       result if the steering value is undefined.  This gives
   2142       lazy propagation. */
   2143    tl_assert(isOriginalAtom(mce, cond));
   2144    tl_assert(isOriginalAtom(mce, iftrue));
   2145    tl_assert(isOriginalAtom(mce, iffalse));
   2146 
   2147    vbitsC = expr2vbits(mce, cond);
   2148    vbits0 = expr2vbits(mce, iffalse);
   2149    vbits1 = expr2vbits(mce, iftrue);
   2150    ty = typeOfIRExpr(mce->bb->tyenv, vbits0);
   2151 
   2152    return
   2153       mkUifU(mce, ty, assignNew(mce, ty, IRExpr_ITE(cond, vbits1, vbits0)),
   2154                       mkPCastTo(mce, ty, vbitsC) );
   2155 }
   2156 
   2157 /* --------- This is the main expression-handling function. --------- */
   2158 
   2159 static
   2160 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
   2161 {
   2162    switch (e->tag) {
   2163 
   2164       case Iex_Get:
   2165          return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
   2166 
   2167       case Iex_GetI:
   2168          return shadow_GETI( mce, e->Iex.GetI.descr,
   2169                                   e->Iex.GetI.ix, e->Iex.GetI.bias );
   2170 
   2171       case Iex_RdTmp:
   2172          return IRExpr_RdTmp( findShadowTmp(mce, e->Iex.RdTmp.tmp) );
   2173 
   2174       case Iex_Const:
   2175          return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
   2176 
   2177       case Iex_Binop:
   2178          return expr2vbits_Binop(
   2179                    mce,
   2180                    e->Iex.Binop.op,
   2181                    e->Iex.Binop.arg1, e->Iex.Binop.arg2
   2182                 );
   2183 
   2184       case Iex_Unop:
   2185          return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
   2186 
   2187       case Iex_Load:
   2188          return expr2vbits_LDle( mce, e->Iex.Load.ty,
   2189                                       e->Iex.Load.addr, 0/*addr bias*/ );
   2190 
   2191       case Iex_CCall:
   2192          return mkLazyN( mce, e->Iex.CCall.args,
   2193                               e->Iex.CCall.retty,
   2194                               e->Iex.CCall.cee );
   2195 
   2196       case Iex_ITE:
   2197          return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue,
   2198                                 e->Iex.ITE.iffalse);
   2199 
   2200       default:
   2201          VG_(printf)("\n");
   2202          ppIRExpr(e);
   2203          VG_(printf)("\n");
   2204          VG_(tool_panic)("memcheck: expr2vbits");
   2205    }
   2206 }
   2207 
   2208 /*------------------------------------------------------------*/
   2209 /*--- Generate shadow stmts from all kinds of IRStmts.     ---*/
   2210 /*------------------------------------------------------------*/
   2211 
   2212 /* Widen a value to the host word size. */
   2213 
   2214 static
   2215 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
   2216 {
   2217    IRType ty, tyH;
   2218 
   2219    /* vatom is vbits-value and as such can only have a shadow type. */
   2220    tl_assert(isShadowAtom(mce,vatom));
   2221 
   2222    ty  = typeOfIRExpr(mce->bb->tyenv, vatom);
   2223    tyH = mce->hWordTy;
   2224 
   2225    if (tyH == Ity_I32) {
   2226       switch (ty) {
   2227          case Ity_I32: return vatom;
   2228          case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom));
   2229          case Ity_I8:  return assignNew(mce, tyH, unop(Iop_8Uto32, vatom));
   2230          default:      goto unhandled;
   2231       }
   2232    } else {
   2233       goto unhandled;
   2234    }
   2235   unhandled:
   2236    VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
   2237    VG_(tool_panic)("zwidenToHostWord");
   2238 }
   2239 
   2240 
   2241 /* Generate a shadow store.  addr is always the original address atom.
   2242    You can pass in either originals or V-bits for the data atom, but
   2243    obviously not both.  */
   2244 
   2245 static
   2246 void do_shadow_STle ( MCEnv* mce,
   2247                       IRAtom* addr, UInt bias,
   2248                       IRAtom* data, IRAtom* vdata )
   2249 {
   2250    IROp     mkAdd;
   2251    IRType   ty, tyAddr;
   2252    IRDirty  *di, *diLo64, *diHi64;
   2253    IRAtom   *addrAct, *addrLo64, *addrHi64;
   2254    IRAtom   *vdataLo64, *vdataHi64;
   2255    IRAtom   *eBias, *eBias0, *eBias8;
   2256    void*    helper = NULL;
   2257    HChar*   hname = NULL;
   2258 
   2259    tyAddr = mce->hWordTy;
   2260    mkAdd  = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
   2261    tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
   2262 
   2263    di = diLo64 = diHi64 = NULL;
   2264    eBias = eBias0 = eBias8 = NULL;
   2265    addrAct = addrLo64 = addrHi64 = NULL;
   2266    vdataLo64 = vdataHi64 = NULL;
   2267 
   2268    if (data) {
   2269       tl_assert(!vdata);
   2270       tl_assert(isOriginalAtom(mce, data));
   2271       tl_assert(bias == 0);
   2272       vdata = expr2vbits( mce, data );
   2273    } else {
   2274       tl_assert(vdata);
   2275    }
   2276 
   2277    tl_assert(isOriginalAtom(mce,addr));
   2278    tl_assert(isShadowAtom(mce,vdata));
   2279 
   2280    ty = typeOfIRExpr(mce->bb->tyenv, vdata);
   2281 
   2282    /* First, emit a definedness test for the address.  This also sets
   2283       the address (shadow) to 'defined' following the test. */
   2284    complainIfUndefined( mce, addr );
   2285 
   2286    /* Now decide which helper function to call to write the data V
   2287       bits into shadow memory. */
   2288    switch (ty) {
   2289       case Ity_V128: /* we'll use the helper twice */
   2290       case Ity_I64: helper = &MC_(helperc_STOREV8);
   2291                     hname = "MC_(helperc_STOREV8)";
   2292                     break;
   2293       case Ity_I32: helper = &MC_(helperc_STOREV4);
   2294                     hname = "MC_(helperc_STOREV4)";
   2295                     break;
   2296       case Ity_I16: helper = &MC_(helperc_STOREV2);
   2297                     hname = "MC_(helperc_STOREV2)";
   2298                     break;
   2299       case Ity_I8:  helper = &MC_(helperc_STOREV1);
   2300                     hname = "MC_(helperc_STOREV1)";
   2301                     break;
   2302       default:      VG_(tool_panic)("memcheck:do_shadow_STle");
   2303    }
   2304 
   2305    if (ty == Ity_V128) {
   2306 
   2307       /* V128-bit case */
   2308       /* See comment in next clause re 64-bit regparms */
   2309       eBias0    = tyAddr==Ity_I32 ? mkU32(bias)   : mkU64(bias);
   2310       addrLo64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias0) );
   2311       vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata));
   2312       diLo64    = unsafeIRDirty_0_N(
   2313                      1/*regparms*/, hname, helper,
   2314                      mkIRExprVec_2( addrLo64, vdataLo64 ));
   2315 
   2316       eBias8    = tyAddr==Ity_I32 ? mkU32(bias+8) : mkU64(bias+8);
   2317       addrHi64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias8) );
   2318       vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata));
   2319       diHi64    = unsafeIRDirty_0_N(
   2320                      1/*regparms*/, hname, helper,
   2321                      mkIRExprVec_2( addrHi64, vdataHi64 ));
   2322 
   2323       setHelperAnns( mce, diLo64 );
   2324       setHelperAnns( mce, diHi64 );
   2325       stmt( mce->bb, IRStmt_Dirty(diLo64) );
   2326       stmt( mce->bb, IRStmt_Dirty(diHi64) );
   2327 
   2328    } else {
   2329 
   2330       /* 8/16/32/64-bit cases */
   2331       /* Generate the actual address into addrAct. */
   2332       if (bias == 0) {
   2333          addrAct = addr;
   2334       } else {
   2335          eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
   2336          addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
   2337       }
   2338 
   2339       if (ty == Ity_I64) {
   2340          /* We can't do this with regparm 2 on 32-bit platforms, since
   2341             the back ends aren't clever enough to handle 64-bit
   2342             regparm args.  Therefore be different. */
   2343          di = unsafeIRDirty_0_N(
   2344                  1/*regparms*/, hname, helper,
   2345                  mkIRExprVec_2( addrAct, vdata ));
   2346       } else {
   2347          di = unsafeIRDirty_0_N(
   2348                  2/*regparms*/, hname, helper,
   2349                  mkIRExprVec_2( addrAct,
   2350                                 zwidenToHostWord( mce, vdata )));
   2351       }
   2352       setHelperAnns( mce, di );
   2353       stmt( mce->bb, IRStmt_Dirty(di) );
   2354    }
   2355 
   2356 }
   2357 
   2358 
   2359 /* Do lazy pessimistic propagation through a dirty helper call, by
   2360    looking at the annotations on it.  This is the most complex part of
   2361    Memcheck. */
   2362 
   2363 static IRType szToITy ( Int n )
   2364 {
   2365    switch (n) {
   2366       case 1: return Ity_I8;
   2367       case 2: return Ity_I16;
   2368       case 4: return Ity_I32;
   2369       case 8: return Ity_I64;
   2370       default: VG_(tool_panic)("szToITy(memcheck)");
   2371    }
   2372 }
   2373 
   2374 static
   2375 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
   2376 {
   2377    Int     i, n, offset, toDo, gSz, gOff;
   2378    IRAtom  *src, *here, *curr;
   2379    IRType  tyAddr, tySrc, tyDst;
   2380    IRTemp  dst;
   2381 
   2382    /* First check the guard. */
   2383    complainIfUndefined(mce, d->guard);
   2384 
   2385    /* Now round up all inputs and PCast over them. */
   2386    curr = definedOfType(Ity_I32);
   2387 
   2388    /* Inputs: unmasked args */
   2389    for (i = 0; d->args[i]; i++) {
   2390       if (d->cee->mcx_mask & (1<<i)) {
   2391          /* ignore this arg */
   2392       } else {
   2393          here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
   2394          curr = mkUifU32(mce, here, curr);
   2395       }
   2396    }
   2397 
   2398    /* Inputs: guest state that we read. */
   2399    for (i = 0; i < d->nFxState; i++) {
   2400       tl_assert(d->fxState[i].fx != Ifx_None);
   2401       if (d->fxState[i].fx == Ifx_Write)
   2402          continue;
   2403 
   2404       /* Ignore any sections marked as 'always defined'. */
   2405       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
   2406          if (0)
   2407          VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
   2408                      d->fxState[i].offset, d->fxState[i].size );
   2409          continue;
   2410       }
   2411 
   2412       /* This state element is read or modified.  So we need to
   2413          consider it.  If larger than 8 bytes, deal with it in 8-byte
   2414          chunks. */
   2415       gSz  = d->fxState[i].size;
   2416       gOff = d->fxState[i].offset;
   2417       tl_assert(gSz > 0);
   2418       while (True) {
   2419          if (gSz == 0) break;
   2420          n = gSz <= 8 ? gSz : 8;
   2421          /* update 'curr' with UifU of the state slice
   2422             gOff .. gOff+n-1 */
   2423          tySrc = szToITy( n );
   2424          src   = assignNew( mce, tySrc,
   2425                             shadow_GET(mce, gOff, tySrc ) );
   2426          here = mkPCastTo( mce, Ity_I32, src );
   2427          curr = mkUifU32(mce, here, curr);
   2428          gSz -= n;
   2429          gOff += n;
   2430       }
   2431 
   2432    }
   2433 
   2434    /* Inputs: memory.  First set up some info needed regardless of
   2435       whether we're doing reads or writes. */
   2436    tyAddr = Ity_INVALID;
   2437 
   2438    if (d->mFx != Ifx_None) {
   2439       /* Because we may do multiple shadow loads/stores from the same
   2440          base address, it's best to do a single test of its
   2441          definedness right now.  Post-instrumentation optimisation
   2442          should remove all but this test. */
   2443       tl_assert(d->mAddr);
   2444       complainIfUndefined(mce, d->mAddr);
   2445 
   2446       tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr);
   2447       tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
   2448       tl_assert(tyAddr == mce->hWordTy); /* not really right */
   2449    }
   2450 
   2451    /* Deal with memory inputs (reads or modifies) */
   2452    if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
   2453       offset = 0;
   2454       toDo   = d->mSize;
   2455       /* chew off 32-bit chunks */
   2456       while (toDo >= 4) {
   2457          here = mkPCastTo(
   2458                    mce, Ity_I32,
   2459                    expr2vbits_LDle ( mce, Ity_I32,
   2460                                      d->mAddr, d->mSize - toDo )
   2461                 );
   2462          curr = mkUifU32(mce, here, curr);
   2463          toDo -= 4;
   2464       }
   2465       /* chew off 16-bit chunks */
   2466       while (toDo >= 2) {
   2467          here = mkPCastTo(
   2468                    mce, Ity_I32,
   2469                    expr2vbits_LDle ( mce, Ity_I16,
   2470                                      d->mAddr, d->mSize - toDo )
   2471                 );
   2472          curr = mkUifU32(mce, here, curr);
   2473          toDo -= 2;
   2474       }
   2475       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
   2476    }
   2477 
   2478    /* Whew!  So curr is a 32-bit V-value summarising pessimistically
   2479       all the inputs to the helper.  Now we need to re-distribute the
   2480       results to all destinations. */
   2481 
   2482    /* Outputs: the destination temporary, if there is one. */
   2483    if (d->tmp != IRTemp_INVALID) {
   2484       dst   = findShadowTmp(mce, d->tmp);
   2485       tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp);
   2486       assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) );
   2487    }
   2488 
   2489    /* Outputs: guest state that we write or modify. */
   2490    for (i = 0; i < d->nFxState; i++) {
   2491       tl_assert(d->fxState[i].fx != Ifx_None);
   2492       if (d->fxState[i].fx == Ifx_Read)
   2493          continue;
   2494       /* Ignore any sections marked as 'always defined'. */
   2495       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
   2496          continue;
   2497       /* This state element is written or modified.  So we need to
   2498          consider it.  If larger than 8 bytes, deal with it in 8-byte
   2499          chunks. */
   2500       gSz  = d->fxState[i].size;
   2501       gOff = d->fxState[i].offset;
   2502       tl_assert(gSz > 0);
   2503       while (True) {
   2504          if (gSz == 0) break;
   2505          n = gSz <= 8 ? gSz : 8;
   2506          /* Write suitably-casted 'curr' to the state slice
   2507             gOff .. gOff+n-1 */
   2508          tyDst = szToITy( n );
   2509          do_shadow_PUT( mce, gOff,
   2510                              NULL, /* original atom */
   2511                              mkPCastTo( mce, tyDst, curr ) );
   2512          gSz -= n;
   2513          gOff += n;
   2514       }
   2515    }
   2516 
   2517    /* Outputs: memory that we write or modify. */
   2518    if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
   2519       offset = 0;
   2520       toDo   = d->mSize;
   2521       /* chew off 32-bit chunks */
   2522       while (toDo >= 4) {
   2523          do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
   2524                          NULL, /* original data */
   2525                          mkPCastTo( mce, Ity_I32, curr ) );
   2526          toDo -= 4;
   2527       }
   2528       /* chew off 16-bit chunks */
   2529       while (toDo >= 2) {
   2530          do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
   2531                          NULL, /* original data */
   2532                          mkPCastTo( mce, Ity_I16, curr ) );
   2533          toDo -= 2;
   2534       }
   2535       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
   2536    }
   2537 
   2538 }
   2539 
   2540 
   2541 /*------------------------------------------------------------*/
   2542 /*--- Memcheck main                                        ---*/
   2543 /*------------------------------------------------------------*/
   2544 
   2545 static Bool isBogusAtom ( IRAtom* at )
   2546 {
   2547    ULong n = 0;
   2548    IRConst* con;
   2549    tl_assert(isIRAtom(at));
   2550    if (at->tag == Iex_RdTmp)
   2551       return False;
   2552    tl_assert(at->tag == Iex_Const);
   2553    con = at->Iex.Const.con;
   2554    switch (con->tag) {
   2555       case Ico_U8:  n = (ULong)con->Ico.U8; break;
   2556       case Ico_U16: n = (ULong)con->Ico.U16; break;
   2557       case Ico_U32: n = (ULong)con->Ico.U32; break;
   2558       case Ico_U64: n = (ULong)con->Ico.U64; break;
   2559       default: ppIRExpr(at); tl_assert(0);
   2560    }
   2561    /* VG_(printf)("%llx\n", n); */
   2562    return (n == 0xFEFEFEFF
   2563            || n == 0x80808080
   2564            || n == 0x1010101
   2565            || n == 1010100);
   2566 }
   2567 
   2568 __attribute__((unused))
   2569 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
   2570 {
   2571    Int     i;
   2572    IRExpr* e;
   2573    switch (st->tag) {
   2574       case Ist_WrTmp:
   2575          e = st->Ist.WrTmp.data;
   2576          switch (e->tag) {
   2577             case Iex_Get:
   2578             case Iex_RdTmp:
   2579                return False;
   2580             case Iex_Unop:
   2581                return isBogusAtom(e->Iex.Unop.arg);
   2582             case Iex_Binop:
   2583                return isBogusAtom(e->Iex.Binop.arg1)
   2584                       || isBogusAtom(e->Iex.Binop.arg2);
   2585             case Iex_ITE:
   2586                return isBogusAtom(e->Iex.ITE.cond)
   2587                       || isBogusAtom(e->Iex.ITE.iftrue)
   2588                       || isBogusAtom(e->Iex.ITE.iffalse);
   2589             case Iex_Load:
   2590                return isBogusAtom(e->Iex.Load.addr);
   2591             case Iex_CCall:
   2592                for (i = 0; e->Iex.CCall.args[i]; i++)
   2593                   if (isBogusAtom(e->Iex.CCall.args[i]))
   2594                      return True;
   2595                return False;
   2596             default:
   2597                goto unhandled;
   2598          }
   2599       case Ist_Put:
   2600          return isBogusAtom(st->Ist.Put.data);
   2601       case Ist_Store:
   2602          return isBogusAtom(st->Ist.Store.addr)
   2603                 || isBogusAtom(st->Ist.Store.data);
   2604       case Ist_Exit:
   2605          return isBogusAtom(st->Ist.Exit.guard);
   2606       default:
   2607       unhandled:
   2608          ppIRStmt(st);
   2609          VG_(tool_panic)("hasBogusLiterals");
   2610    }
   2611 }
   2612 
   2613 IRSB* mc_instrument ( void* closureV,
   2614                       IRSB* bb_in, VexGuestLayout* layout,
   2615                       VexGuestExtents* vge,
   2616                       IRType gWordTy, IRType hWordTy )
   2617 {
   2618    Bool verboze = False; //True;
   2619 
   2620    /* Bool hasBogusLiterals = False; */
   2621 
   2622    Int i, j, first_stmt;
   2623    IRStmt* st;
   2624    MCEnv mce;
   2625 
   2626    /* Set up BB */
   2627    IRSB* bb     = emptyIRSB();
   2628    bb->tyenv    = deepCopyIRTypeEnv(bb_in->tyenv);
   2629    bb->next     = deepCopyIRExpr(bb_in->next);
   2630    bb->jumpkind = bb_in->jumpkind;
   2631 
   2632    /* Set up the running environment.  Only .bb is modified as we go
   2633       along. */
   2634    mce.bb             = bb;
   2635    mce.layout         = layout;
   2636    mce.n_originalTmps = bb->tyenv->types_used;
   2637    mce.hWordTy        = hWordTy;
   2638    mce.tmpMap         = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
   2639    for (i = 0; i < mce.n_originalTmps; i++)
   2640       mce.tmpMap[i] = IRTemp_INVALID;
   2641 
   2642    /* Iterate over the stmts. */
   2643 
   2644    for (i = 0; i <  bb_in->stmts_used; i++) {
   2645       st = bb_in->stmts[i];
   2646       if (!st) continue;
   2647 
   2648       tl_assert(isFlatIRStmt(st));
   2649 
   2650       /*
   2651       if (!hasBogusLiterals) {
   2652          hasBogusLiterals = checkForBogusLiterals(st);
   2653          if (hasBogusLiterals) {
   2654             VG_(printf)("bogus: ");
   2655             ppIRStmt(st);
   2656             VG_(printf)("\n");
   2657          }
   2658       }
   2659       */
   2660       first_stmt = bb->stmts_used;
   2661 
   2662       if (verboze) {
   2663          ppIRStmt(st);
   2664          VG_(printf)("\n\n");
   2665       }
   2666 
   2667       switch (st->tag) {
   2668 
   2669          case Ist_WrTmp:
   2670             assign( bb, findShadowTmp(&mce, st->Ist.WrTmp.tmp),
   2671                         expr2vbits( &mce, st->Ist.WrTmp.data) );
   2672             break;
   2673 
   2674          case Ist_Put:
   2675             do_shadow_PUT( &mce,
   2676                            st->Ist.Put.offset,
   2677                            st->Ist.Put.data,
   2678                            NULL /* shadow atom */ );
   2679             break;
   2680 
   2681          case Ist_PutI:
   2682             do_shadow_PUTI( &mce,
   2683                             st->Ist.PutI.details->descr,
   2684                             st->Ist.PutI.details->ix,
   2685                             st->Ist.PutI.details->bias,
   2686                             st->Ist.PutI.details->data );
   2687             break;
   2688 
   2689          case Ist_Store:
   2690             do_shadow_STle( &mce, st->Ist.Store.addr, 0/* addr bias */,
   2691                                   st->Ist.Store.data,
   2692                                   NULL /* shadow data */ );
   2693             break;
   2694 
   2695          case Ist_Exit:
   2696             /* if (!hasBogusLiterals) */
   2697                complainIfUndefined( &mce, st->Ist.Exit.guard );
   2698             break;
   2699 
   2700          case Ist_Dirty:
   2701             do_shadow_Dirty( &mce, st->Ist.Dirty.details );
   2702             break;
   2703 
   2704          case Ist_IMark:
   2705          case Ist_NoOp:
   2706             break;
   2707 
   2708          default:
   2709             VG_(printf)("\n");
   2710             ppIRStmt(st);
   2711             VG_(printf)("\n");
   2712             VG_(tool_panic)("memcheck: unhandled IRStmt");
   2713 
   2714       } /* switch (st->tag) */
   2715 
   2716       if (verboze) {
   2717          for (j = first_stmt; j < bb->stmts_used; j++) {
   2718             VG_(printf)("   ");
   2719             ppIRStmt(bb->stmts[j]);
   2720             VG_(printf)("\n");
   2721          }
   2722          VG_(printf)("\n");
   2723       }
   2724 
   2725       addStmtToIRSB(bb, st);
   2726 
   2727    }
   2728 
   2729    /* Now we need to complain if the jump target is undefined. */
   2730    first_stmt = bb->stmts_used;
   2731 
   2732    if (verboze) {
   2733       VG_(printf)("bb->next = ");
   2734       ppIRExpr(bb->next);
   2735       VG_(printf)("\n\n");
   2736    }
   2737 
   2738    complainIfUndefined( &mce, bb->next );
   2739 
   2740    if (verboze) {
   2741       for (j = first_stmt; j < bb->stmts_used; j++) {
   2742          VG_(printf)("   ");
   2743          ppIRStmt(bb->stmts[j]);
   2744          VG_(printf)("\n");
   2745       }
   2746       VG_(printf)("\n");
   2747    }
   2748 
   2749    return bb;
   2750 }
   2751 #endif /* UNUSED */
   2752 
   2753 /*--------------------------------------------------------------------*/
   2754 /*--- end                                              test_main.c ---*/
   2755 /*--------------------------------------------------------------------*/
   2756