Home | History | Annotate | Download | only in memcheck
      1 
      2 /*--------------------------------------------------------------------*/
      3 /*--- Instrument IR to perform memory checking operations.         ---*/
      4 /*---                                               mc_translate.c ---*/
      5 /*--------------------------------------------------------------------*/
      6 
      7 /*
      8    This file is part of MemCheck, a heavyweight Valgrind tool for
      9    detecting memory errors.
     10 
     11    Copyright (C) 2000-2012 Julian Seward
     12       jseward (at) acm.org
     13 
     14    This program is free software; you can redistribute it and/or
     15    modify it under the terms of the GNU General Public License as
     16    published by the Free Software Foundation; either version 2 of the
     17    License, or (at your option) any later version.
     18 
     19    This program is distributed in the hope that it will be useful, but
     20    WITHOUT ANY WARRANTY; without even the implied warranty of
     21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     22    General Public License for more details.
     23 
     24    You should have received a copy of the GNU General Public License
     25    along with this program; if not, write to the Free Software
     26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
     27    02111-1307, USA.
     28 
     29    The GNU General Public License is contained in the file COPYING.
     30 */
     31 
     32 #include "pub_tool_basics.h"
     33 #include "pub_tool_poolalloc.h"     // For mc_include.h
     34 #include "pub_tool_hashtable.h"     // For mc_include.h
     35 #include "pub_tool_libcassert.h"
     36 #include "pub_tool_libcprint.h"
     37 #include "pub_tool_tooliface.h"
     38 #include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
     39 #include "pub_tool_xarray.h"
     40 #include "pub_tool_mallocfree.h"
     41 #include "pub_tool_libcbase.h"
     42 
     43 #include "mc_include.h"
     44 
     45 
     46 /* FIXMEs JRS 2011-June-16.
     47 
     48    Check the interpretation for vector narrowing and widening ops,
     49    particularly the saturating ones.  I suspect they are either overly
     50    pessimistic and/or wrong.
     51 */
     52 
     53 /* This file implements the Memcheck instrumentation, and in
     54    particular contains the core of its undefined value detection
     55    machinery.  For a comprehensive background of the terminology,
     56    algorithms and rationale used herein, read:
     57 
     58      Using Valgrind to detect undefined value errors with
     59      bit-precision
     60 
     61      Julian Seward and Nicholas Nethercote
     62 
     63      2005 USENIX Annual Technical Conference (General Track),
     64      Anaheim, CA, USA, April 10-15, 2005.
     65 
     66    ----
     67 
     68    Here is as good a place as any to record exactly when V bits are and
     69    should be checked, why, and what function is responsible.
     70 
     71 
     72    Memcheck complains when an undefined value is used:
     73 
     74    1. In the condition of a conditional branch.  Because it could cause
     75       incorrect control flow, and thus cause incorrect externally-visible
     76       behaviour.  [mc_translate.c:complainIfUndefined]
     77 
     78    2. As an argument to a system call, or as the value that specifies
     79       the system call number.  Because it could cause an incorrect
     80       externally-visible side effect.  [mc_translate.c:mc_pre_reg_read]
     81 
     82    3. As the address in a load or store.  Because it could cause an
     83       incorrect value to be used later, which could cause externally-visible
     84       behaviour (eg. via incorrect control flow or an incorrect system call
     85       argument)  [complainIfUndefined]
     86 
     87    4. As the target address of a branch.  Because it could cause incorrect
     88       control flow.  [complainIfUndefined]
     89 
     90    5. As an argument to setenv, unsetenv, or putenv.  Because it could put
     91       an incorrect value into the external environment.
     92       [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
     93 
     94    6. As the index in a GETI or PUTI operation.  I'm not sure why... (njn).
     95       [complainIfUndefined]
     96 
     97    7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
     98       VALGRIND_CHECK_VALUE_IS_DEFINED client requests.  Because the user
     99       requested it.  [in memcheck.h]
    100 
    101 
    102    Memcheck also complains, but should not, when an undefined value is used:
    103 
    104    8. As the shift value in certain SIMD shift operations (but not in the
    105       standard integer shift operations).  This inconsistency is due to
    106       historical reasons.)  [complainIfUndefined]
    107 
    108 
    109    Memcheck does not complain, but should, when an undefined value is used:
    110 
    111    9. As an input to a client request.  Because the client request may
    112       affect the visible behaviour -- see bug #144362 for an example
    113       involving the malloc replacements in vg_replace_malloc.c and
    114       VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
    115       isn't identified.  That bug report also has some info on how to solve
    116       the problem.  [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
    117 
    118 
    119    In practice, 1 and 2 account for the vast majority of cases.
    120 */
    121 
    122 /*------------------------------------------------------------*/
    123 /*--- Forward decls                                        ---*/
    124 /*------------------------------------------------------------*/
    125 
    126 struct _MCEnv;
    127 
    128 static IRType  shadowTypeV ( IRType ty );
    129 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
    130 static IRTemp  findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
    131 
    132 static IRExpr *i128_const_zero(void);
    133 
    134 /*------------------------------------------------------------*/
    135 /*--- Memcheck running state, and tmp management.          ---*/
    136 /*------------------------------------------------------------*/
    137 
    138 /* Carries info about a particular tmp.  The tmp's number is not
    139    recorded, as this is implied by (equal to) its index in the tmpMap
    140    in MCEnv.  The tmp's type is also not recorded, as this is present
    141    in MCEnv.sb->tyenv.
    142 
    143    When .kind is Orig, .shadowV and .shadowB may give the identities
    144    of the temps currently holding the associated definedness (shadowV)
    145    and origin (shadowB) values, or these may be IRTemp_INVALID if code
    146    to compute such values has not yet been emitted.
    147 
    148    When .kind is VSh or BSh then the tmp is holds a V- or B- value,
    149    and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
    150    illogical for a shadow tmp itself to be shadowed.
    151 */
    152 typedef
    153    enum { Orig=1, VSh=2, BSh=3 }
    154    TempKind;
    155 
    156 typedef
    157    struct {
    158       TempKind kind;
    159       IRTemp   shadowV;
    160       IRTemp   shadowB;
    161    }
    162    TempMapEnt;
    163 
    164 
    165 /* Carries around state during memcheck instrumentation. */
    166 typedef
    167    struct _MCEnv {
    168       /* MODIFIED: the superblock being constructed.  IRStmts are
    169          added. */
    170       IRSB* sb;
    171       Bool  trace;
    172 
    173       /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
    174          current kind and possibly shadow temps for each temp in the
    175          IRSB being constructed.  Note that it does not contain the
    176          type of each tmp.  If you want to know the type, look at the
    177          relevant entry in sb->tyenv.  It follows that at all times
    178          during the instrumentation process, the valid indices for
    179          tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
    180          total number of Orig, V- and B- temps allocated so far.
    181 
    182          The reason for this strange split (types in one place, all
    183          other info in another) is that we need the types to be
    184          attached to sb so as to make it possible to do
    185          "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
    186          instrumentation process. */
    187       XArray* /* of TempMapEnt */ tmpMap;
    188 
    189       /* MODIFIED: indicates whether "bogus" literals have so far been
    190          found.  Starts off False, and may change to True. */
    191       Bool bogusLiterals;
    192 
    193       /* READONLY: indicates whether we should use expensive
    194          interpretations of integer adds, since unfortunately LLVM
    195          uses them to do ORs in some circumstances.  Defaulted to True
    196          on MacOS and False everywhere else. */
    197       Bool useLLVMworkarounds;
    198 
    199       /* READONLY: the guest layout.  This indicates which parts of
    200          the guest state should be regarded as 'always defined'. */
    201       VexGuestLayout* layout;
    202 
    203       /* READONLY: the host word type.  Needed for constructing
    204          arguments of type 'HWord' to be passed to helper functions.
    205          Ity_I32 or Ity_I64 only. */
    206       IRType hWordTy;
    207    }
    208    MCEnv;
    209 
    210 /* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
    211    demand), as they are encountered.  This is for two reasons.
    212 
    213    (1) (less important reason): Many original tmps are unused due to
    214    initial IR optimisation, and we do not want to spaces in tables
    215    tracking them.
    216 
    217    Shadow IRTemps are therefore allocated on demand.  mce.tmpMap is a
    218    table indexed [0 .. n_types-1], which gives the current shadow for
    219    each original tmp, or INVALID_IRTEMP if none is so far assigned.
    220    It is necessary to support making multiple assignments to a shadow
    221    -- specifically, after testing a shadow for definedness, it needs
    222    to be made defined.  But IR's SSA property disallows this.
    223 
    224    (2) (more important reason): Therefore, when a shadow needs to get
    225    a new value, a new temporary is created, the value is assigned to
    226    that, and the tmpMap is updated to reflect the new binding.
    227 
    228    A corollary is that if the tmpMap maps a given tmp to
    229    IRTemp_INVALID and we are hoping to read that shadow tmp, it means
    230    there's a read-before-write error in the original tmps.  The IR
    231    sanity checker should catch all such anomalies, however.
    232 */
    233 
    234 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
    235    both the table in mce->sb and to our auxiliary mapping.  Note that
    236    newTemp may cause mce->tmpMap to resize, hence previous results
    237    from VG_(indexXA)(mce->tmpMap) are invalidated. */
    238 static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
    239 {
    240    Word       newIx;
    241    TempMapEnt ent;
    242    IRTemp     tmp = newIRTemp(mce->sb->tyenv, ty);
    243    ent.kind    = kind;
    244    ent.shadowV = IRTemp_INVALID;
    245    ent.shadowB = IRTemp_INVALID;
    246    newIx = VG_(addToXA)( mce->tmpMap, &ent );
    247    tl_assert(newIx == (Word)tmp);
    248    return tmp;
    249 }
    250 
    251 
    252 /* Find the tmp currently shadowing the given original tmp.  If none
    253    so far exists, allocate one.  */
    254 static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
    255 {
    256    TempMapEnt* ent;
    257    /* VG_(indexXA) range-checks 'orig', hence no need to check
    258       here. */
    259    ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
    260    tl_assert(ent->kind == Orig);
    261    if (ent->shadowV == IRTemp_INVALID) {
    262       IRTemp tmpV
    263         = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
    264       /* newTemp may cause mce->tmpMap to resize, hence previous results
    265          from VG_(indexXA) are invalid. */
    266       ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
    267       tl_assert(ent->kind == Orig);
    268       tl_assert(ent->shadowV == IRTemp_INVALID);
    269       ent->shadowV = tmpV;
    270    }
    271    return ent->shadowV;
    272 }
    273 
    274 /* Allocate a new shadow for the given original tmp.  This means any
    275    previous shadow is abandoned.  This is needed because it is
    276    necessary to give a new value to a shadow once it has been tested
    277    for undefinedness, but unfortunately IR's SSA property disallows
    278    this.  Instead we must abandon the old shadow, allocate a new one
    279    and use that instead.
    280 
    281    This is the same as findShadowTmpV, except we don't bother to see
    282    if a shadow temp already existed -- we simply allocate a new one
    283    regardless. */
    284 static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
    285 {
    286    TempMapEnt* ent;
    287    /* VG_(indexXA) range-checks 'orig', hence no need to check
    288       here. */
    289    ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
    290    tl_assert(ent->kind == Orig);
    291    if (1) {
    292       IRTemp tmpV
    293         = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
    294       /* newTemp may cause mce->tmpMap to resize, hence previous results
    295          from VG_(indexXA) are invalid. */
    296       ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
    297       tl_assert(ent->kind == Orig);
    298       ent->shadowV = tmpV;
    299    }
    300 }
    301 
    302 
    303 /*------------------------------------------------------------*/
    304 /*--- IRAtoms -- a subset of IRExprs                       ---*/
    305 /*------------------------------------------------------------*/
    306 
    307 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
    308    isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
    309    input, most of this code deals in atoms.  Usefully, a value atom
    310    always has a V-value which is also an atom: constants are shadowed
    311    by constants, and temps are shadowed by the corresponding shadow
    312    temporary. */
    313 
    314 typedef  IRExpr  IRAtom;
    315 
    316 /* (used for sanity checks only): is this an atom which looks
    317    like it's from original code? */
    318 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
    319 {
    320    if (a1->tag == Iex_Const)
    321       return True;
    322    if (a1->tag == Iex_RdTmp) {
    323       TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
    324       return ent->kind == Orig;
    325    }
    326    return False;
    327 }
    328 
    329 /* (used for sanity checks only): is this an atom which looks
    330    like it's from shadow code? */
    331 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
    332 {
    333    if (a1->tag == Iex_Const)
    334       return True;
    335    if (a1->tag == Iex_RdTmp) {
    336       TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
    337       return ent->kind == VSh || ent->kind == BSh;
    338    }
    339    return False;
    340 }
    341 
    342 /* (used for sanity checks only): check that both args are atoms and
    343    are identically-kinded. */
    344 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
    345 {
    346    if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
    347       return True;
    348    if (a1->tag == Iex_Const && a2->tag == Iex_Const)
    349       return True;
    350    return False;
    351 }
    352 
    353 
    354 /*------------------------------------------------------------*/
    355 /*--- Type management                                      ---*/
    356 /*------------------------------------------------------------*/
    357 
    358 /* Shadow state is always accessed using integer types.  This returns
    359    an integer type with the same size (as per sizeofIRType) as the
    360    given type.  The only valid shadow types are Bit, I8, I16, I32,
    361    I64, I128, V128, V256. */
    362 
    363 static IRType shadowTypeV ( IRType ty )
    364 {
    365    switch (ty) {
    366       case Ity_I1:
    367       case Ity_I8:
    368       case Ity_I16:
    369       case Ity_I32:
    370       case Ity_I64:
    371       case Ity_I128: return ty;
    372       case Ity_F32:  return Ity_I32;
    373       case Ity_D32:  return Ity_I32;
    374       case Ity_F64:  return Ity_I64;
    375       case Ity_D64:  return Ity_I64;
    376       case Ity_F128: return Ity_I128;
    377       case Ity_D128: return Ity_I128;
    378       case Ity_V128: return Ity_V128;
    379       case Ity_V256: return Ity_V256;
    380       default: ppIRType(ty);
    381                VG_(tool_panic)("memcheck:shadowTypeV");
    382    }
    383 }
    384 
    385 /* Produce a 'defined' value of the given shadow type.  Should only be
    386    supplied shadow types (Bit/I8/I16/I32/UI64). */
    387 static IRExpr* definedOfType ( IRType ty ) {
    388    switch (ty) {
    389       case Ity_I1:   return IRExpr_Const(IRConst_U1(False));
    390       case Ity_I8:   return IRExpr_Const(IRConst_U8(0));
    391       case Ity_I16:  return IRExpr_Const(IRConst_U16(0));
    392       case Ity_I32:  return IRExpr_Const(IRConst_U32(0));
    393       case Ity_I64:  return IRExpr_Const(IRConst_U64(0));
    394       case Ity_I128: return i128_const_zero();
    395       case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
    396       default:       VG_(tool_panic)("memcheck:definedOfType");
    397    }
    398 }
    399 
    400 
    401 /*------------------------------------------------------------*/
    402 /*--- Constructing IR fragments                            ---*/
    403 /*------------------------------------------------------------*/
    404 
    405 /* add stmt to a bb */
    406 static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
    407    if (mce->trace) {
    408       VG_(printf)("  %c: ", cat);
    409       ppIRStmt(st);
    410       VG_(printf)("\n");
    411    }
    412    addStmtToIRSB(mce->sb, st);
    413 }
    414 
    415 /* assign value to tmp */
    416 static inline
    417 void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
    418    stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
    419 }
    420 
    421 /* build various kinds of expressions */
    422 #define triop(_op, _arg1, _arg2, _arg3) \
    423                                  IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
    424 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
    425 #define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
    426 #define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
    427 #define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
    428 #define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
    429 #define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
    430 #define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
    431 #define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
    432 
    433 /* Bind the given expression to a new temporary, and return the
    434    temporary.  This effectively converts an arbitrary expression into
    435    an atom.
    436 
    437    'ty' is the type of 'e' and hence the type that the new temporary
    438    needs to be.  But passing it in is redundant, since we can deduce
    439    the type merely by inspecting 'e'.  So at least use that fact to
    440    assert that the two types agree. */
    441 static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
    442 {
    443    TempKind k;
    444    IRTemp   t;
    445    IRType   tyE = typeOfIRExpr(mce->sb->tyenv, e);
    446 
    447    tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
    448    switch (cat) {
    449       case 'V': k = VSh;  break;
    450       case 'B': k = BSh;  break;
    451       case 'C': k = Orig; break;
    452                 /* happens when we are making up new "orig"
    453                    expressions, for IRCAS handling */
    454       default: tl_assert(0);
    455    }
    456    t = newTemp(mce, ty, k);
    457    assign(cat, mce, t, e);
    458    return mkexpr(t);
    459 }
    460 
    461 
    462 /*------------------------------------------------------------*/
    463 /*--- Helper functions for 128-bit ops                     ---*/
    464 /*------------------------------------------------------------*/
    465 
    466 static IRExpr *i128_const_zero(void)
    467 {
    468    IRAtom* z64 = IRExpr_Const(IRConst_U64(0));
    469    return binop(Iop_64HLto128, z64, z64);
    470 }
    471 
    472 /* There are no I128-bit loads and/or stores [as generated by any
    473    current front ends].  So we do not need to worry about that in
    474    expr2vbits_Load */
    475 
    476 
    477 /*------------------------------------------------------------*/
    478 /*--- Constructing definedness primitive ops               ---*/
    479 /*------------------------------------------------------------*/
    480 
    481 /* --------- Defined-if-either-defined --------- */
    482 
    483 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    484    tl_assert(isShadowAtom(mce,a1));
    485    tl_assert(isShadowAtom(mce,a2));
    486    return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
    487 }
    488 
    489 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    490    tl_assert(isShadowAtom(mce,a1));
    491    tl_assert(isShadowAtom(mce,a2));
    492    return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
    493 }
    494 
    495 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    496    tl_assert(isShadowAtom(mce,a1));
    497    tl_assert(isShadowAtom(mce,a2));
    498    return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
    499 }
    500 
    501 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    502    tl_assert(isShadowAtom(mce,a1));
    503    tl_assert(isShadowAtom(mce,a2));
    504    return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
    505 }
    506 
    507 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    508    tl_assert(isShadowAtom(mce,a1));
    509    tl_assert(isShadowAtom(mce,a2));
    510    return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
    511 }
    512 
    513 static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    514    tl_assert(isShadowAtom(mce,a1));
    515    tl_assert(isShadowAtom(mce,a2));
    516    return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2));
    517 }
    518 
    519 /* --------- Undefined-if-either-undefined --------- */
    520 
    521 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    522    tl_assert(isShadowAtom(mce,a1));
    523    tl_assert(isShadowAtom(mce,a2));
    524    return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
    525 }
    526 
    527 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    528    tl_assert(isShadowAtom(mce,a1));
    529    tl_assert(isShadowAtom(mce,a2));
    530    return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
    531 }
    532 
    533 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    534    tl_assert(isShadowAtom(mce,a1));
    535    tl_assert(isShadowAtom(mce,a2));
    536    return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
    537 }
    538 
    539 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    540    tl_assert(isShadowAtom(mce,a1));
    541    tl_assert(isShadowAtom(mce,a2));
    542    return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
    543 }
    544 
    545 static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    546    IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
    547    tl_assert(isShadowAtom(mce,a1));
    548    tl_assert(isShadowAtom(mce,a2));
    549    tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
    550    tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
    551    tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
    552    tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
    553    tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
    554    tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
    555 
    556    return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
    557 }
    558 
    559 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    560    tl_assert(isShadowAtom(mce,a1));
    561    tl_assert(isShadowAtom(mce,a2));
    562    return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
    563 }
    564 
    565 static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
    566    tl_assert(isShadowAtom(mce,a1));
    567    tl_assert(isShadowAtom(mce,a2));
    568    return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2));
    569 }
    570 
    571 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
    572    switch (vty) {
    573       case Ity_I8:   return mkUifU8(mce, a1, a2);
    574       case Ity_I16:  return mkUifU16(mce, a1, a2);
    575       case Ity_I32:  return mkUifU32(mce, a1, a2);
    576       case Ity_I64:  return mkUifU64(mce, a1, a2);
    577       case Ity_I128: return mkUifU128(mce, a1, a2);
    578       case Ity_V128: return mkUifUV128(mce, a1, a2);
    579       default:
    580          VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
    581          VG_(tool_panic)("memcheck:mkUifU");
    582    }
    583 }
    584 
    585 /* --------- The Left-family of operations. --------- */
    586 
    587 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
    588    tl_assert(isShadowAtom(mce,a1));
    589    return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
    590 }
    591 
    592 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
    593    tl_assert(isShadowAtom(mce,a1));
    594    return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
    595 }
    596 
    597 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
    598    tl_assert(isShadowAtom(mce,a1));
    599    return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
    600 }
    601 
    602 static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
    603    tl_assert(isShadowAtom(mce,a1));
    604    return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
    605 }
    606 
    607 /* --------- 'Improvement' functions for AND/OR. --------- */
    608 
    609 /* ImproveAND(data, vbits) = data OR vbits.  Defined (0) data 0s give
    610    defined (0); all other -> undefined (1).
    611 */
    612 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    613 {
    614    tl_assert(isOriginalAtom(mce, data));
    615    tl_assert(isShadowAtom(mce, vbits));
    616    tl_assert(sameKindedAtoms(data, vbits));
    617    return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
    618 }
    619 
    620 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    621 {
    622    tl_assert(isOriginalAtom(mce, data));
    623    tl_assert(isShadowAtom(mce, vbits));
    624    tl_assert(sameKindedAtoms(data, vbits));
    625    return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
    626 }
    627 
    628 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    629 {
    630    tl_assert(isOriginalAtom(mce, data));
    631    tl_assert(isShadowAtom(mce, vbits));
    632    tl_assert(sameKindedAtoms(data, vbits));
    633    return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
    634 }
    635 
    636 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    637 {
    638    tl_assert(isOriginalAtom(mce, data));
    639    tl_assert(isShadowAtom(mce, vbits));
    640    tl_assert(sameKindedAtoms(data, vbits));
    641    return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
    642 }
    643 
    644 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    645 {
    646    tl_assert(isOriginalAtom(mce, data));
    647    tl_assert(isShadowAtom(mce, vbits));
    648    tl_assert(sameKindedAtoms(data, vbits));
    649    return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
    650 }
    651 
    652 static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    653 {
    654    tl_assert(isOriginalAtom(mce, data));
    655    tl_assert(isShadowAtom(mce, vbits));
    656    tl_assert(sameKindedAtoms(data, vbits));
    657    return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits));
    658 }
    659 
    660 /* ImproveOR(data, vbits) = ~data OR vbits.  Defined (0) data 1s give
    661    defined (0); all other -> undefined (1).
    662 */
    663 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    664 {
    665    tl_assert(isOriginalAtom(mce, data));
    666    tl_assert(isShadowAtom(mce, vbits));
    667    tl_assert(sameKindedAtoms(data, vbits));
    668    return assignNew(
    669              'V', mce, Ity_I8,
    670              binop(Iop_Or8,
    671                    assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
    672                    vbits) );
    673 }
    674 
    675 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    676 {
    677    tl_assert(isOriginalAtom(mce, data));
    678    tl_assert(isShadowAtom(mce, vbits));
    679    tl_assert(sameKindedAtoms(data, vbits));
    680    return assignNew(
    681              'V', mce, Ity_I16,
    682              binop(Iop_Or16,
    683                    assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
    684                    vbits) );
    685 }
    686 
    687 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    688 {
    689    tl_assert(isOriginalAtom(mce, data));
    690    tl_assert(isShadowAtom(mce, vbits));
    691    tl_assert(sameKindedAtoms(data, vbits));
    692    return assignNew(
    693              'V', mce, Ity_I32,
    694              binop(Iop_Or32,
    695                    assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
    696                    vbits) );
    697 }
    698 
    699 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    700 {
    701    tl_assert(isOriginalAtom(mce, data));
    702    tl_assert(isShadowAtom(mce, vbits));
    703    tl_assert(sameKindedAtoms(data, vbits));
    704    return assignNew(
    705              'V', mce, Ity_I64,
    706              binop(Iop_Or64,
    707                    assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
    708                    vbits) );
    709 }
    710 
    711 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    712 {
    713    tl_assert(isOriginalAtom(mce, data));
    714    tl_assert(isShadowAtom(mce, vbits));
    715    tl_assert(sameKindedAtoms(data, vbits));
    716    return assignNew(
    717              'V', mce, Ity_V128,
    718              binop(Iop_OrV128,
    719                    assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
    720                    vbits) );
    721 }
    722 
    723 static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
    724 {
    725    tl_assert(isOriginalAtom(mce, data));
    726    tl_assert(isShadowAtom(mce, vbits));
    727    tl_assert(sameKindedAtoms(data, vbits));
    728    return assignNew(
    729              'V', mce, Ity_V256,
    730              binop(Iop_OrV256,
    731                    assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)),
    732                    vbits) );
    733 }
    734 
    735 /* --------- Pessimising casts. --------- */
    736 
    737 /* The function returns an expression of type DST_TY. If any of the VBITS
    738    is undefined (value == 1) the resulting expression has all bits set to
    739    1. Otherwise, all bits are 0. */
    740 
    741 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
    742 {
    743    IRType  src_ty;
    744    IRAtom* tmp1;
    745 
    746    /* Note, dst_ty is a shadow type, not an original type. */
    747    tl_assert(isShadowAtom(mce,vbits));
    748    src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
    749 
    750    /* Fast-track some common cases */
    751    if (src_ty == Ity_I32 && dst_ty == Ity_I32)
    752       return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
    753 
    754    if (src_ty == Ity_I64 && dst_ty == Ity_I64)
    755       return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
    756 
    757    if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
    758       /* PCast the arg, then clone it. */
    759       IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
    760       return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
    761    }
    762 
    763    if (src_ty == Ity_I64 && dst_ty == Ity_I32) {
    764       /* PCast the arg.  This gives all 0s or all 1s.  Then throw away
    765          the top half. */
    766       IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
    767       return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp));
    768    }
    769 
    770    /* Else do it the slow way .. */
    771    /* First of all, collapse vbits down to a single bit. */
    772    tmp1   = NULL;
    773    switch (src_ty) {
    774       case Ity_I1:
    775          tmp1 = vbits;
    776          break;
    777       case Ity_I8:
    778          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
    779          break;
    780       case Ity_I16:
    781          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
    782          break;
    783       case Ity_I32:
    784          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
    785          break;
    786       case Ity_I64:
    787          tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
    788          break;
    789       case Ity_I128: {
    790          /* Gah.  Chop it in half, OR the halves together, and compare
    791             that with zero. */
    792          IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
    793          IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
    794          IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
    795          tmp1         = assignNew('V', mce, Ity_I1,
    796                                        unop(Iop_CmpNEZ64, tmp4));
    797          break;
    798       }
    799       default:
    800          ppIRType(src_ty);
    801          VG_(tool_panic)("mkPCastTo(1)");
    802    }
    803    tl_assert(tmp1);
    804    /* Now widen up to the dst type. */
    805    switch (dst_ty) {
    806       case Ity_I1:
    807          return tmp1;
    808       case Ity_I8:
    809          return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
    810       case Ity_I16:
    811          return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
    812       case Ity_I32:
    813          return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
    814       case Ity_I64:
    815          return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
    816       case Ity_V128:
    817          tmp1 = assignNew('V', mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
    818          tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
    819          return tmp1;
    820       case Ity_I128:
    821          tmp1 = assignNew('V', mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
    822          tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
    823          return tmp1;
    824       default:
    825          ppIRType(dst_ty);
    826          VG_(tool_panic)("mkPCastTo(2)");
    827    }
    828 }
    829 
    830 /* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
    831 /*
    832    Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
    833    PCasting to Ity_U1.  However, sometimes it is necessary to be more
    834    accurate.  The insight is that the result is defined if two
    835    corresponding bits can be found, one from each argument, so that
    836    both bits are defined but are different -- that makes EQ say "No"
    837    and NE say "Yes".  Hence, we compute an improvement term and DifD
    838    it onto the "normal" (UifU) result.
    839 
    840    The result is:
    841 
    842    PCastTo<1> (
    843       -- naive version
    844       PCastTo<sz>( UifU<sz>(vxx, vyy) )
    845 
    846       `DifD<sz>`
    847 
    848       -- improvement term
    849       PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
    850    )
    851 
    852    where
    853      vec contains 0 (defined) bits where the corresponding arg bits
    854      are defined but different, and 1 bits otherwise.
    855 
    856      vec = Or<sz>( vxx,   // 0 iff bit defined
    857                    vyy,   // 0 iff bit defined
    858                    Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
    859                  )
    860 
    861      If any bit of vec is 0, the result is defined and so the
    862      improvement term should produce 0...0, else it should produce
    863      1...1.
    864 
    865      Hence require for the improvement term:
    866 
    867         if vec == 1...1 then 1...1 else 0...0
    868      ->
    869         PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
    870 
    871    This was extensively re-analysed and checked on 6 July 05.
    872 */
    873 static IRAtom* expensiveCmpEQorNE ( MCEnv*  mce,
    874                                     IRType  ty,
    875                                     IRAtom* vxx, IRAtom* vyy,
    876                                     IRAtom* xx,  IRAtom* yy )
    877 {
    878    IRAtom *naive, *vec, *improvement_term;
    879    IRAtom *improved, *final_cast, *top;
    880    IROp   opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
    881 
    882    tl_assert(isShadowAtom(mce,vxx));
    883    tl_assert(isShadowAtom(mce,vyy));
    884    tl_assert(isOriginalAtom(mce,xx));
    885    tl_assert(isOriginalAtom(mce,yy));
    886    tl_assert(sameKindedAtoms(vxx,xx));
    887    tl_assert(sameKindedAtoms(vyy,yy));
    888 
    889    switch (ty) {
    890       case Ity_I32:
    891          opOR   = Iop_Or32;
    892          opDIFD = Iop_And32;
    893          opUIFU = Iop_Or32;
    894          opNOT  = Iop_Not32;
    895          opXOR  = Iop_Xor32;
    896          opCMP  = Iop_CmpEQ32;
    897          top    = mkU32(0xFFFFFFFF);
    898          break;
    899       case Ity_I64:
    900          opOR   = Iop_Or64;
    901          opDIFD = Iop_And64;
    902          opUIFU = Iop_Or64;
    903          opNOT  = Iop_Not64;
    904          opXOR  = Iop_Xor64;
    905          opCMP  = Iop_CmpEQ64;
    906          top    = mkU64(0xFFFFFFFFFFFFFFFFULL);
    907          break;
    908       default:
    909          VG_(tool_panic)("expensiveCmpEQorNE");
    910    }
    911 
    912    naive
    913       = mkPCastTo(mce,ty,
    914                   assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
    915 
    916    vec
    917       = assignNew(
    918            'V', mce,ty,
    919            binop( opOR,
    920                   assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
    921                   assignNew(
    922                      'V', mce,ty,
    923                      unop( opNOT,
    924                            assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
    925 
    926    improvement_term
    927       = mkPCastTo( mce,ty,
    928                    assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
    929 
    930    improved
    931       = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
    932 
    933    final_cast
    934       = mkPCastTo( mce, Ity_I1, improved );
    935 
    936    return final_cast;
    937 }
    938 
    939 
    940 /* --------- Semi-accurate interpretation of CmpORD. --------- */
    941 
    942 /* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
    943 
    944       CmpORD32S(x,y) = 1<<3   if  x <s y
    945                      = 1<<2   if  x >s y
    946                      = 1<<1   if  x == y
    947 
    948    and similarly the unsigned variant.  The default interpretation is:
    949 
    950       CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
    951                                   & (7<<1)
    952 
    953    The "& (7<<1)" reflects the fact that all result bits except 3,2,1
    954    are zero and therefore defined (viz, zero).
    955 
    956    Also deal with a special case better:
    957 
    958       CmpORD32S(x,0)
    959 
    960    Here, bit 3 (LT) of the result is a copy of the top bit of x and
    961    will be defined even if the rest of x isn't.  In which case we do:
    962 
    963       CmpORD32S#(x,x#,0,{impliedly 0}#)
    964          = PCast(x#) & (3<<1)      -- standard interp for GT#,EQ#
    965            | (x# >>u 31) << 3      -- LT# = x#[31]
    966 
    967    Analogous handling for CmpORD64{S,U}.
    968 */
    969 static Bool isZeroU32 ( IRAtom* e )
    970 {
    971    return
    972       toBool( e->tag == Iex_Const
    973               && e->Iex.Const.con->tag == Ico_U32
    974               && e->Iex.Const.con->Ico.U32 == 0 );
    975 }
    976 
    977 static Bool isZeroU64 ( IRAtom* e )
    978 {
    979    return
    980       toBool( e->tag == Iex_Const
    981               && e->Iex.Const.con->tag == Ico_U64
    982               && e->Iex.Const.con->Ico.U64 == 0 );
    983 }
    984 
    985 static IRAtom* doCmpORD ( MCEnv*  mce,
    986                           IROp    cmp_op,
    987                           IRAtom* xxhash, IRAtom* yyhash,
    988                           IRAtom* xx,     IRAtom* yy )
    989 {
    990    Bool   m64    = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
    991    Bool   syned  = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
    992    IROp   opOR   = m64 ? Iop_Or64  : Iop_Or32;
    993    IROp   opAND  = m64 ? Iop_And64 : Iop_And32;
    994    IROp   opSHL  = m64 ? Iop_Shl64 : Iop_Shl32;
    995    IROp   opSHR  = m64 ? Iop_Shr64 : Iop_Shr32;
    996    IRType ty     = m64 ? Ity_I64   : Ity_I32;
    997    Int    width  = m64 ? 64        : 32;
    998 
    999    Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
   1000 
   1001    IRAtom* threeLeft1 = NULL;
   1002    IRAtom* sevenLeft1 = NULL;
   1003 
   1004    tl_assert(isShadowAtom(mce,xxhash));
   1005    tl_assert(isShadowAtom(mce,yyhash));
   1006    tl_assert(isOriginalAtom(mce,xx));
   1007    tl_assert(isOriginalAtom(mce,yy));
   1008    tl_assert(sameKindedAtoms(xxhash,xx));
   1009    tl_assert(sameKindedAtoms(yyhash,yy));
   1010    tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
   1011              || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
   1012 
   1013    if (0) {
   1014       ppIROp(cmp_op); VG_(printf)(" ");
   1015       ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
   1016    }
   1017 
   1018    if (syned && isZero(yy)) {
   1019       /* fancy interpretation */
   1020       /* if yy is zero, then it must be fully defined (zero#). */
   1021       tl_assert(isZero(yyhash));
   1022       threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
   1023       return
   1024          binop(
   1025             opOR,
   1026             assignNew(
   1027                'V', mce,ty,
   1028                binop(
   1029                   opAND,
   1030                   mkPCastTo(mce,ty, xxhash),
   1031                   threeLeft1
   1032                )),
   1033             assignNew(
   1034                'V', mce,ty,
   1035                binop(
   1036                   opSHL,
   1037                   assignNew(
   1038                      'V', mce,ty,
   1039                      binop(opSHR, xxhash, mkU8(width-1))),
   1040                   mkU8(3)
   1041                ))
   1042 	 );
   1043    } else {
   1044       /* standard interpretation */
   1045       sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
   1046       return
   1047          binop(
   1048             opAND,
   1049             mkPCastTo( mce,ty,
   1050                        mkUifU(mce,ty, xxhash,yyhash)),
   1051             sevenLeft1
   1052          );
   1053    }
   1054 }
   1055 
   1056 
   1057 /*------------------------------------------------------------*/
   1058 /*--- Emit a test and complaint if something is undefined. ---*/
   1059 /*------------------------------------------------------------*/
   1060 
   1061 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
   1062 
   1063 
   1064 /* Set the annotations on a dirty helper to indicate that the stack
   1065    pointer and instruction pointers might be read.  This is the
   1066    behaviour of all 'emit-a-complaint' style functions we might
   1067    call. */
   1068 
   1069 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
   1070    di->nFxState = 2;
   1071    di->fxState[0].fx        = Ifx_Read;
   1072    di->fxState[0].offset    = mce->layout->offset_SP;
   1073    di->fxState[0].size      = mce->layout->sizeof_SP;
   1074    di->fxState[0].nRepeats  = 0;
   1075    di->fxState[0].repeatLen = 0;
   1076    di->fxState[1].fx        = Ifx_Read;
   1077    di->fxState[1].offset    = mce->layout->offset_IP;
   1078    di->fxState[1].size      = mce->layout->sizeof_IP;
   1079    di->fxState[1].nRepeats  = 0;
   1080    di->fxState[1].repeatLen = 0;
   1081 }
   1082 
   1083 
   1084 /* Check the supplied **original** atom for undefinedness, and emit a
   1085    complaint if so.  Once that happens, mark it as defined.  This is
   1086    possible because the atom is either a tmp or literal.  If it's a
   1087    tmp, it will be shadowed by a tmp, and so we can set the shadow to
   1088    be defined.  In fact as mentioned above, we will have to allocate a
   1089    new tmp to carry the new 'defined' shadow value, and update the
   1090    original->tmp mapping accordingly; we cannot simply assign a new
   1091    value to an existing shadow tmp as this breaks SSAness -- resulting
   1092    in the post-instrumentation sanity checker spluttering in disapproval.
   1093 */
   1094 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard )
   1095 {
   1096    IRAtom*  vatom;
   1097    IRType   ty;
   1098    Int      sz;
   1099    IRDirty* di;
   1100    IRAtom*  cond;
   1101    IRAtom*  origin;
   1102    void*    fn;
   1103    HChar*   nm;
   1104    IRExpr** args;
   1105    Int      nargs;
   1106 
   1107    // Don't do V bit tests if we're not reporting undefined value errors.
   1108    if (MC_(clo_mc_level) == 1)
   1109       return;
   1110 
   1111    /* Since the original expression is atomic, there's no duplicated
   1112       work generated by making multiple V-expressions for it.  So we
   1113       don't really care about the possibility that someone else may
   1114       also create a V-interpretion for it. */
   1115    tl_assert(isOriginalAtom(mce, atom));
   1116    vatom = expr2vbits( mce, atom );
   1117    tl_assert(isShadowAtom(mce, vatom));
   1118    tl_assert(sameKindedAtoms(atom, vatom));
   1119 
   1120    ty = typeOfIRExpr(mce->sb->tyenv, vatom);
   1121 
   1122    /* sz is only used for constructing the error message */
   1123    sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
   1124 
   1125    cond = mkPCastTo( mce, Ity_I1, vatom );
   1126    /* cond will be 0 if all defined, and 1 if any not defined. */
   1127 
   1128    /* Get the origin info for the value we are about to check.  At
   1129       least, if we are doing origin tracking.  If not, use a dummy
   1130       zero origin. */
   1131    if (MC_(clo_mc_level) == 3) {
   1132       origin = schemeE( mce, atom );
   1133       if (mce->hWordTy == Ity_I64) {
   1134          origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
   1135       }
   1136    } else {
   1137       origin = NULL;
   1138    }
   1139 
   1140    fn    = NULL;
   1141    nm    = NULL;
   1142    args  = NULL;
   1143    nargs = -1;
   1144 
   1145    switch (sz) {
   1146       case 0:
   1147          if (origin) {
   1148             fn    = &MC_(helperc_value_check0_fail_w_o);
   1149             nm    = "MC_(helperc_value_check0_fail_w_o)";
   1150             args  = mkIRExprVec_1(origin);
   1151             nargs = 1;
   1152          } else {
   1153             fn    = &MC_(helperc_value_check0_fail_no_o);
   1154             nm    = "MC_(helperc_value_check0_fail_no_o)";
   1155             args  = mkIRExprVec_0();
   1156             nargs = 0;
   1157          }
   1158          break;
   1159       case 1:
   1160          if (origin) {
   1161             fn    = &MC_(helperc_value_check1_fail_w_o);
   1162             nm    = "MC_(helperc_value_check1_fail_w_o)";
   1163             args  = mkIRExprVec_1(origin);
   1164             nargs = 1;
   1165          } else {
   1166             fn    = &MC_(helperc_value_check1_fail_no_o);
   1167             nm    = "MC_(helperc_value_check1_fail_no_o)";
   1168             args  = mkIRExprVec_0();
   1169             nargs = 0;
   1170          }
   1171          break;
   1172       case 4:
   1173          if (origin) {
   1174             fn    = &MC_(helperc_value_check4_fail_w_o);
   1175             nm    = "MC_(helperc_value_check4_fail_w_o)";
   1176             args  = mkIRExprVec_1(origin);
   1177             nargs = 1;
   1178          } else {
   1179             fn    = &MC_(helperc_value_check4_fail_no_o);
   1180             nm    = "MC_(helperc_value_check4_fail_no_o)";
   1181             args  = mkIRExprVec_0();
   1182             nargs = 0;
   1183          }
   1184          break;
   1185       case 8:
   1186          if (origin) {
   1187             fn    = &MC_(helperc_value_check8_fail_w_o);
   1188             nm    = "MC_(helperc_value_check8_fail_w_o)";
   1189             args  = mkIRExprVec_1(origin);
   1190             nargs = 1;
   1191          } else {
   1192             fn    = &MC_(helperc_value_check8_fail_no_o);
   1193             nm    = "MC_(helperc_value_check8_fail_no_o)";
   1194             args  = mkIRExprVec_0();
   1195             nargs = 0;
   1196          }
   1197          break;
   1198       case 2:
   1199       case 16:
   1200          if (origin) {
   1201             fn    = &MC_(helperc_value_checkN_fail_w_o);
   1202             nm    = "MC_(helperc_value_checkN_fail_w_o)";
   1203             args  = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
   1204             nargs = 2;
   1205          } else {
   1206             fn    = &MC_(helperc_value_checkN_fail_no_o);
   1207             nm    = "MC_(helperc_value_checkN_fail_no_o)";
   1208             args  = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
   1209             nargs = 1;
   1210          }
   1211          break;
   1212       default:
   1213          VG_(tool_panic)("unexpected szB");
   1214    }
   1215 
   1216    tl_assert(fn);
   1217    tl_assert(nm);
   1218    tl_assert(args);
   1219    tl_assert(nargs >= 0 && nargs <= 2);
   1220    tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
   1221               || (MC_(clo_mc_level) == 2 && origin == NULL) );
   1222 
   1223    di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
   1224                            VG_(fnptr_to_fnentry)( fn ), args );
   1225    di->guard = cond;
   1226 
   1227    /* If the complaint is to be issued under a guard condition, AND that
   1228       guard condition. */
   1229    if (guard) {
   1230      IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard));
   1231      IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard));
   1232      IRAtom *e  = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2));
   1233 
   1234      di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e));
   1235    }
   1236 
   1237    setHelperAnns( mce, di );
   1238    stmt( 'V', mce, IRStmt_Dirty(di));
   1239 
   1240    /* Set the shadow tmp to be defined.  First, update the
   1241       orig->shadow tmp mapping to reflect the fact that this shadow is
   1242       getting a new value. */
   1243    tl_assert(isIRAtom(vatom));
   1244    /* sameKindedAtoms ... */
   1245    if (vatom->tag == Iex_RdTmp) {
   1246       tl_assert(atom->tag == Iex_RdTmp);
   1247       newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
   1248       assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
   1249                        definedOfType(ty));
   1250    }
   1251 }
   1252 
   1253 
   1254 /*------------------------------------------------------------*/
   1255 /*--- Shadowing PUTs/GETs, and indexed variants thereof    ---*/
   1256 /*------------------------------------------------------------*/
   1257 
   1258 /* Examine the always-defined sections declared in layout to see if
   1259    the (offset,size) section is within one.  Note, is is an error to
   1260    partially fall into such a region: (offset,size) should either be
   1261    completely in such a region or completely not-in such a region.
   1262 */
   1263 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
   1264 {
   1265    Int minoffD, maxoffD, i;
   1266    Int minoff = offset;
   1267    Int maxoff = minoff + size - 1;
   1268    tl_assert((minoff & ~0xFFFF) == 0);
   1269    tl_assert((maxoff & ~0xFFFF) == 0);
   1270 
   1271    for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
   1272       minoffD = mce->layout->alwaysDefd[i].offset;
   1273       maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
   1274       tl_assert((minoffD & ~0xFFFF) == 0);
   1275       tl_assert((maxoffD & ~0xFFFF) == 0);
   1276 
   1277       if (maxoff < minoffD || maxoffD < minoff)
   1278          continue; /* no overlap */
   1279       if (minoff >= minoffD && maxoff <= maxoffD)
   1280          return True; /* completely contained in an always-defd section */
   1281 
   1282       VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
   1283    }
   1284    return False; /* could not find any containing section */
   1285 }
   1286 
   1287 
   1288 /* Generate into bb suitable actions to shadow this Put.  If the state
   1289    slice is marked 'always defined', do nothing.  Otherwise, write the
   1290    supplied V bits to the shadow state.  We can pass in either an
   1291    original atom or a V-atom, but not both.  In the former case the
   1292    relevant V-bits are then generated from the original.
   1293    We assume here, that the definedness of GUARD has already been checked.
   1294 */
   1295 static
   1296 void do_shadow_PUT ( MCEnv* mce,  Int offset,
   1297                      IRAtom* atom, IRAtom* vatom, IRExpr *guard )
   1298 {
   1299    IRType ty;
   1300 
   1301    // Don't do shadow PUTs if we're not doing undefined value checking.
   1302    // Their absence lets Vex's optimiser remove all the shadow computation
   1303    // that they depend on, which includes GETs of the shadow registers.
   1304    if (MC_(clo_mc_level) == 1)
   1305       return;
   1306 
   1307    if (atom) {
   1308       tl_assert(!vatom);
   1309       tl_assert(isOriginalAtom(mce, atom));
   1310       vatom = expr2vbits( mce, atom );
   1311    } else {
   1312       tl_assert(vatom);
   1313       tl_assert(isShadowAtom(mce, vatom));
   1314    }
   1315 
   1316    ty = typeOfIRExpr(mce->sb->tyenv, vatom);
   1317    tl_assert(ty != Ity_I1);
   1318    tl_assert(ty != Ity_I128);
   1319    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
   1320       /* later: no ... */
   1321       /* emit code to emit a complaint if any of the vbits are 1. */
   1322       /* complainIfUndefined(mce, atom); */
   1323    } else {
   1324       /* Do a plain shadow Put. */
   1325       if (guard) {
   1326          /* If the guard expression evaluates to false we simply Put the value
   1327             that is already stored in the guest state slot */
   1328          IRAtom *cond, *iffalse;
   1329 
   1330          cond    = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard));
   1331          iffalse = assignNew('V', mce, ty,
   1332                              IRExpr_Get(offset + mce->layout->total_sizeB, ty));
   1333          vatom   = assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, vatom));
   1334       }
   1335       stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ));
   1336    }
   1337 }
   1338 
   1339 
   1340 /* Return an expression which contains the V bits corresponding to the
   1341    given GETI (passed in in pieces).
   1342 */
   1343 static
   1344 void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti)
   1345 {
   1346    IRAtom* vatom;
   1347    IRType  ty, tyS;
   1348    Int     arrSize;;
   1349    IRRegArray* descr = puti->descr;
   1350    IRAtom*     ix    = puti->ix;
   1351    Int         bias  = puti->bias;
   1352    IRAtom*     atom  = puti->data;
   1353 
   1354    // Don't do shadow PUTIs if we're not doing undefined value checking.
   1355    // Their absence lets Vex's optimiser remove all the shadow computation
   1356    // that they depend on, which includes GETIs of the shadow registers.
   1357    if (MC_(clo_mc_level) == 1)
   1358       return;
   1359 
   1360    tl_assert(isOriginalAtom(mce,atom));
   1361    vatom = expr2vbits( mce, atom );
   1362    tl_assert(sameKindedAtoms(atom, vatom));
   1363    ty   = descr->elemTy;
   1364    tyS  = shadowTypeV(ty);
   1365    arrSize = descr->nElems * sizeofIRType(ty);
   1366    tl_assert(ty != Ity_I1);
   1367    tl_assert(isOriginalAtom(mce,ix));
   1368    complainIfUndefined(mce, ix, NULL);
   1369    if (isAlwaysDefd(mce, descr->base, arrSize)) {
   1370       /* later: no ... */
   1371       /* emit code to emit a complaint if any of the vbits are 1. */
   1372       /* complainIfUndefined(mce, atom); */
   1373    } else {
   1374       /* Do a cloned version of the Put that refers to the shadow
   1375          area. */
   1376       IRRegArray* new_descr
   1377          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
   1378                          tyS, descr->nElems);
   1379       stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) ));
   1380    }
   1381 }
   1382 
   1383 
   1384 /* Return an expression which contains the V bits corresponding to the
   1385    given GET (passed in in pieces).
   1386 */
   1387 static
   1388 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
   1389 {
   1390    IRType tyS = shadowTypeV(ty);
   1391    tl_assert(ty != Ity_I1);
   1392    tl_assert(ty != Ity_I128);
   1393    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
   1394       /* Always defined, return all zeroes of the relevant type */
   1395       return definedOfType(tyS);
   1396    } else {
   1397       /* return a cloned version of the Get that refers to the shadow
   1398          area. */
   1399       /* FIXME: this isn't an atom! */
   1400       return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
   1401    }
   1402 }
   1403 
   1404 
   1405 /* Return an expression which contains the V bits corresponding to the
   1406    given GETI (passed in in pieces).
   1407 */
   1408 static
   1409 IRExpr* shadow_GETI ( MCEnv* mce,
   1410                       IRRegArray* descr, IRAtom* ix, Int bias )
   1411 {
   1412    IRType ty   = descr->elemTy;
   1413    IRType tyS  = shadowTypeV(ty);
   1414    Int arrSize = descr->nElems * sizeofIRType(ty);
   1415    tl_assert(ty != Ity_I1);
   1416    tl_assert(isOriginalAtom(mce,ix));
   1417    complainIfUndefined(mce, ix, NULL);
   1418    if (isAlwaysDefd(mce, descr->base, arrSize)) {
   1419       /* Always defined, return all zeroes of the relevant type */
   1420       return definedOfType(tyS);
   1421    } else {
   1422       /* return a cloned version of the Get that refers to the shadow
   1423          area. */
   1424       IRRegArray* new_descr
   1425          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
   1426                          tyS, descr->nElems);
   1427       return IRExpr_GetI( new_descr, ix, bias );
   1428    }
   1429 }
   1430 
   1431 
   1432 /*------------------------------------------------------------*/
   1433 /*--- Generating approximations for unknown operations,    ---*/
   1434 /*--- using lazy-propagate semantics                       ---*/
   1435 /*------------------------------------------------------------*/
   1436 
   1437 /* Lazy propagation of undefinedness from two values, resulting in the
   1438    specified shadow type.
   1439 */
   1440 static
   1441 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
   1442 {
   1443    IRAtom* at;
   1444    IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
   1445    IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
   1446    tl_assert(isShadowAtom(mce,va1));
   1447    tl_assert(isShadowAtom(mce,va2));
   1448 
   1449    /* The general case is inefficient because PCast is an expensive
   1450       operation.  Here are some special cases which use PCast only
   1451       once rather than twice. */
   1452 
   1453    /* I64 x I64 -> I64 */
   1454    if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
   1455       if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
   1456       at = mkUifU(mce, Ity_I64, va1, va2);
   1457       at = mkPCastTo(mce, Ity_I64, at);
   1458       return at;
   1459    }
   1460 
   1461    /* I64 x I64 -> I32 */
   1462    if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
   1463       if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
   1464       at = mkUifU(mce, Ity_I64, va1, va2);
   1465       at = mkPCastTo(mce, Ity_I32, at);
   1466       return at;
   1467    }
   1468 
   1469    if (0) {
   1470       VG_(printf)("mkLazy2 ");
   1471       ppIRType(t1);
   1472       VG_(printf)("_");
   1473       ppIRType(t2);
   1474       VG_(printf)("_");
   1475       ppIRType(finalVty);
   1476       VG_(printf)("\n");
   1477    }
   1478 
   1479    /* General case: force everything via 32-bit intermediaries. */
   1480    at = mkPCastTo(mce, Ity_I32, va1);
   1481    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
   1482    at = mkPCastTo(mce, finalVty, at);
   1483    return at;
   1484 }
   1485 
   1486 
   1487 /* 3-arg version of the above. */
   1488 static
   1489 IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
   1490                   IRAtom* va1, IRAtom* va2, IRAtom* va3 )
   1491 {
   1492    IRAtom* at;
   1493    IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
   1494    IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
   1495    IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
   1496    tl_assert(isShadowAtom(mce,va1));
   1497    tl_assert(isShadowAtom(mce,va2));
   1498    tl_assert(isShadowAtom(mce,va3));
   1499 
   1500    /* The general case is inefficient because PCast is an expensive
   1501       operation.  Here are some special cases which use PCast only
   1502       twice rather than three times. */
   1503 
   1504    /* I32 x I64 x I64 -> I64 */
   1505    /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
   1506    if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
   1507        && finalVty == Ity_I64) {
   1508       if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
   1509       /* Widen 1st arg to I64.  Since 1st arg is typically a rounding
   1510          mode indication which is fully defined, this should get
   1511          folded out later. */
   1512       at = mkPCastTo(mce, Ity_I64, va1);
   1513       /* Now fold in 2nd and 3rd args. */
   1514       at = mkUifU(mce, Ity_I64, at, va2);
   1515       at = mkUifU(mce, Ity_I64, at, va3);
   1516       /* and PCast once again. */
   1517       at = mkPCastTo(mce, Ity_I64, at);
   1518       return at;
   1519    }
   1520 
   1521    /* I32 x I64 x I64 -> I32 */
   1522    if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
   1523        && finalVty == Ity_I32) {
   1524       if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
   1525       at = mkPCastTo(mce, Ity_I64, va1);
   1526       at = mkUifU(mce, Ity_I64, at, va2);
   1527       at = mkUifU(mce, Ity_I64, at, va3);
   1528       at = mkPCastTo(mce, Ity_I32, at);
   1529       return at;
   1530    }
   1531 
   1532    /* I32 x I32 x I32 -> I32 */
   1533    /* 32-bit FP idiom, as (eg) happens on ARM */
   1534    if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
   1535        && finalVty == Ity_I32) {
   1536       if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
   1537       at = va1;
   1538       at = mkUifU(mce, Ity_I32, at, va2);
   1539       at = mkUifU(mce, Ity_I32, at, va3);
   1540       at = mkPCastTo(mce, Ity_I32, at);
   1541       return at;
   1542    }
   1543 
   1544    /* I32 x I128 x I128 -> I128 */
   1545    /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
   1546    if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
   1547        && finalVty == Ity_I128) {
   1548       if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
   1549       /* Widen 1st arg to I128.  Since 1st arg is typically a rounding
   1550          mode indication which is fully defined, this should get
   1551          folded out later. */
   1552       at = mkPCastTo(mce, Ity_I128, va1);
   1553       /* Now fold in 2nd and 3rd args. */
   1554       at = mkUifU(mce, Ity_I128, at, va2);
   1555       at = mkUifU(mce, Ity_I128, at, va3);
   1556       /* and PCast once again. */
   1557       at = mkPCastTo(mce, Ity_I128, at);
   1558       return at;
   1559    }
   1560    if (1) {
   1561       VG_(printf)("mkLazy3: ");
   1562       ppIRType(t1);
   1563       VG_(printf)(" x ");
   1564       ppIRType(t2);
   1565       VG_(printf)(" x ");
   1566       ppIRType(t3);
   1567       VG_(printf)(" -> ");
   1568       ppIRType(finalVty);
   1569       VG_(printf)("\n");
   1570    }
   1571 
   1572    tl_assert(0);
   1573    /* General case: force everything via 32-bit intermediaries. */
   1574    /*
   1575    at = mkPCastTo(mce, Ity_I32, va1);
   1576    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
   1577    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
   1578    at = mkPCastTo(mce, finalVty, at);
   1579    return at;
   1580    */
   1581 }
   1582 
   1583 
   1584 /* 4-arg version of the above. */
   1585 static
   1586 IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
   1587                   IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
   1588 {
   1589    IRAtom* at;
   1590    IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
   1591    IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
   1592    IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
   1593    IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
   1594    tl_assert(isShadowAtom(mce,va1));
   1595    tl_assert(isShadowAtom(mce,va2));
   1596    tl_assert(isShadowAtom(mce,va3));
   1597    tl_assert(isShadowAtom(mce,va4));
   1598 
   1599    /* The general case is inefficient because PCast is an expensive
   1600       operation.  Here are some special cases which use PCast only
   1601       twice rather than three times. */
   1602 
   1603    /* I32 x I64 x I64 x I64 -> I64 */
   1604    /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
   1605    if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
   1606        && finalVty == Ity_I64) {
   1607       if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
   1608       /* Widen 1st arg to I64.  Since 1st arg is typically a rounding
   1609          mode indication which is fully defined, this should get
   1610          folded out later. */
   1611       at = mkPCastTo(mce, Ity_I64, va1);
   1612       /* Now fold in 2nd, 3rd, 4th args. */
   1613       at = mkUifU(mce, Ity_I64, at, va2);
   1614       at = mkUifU(mce, Ity_I64, at, va3);
   1615       at = mkUifU(mce, Ity_I64, at, va4);
   1616       /* and PCast once again. */
   1617       at = mkPCastTo(mce, Ity_I64, at);
   1618       return at;
   1619    }
   1620    /* I32 x I32 x I32 x I32 -> I32 */
   1621    /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
   1622    if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
   1623        && finalVty == Ity_I32) {
   1624       if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
   1625       at = va1;
   1626       /* Now fold in 2nd, 3rd, 4th args. */
   1627       at = mkUifU(mce, Ity_I32, at, va2);
   1628       at = mkUifU(mce, Ity_I32, at, va3);
   1629       at = mkUifU(mce, Ity_I32, at, va4);
   1630       at = mkPCastTo(mce, Ity_I32, at);
   1631       return at;
   1632    }
   1633 
   1634    if (1) {
   1635       VG_(printf)("mkLazy4: ");
   1636       ppIRType(t1);
   1637       VG_(printf)(" x ");
   1638       ppIRType(t2);
   1639       VG_(printf)(" x ");
   1640       ppIRType(t3);
   1641       VG_(printf)(" x ");
   1642       ppIRType(t4);
   1643       VG_(printf)(" -> ");
   1644       ppIRType(finalVty);
   1645       VG_(printf)("\n");
   1646    }
   1647 
   1648    tl_assert(0);
   1649 }
   1650 
   1651 
   1652 /* Do the lazy propagation game from a null-terminated vector of
   1653    atoms.  This is presumably the arguments to a helper call, so the
   1654    IRCallee info is also supplied in order that we can know which
   1655    arguments should be ignored (via the .mcx_mask field).
   1656 */
   1657 static
   1658 IRAtom* mkLazyN ( MCEnv* mce,
   1659                   IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
   1660 {
   1661    Int     i;
   1662    IRAtom* here;
   1663    IRAtom* curr;
   1664    IRType  mergeTy;
   1665    Bool    mergeTy64 = True;
   1666 
   1667    /* Decide on the type of the merge intermediary.  If all relevant
   1668       args are I64, then it's I64.  In all other circumstances, use
   1669       I32. */
   1670    for (i = 0; exprvec[i]; i++) {
   1671       tl_assert(i < 32);
   1672       tl_assert(isOriginalAtom(mce, exprvec[i]));
   1673       if (cee->mcx_mask & (1<<i))
   1674          continue;
   1675       if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
   1676          mergeTy64 = False;
   1677    }
   1678 
   1679    mergeTy = mergeTy64  ? Ity_I64  : Ity_I32;
   1680    curr    = definedOfType(mergeTy);
   1681 
   1682    for (i = 0; exprvec[i]; i++) {
   1683       tl_assert(i < 32);
   1684       tl_assert(isOriginalAtom(mce, exprvec[i]));
   1685       /* Only take notice of this arg if the callee's mc-exclusion
   1686          mask does not say it is to be excluded. */
   1687       if (cee->mcx_mask & (1<<i)) {
   1688          /* the arg is to be excluded from definedness checking.  Do
   1689             nothing. */
   1690          if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
   1691       } else {
   1692          /* calculate the arg's definedness, and pessimistically merge
   1693             it in. */
   1694          here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
   1695          curr = mergeTy64
   1696                    ? mkUifU64(mce, here, curr)
   1697                    : mkUifU32(mce, here, curr);
   1698       }
   1699    }
   1700    return mkPCastTo(mce, finalVtype, curr );
   1701 }
   1702 
   1703 
   1704 /*------------------------------------------------------------*/
   1705 /*--- Generating expensive sequences for exact carry-chain ---*/
   1706 /*--- propagation in add/sub and related operations.       ---*/
   1707 /*------------------------------------------------------------*/
   1708 
   1709 static
   1710 IRAtom* expensiveAddSub ( MCEnv*  mce,
   1711                           Bool    add,
   1712                           IRType  ty,
   1713                           IRAtom* qaa, IRAtom* qbb,
   1714                           IRAtom* aa,  IRAtom* bb )
   1715 {
   1716    IRAtom *a_min, *b_min, *a_max, *b_max;
   1717    IROp   opAND, opOR, opXOR, opNOT, opADD, opSUB;
   1718 
   1719    tl_assert(isShadowAtom(mce,qaa));
   1720    tl_assert(isShadowAtom(mce,qbb));
   1721    tl_assert(isOriginalAtom(mce,aa));
   1722    tl_assert(isOriginalAtom(mce,bb));
   1723    tl_assert(sameKindedAtoms(qaa,aa));
   1724    tl_assert(sameKindedAtoms(qbb,bb));
   1725 
   1726    switch (ty) {
   1727       case Ity_I32:
   1728          opAND = Iop_And32;
   1729          opOR  = Iop_Or32;
   1730          opXOR = Iop_Xor32;
   1731          opNOT = Iop_Not32;
   1732          opADD = Iop_Add32;
   1733          opSUB = Iop_Sub32;
   1734          break;
   1735       case Ity_I64:
   1736          opAND = Iop_And64;
   1737          opOR  = Iop_Or64;
   1738          opXOR = Iop_Xor64;
   1739          opNOT = Iop_Not64;
   1740          opADD = Iop_Add64;
   1741          opSUB = Iop_Sub64;
   1742          break;
   1743       default:
   1744          VG_(tool_panic)("expensiveAddSub");
   1745    }
   1746 
   1747    // a_min = aa & ~qaa
   1748    a_min = assignNew('V', mce,ty,
   1749                      binop(opAND, aa,
   1750                                   assignNew('V', mce,ty, unop(opNOT, qaa))));
   1751 
   1752    // b_min = bb & ~qbb
   1753    b_min = assignNew('V', mce,ty,
   1754                      binop(opAND, bb,
   1755                                   assignNew('V', mce,ty, unop(opNOT, qbb))));
   1756 
   1757    // a_max = aa | qaa
   1758    a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
   1759 
   1760    // b_max = bb | qbb
   1761    b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
   1762 
   1763    if (add) {
   1764       // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
   1765       return
   1766       assignNew('V', mce,ty,
   1767          binop( opOR,
   1768                 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
   1769                 assignNew('V', mce,ty,
   1770                    binop( opXOR,
   1771                           assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
   1772                           assignNew('V', mce,ty, binop(opADD, a_max, b_max))
   1773                    )
   1774                 )
   1775          )
   1776       );
   1777    } else {
   1778       // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
   1779       return
   1780       assignNew('V', mce,ty,
   1781          binop( opOR,
   1782                 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
   1783                 assignNew('V', mce,ty,
   1784                    binop( opXOR,
   1785                           assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
   1786                           assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
   1787                    )
   1788                 )
   1789          )
   1790       );
   1791    }
   1792 
   1793 }
   1794 
   1795 
   1796 /*------------------------------------------------------------*/
   1797 /*--- Scalar shifts.                                       ---*/
   1798 /*------------------------------------------------------------*/
   1799 
   1800 /* Produce an interpretation for (aa << bb) (or >>s, >>u).  The basic
   1801    idea is to shift the definedness bits by the original shift amount.
   1802    This introduces 0s ("defined") in new positions for left shifts and
   1803    unsigned right shifts, and copies the top definedness bit for
   1804    signed right shifts.  So, conveniently, applying the original shift
   1805    operator to the definedness bits for the left arg is exactly the
   1806    right thing to do:
   1807 
   1808       (qaa << bb)
   1809 
   1810    However if the shift amount is undefined then the whole result
   1811    is undefined.  Hence need:
   1812 
   1813       (qaa << bb) `UifU` PCast(qbb)
   1814 
   1815    If the shift amount bb is a literal than qbb will say 'all defined'
   1816    and the UifU and PCast will get folded out by post-instrumentation
   1817    optimisation.
   1818 */
   1819 static IRAtom* scalarShift ( MCEnv*  mce,
   1820                              IRType  ty,
   1821                              IROp    original_op,
   1822                              IRAtom* qaa, IRAtom* qbb,
   1823                              IRAtom* aa,  IRAtom* bb )
   1824 {
   1825    tl_assert(isShadowAtom(mce,qaa));
   1826    tl_assert(isShadowAtom(mce,qbb));
   1827    tl_assert(isOriginalAtom(mce,aa));
   1828    tl_assert(isOriginalAtom(mce,bb));
   1829    tl_assert(sameKindedAtoms(qaa,aa));
   1830    tl_assert(sameKindedAtoms(qbb,bb));
   1831    return
   1832       assignNew(
   1833          'V', mce, ty,
   1834          mkUifU( mce, ty,
   1835                  assignNew('V', mce, ty, binop(original_op, qaa, bb)),
   1836                  mkPCastTo(mce, ty, qbb)
   1837          )
   1838    );
   1839 }
   1840 
   1841 
   1842 /*------------------------------------------------------------*/
   1843 /*--- Helpers for dealing with vector primops.             ---*/
   1844 /*------------------------------------------------------------*/
   1845 
   1846 /* Vector pessimisation -- pessimise within each lane individually. */
   1847 
   1848 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
   1849 {
   1850    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
   1851 }
   1852 
   1853 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
   1854 {
   1855    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
   1856 }
   1857 
   1858 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
   1859 {
   1860    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
   1861 }
   1862 
   1863 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
   1864 {
   1865    return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
   1866 }
   1867 
   1868 static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at )
   1869 {
   1870    return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at));
   1871 }
   1872 
   1873 static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at )
   1874 {
   1875    return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at));
   1876 }
   1877 
   1878 static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
   1879 {
   1880    return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
   1881 }
   1882 
   1883 static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
   1884 {
   1885    return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
   1886 }
   1887 
   1888 static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
   1889 {
   1890    return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
   1891 }
   1892 
   1893 static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
   1894 {
   1895    return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
   1896 }
   1897 
   1898 static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
   1899 {
   1900    return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
   1901 }
   1902 
   1903 
   1904 /* Here's a simple scheme capable of handling ops derived from SSE1
   1905    code and while only generating ops that can be efficiently
   1906    implemented in SSE1. */
   1907 
   1908 /* All-lanes versions are straightforward:
   1909 
   1910    binary32Fx4(x,y)   ==> PCast32x4(UifUV128(x#,y#))
   1911 
   1912    unary32Fx4(x,y)    ==> PCast32x4(x#)
   1913 
   1914    Lowest-lane-only versions are more complex:
   1915 
   1916    binary32F0x4(x,y)  ==> SetV128lo32(
   1917                              x#,
   1918                              PCast32(V128to32(UifUV128(x#,y#)))
   1919                           )
   1920 
   1921    This is perhaps not so obvious.  In particular, it's faster to
   1922    do a V128-bit UifU and then take the bottom 32 bits than the more
   1923    obvious scheme of taking the bottom 32 bits of each operand
   1924    and doing a 32-bit UifU.  Basically since UifU is fast and
   1925    chopping lanes off vector values is slow.
   1926 
   1927    Finally:
   1928 
   1929    unary32F0x4(x)     ==> SetV128lo32(
   1930                              x#,
   1931                              PCast32(V128to32(x#))
   1932                           )
   1933 
   1934    Where:
   1935 
   1936    PCast32(v#)   = 1Sto32(CmpNE32(v#,0))
   1937    PCast32x4(v#) = CmpNEZ32x4(v#)
   1938 */
   1939 
   1940 static
   1941 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   1942 {
   1943    IRAtom* at;
   1944    tl_assert(isShadowAtom(mce, vatomX));
   1945    tl_assert(isShadowAtom(mce, vatomY));
   1946    at = mkUifUV128(mce, vatomX, vatomY);
   1947    at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
   1948    return at;
   1949 }
   1950 
   1951 static
   1952 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
   1953 {
   1954    IRAtom* at;
   1955    tl_assert(isShadowAtom(mce, vatomX));
   1956    at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
   1957    return at;
   1958 }
   1959 
   1960 static
   1961 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   1962 {
   1963    IRAtom* at;
   1964    tl_assert(isShadowAtom(mce, vatomX));
   1965    tl_assert(isShadowAtom(mce, vatomY));
   1966    at = mkUifUV128(mce, vatomX, vatomY);
   1967    at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
   1968    at = mkPCastTo(mce, Ity_I32, at);
   1969    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
   1970    return at;
   1971 }
   1972 
   1973 static
   1974 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
   1975 {
   1976    IRAtom* at;
   1977    tl_assert(isShadowAtom(mce, vatomX));
   1978    at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
   1979    at = mkPCastTo(mce, Ity_I32, at);
   1980    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
   1981    return at;
   1982 }
   1983 
   1984 /* --- ... and ... 64Fx2 versions of the same ... --- */
   1985 
   1986 static
   1987 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   1988 {
   1989    IRAtom* at;
   1990    tl_assert(isShadowAtom(mce, vatomX));
   1991    tl_assert(isShadowAtom(mce, vatomY));
   1992    at = mkUifUV128(mce, vatomX, vatomY);
   1993    at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
   1994    return at;
   1995 }
   1996 
   1997 static
   1998 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
   1999 {
   2000    IRAtom* at;
   2001    tl_assert(isShadowAtom(mce, vatomX));
   2002    at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
   2003    return at;
   2004 }
   2005 
   2006 static
   2007 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   2008 {
   2009    IRAtom* at;
   2010    tl_assert(isShadowAtom(mce, vatomX));
   2011    tl_assert(isShadowAtom(mce, vatomY));
   2012    at = mkUifUV128(mce, vatomX, vatomY);
   2013    at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
   2014    at = mkPCastTo(mce, Ity_I64, at);
   2015    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
   2016    return at;
   2017 }
   2018 
   2019 static
   2020 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
   2021 {
   2022    IRAtom* at;
   2023    tl_assert(isShadowAtom(mce, vatomX));
   2024    at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
   2025    at = mkPCastTo(mce, Ity_I64, at);
   2026    at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
   2027    return at;
   2028 }
   2029 
   2030 /* --- --- ... and ... 32Fx2 versions of the same --- --- */
   2031 
   2032 static
   2033 IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   2034 {
   2035    IRAtom* at;
   2036    tl_assert(isShadowAtom(mce, vatomX));
   2037    tl_assert(isShadowAtom(mce, vatomY));
   2038    at = mkUifU64(mce, vatomX, vatomY);
   2039    at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
   2040    return at;
   2041 }
   2042 
   2043 static
   2044 IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
   2045 {
   2046    IRAtom* at;
   2047    tl_assert(isShadowAtom(mce, vatomX));
   2048    at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
   2049    return at;
   2050 }
   2051 
   2052 /* --- ... and ... 64Fx4 versions of the same ... --- */
   2053 
   2054 static
   2055 IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   2056 {
   2057    IRAtom* at;
   2058    tl_assert(isShadowAtom(mce, vatomX));
   2059    tl_assert(isShadowAtom(mce, vatomY));
   2060    at = mkUifUV256(mce, vatomX, vatomY);
   2061    at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at));
   2062    return at;
   2063 }
   2064 
   2065 static
   2066 IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX )
   2067 {
   2068    IRAtom* at;
   2069    tl_assert(isShadowAtom(mce, vatomX));
   2070    at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX));
   2071    return at;
   2072 }
   2073 
   2074 /* --- ... and ... 32Fx8 versions of the same ... --- */
   2075 
   2076 static
   2077 IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
   2078 {
   2079    IRAtom* at;
   2080    tl_assert(isShadowAtom(mce, vatomX));
   2081    tl_assert(isShadowAtom(mce, vatomY));
   2082    at = mkUifUV256(mce, vatomX, vatomY);
   2083    at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at));
   2084    return at;
   2085 }
   2086 
   2087 static
   2088 IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX )
   2089 {
   2090    IRAtom* at;
   2091    tl_assert(isShadowAtom(mce, vatomX));
   2092    at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX));
   2093    return at;
   2094 }
   2095 
   2096 /* --- --- Vector saturated narrowing --- --- */
   2097 
   2098 /* We used to do something very clever here, but on closer inspection
   2099    (2011-Jun-15), and in particular bug #279698, it turns out to be
   2100    wrong.  Part of the problem came from the fact that for a long
   2101    time, the IR primops to do with saturated narrowing were
   2102    underspecified and managed to confuse multiple cases which needed
   2103    to be separate: the op names had a signedness qualifier, but in
   2104    fact the source and destination signednesses needed to be specified
   2105    independently, so the op names really need two independent
   2106    signedness specifiers.
   2107 
   2108    As of 2011-Jun-15 (ish) the underspecification was sorted out
   2109    properly.  The incorrect instrumentation remained, though.  That
   2110    has now (2011-Oct-22) been fixed.
   2111 
   2112    What we now do is simple:
   2113 
   2114    Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
   2115    number of lanes, X is the source lane width and signedness, and Y
   2116    is the destination lane width and signedness.  In all cases the
   2117    destination lane width is half the source lane width, so the names
   2118    have a bit of redundancy, but are at least easy to read.
   2119 
   2120    For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
   2121    to unsigned 16s.
   2122 
   2123    Let Vanilla(OP) be a function that takes OP, one of these
   2124    saturating narrowing ops, and produces the same "shaped" narrowing
   2125    op which is not saturating, but merely dumps the most significant
   2126    bits.  "same shape" means that the lane numbers and widths are the
   2127    same as with OP.
   2128 
   2129    For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
   2130                   = Iop_NarrowBin32to16x8,
   2131    that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
   2132    dumping the top half of each lane.
   2133 
   2134    So, with that in place, the scheme is simple, and it is simple to
   2135    pessimise each lane individually and then apply Vanilla(OP) so as
   2136    to get the result in the right "shape".  If the original OP is
   2137    QNarrowBinXtoYxZ then we produce
   2138 
   2139    Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
   2140 
   2141    or for the case when OP is unary (Iop_QNarrowUn*)
   2142 
   2143    Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
   2144 */
   2145 static
   2146 IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
   2147 {
   2148    switch (qnarrowOp) {
   2149       /* Binary: (128, 128) -> 128 */
   2150       case Iop_QNarrowBin16Sto8Ux16:
   2151       case Iop_QNarrowBin16Sto8Sx16:
   2152       case Iop_QNarrowBin16Uto8Ux16:
   2153          return Iop_NarrowBin16to8x16;
   2154       case Iop_QNarrowBin32Sto16Ux8:
   2155       case Iop_QNarrowBin32Sto16Sx8:
   2156       case Iop_QNarrowBin32Uto16Ux8:
   2157          return Iop_NarrowBin32to16x8;
   2158       /* Binary: (64, 64) -> 64 */
   2159       case Iop_QNarrowBin32Sto16Sx4:
   2160          return Iop_NarrowBin32to16x4;
   2161       case Iop_QNarrowBin16Sto8Ux8:
   2162       case Iop_QNarrowBin16Sto8Sx8:
   2163          return Iop_NarrowBin16to8x8;
   2164       /* Unary: 128 -> 64 */
   2165       case Iop_QNarrowUn64Uto32Ux2:
   2166       case Iop_QNarrowUn64Sto32Sx2:
   2167       case Iop_QNarrowUn64Sto32Ux2:
   2168          return Iop_NarrowUn64to32x2;
   2169       case Iop_QNarrowUn32Uto16Ux4:
   2170       case Iop_QNarrowUn32Sto16Sx4:
   2171       case Iop_QNarrowUn32Sto16Ux4:
   2172          return Iop_NarrowUn32to16x4;
   2173       case Iop_QNarrowUn16Uto8Ux8:
   2174       case Iop_QNarrowUn16Sto8Sx8:
   2175       case Iop_QNarrowUn16Sto8Ux8:
   2176          return Iop_NarrowUn16to8x8;
   2177       default:
   2178          ppIROp(qnarrowOp);
   2179          VG_(tool_panic)("vanillaNarrowOpOfShape");
   2180    }
   2181 }
   2182 
   2183 static
   2184 IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
   2185                               IRAtom* vatom1, IRAtom* vatom2)
   2186 {
   2187    IRAtom *at1, *at2, *at3;
   2188    IRAtom* (*pcast)( MCEnv*, IRAtom* );
   2189    switch (narrow_op) {
   2190       case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
   2191       case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
   2192       case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
   2193       case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
   2194       case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
   2195       case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
   2196       default: VG_(tool_panic)("vectorNarrowBinV128");
   2197    }
   2198    IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
   2199    tl_assert(isShadowAtom(mce,vatom1));
   2200    tl_assert(isShadowAtom(mce,vatom2));
   2201    at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
   2202    at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
   2203    at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
   2204    return at3;
   2205 }
   2206 
   2207 static
   2208 IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
   2209                             IRAtom* vatom1, IRAtom* vatom2)
   2210 {
   2211    IRAtom *at1, *at2, *at3;
   2212    IRAtom* (*pcast)( MCEnv*, IRAtom* );
   2213    switch (narrow_op) {
   2214       case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
   2215       case Iop_QNarrowBin16Sto8Sx8:  pcast = mkPCast16x4; break;
   2216       case Iop_QNarrowBin16Sto8Ux8:  pcast = mkPCast16x4; break;
   2217       default: VG_(tool_panic)("vectorNarrowBin64");
   2218    }
   2219    IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
   2220    tl_assert(isShadowAtom(mce,vatom1));
   2221    tl_assert(isShadowAtom(mce,vatom2));
   2222    at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
   2223    at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
   2224    at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
   2225    return at3;
   2226 }
   2227 
   2228 static
   2229 IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
   2230                              IRAtom* vatom1)
   2231 {
   2232    IRAtom *at1, *at2;
   2233    IRAtom* (*pcast)( MCEnv*, IRAtom* );
   2234    tl_assert(isShadowAtom(mce,vatom1));
   2235    /* For vanilla narrowing (non-saturating), we can just apply
   2236       the op directly to the V bits. */
   2237    switch (narrow_op) {
   2238       case Iop_NarrowUn16to8x8:
   2239       case Iop_NarrowUn32to16x4:
   2240       case Iop_NarrowUn64to32x2:
   2241          at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
   2242          return at1;
   2243       default:
   2244          break; /* Do Plan B */
   2245    }
   2246    /* Plan B: for ops that involve a saturation operation on the args,
   2247       we must PCast before the vanilla narrow. */
   2248    switch (narrow_op) {
   2249       case Iop_QNarrowUn16Sto8Sx8:  pcast = mkPCast16x8; break;
   2250       case Iop_QNarrowUn16Sto8Ux8:  pcast = mkPCast16x8; break;
   2251       case Iop_QNarrowUn16Uto8Ux8:  pcast = mkPCast16x8; break;
   2252       case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
   2253       case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
   2254       case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
   2255       case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
   2256       case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
   2257       case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
   2258       default: VG_(tool_panic)("vectorNarrowUnV128");
   2259    }
   2260    IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
   2261    at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
   2262    at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
   2263    return at2;
   2264 }
   2265 
   2266 static
   2267 IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
   2268                          IRAtom* vatom1)
   2269 {
   2270    IRAtom *at1, *at2;
   2271    IRAtom* (*pcast)( MCEnv*, IRAtom* );
   2272    switch (longen_op) {
   2273       case Iop_Widen8Uto16x8:  pcast = mkPCast16x8; break;
   2274       case Iop_Widen8Sto16x8:  pcast = mkPCast16x8; break;
   2275       case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
   2276       case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
   2277       case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
   2278       case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
   2279       default: VG_(tool_panic)("vectorWidenI64");
   2280    }
   2281    tl_assert(isShadowAtom(mce,vatom1));
   2282    at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
   2283    at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
   2284    return at2;
   2285 }
   2286 
   2287 
   2288 /* --- --- Vector integer arithmetic --- --- */
   2289 
   2290 /* Simple ... UifU the args and per-lane pessimise the results. */
   2291 
   2292 /* --- V128-bit versions --- */
   2293 
   2294 static
   2295 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2296 {
   2297    IRAtom* at;
   2298    at = mkUifUV128(mce, vatom1, vatom2);
   2299    at = mkPCast8x16(mce, at);
   2300    return at;
   2301 }
   2302 
   2303 static
   2304 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2305 {
   2306    IRAtom* at;
   2307    at = mkUifUV128(mce, vatom1, vatom2);
   2308    at = mkPCast16x8(mce, at);
   2309    return at;
   2310 }
   2311 
   2312 static
   2313 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2314 {
   2315    IRAtom* at;
   2316    at = mkUifUV128(mce, vatom1, vatom2);
   2317    at = mkPCast32x4(mce, at);
   2318    return at;
   2319 }
   2320 
   2321 static
   2322 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2323 {
   2324    IRAtom* at;
   2325    at = mkUifUV128(mce, vatom1, vatom2);
   2326    at = mkPCast64x2(mce, at);
   2327    return at;
   2328 }
   2329 
   2330 /* --- 64-bit versions --- */
   2331 
   2332 static
   2333 IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2334 {
   2335    IRAtom* at;
   2336    at = mkUifU64(mce, vatom1, vatom2);
   2337    at = mkPCast8x8(mce, at);
   2338    return at;
   2339 }
   2340 
   2341 static
   2342 IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2343 {
   2344    IRAtom* at;
   2345    at = mkUifU64(mce, vatom1, vatom2);
   2346    at = mkPCast16x4(mce, at);
   2347    return at;
   2348 }
   2349 
   2350 static
   2351 IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2352 {
   2353    IRAtom* at;
   2354    at = mkUifU64(mce, vatom1, vatom2);
   2355    at = mkPCast32x2(mce, at);
   2356    return at;
   2357 }
   2358 
   2359 static
   2360 IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2361 {
   2362    IRAtom* at;
   2363    at = mkUifU64(mce, vatom1, vatom2);
   2364    at = mkPCastTo(mce, Ity_I64, at);
   2365    return at;
   2366 }
   2367 
   2368 /* --- 32-bit versions --- */
   2369 
   2370 static
   2371 IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2372 {
   2373    IRAtom* at;
   2374    at = mkUifU32(mce, vatom1, vatom2);
   2375    at = mkPCast8x4(mce, at);
   2376    return at;
   2377 }
   2378 
   2379 static
   2380 IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
   2381 {
   2382    IRAtom* at;
   2383    at = mkUifU32(mce, vatom1, vatom2);
   2384    at = mkPCast16x2(mce, at);
   2385    return at;
   2386 }
   2387 
   2388 
   2389 /*------------------------------------------------------------*/
   2390 /*--- Generate shadow values from all kinds of IRExprs.    ---*/
   2391 /*------------------------------------------------------------*/
   2392 
   2393 static
   2394 IRAtom* expr2vbits_Qop ( MCEnv* mce,
   2395                          IROp op,
   2396                          IRAtom* atom1, IRAtom* atom2,
   2397                          IRAtom* atom3, IRAtom* atom4 )
   2398 {
   2399    IRAtom* vatom1 = expr2vbits( mce, atom1 );
   2400    IRAtom* vatom2 = expr2vbits( mce, atom2 );
   2401    IRAtom* vatom3 = expr2vbits( mce, atom3 );
   2402    IRAtom* vatom4 = expr2vbits( mce, atom4 );
   2403 
   2404    tl_assert(isOriginalAtom(mce,atom1));
   2405    tl_assert(isOriginalAtom(mce,atom2));
   2406    tl_assert(isOriginalAtom(mce,atom3));
   2407    tl_assert(isOriginalAtom(mce,atom4));
   2408    tl_assert(isShadowAtom(mce,vatom1));
   2409    tl_assert(isShadowAtom(mce,vatom2));
   2410    tl_assert(isShadowAtom(mce,vatom3));
   2411    tl_assert(isShadowAtom(mce,vatom4));
   2412    tl_assert(sameKindedAtoms(atom1,vatom1));
   2413    tl_assert(sameKindedAtoms(atom2,vatom2));
   2414    tl_assert(sameKindedAtoms(atom3,vatom3));
   2415    tl_assert(sameKindedAtoms(atom4,vatom4));
   2416    switch (op) {
   2417       case Iop_MAddF64:
   2418       case Iop_MAddF64r32:
   2419       case Iop_MSubF64:
   2420       case Iop_MSubF64r32:
   2421          /* I32(rm) x F64 x F64 x F64 -> F64 */
   2422          return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
   2423 
   2424       case Iop_MAddF32:
   2425       case Iop_MSubF32:
   2426          /* I32(rm) x F32 x F32 x F32 -> F32 */
   2427          return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
   2428 
   2429       /* V256-bit data-steering */
   2430       case Iop_64x4toV256:
   2431          return assignNew('V', mce, Ity_V256,
   2432                           IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4));
   2433 
   2434       default:
   2435          ppIROp(op);
   2436          VG_(tool_panic)("memcheck:expr2vbits_Qop");
   2437    }
   2438 }
   2439 
   2440 
   2441 static
   2442 IRAtom* expr2vbits_Triop ( MCEnv* mce,
   2443                            IROp op,
   2444                            IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
   2445 {
   2446    IRAtom* vatom1 = expr2vbits( mce, atom1 );
   2447    IRAtom* vatom2 = expr2vbits( mce, atom2 );
   2448    IRAtom* vatom3 = expr2vbits( mce, atom3 );
   2449 
   2450    tl_assert(isOriginalAtom(mce,atom1));
   2451    tl_assert(isOriginalAtom(mce,atom2));
   2452    tl_assert(isOriginalAtom(mce,atom3));
   2453    tl_assert(isShadowAtom(mce,vatom1));
   2454    tl_assert(isShadowAtom(mce,vatom2));
   2455    tl_assert(isShadowAtom(mce,vatom3));
   2456    tl_assert(sameKindedAtoms(atom1,vatom1));
   2457    tl_assert(sameKindedAtoms(atom2,vatom2));
   2458    tl_assert(sameKindedAtoms(atom3,vatom3));
   2459    switch (op) {
   2460       case Iop_AddF128:
   2461       case Iop_AddD128:
   2462       case Iop_SubF128:
   2463       case Iop_SubD128:
   2464       case Iop_MulF128:
   2465       case Iop_MulD128:
   2466       case Iop_DivF128:
   2467       case Iop_DivD128:
   2468       case Iop_QuantizeD128:
   2469          /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
   2470          return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
   2471       case Iop_AddF64:
   2472       case Iop_AddD64:
   2473       case Iop_AddF64r32:
   2474       case Iop_SubF64:
   2475       case Iop_SubD64:
   2476       case Iop_SubF64r32:
   2477       case Iop_MulF64:
   2478       case Iop_MulD64:
   2479       case Iop_MulF64r32:
   2480       case Iop_DivF64:
   2481       case Iop_DivD64:
   2482       case Iop_DivF64r32:
   2483       case Iop_ScaleF64:
   2484       case Iop_Yl2xF64:
   2485       case Iop_Yl2xp1F64:
   2486       case Iop_AtanF64:
   2487       case Iop_PRemF64:
   2488       case Iop_PRem1F64:
   2489       case Iop_QuantizeD64:
   2490          /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
   2491          return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
   2492       case Iop_PRemC3210F64:
   2493       case Iop_PRem1C3210F64:
   2494          /* I32(rm) x F64 x F64 -> I32 */
   2495          return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
   2496       case Iop_AddF32:
   2497       case Iop_SubF32:
   2498       case Iop_MulF32:
   2499       case Iop_DivF32:
   2500          /* I32(rm) x F32 x F32 -> I32 */
   2501          return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
   2502       case Iop_SignificanceRoundD64:
   2503          /* IRRoundingModeDFP(I32) x I8 x D64 -> D64 */
   2504          return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
   2505       case Iop_SignificanceRoundD128:
   2506          /* IRRoundingModeDFP(I32) x I8 x D128 -> D128 */
   2507          return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
   2508       case Iop_ExtractV128:
   2509          complainIfUndefined(mce, atom3, NULL);
   2510          return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
   2511       case Iop_Extract64:
   2512          complainIfUndefined(mce, atom3, NULL);
   2513          return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
   2514       case Iop_SetElem8x8:
   2515       case Iop_SetElem16x4:
   2516       case Iop_SetElem32x2:
   2517          complainIfUndefined(mce, atom2, NULL);
   2518          return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
   2519       default:
   2520          ppIROp(op);
   2521          VG_(tool_panic)("memcheck:expr2vbits_Triop");
   2522    }
   2523 }
   2524 
   2525 
   2526 static
   2527 IRAtom* expr2vbits_Binop ( MCEnv* mce,
   2528                            IROp op,
   2529                            IRAtom* atom1, IRAtom* atom2 )
   2530 {
   2531    IRType  and_or_ty;
   2532    IRAtom* (*uifu)    (MCEnv*, IRAtom*, IRAtom*);
   2533    IRAtom* (*difd)    (MCEnv*, IRAtom*, IRAtom*);
   2534    IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
   2535 
   2536    IRAtom* vatom1 = expr2vbits( mce, atom1 );
   2537    IRAtom* vatom2 = expr2vbits( mce, atom2 );
   2538 
   2539    tl_assert(isOriginalAtom(mce,atom1));
   2540    tl_assert(isOriginalAtom(mce,atom2));
   2541    tl_assert(isShadowAtom(mce,vatom1));
   2542    tl_assert(isShadowAtom(mce,vatom2));
   2543    tl_assert(sameKindedAtoms(atom1,vatom1));
   2544    tl_assert(sameKindedAtoms(atom2,vatom2));
   2545    switch (op) {
   2546 
   2547       /* 32-bit SIMD */
   2548 
   2549       case Iop_Add16x2:
   2550       case Iop_HAdd16Ux2:
   2551       case Iop_HAdd16Sx2:
   2552       case Iop_Sub16x2:
   2553       case Iop_HSub16Ux2:
   2554       case Iop_HSub16Sx2:
   2555       case Iop_QAdd16Sx2:
   2556       case Iop_QSub16Sx2:
   2557       case Iop_QSub16Ux2:
   2558          return binary16Ix2(mce, vatom1, vatom2);
   2559 
   2560       case Iop_Add8x4:
   2561       case Iop_HAdd8Ux4:
   2562       case Iop_HAdd8Sx4:
   2563       case Iop_Sub8x4:
   2564       case Iop_HSub8Ux4:
   2565       case Iop_HSub8Sx4:
   2566       case Iop_QSub8Ux4:
   2567       case Iop_QAdd8Ux4:
   2568       case Iop_QSub8Sx4:
   2569       case Iop_QAdd8Sx4:
   2570          return binary8Ix4(mce, vatom1, vatom2);
   2571 
   2572       /* 64-bit SIMD */
   2573 
   2574       case Iop_ShrN8x8:
   2575       case Iop_ShrN16x4:
   2576       case Iop_ShrN32x2:
   2577       case Iop_SarN8x8:
   2578       case Iop_SarN16x4:
   2579       case Iop_SarN32x2:
   2580       case Iop_ShlN16x4:
   2581       case Iop_ShlN32x2:
   2582       case Iop_ShlN8x8:
   2583          /* Same scheme as with all other shifts. */
   2584          complainIfUndefined(mce, atom2, NULL);
   2585          return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
   2586 
   2587       case Iop_QNarrowBin32Sto16Sx4:
   2588       case Iop_QNarrowBin16Sto8Sx8:
   2589       case Iop_QNarrowBin16Sto8Ux8:
   2590          return vectorNarrowBin64(mce, op, vatom1, vatom2);
   2591 
   2592       case Iop_Min8Ux8:
   2593       case Iop_Min8Sx8:
   2594       case Iop_Max8Ux8:
   2595       case Iop_Max8Sx8:
   2596       case Iop_Avg8Ux8:
   2597       case Iop_QSub8Sx8:
   2598       case Iop_QSub8Ux8:
   2599       case Iop_Sub8x8:
   2600       case Iop_CmpGT8Sx8:
   2601       case Iop_CmpGT8Ux8:
   2602       case Iop_CmpEQ8x8:
   2603       case Iop_QAdd8Sx8:
   2604       case Iop_QAdd8Ux8:
   2605       case Iop_QSal8x8:
   2606       case Iop_QShl8x8:
   2607       case Iop_Add8x8:
   2608       case Iop_Mul8x8:
   2609       case Iop_PolynomialMul8x8:
   2610          return binary8Ix8(mce, vatom1, vatom2);
   2611 
   2612       case Iop_Min16Sx4:
   2613       case Iop_Min16Ux4:
   2614       case Iop_Max16Sx4:
   2615       case Iop_Max16Ux4:
   2616       case Iop_Avg16Ux4:
   2617       case Iop_QSub16Ux4:
   2618       case Iop_QSub16Sx4:
   2619       case Iop_Sub16x4:
   2620       case Iop_Mul16x4:
   2621       case Iop_MulHi16Sx4:
   2622       case Iop_MulHi16Ux4:
   2623       case Iop_CmpGT16Sx4:
   2624       case Iop_CmpGT16Ux4:
   2625       case Iop_CmpEQ16x4:
   2626       case Iop_QAdd16Sx4:
   2627       case Iop_QAdd16Ux4:
   2628       case Iop_QSal16x4:
   2629       case Iop_QShl16x4:
   2630       case Iop_Add16x4:
   2631       case Iop_QDMulHi16Sx4:
   2632       case Iop_QRDMulHi16Sx4:
   2633          return binary16Ix4(mce, vatom1, vatom2);
   2634 
   2635       case Iop_Sub32x2:
   2636       case Iop_Mul32x2:
   2637       case Iop_Max32Sx2:
   2638       case Iop_Max32Ux2:
   2639       case Iop_Min32Sx2:
   2640       case Iop_Min32Ux2:
   2641       case Iop_CmpGT32Sx2:
   2642       case Iop_CmpGT32Ux2:
   2643       case Iop_CmpEQ32x2:
   2644       case Iop_Add32x2:
   2645       case Iop_QAdd32Ux2:
   2646       case Iop_QAdd32Sx2:
   2647       case Iop_QSub32Ux2:
   2648       case Iop_QSub32Sx2:
   2649       case Iop_QSal32x2:
   2650       case Iop_QShl32x2:
   2651       case Iop_QDMulHi32Sx2:
   2652       case Iop_QRDMulHi32Sx2:
   2653          return binary32Ix2(mce, vatom1, vatom2);
   2654 
   2655       case Iop_QSub64Ux1:
   2656       case Iop_QSub64Sx1:
   2657       case Iop_QAdd64Ux1:
   2658       case Iop_QAdd64Sx1:
   2659       case Iop_QSal64x1:
   2660       case Iop_QShl64x1:
   2661       case Iop_Sal64x1:
   2662          return binary64Ix1(mce, vatom1, vatom2);
   2663 
   2664       case Iop_QShlN8Sx8:
   2665       case Iop_QShlN8x8:
   2666       case Iop_QSalN8x8:
   2667          complainIfUndefined(mce, atom2, NULL);
   2668          return mkPCast8x8(mce, vatom1);
   2669 
   2670       case Iop_QShlN16Sx4:
   2671       case Iop_QShlN16x4:
   2672       case Iop_QSalN16x4:
   2673          complainIfUndefined(mce, atom2, NULL);
   2674          return mkPCast16x4(mce, vatom1);
   2675 
   2676       case Iop_QShlN32Sx2:
   2677       case Iop_QShlN32x2:
   2678       case Iop_QSalN32x2:
   2679          complainIfUndefined(mce, atom2, NULL);
   2680          return mkPCast32x2(mce, vatom1);
   2681 
   2682       case Iop_QShlN64Sx1:
   2683       case Iop_QShlN64x1:
   2684       case Iop_QSalN64x1:
   2685          complainIfUndefined(mce, atom2, NULL);
   2686          return mkPCast32x2(mce, vatom1);
   2687 
   2688       case Iop_PwMax32Sx2:
   2689       case Iop_PwMax32Ux2:
   2690       case Iop_PwMin32Sx2:
   2691       case Iop_PwMin32Ux2:
   2692       case Iop_PwMax32Fx2:
   2693       case Iop_PwMin32Fx2:
   2694          return assignNew('V', mce, Ity_I64,
   2695                           binop(Iop_PwMax32Ux2,
   2696                                 mkPCast32x2(mce, vatom1),
   2697                                 mkPCast32x2(mce, vatom2)));
   2698 
   2699       case Iop_PwMax16Sx4:
   2700       case Iop_PwMax16Ux4:
   2701       case Iop_PwMin16Sx4:
   2702       case Iop_PwMin16Ux4:
   2703          return assignNew('V', mce, Ity_I64,
   2704                           binop(Iop_PwMax16Ux4,
   2705                                 mkPCast16x4(mce, vatom1),
   2706                                 mkPCast16x4(mce, vatom2)));
   2707 
   2708       case Iop_PwMax8Sx8:
   2709       case Iop_PwMax8Ux8:
   2710       case Iop_PwMin8Sx8:
   2711       case Iop_PwMin8Ux8:
   2712          return assignNew('V', mce, Ity_I64,
   2713                           binop(Iop_PwMax8Ux8,
   2714                                 mkPCast8x8(mce, vatom1),
   2715                                 mkPCast8x8(mce, vatom2)));
   2716 
   2717       case Iop_PwAdd32x2:
   2718       case Iop_PwAdd32Fx2:
   2719          return mkPCast32x2(mce,
   2720                assignNew('V', mce, Ity_I64,
   2721                          binop(Iop_PwAdd32x2,
   2722                                mkPCast32x2(mce, vatom1),
   2723                                mkPCast32x2(mce, vatom2))));
   2724 
   2725       case Iop_PwAdd16x4:
   2726          return mkPCast16x4(mce,
   2727                assignNew('V', mce, Ity_I64,
   2728                          binop(op, mkPCast16x4(mce, vatom1),
   2729                                    mkPCast16x4(mce, vatom2))));
   2730 
   2731       case Iop_PwAdd8x8:
   2732          return mkPCast8x8(mce,
   2733                assignNew('V', mce, Ity_I64,
   2734                          binop(op, mkPCast8x8(mce, vatom1),
   2735                                    mkPCast8x8(mce, vatom2))));
   2736 
   2737       case Iop_Shl8x8:
   2738       case Iop_Shr8x8:
   2739       case Iop_Sar8x8:
   2740       case Iop_Sal8x8:
   2741          return mkUifU64(mce,
   2742                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
   2743                    mkPCast8x8(mce,vatom2)
   2744                 );
   2745 
   2746       case Iop_Shl16x4:
   2747       case Iop_Shr16x4:
   2748       case Iop_Sar16x4:
   2749       case Iop_Sal16x4:
   2750          return mkUifU64(mce,
   2751                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
   2752                    mkPCast16x4(mce,vatom2)
   2753                 );
   2754 
   2755       case Iop_Shl32x2:
   2756       case Iop_Shr32x2:
   2757       case Iop_Sar32x2:
   2758       case Iop_Sal32x2:
   2759          return mkUifU64(mce,
   2760                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
   2761                    mkPCast32x2(mce,vatom2)
   2762                 );
   2763 
   2764       /* 64-bit data-steering */
   2765       case Iop_InterleaveLO32x2:
   2766       case Iop_InterleaveLO16x4:
   2767       case Iop_InterleaveLO8x8:
   2768       case Iop_InterleaveHI32x2:
   2769       case Iop_InterleaveHI16x4:
   2770       case Iop_InterleaveHI8x8:
   2771       case Iop_CatOddLanes8x8:
   2772       case Iop_CatEvenLanes8x8:
   2773       case Iop_CatOddLanes16x4:
   2774       case Iop_CatEvenLanes16x4:
   2775       case Iop_InterleaveOddLanes8x8:
   2776       case Iop_InterleaveEvenLanes8x8:
   2777       case Iop_InterleaveOddLanes16x4:
   2778       case Iop_InterleaveEvenLanes16x4:
   2779          return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
   2780 
   2781       case Iop_GetElem8x8:
   2782          complainIfUndefined(mce, atom2, NULL);
   2783          return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
   2784       case Iop_GetElem16x4:
   2785          complainIfUndefined(mce, atom2, NULL);
   2786          return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
   2787       case Iop_GetElem32x2:
   2788          complainIfUndefined(mce, atom2, NULL);
   2789          return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
   2790 
   2791       /* Perm8x8: rearrange values in left arg using steering values
   2792         from right arg.  So rearrange the vbits in the same way but
   2793         pessimise wrt steering values. */
   2794       case Iop_Perm8x8:
   2795          return mkUifU64(
   2796                    mce,
   2797                    assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
   2798                    mkPCast8x8(mce, vatom2)
   2799                 );
   2800 
   2801       /* V128-bit SIMD */
   2802 
   2803       case Iop_ShrN8x16:
   2804       case Iop_ShrN16x8:
   2805       case Iop_ShrN32x4:
   2806       case Iop_ShrN64x2:
   2807       case Iop_SarN8x16:
   2808       case Iop_SarN16x8:
   2809       case Iop_SarN32x4:
   2810       case Iop_SarN64x2:
   2811       case Iop_ShlN8x16:
   2812       case Iop_ShlN16x8:
   2813       case Iop_ShlN32x4:
   2814       case Iop_ShlN64x2:
   2815          /* Same scheme as with all other shifts.  Note: 22 Oct 05:
   2816             this is wrong now, scalar shifts are done properly lazily.
   2817             Vector shifts should be fixed too. */
   2818          complainIfUndefined(mce, atom2, NULL);
   2819          return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
   2820 
   2821       /* V x V shifts/rotates are done using the standard lazy scheme. */
   2822       case Iop_Shl8x16:
   2823       case Iop_Shr8x16:
   2824       case Iop_Sar8x16:
   2825       case Iop_Sal8x16:
   2826       case Iop_Rol8x16:
   2827          return mkUifUV128(mce,
   2828                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
   2829                    mkPCast8x16(mce,vatom2)
   2830                 );
   2831 
   2832       case Iop_Shl16x8:
   2833       case Iop_Shr16x8:
   2834       case Iop_Sar16x8:
   2835       case Iop_Sal16x8:
   2836       case Iop_Rol16x8:
   2837          return mkUifUV128(mce,
   2838                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
   2839                    mkPCast16x8(mce,vatom2)
   2840                 );
   2841 
   2842       case Iop_Shl32x4:
   2843       case Iop_Shr32x4:
   2844       case Iop_Sar32x4:
   2845       case Iop_Sal32x4:
   2846       case Iop_Rol32x4:
   2847          return mkUifUV128(mce,
   2848                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
   2849                    mkPCast32x4(mce,vatom2)
   2850                 );
   2851 
   2852       case Iop_Shl64x2:
   2853       case Iop_Shr64x2:
   2854       case Iop_Sar64x2:
   2855       case Iop_Sal64x2:
   2856          return mkUifUV128(mce,
   2857                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
   2858                    mkPCast64x2(mce,vatom2)
   2859                 );
   2860 
   2861       case Iop_F32ToFixed32Ux4_RZ:
   2862       case Iop_F32ToFixed32Sx4_RZ:
   2863       case Iop_Fixed32UToF32x4_RN:
   2864       case Iop_Fixed32SToF32x4_RN:
   2865          complainIfUndefined(mce, atom2, NULL);
   2866          return mkPCast32x4(mce, vatom1);
   2867 
   2868       case Iop_F32ToFixed32Ux2_RZ:
   2869       case Iop_F32ToFixed32Sx2_RZ:
   2870       case Iop_Fixed32UToF32x2_RN:
   2871       case Iop_Fixed32SToF32x2_RN:
   2872          complainIfUndefined(mce, atom2, NULL);
   2873          return mkPCast32x2(mce, vatom1);
   2874 
   2875       case Iop_QSub8Ux16:
   2876       case Iop_QSub8Sx16:
   2877       case Iop_Sub8x16:
   2878       case Iop_Min8Ux16:
   2879       case Iop_Min8Sx16:
   2880       case Iop_Max8Ux16:
   2881       case Iop_Max8Sx16:
   2882       case Iop_CmpGT8Sx16:
   2883       case Iop_CmpGT8Ux16:
   2884       case Iop_CmpEQ8x16:
   2885       case Iop_Avg8Ux16:
   2886       case Iop_Avg8Sx16:
   2887       case Iop_QAdd8Ux16:
   2888       case Iop_QAdd8Sx16:
   2889       case Iop_QSal8x16:
   2890       case Iop_QShl8x16:
   2891       case Iop_Add8x16:
   2892       case Iop_Mul8x16:
   2893       case Iop_PolynomialMul8x16:
   2894          return binary8Ix16(mce, vatom1, vatom2);
   2895 
   2896       case Iop_QSub16Ux8:
   2897       case Iop_QSub16Sx8:
   2898       case Iop_Sub16x8:
   2899       case Iop_Mul16x8:
   2900       case Iop_MulHi16Sx8:
   2901       case Iop_MulHi16Ux8:
   2902       case Iop_Min16Sx8:
   2903       case Iop_Min16Ux8:
   2904       case Iop_Max16Sx8:
   2905       case Iop_Max16Ux8:
   2906       case Iop_CmpGT16Sx8:
   2907       case Iop_CmpGT16Ux8:
   2908       case Iop_CmpEQ16x8:
   2909       case Iop_Avg16Ux8:
   2910       case Iop_Avg16Sx8:
   2911       case Iop_QAdd16Ux8:
   2912       case Iop_QAdd16Sx8:
   2913       case Iop_QSal16x8:
   2914       case Iop_QShl16x8:
   2915       case Iop_Add16x8:
   2916       case Iop_QDMulHi16Sx8:
   2917       case Iop_QRDMulHi16Sx8:
   2918          return binary16Ix8(mce, vatom1, vatom2);
   2919 
   2920       case Iop_Sub32x4:
   2921       case Iop_CmpGT32Sx4:
   2922       case Iop_CmpGT32Ux4:
   2923       case Iop_CmpEQ32x4:
   2924       case Iop_QAdd32Sx4:
   2925       case Iop_QAdd32Ux4:
   2926       case Iop_QSub32Sx4:
   2927       case Iop_QSub32Ux4:
   2928       case Iop_QSal32x4:
   2929       case Iop_QShl32x4:
   2930       case Iop_Avg32Ux4:
   2931       case Iop_Avg32Sx4:
   2932       case Iop_Add32x4:
   2933       case Iop_Max32Ux4:
   2934       case Iop_Max32Sx4:
   2935       case Iop_Min32Ux4:
   2936       case Iop_Min32Sx4:
   2937       case Iop_Mul32x4:
   2938       case Iop_QDMulHi32Sx4:
   2939       case Iop_QRDMulHi32Sx4:
   2940          return binary32Ix4(mce, vatom1, vatom2);
   2941 
   2942       case Iop_Sub64x2:
   2943       case Iop_Add64x2:
   2944       case Iop_CmpEQ64x2:
   2945       case Iop_CmpGT64Sx2:
   2946       case Iop_QSal64x2:
   2947       case Iop_QShl64x2:
   2948       case Iop_QAdd64Ux2:
   2949       case Iop_QAdd64Sx2:
   2950       case Iop_QSub64Ux2:
   2951       case Iop_QSub64Sx2:
   2952          return binary64Ix2(mce, vatom1, vatom2);
   2953 
   2954       case Iop_QNarrowBin32Sto16Sx8:
   2955       case Iop_QNarrowBin32Uto16Ux8:
   2956       case Iop_QNarrowBin32Sto16Ux8:
   2957       case Iop_QNarrowBin16Sto8Sx16:
   2958       case Iop_QNarrowBin16Uto8Ux16:
   2959       case Iop_QNarrowBin16Sto8Ux16:
   2960          return vectorNarrowBinV128(mce, op, vatom1, vatom2);
   2961 
   2962       case Iop_Sub64Fx2:
   2963       case Iop_Mul64Fx2:
   2964       case Iop_Min64Fx2:
   2965       case Iop_Max64Fx2:
   2966       case Iop_Div64Fx2:
   2967       case Iop_CmpLT64Fx2:
   2968       case Iop_CmpLE64Fx2:
   2969       case Iop_CmpEQ64Fx2:
   2970       case Iop_CmpUN64Fx2:
   2971       case Iop_Add64Fx2:
   2972          return binary64Fx2(mce, vatom1, vatom2);
   2973 
   2974       case Iop_Sub64F0x2:
   2975       case Iop_Mul64F0x2:
   2976       case Iop_Min64F0x2:
   2977       case Iop_Max64F0x2:
   2978       case Iop_Div64F0x2:
   2979       case Iop_CmpLT64F0x2:
   2980       case Iop_CmpLE64F0x2:
   2981       case Iop_CmpEQ64F0x2:
   2982       case Iop_CmpUN64F0x2:
   2983       case Iop_Add64F0x2:
   2984          return binary64F0x2(mce, vatom1, vatom2);
   2985 
   2986       case Iop_Sub32Fx4:
   2987       case Iop_Mul32Fx4:
   2988       case Iop_Min32Fx4:
   2989       case Iop_Max32Fx4:
   2990       case Iop_Div32Fx4:
   2991       case Iop_CmpLT32Fx4:
   2992       case Iop_CmpLE32Fx4:
   2993       case Iop_CmpEQ32Fx4:
   2994       case Iop_CmpUN32Fx4:
   2995       case Iop_CmpGT32Fx4:
   2996       case Iop_CmpGE32Fx4:
   2997       case Iop_Add32Fx4:
   2998       case Iop_Recps32Fx4:
   2999       case Iop_Rsqrts32Fx4:
   3000          return binary32Fx4(mce, vatom1, vatom2);
   3001 
   3002       case Iop_Sub32Fx2:
   3003       case Iop_Mul32Fx2:
   3004       case Iop_Min32Fx2:
   3005       case Iop_Max32Fx2:
   3006       case Iop_CmpEQ32Fx2:
   3007       case Iop_CmpGT32Fx2:
   3008       case Iop_CmpGE32Fx2:
   3009       case Iop_Add32Fx2:
   3010       case Iop_Recps32Fx2:
   3011       case Iop_Rsqrts32Fx2:
   3012          return binary32Fx2(mce, vatom1, vatom2);
   3013 
   3014       case Iop_Sub32F0x4:
   3015       case Iop_Mul32F0x4:
   3016       case Iop_Min32F0x4:
   3017       case Iop_Max32F0x4:
   3018       case Iop_Div32F0x4:
   3019       case Iop_CmpLT32F0x4:
   3020       case Iop_CmpLE32F0x4:
   3021       case Iop_CmpEQ32F0x4:
   3022       case Iop_CmpUN32F0x4:
   3023       case Iop_Add32F0x4:
   3024          return binary32F0x4(mce, vatom1, vatom2);
   3025 
   3026       case Iop_QShlN8Sx16:
   3027       case Iop_QShlN8x16:
   3028       case Iop_QSalN8x16:
   3029          complainIfUndefined(mce, atom2, NULL);
   3030          return mkPCast8x16(mce, vatom1);
   3031 
   3032       case Iop_QShlN16Sx8:
   3033       case Iop_QShlN16x8:
   3034       case Iop_QSalN16x8:
   3035          complainIfUndefined(mce, atom2, NULL);
   3036          return mkPCast16x8(mce, vatom1);
   3037 
   3038       case Iop_QShlN32Sx4:
   3039       case Iop_QShlN32x4:
   3040       case Iop_QSalN32x4:
   3041          complainIfUndefined(mce, atom2, NULL);
   3042          return mkPCast32x4(mce, vatom1);
   3043 
   3044       case Iop_QShlN64Sx2:
   3045       case Iop_QShlN64x2:
   3046       case Iop_QSalN64x2:
   3047          complainIfUndefined(mce, atom2, NULL);
   3048          return mkPCast32x4(mce, vatom1);
   3049 
   3050       case Iop_Mull32Sx2:
   3051       case Iop_Mull32Ux2:
   3052       case Iop_QDMulLong32Sx2:
   3053          return vectorWidenI64(mce, Iop_Widen32Sto64x2,
   3054                                     mkUifU64(mce, vatom1, vatom2));
   3055 
   3056       case Iop_Mull16Sx4:
   3057       case Iop_Mull16Ux4:
   3058       case Iop_QDMulLong16Sx4:
   3059          return vectorWidenI64(mce, Iop_Widen16Sto32x4,
   3060                                     mkUifU64(mce, vatom1, vatom2));
   3061 
   3062       case Iop_Mull8Sx8:
   3063       case Iop_Mull8Ux8:
   3064       case Iop_PolynomialMull8x8:
   3065          return vectorWidenI64(mce, Iop_Widen8Sto16x8,
   3066                                     mkUifU64(mce, vatom1, vatom2));
   3067 
   3068       case Iop_PwAdd32x4:
   3069          return mkPCast32x4(mce,
   3070                assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
   3071                      mkPCast32x4(mce, vatom2))));
   3072 
   3073       case Iop_PwAdd16x8:
   3074          return mkPCast16x8(mce,
   3075                assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
   3076                      mkPCast16x8(mce, vatom2))));
   3077 
   3078       case Iop_PwAdd8x16:
   3079          return mkPCast8x16(mce,
   3080                assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
   3081                      mkPCast8x16(mce, vatom2))));
   3082 
   3083       /* V128-bit data-steering */
   3084       case Iop_SetV128lo32:
   3085       case Iop_SetV128lo64:
   3086       case Iop_64HLtoV128:
   3087       case Iop_InterleaveLO64x2:
   3088       case Iop_InterleaveLO32x4:
   3089       case Iop_InterleaveLO16x8:
   3090       case Iop_InterleaveLO8x16:
   3091       case Iop_InterleaveHI64x2:
   3092       case Iop_InterleaveHI32x4:
   3093       case Iop_InterleaveHI16x8:
   3094       case Iop_InterleaveHI8x16:
   3095       case Iop_CatOddLanes8x16:
   3096       case Iop_CatOddLanes16x8:
   3097       case Iop_CatOddLanes32x4:
   3098       case Iop_CatEvenLanes8x16:
   3099       case Iop_CatEvenLanes16x8:
   3100       case Iop_CatEvenLanes32x4:
   3101       case Iop_InterleaveOddLanes8x16:
   3102       case Iop_InterleaveOddLanes16x8:
   3103       case Iop_InterleaveOddLanes32x4:
   3104       case Iop_InterleaveEvenLanes8x16:
   3105       case Iop_InterleaveEvenLanes16x8:
   3106       case Iop_InterleaveEvenLanes32x4:
   3107          return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
   3108 
   3109       case Iop_GetElem8x16:
   3110          complainIfUndefined(mce, atom2, NULL);
   3111          return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
   3112       case Iop_GetElem16x8:
   3113          complainIfUndefined(mce, atom2, NULL);
   3114          return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
   3115       case Iop_GetElem32x4:
   3116          complainIfUndefined(mce, atom2, NULL);
   3117          return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
   3118       case Iop_GetElem64x2:
   3119          complainIfUndefined(mce, atom2, NULL);
   3120          return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
   3121 
   3122      /* Perm8x16: rearrange values in left arg using steering values
   3123         from right arg.  So rearrange the vbits in the same way but
   3124         pessimise wrt steering values.  Perm32x4 ditto. */
   3125       case Iop_Perm8x16:
   3126          return mkUifUV128(
   3127                    mce,
   3128                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
   3129                    mkPCast8x16(mce, vatom2)
   3130                 );
   3131       case Iop_Perm32x4:
   3132          return mkUifUV128(
   3133                    mce,
   3134                    assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
   3135                    mkPCast32x4(mce, vatom2)
   3136                 );
   3137 
   3138      /* These two take the lower half of each 16-bit lane, sign/zero
   3139         extend it to 32, and multiply together, producing a 32x4
   3140         result (and implicitly ignoring half the operand bits).  So
   3141         treat it as a bunch of independent 16x8 operations, but then
   3142         do 32-bit shifts left-right to copy the lower half results
   3143         (which are all 0s or all 1s due to PCasting in binary16Ix8)
   3144         into the upper half of each result lane. */
   3145       case Iop_MullEven16Ux8:
   3146       case Iop_MullEven16Sx8: {
   3147          IRAtom* at;
   3148          at = binary16Ix8(mce,vatom1,vatom2);
   3149          at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
   3150          at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
   3151 	 return at;
   3152       }
   3153 
   3154       /* Same deal as Iop_MullEven16{S,U}x8 */
   3155       case Iop_MullEven8Ux16:
   3156       case Iop_MullEven8Sx16: {
   3157          IRAtom* at;
   3158          at = binary8Ix16(mce,vatom1,vatom2);
   3159          at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
   3160          at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
   3161 	 return at;
   3162       }
   3163 
   3164       /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
   3165          32x4 -> 16x8 laneage, discarding the upper half of each lane.
   3166          Simply apply same op to the V bits, since this really no more
   3167          than a data steering operation. */
   3168       case Iop_NarrowBin32to16x8:
   3169       case Iop_NarrowBin16to8x16:
   3170          return assignNew('V', mce, Ity_V128,
   3171                                     binop(op, vatom1, vatom2));
   3172 
   3173       case Iop_ShrV128:
   3174       case Iop_ShlV128:
   3175          /* Same scheme as with all other shifts.  Note: 10 Nov 05:
   3176             this is wrong now, scalar shifts are done properly lazily.
   3177             Vector shifts should be fixed too. */
   3178          complainIfUndefined(mce, atom2, NULL);
   3179          return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
   3180 
   3181       /* I128-bit data-steering */
   3182       case Iop_64HLto128:
   3183          return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
   3184 
   3185       /* V256-bit SIMD */
   3186 
   3187       case Iop_Add64Fx4:
   3188       case Iop_Sub64Fx4:
   3189       case Iop_Mul64Fx4:
   3190       case Iop_Div64Fx4:
   3191       case Iop_Max64Fx4:
   3192       case Iop_Min64Fx4:
   3193          return binary64Fx4(mce, vatom1, vatom2);
   3194 
   3195       case Iop_Add32Fx8:
   3196       case Iop_Sub32Fx8:
   3197       case Iop_Mul32Fx8:
   3198       case Iop_Div32Fx8:
   3199       case Iop_Max32Fx8:
   3200       case Iop_Min32Fx8:
   3201          return binary32Fx8(mce, vatom1, vatom2);
   3202 
   3203       /* V256-bit data-steering */
   3204       case Iop_V128HLtoV256:
   3205          return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2));
   3206 
   3207       /* Scalar floating point */
   3208 
   3209       case Iop_F32toI64S:
   3210          /* I32(rm) x F32 -> I64 */
   3211          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   3212 
   3213       case Iop_I64StoF32:
   3214          /* I32(rm) x I64 -> F32 */
   3215          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   3216 
   3217       case Iop_RoundF64toInt:
   3218       case Iop_RoundF64toF32:
   3219       case Iop_F64toI64S:
   3220       case Iop_F64toI64U:
   3221       case Iop_I64StoF64:
   3222       case Iop_I64UtoF64:
   3223       case Iop_SinF64:
   3224       case Iop_CosF64:
   3225       case Iop_TanF64:
   3226       case Iop_2xm1F64:
   3227       case Iop_SqrtF64:
   3228          /* I32(rm) x I64/F64 -> I64/F64 */
   3229          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   3230 
   3231       case Iop_ShlD64:
   3232       case Iop_ShrD64:
   3233       case Iop_RoundD64toInt:
   3234          /* I32(DFP rm) x D64 -> D64 */
   3235          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   3236 
   3237       case Iop_ShlD128:
   3238       case Iop_ShrD128:
   3239       case Iop_RoundD128toInt:
   3240          /* I32(DFP rm) x D128 -> D128 */
   3241          return mkLazy2(mce, Ity_I128, vatom1, vatom2);
   3242 
   3243       case Iop_D64toI64S:
   3244       case Iop_I64StoD64:
   3245          /* I64(DFP rm) x I64 -> D64 */
   3246          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   3247 
   3248       case Iop_RoundF32toInt:
   3249       case Iop_SqrtF32:
   3250          /* I32(rm) x I32/F32 -> I32/F32 */
   3251          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   3252 
   3253       case Iop_SqrtF128:
   3254          /* I32(rm) x F128 -> F128 */
   3255          return mkLazy2(mce, Ity_I128, vatom1, vatom2);
   3256 
   3257       case Iop_I32StoF32:
   3258       case Iop_F32toI32S:
   3259          /* First arg is I32 (rounding mode), second is F32/I32 (data). */
   3260          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   3261 
   3262       case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32  */
   3263       case Iop_F128toF32:  /* IRRoundingMode(I32) x F128 -> F32         */
   3264          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   3265 
   3266       case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64  */
   3267       case Iop_F128toF64:  /* IRRoundingMode(I32) x F128 -> F64         */
   3268       case Iop_D128toD64:  /* IRRoundingModeDFP(I64) x D128 -> D64 */
   3269       case Iop_D128toI64S: /* IRRoundingModeDFP(I64) x D128 -> signed I64  */
   3270          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   3271 
   3272       case Iop_F64HLtoF128:
   3273       case Iop_D64HLtoD128:
   3274          return assignNew('V', mce, Ity_I128,
   3275                           binop(Iop_64HLto128, vatom1, vatom2));
   3276 
   3277       case Iop_F64toI32U:
   3278       case Iop_F64toI32S:
   3279       case Iop_F64toF32:
   3280       case Iop_I64UtoF32:
   3281          /* First arg is I32 (rounding mode), second is F64 (data). */
   3282          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   3283 
   3284       case Iop_D64toD32:
   3285          /* First arg is I64 (DFProunding mode), second is D64 (data). */
   3286          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   3287 
   3288       case Iop_F64toI16S:
   3289          /* First arg is I32 (rounding mode), second is F64 (data). */
   3290          return mkLazy2(mce, Ity_I16, vatom1, vatom2);
   3291 
   3292       case Iop_InsertExpD64:
   3293          /*  I64 x I64 -> D64 */
   3294          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   3295 
   3296       case Iop_InsertExpD128:
   3297          /*  I64 x I128 -> D128 */
   3298          return mkLazy2(mce, Ity_I128, vatom1, vatom2);
   3299 
   3300       case Iop_CmpF32:
   3301       case Iop_CmpF64:
   3302       case Iop_CmpF128:
   3303       case Iop_CmpD64:
   3304       case Iop_CmpD128:
   3305          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   3306 
   3307       /* non-FP after here */
   3308 
   3309       case Iop_DivModU64to32:
   3310       case Iop_DivModS64to32:
   3311          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   3312 
   3313       case Iop_DivModU128to64:
   3314       case Iop_DivModS128to64:
   3315          return mkLazy2(mce, Ity_I128, vatom1, vatom2);
   3316 
   3317       case Iop_16HLto32:
   3318          return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
   3319       case Iop_32HLto64:
   3320          return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
   3321 
   3322       case Iop_DivModS64to64:
   3323       case Iop_MullS64:
   3324       case Iop_MullU64: {
   3325          IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
   3326          IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
   3327          return assignNew('V', mce, Ity_I128,
   3328                           binop(Iop_64HLto128, vHi64, vLo64));
   3329       }
   3330 
   3331       case Iop_MullS32:
   3332       case Iop_MullU32: {
   3333          IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
   3334          IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
   3335          return assignNew('V', mce, Ity_I64,
   3336                           binop(Iop_32HLto64, vHi32, vLo32));
   3337       }
   3338 
   3339       case Iop_MullS16:
   3340       case Iop_MullU16: {
   3341          IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
   3342          IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
   3343          return assignNew('V', mce, Ity_I32,
   3344                           binop(Iop_16HLto32, vHi16, vLo16));
   3345       }
   3346 
   3347       case Iop_MullS8:
   3348       case Iop_MullU8: {
   3349          IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
   3350          IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
   3351          return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
   3352       }
   3353 
   3354       case Iop_Sad8Ux4: /* maybe we could do better?  ftm, do mkLazy2. */
   3355       case Iop_DivS32:
   3356       case Iop_DivU32:
   3357       case Iop_DivU32E:
   3358       case Iop_DivS32E:
   3359       case Iop_QAdd32S: /* could probably do better */
   3360       case Iop_QSub32S: /* could probably do better */
   3361          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
   3362 
   3363       case Iop_DivS64:
   3364       case Iop_DivU64:
   3365       case Iop_DivS64E:
   3366       case Iop_DivU64E:
   3367          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
   3368 
   3369       case Iop_Add32:
   3370          if (mce->bogusLiterals || mce->useLLVMworkarounds)
   3371             return expensiveAddSub(mce,True,Ity_I32,
   3372                                    vatom1,vatom2, atom1,atom2);
   3373          else
   3374             goto cheap_AddSub32;
   3375       case Iop_Sub32:
   3376          if (mce->bogusLiterals)
   3377             return expensiveAddSub(mce,False,Ity_I32,
   3378                                    vatom1,vatom2, atom1,atom2);
   3379          else
   3380             goto cheap_AddSub32;
   3381 
   3382       cheap_AddSub32:
   3383       case Iop_Mul32:
   3384          return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
   3385 
   3386       case Iop_CmpORD32S:
   3387       case Iop_CmpORD32U:
   3388       case Iop_CmpORD64S:
   3389       case Iop_CmpORD64U:
   3390          return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
   3391 
   3392       case Iop_Add64:
   3393          if (mce->bogusLiterals || mce->useLLVMworkarounds)
   3394             return expensiveAddSub(mce,True,Ity_I64,
   3395                                    vatom1,vatom2, atom1,atom2);
   3396          else
   3397             goto cheap_AddSub64;
   3398       case Iop_Sub64:
   3399          if (mce->bogusLiterals)
   3400             return expensiveAddSub(mce,False,Ity_I64,
   3401                                    vatom1,vatom2, atom1,atom2);
   3402          else
   3403             goto cheap_AddSub64;
   3404 
   3405       cheap_AddSub64:
   3406       case Iop_Mul64:
   3407          return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
   3408 
   3409       case Iop_Mul16:
   3410       case Iop_Add16:
   3411       case Iop_Sub16:
   3412          return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
   3413 
   3414       case Iop_Sub8:
   3415       case Iop_Add8:
   3416          return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
   3417 
   3418       case Iop_CmpEQ64:
   3419       case Iop_CmpNE64:
   3420          if (mce->bogusLiterals)
   3421             return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
   3422          else
   3423             goto cheap_cmp64;
   3424       cheap_cmp64:
   3425       case Iop_CmpLE64S: case Iop_CmpLE64U:
   3426       case Iop_CmpLT64U: case Iop_CmpLT64S:
   3427          return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
   3428 
   3429       case Iop_CmpEQ32:
   3430       case Iop_CmpNE32:
   3431          if (mce->bogusLiterals)
   3432             return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
   3433          else
   3434             goto cheap_cmp32;
   3435       cheap_cmp32:
   3436       case Iop_CmpLE32S: case Iop_CmpLE32U:
   3437       case Iop_CmpLT32U: case Iop_CmpLT32S:
   3438          return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
   3439 
   3440       case Iop_CmpEQ16: case Iop_CmpNE16:
   3441          return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
   3442 
   3443       case Iop_CmpEQ8: case Iop_CmpNE8:
   3444          return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
   3445 
   3446       case Iop_CasCmpEQ8:  case Iop_CasCmpNE8:
   3447       case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
   3448       case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
   3449       case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
   3450          /* Just say these all produce a defined result, regardless
   3451             of their arguments.  See COMMENT_ON_CasCmpEQ in this file. */
   3452          return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
   3453 
   3454       case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
   3455          return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
   3456 
   3457       case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
   3458          return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
   3459 
   3460       case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
   3461          return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
   3462 
   3463       case Iop_Shl8: case Iop_Shr8:
   3464          return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
   3465 
   3466       case Iop_AndV256:
   3467          uifu = mkUifUV256; difd = mkDifDV256;
   3468          and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or;
   3469       case Iop_AndV128:
   3470          uifu = mkUifUV128; difd = mkDifDV128;
   3471          and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
   3472       case Iop_And64:
   3473          uifu = mkUifU64; difd = mkDifD64;
   3474          and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
   3475       case Iop_And32:
   3476          uifu = mkUifU32; difd = mkDifD32;
   3477          and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
   3478       case Iop_And16:
   3479          uifu = mkUifU16; difd = mkDifD16;
   3480          and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
   3481       case Iop_And8:
   3482          uifu = mkUifU8; difd = mkDifD8;
   3483          and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
   3484 
   3485       case Iop_OrV256:
   3486          uifu = mkUifUV256; difd = mkDifDV256;
   3487          and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or;
   3488       case Iop_OrV128:
   3489          uifu = mkUifUV128; difd = mkDifDV128;
   3490          and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
   3491       case Iop_Or64:
   3492          uifu = mkUifU64; difd = mkDifD64;
   3493          and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
   3494       case Iop_Or32:
   3495          uifu = mkUifU32; difd = mkDifD32;
   3496          and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
   3497       case Iop_Or16:
   3498          uifu = mkUifU16; difd = mkDifD16;
   3499          and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
   3500       case Iop_Or8:
   3501          uifu = mkUifU8; difd = mkDifD8;
   3502          and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
   3503 
   3504       do_And_Or:
   3505          return
   3506          assignNew(
   3507             'V', mce,
   3508             and_or_ty,
   3509             difd(mce, uifu(mce, vatom1, vatom2),
   3510                       difd(mce, improve(mce, atom1, vatom1),
   3511                                 improve(mce, atom2, vatom2) ) ) );
   3512 
   3513       case Iop_Xor8:
   3514          return mkUifU8(mce, vatom1, vatom2);
   3515       case Iop_Xor16:
   3516          return mkUifU16(mce, vatom1, vatom2);
   3517       case Iop_Xor32:
   3518          return mkUifU32(mce, vatom1, vatom2);
   3519       case Iop_Xor64:
   3520          return mkUifU64(mce, vatom1, vatom2);
   3521       case Iop_XorV128:
   3522          return mkUifUV128(mce, vatom1, vatom2);
   3523       case Iop_XorV256:
   3524          return mkUifUV256(mce, vatom1, vatom2);
   3525 
   3526       default:
   3527          ppIROp(op);
   3528          VG_(tool_panic)("memcheck:expr2vbits_Binop");
   3529    }
   3530 }
   3531 
   3532 
   3533 static
   3534 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
   3535 {
   3536    IRAtom* vatom = expr2vbits( mce, atom );
   3537    tl_assert(isOriginalAtom(mce,atom));
   3538    switch (op) {
   3539 
   3540       case Iop_Sqrt64Fx2:
   3541          return unary64Fx2(mce, vatom);
   3542 
   3543       case Iop_Sqrt64F0x2:
   3544          return unary64F0x2(mce, vatom);
   3545 
   3546       case Iop_Sqrt32Fx8:
   3547       case Iop_RSqrt32Fx8:
   3548       case Iop_Recip32Fx8:
   3549          return unary32Fx8(mce, vatom);
   3550 
   3551       case Iop_Sqrt64Fx4:
   3552          return unary64Fx4(mce, vatom);
   3553 
   3554       case Iop_Sqrt32Fx4:
   3555       case Iop_RSqrt32Fx4:
   3556       case Iop_Recip32Fx4:
   3557       case Iop_I32UtoFx4:
   3558       case Iop_I32StoFx4:
   3559       case Iop_QFtoI32Ux4_RZ:
   3560       case Iop_QFtoI32Sx4_RZ:
   3561       case Iop_RoundF32x4_RM:
   3562       case Iop_RoundF32x4_RP:
   3563       case Iop_RoundF32x4_RN:
   3564       case Iop_RoundF32x4_RZ:
   3565       case Iop_Recip32x4:
   3566       case Iop_Abs32Fx4:
   3567       case Iop_Neg32Fx4:
   3568       case Iop_Rsqrte32Fx4:
   3569          return unary32Fx4(mce, vatom);
   3570 
   3571       case Iop_I32UtoFx2:
   3572       case Iop_I32StoFx2:
   3573       case Iop_Recip32Fx2:
   3574       case Iop_Recip32x2:
   3575       case Iop_Abs32Fx2:
   3576       case Iop_Neg32Fx2:
   3577       case Iop_Rsqrte32Fx2:
   3578          return unary32Fx2(mce, vatom);
   3579 
   3580       case Iop_Sqrt32F0x4:
   3581       case Iop_RSqrt32F0x4:
   3582       case Iop_Recip32F0x4:
   3583          return unary32F0x4(mce, vatom);
   3584 
   3585       case Iop_32UtoV128:
   3586       case Iop_64UtoV128:
   3587       case Iop_Dup8x16:
   3588       case Iop_Dup16x8:
   3589       case Iop_Dup32x4:
   3590       case Iop_Reverse16_8x16:
   3591       case Iop_Reverse32_8x16:
   3592       case Iop_Reverse32_16x8:
   3593       case Iop_Reverse64_8x16:
   3594       case Iop_Reverse64_16x8:
   3595       case Iop_Reverse64_32x4:
   3596       case Iop_V256toV128_1: case Iop_V256toV128_0:
   3597          return assignNew('V', mce, Ity_V128, unop(op, vatom));
   3598 
   3599       case Iop_F128HItoF64:  /* F128 -> high half of F128 */
   3600       case Iop_D128HItoD64:  /* D128 -> high half of D128 */
   3601          return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
   3602       case Iop_F128LOtoF64:  /* F128 -> low  half of F128 */
   3603       case Iop_D128LOtoD64:  /* D128 -> low  half of D128 */
   3604          return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
   3605 
   3606       case Iop_NegF128:
   3607       case Iop_AbsF128:
   3608          return mkPCastTo(mce, Ity_I128, vatom);
   3609 
   3610       case Iop_I32StoF128: /* signed I32 -> F128 */
   3611       case Iop_I64StoF128: /* signed I64 -> F128 */
   3612       case Iop_F32toF128:  /* F32 -> F128 */
   3613       case Iop_F64toF128:  /* F64 -> F128 */
   3614       case Iop_I64StoD128: /* signed I64 -> D128 */
   3615          return mkPCastTo(mce, Ity_I128, vatom);
   3616 
   3617       case Iop_F32toF64:
   3618       case Iop_I32StoF64:
   3619       case Iop_I32UtoF64:
   3620       case Iop_NegF64:
   3621       case Iop_AbsF64:
   3622       case Iop_Est5FRSqrt:
   3623       case Iop_RoundF64toF64_NEAREST:
   3624       case Iop_RoundF64toF64_NegINF:
   3625       case Iop_RoundF64toF64_PosINF:
   3626       case Iop_RoundF64toF64_ZERO:
   3627       case Iop_Clz64:
   3628       case Iop_Ctz64:
   3629       case Iop_D32toD64:
   3630       case Iop_ExtractExpD64:    /* D64  -> I64 */
   3631       case Iop_ExtractExpD128:   /* D128 -> I64 */
   3632       case Iop_DPBtoBCD:
   3633       case Iop_BCDtoDPB:
   3634          return mkPCastTo(mce, Ity_I64, vatom);
   3635 
   3636       case Iop_D64toD128:
   3637          return mkPCastTo(mce, Ity_I128, vatom);
   3638 
   3639       case Iop_Clz32:
   3640       case Iop_Ctz32:
   3641       case Iop_TruncF64asF32:
   3642       case Iop_NegF32:
   3643       case Iop_AbsF32:
   3644          return mkPCastTo(mce, Ity_I32, vatom);
   3645 
   3646       case Iop_1Uto64:
   3647       case Iop_1Sto64:
   3648       case Iop_8Uto64:
   3649       case Iop_8Sto64:
   3650       case Iop_16Uto64:
   3651       case Iop_16Sto64:
   3652       case Iop_32Sto64:
   3653       case Iop_32Uto64:
   3654       case Iop_V128to64:
   3655       case Iop_V128HIto64:
   3656       case Iop_128HIto64:
   3657       case Iop_128to64:
   3658       case Iop_Dup8x8:
   3659       case Iop_Dup16x4:
   3660       case Iop_Dup32x2:
   3661       case Iop_Reverse16_8x8:
   3662       case Iop_Reverse32_8x8:
   3663       case Iop_Reverse32_16x4:
   3664       case Iop_Reverse64_8x8:
   3665       case Iop_Reverse64_16x4:
   3666       case Iop_Reverse64_32x2:
   3667       case Iop_V256to64_0: case Iop_V256to64_1:
   3668       case Iop_V256to64_2: case Iop_V256to64_3:
   3669          return assignNew('V', mce, Ity_I64, unop(op, vatom));
   3670 
   3671       case Iop_I16StoF32:
   3672       case Iop_64to32:
   3673       case Iop_64HIto32:
   3674       case Iop_1Uto32:
   3675       case Iop_1Sto32:
   3676       case Iop_8Uto32:
   3677       case Iop_16Uto32:
   3678       case Iop_16Sto32:
   3679       case Iop_8Sto32:
   3680       case Iop_V128to32:
   3681          return assignNew('V', mce, Ity_I32, unop(op, vatom));
   3682 
   3683       case Iop_8Sto16:
   3684       case Iop_8Uto16:
   3685       case Iop_32to16:
   3686       case Iop_32HIto16:
   3687       case Iop_64to16:
   3688          return assignNew('V', mce, Ity_I16, unop(op, vatom));
   3689 
   3690       case Iop_1Uto8:
   3691       case Iop_1Sto8:
   3692       case Iop_16to8:
   3693       case Iop_16HIto8:
   3694       case Iop_32to8:
   3695       case Iop_64to8:
   3696          return assignNew('V', mce, Ity_I8, unop(op, vatom));
   3697 
   3698       case Iop_32to1:
   3699          return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
   3700 
   3701       case Iop_64to1:
   3702          return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
   3703 
   3704       case Iop_ReinterpF64asI64:
   3705       case Iop_ReinterpI64asF64:
   3706       case Iop_ReinterpI32asF32:
   3707       case Iop_ReinterpF32asI32:
   3708       case Iop_ReinterpI64asD64:
   3709       case Iop_ReinterpD64asI64:
   3710       case Iop_NotV256:
   3711       case Iop_NotV128:
   3712       case Iop_Not64:
   3713       case Iop_Not32:
   3714       case Iop_Not16:
   3715       case Iop_Not8:
   3716       case Iop_Not1:
   3717          return vatom;
   3718 
   3719       case Iop_CmpNEZ8x8:
   3720       case Iop_Cnt8x8:
   3721       case Iop_Clz8Sx8:
   3722       case Iop_Cls8Sx8:
   3723       case Iop_Abs8x8:
   3724          return mkPCast8x8(mce, vatom);
   3725 
   3726       case Iop_CmpNEZ8x16:
   3727       case Iop_Cnt8x16:
   3728       case Iop_Clz8Sx16:
   3729       case Iop_Cls8Sx16:
   3730       case Iop_Abs8x16:
   3731          return mkPCast8x16(mce, vatom);
   3732 
   3733       case Iop_CmpNEZ16x4:
   3734       case Iop_Clz16Sx4:
   3735       case Iop_Cls16Sx4:
   3736       case Iop_Abs16x4:
   3737          return mkPCast16x4(mce, vatom);
   3738 
   3739       case Iop_CmpNEZ16x8:
   3740       case Iop_Clz16Sx8:
   3741       case Iop_Cls16Sx8:
   3742       case Iop_Abs16x8:
   3743          return mkPCast16x8(mce, vatom);
   3744 
   3745       case Iop_CmpNEZ32x2:
   3746       case Iop_Clz32Sx2:
   3747       case Iop_Cls32Sx2:
   3748       case Iop_FtoI32Ux2_RZ:
   3749       case Iop_FtoI32Sx2_RZ:
   3750       case Iop_Abs32x2:
   3751          return mkPCast32x2(mce, vatom);
   3752 
   3753       case Iop_CmpNEZ32x4:
   3754       case Iop_Clz32Sx4:
   3755       case Iop_Cls32Sx4:
   3756       case Iop_FtoI32Ux4_RZ:
   3757       case Iop_FtoI32Sx4_RZ:
   3758       case Iop_Abs32x4:
   3759          return mkPCast32x4(mce, vatom);
   3760 
   3761       case Iop_CmpwNEZ64:
   3762          return mkPCastTo(mce, Ity_I64, vatom);
   3763 
   3764       case Iop_CmpNEZ64x2:
   3765          return mkPCast64x2(mce, vatom);
   3766 
   3767       case Iop_NarrowUn16to8x8:
   3768       case Iop_NarrowUn32to16x4:
   3769       case Iop_NarrowUn64to32x2:
   3770       case Iop_QNarrowUn16Sto8Sx8:
   3771       case Iop_QNarrowUn16Sto8Ux8:
   3772       case Iop_QNarrowUn16Uto8Ux8:
   3773       case Iop_QNarrowUn32Sto16Sx4:
   3774       case Iop_QNarrowUn32Sto16Ux4:
   3775       case Iop_QNarrowUn32Uto16Ux4:
   3776       case Iop_QNarrowUn64Sto32Sx2:
   3777       case Iop_QNarrowUn64Sto32Ux2:
   3778       case Iop_QNarrowUn64Uto32Ux2:
   3779          return vectorNarrowUnV128(mce, op, vatom);
   3780 
   3781       case Iop_Widen8Sto16x8:
   3782       case Iop_Widen8Uto16x8:
   3783       case Iop_Widen16Sto32x4:
   3784       case Iop_Widen16Uto32x4:
   3785       case Iop_Widen32Sto64x2:
   3786       case Iop_Widen32Uto64x2:
   3787          return vectorWidenI64(mce, op, vatom);
   3788 
   3789       case Iop_PwAddL32Ux2:
   3790       case Iop_PwAddL32Sx2:
   3791          return mkPCastTo(mce, Ity_I64,
   3792                assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
   3793 
   3794       case Iop_PwAddL16Ux4:
   3795       case Iop_PwAddL16Sx4:
   3796          return mkPCast32x2(mce,
   3797                assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
   3798 
   3799       case Iop_PwAddL8Ux8:
   3800       case Iop_PwAddL8Sx8:
   3801          return mkPCast16x4(mce,
   3802                assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
   3803 
   3804       case Iop_PwAddL32Ux4:
   3805       case Iop_PwAddL32Sx4:
   3806          return mkPCast64x2(mce,
   3807                assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
   3808 
   3809       case Iop_PwAddL16Ux8:
   3810       case Iop_PwAddL16Sx8:
   3811          return mkPCast32x4(mce,
   3812                assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
   3813 
   3814       case Iop_PwAddL8Ux16:
   3815       case Iop_PwAddL8Sx16:
   3816          return mkPCast16x8(mce,
   3817                assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
   3818 
   3819       case Iop_I64UtoF32:
   3820       default:
   3821          ppIROp(op);
   3822          VG_(tool_panic)("memcheck:expr2vbits_Unop");
   3823    }
   3824 }
   3825 
   3826 
   3827 /* Worker function; do not call directly. */
   3828 static
   3829 IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
   3830                               IREndness end, IRType ty,
   3831                               IRAtom* addr, UInt bias )
   3832 {
   3833    void*    helper;
   3834    Char*    hname;
   3835    IRDirty* di;
   3836    IRTemp   datavbits;
   3837    IRAtom*  addrAct;
   3838 
   3839    tl_assert(isOriginalAtom(mce,addr));
   3840    tl_assert(end == Iend_LE || end == Iend_BE);
   3841 
   3842    /* First, emit a definedness test for the address.  This also sets
   3843       the address (shadow) to 'defined' following the test. */
   3844    complainIfUndefined( mce, addr, NULL );
   3845 
   3846    /* Now cook up a call to the relevant helper function, to read the
   3847       data V bits from shadow memory. */
   3848    ty = shadowTypeV(ty);
   3849 
   3850    if (end == Iend_LE) {
   3851       switch (ty) {
   3852          case Ity_I64: helper = &MC_(helperc_LOADV64le);
   3853                        hname = "MC_(helperc_LOADV64le)";
   3854                        break;
   3855          case Ity_I32: helper = &MC_(helperc_LOADV32le);
   3856                        hname = "MC_(helperc_LOADV32le)";
   3857                        break;
   3858          case Ity_I16: helper = &MC_(helperc_LOADV16le);
   3859                        hname = "MC_(helperc_LOADV16le)";
   3860                        break;
   3861          case Ity_I8:  helper = &MC_(helperc_LOADV8);
   3862                        hname = "MC_(helperc_LOADV8)";
   3863                        break;
   3864          default:      ppIRType(ty);
   3865                        VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
   3866       }
   3867    } else {
   3868       switch (ty) {
   3869          case Ity_I64: helper = &MC_(helperc_LOADV64be);
   3870                        hname = "MC_(helperc_LOADV64be)";
   3871                        break;
   3872          case Ity_I32: helper = &MC_(helperc_LOADV32be);
   3873                        hname = "MC_(helperc_LOADV32be)";
   3874                        break;
   3875          case Ity_I16: helper = &MC_(helperc_LOADV16be);
   3876                        hname = "MC_(helperc_LOADV16be)";
   3877                        break;
   3878          case Ity_I8:  helper = &MC_(helperc_LOADV8);
   3879                        hname = "MC_(helperc_LOADV8)";
   3880                        break;
   3881          default:      ppIRType(ty);
   3882                        VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
   3883       }
   3884    }
   3885 
   3886    /* Generate the actual address into addrAct. */
   3887    if (bias == 0) {
   3888       addrAct = addr;
   3889    } else {
   3890       IROp    mkAdd;
   3891       IRAtom* eBias;
   3892       IRType  tyAddr  = mce->hWordTy;
   3893       tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
   3894       mkAdd   = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
   3895       eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
   3896       addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
   3897    }
   3898 
   3899    /* We need to have a place to park the V bits we're just about to
   3900       read. */
   3901    datavbits = newTemp(mce, ty, VSh);
   3902    di = unsafeIRDirty_1_N( datavbits,
   3903                            1/*regparms*/,
   3904                            hname, VG_(fnptr_to_fnentry)( helper ),
   3905                            mkIRExprVec_1( addrAct ));
   3906    setHelperAnns( mce, di );
   3907    stmt( 'V', mce, IRStmt_Dirty(di) );
   3908 
   3909    return mkexpr(datavbits);
   3910 }
   3911 
   3912 
   3913 static
   3914 IRAtom* expr2vbits_Load ( MCEnv* mce,
   3915                           IREndness end, IRType ty,
   3916                           IRAtom* addr, UInt bias )
   3917 {
   3918    tl_assert(end == Iend_LE || end == Iend_BE);
   3919    switch (shadowTypeV(ty)) {
   3920       case Ity_I8:
   3921       case Ity_I16:
   3922       case Ity_I32:
   3923       case Ity_I64:
   3924          return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
   3925       case Ity_V128: {
   3926          IRAtom *v64hi, *v64lo;
   3927          if (end == Iend_LE) {
   3928             v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
   3929             v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
   3930          } else {
   3931             v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
   3932             v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
   3933          }
   3934          return assignNew( 'V', mce,
   3935                            Ity_V128,
   3936                            binop(Iop_64HLtoV128, v64hi, v64lo));
   3937       }
   3938       case Ity_V256: {
   3939          /* V256-bit case -- phrased in terms of 64 bit units (Qs),
   3940             with Q3 being the most significant lane. */
   3941          if (end == Iend_BE) goto unhandled;
   3942          IRAtom* v64Q0 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
   3943          IRAtom* v64Q1 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
   3944          IRAtom* v64Q2 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+16);
   3945          IRAtom* v64Q3 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+24);
   3946          return assignNew( 'V', mce,
   3947                            Ity_V256,
   3948                            IRExpr_Qop(Iop_64x4toV256,
   3949                                       v64Q3, v64Q2, v64Q1, v64Q0));
   3950       }
   3951       unhandled:
   3952       default:
   3953          VG_(tool_panic)("expr2vbits_Load");
   3954    }
   3955 }
   3956 
   3957 
   3958 /* If there is no guard expression or the guard is always TRUE this function
   3959    behaves like expr2vbits_Load. If the guard is not true at runtime, an
   3960    all-bits-defined bit pattern will be returned.
   3961    It is assumed that definedness of GUARD has already been checked at the call
   3962    site. */
   3963 static
   3964 IRAtom* expr2vbits_guarded_Load ( MCEnv* mce,
   3965                                   IREndness end, IRType ty,
   3966                                   IRAtom* addr, UInt bias, IRAtom *guard )
   3967 {
   3968    if (guard) {
   3969       IRAtom *cond, *iffalse, *iftrue;
   3970 
   3971       cond    = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard));
   3972       iftrue  = assignNew('V', mce, ty,
   3973                           expr2vbits_Load(mce, end, ty, addr, bias));
   3974       iffalse = assignNew('V', mce, ty, definedOfType(ty));
   3975 
   3976       return assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, iftrue));
   3977    }
   3978 
   3979    /* No guard expression or unconditional load */
   3980    return expr2vbits_Load(mce, end, ty, addr, bias);
   3981 }
   3982 
   3983 
   3984 static
   3985 IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
   3986                            IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
   3987 {
   3988    IRAtom *vbitsC, *vbits0, *vbitsX;
   3989    IRType ty;
   3990    /* Given Mux0X(cond,expr0,exprX), generate
   3991          Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
   3992       That is, steer the V bits like the originals, but trash the
   3993       result if the steering value is undefined.  This gives
   3994       lazy propagation. */
   3995    tl_assert(isOriginalAtom(mce, cond));
   3996    tl_assert(isOriginalAtom(mce, expr0));
   3997    tl_assert(isOriginalAtom(mce, exprX));
   3998 
   3999    vbitsC = expr2vbits(mce, cond);
   4000    vbits0 = expr2vbits(mce, expr0);
   4001    vbitsX = expr2vbits(mce, exprX);
   4002    ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
   4003 
   4004    return
   4005       mkUifU(mce, ty, assignNew('V', mce, ty,
   4006                                      IRExpr_Mux0X(cond, vbits0, vbitsX)),
   4007                       mkPCastTo(mce, ty, vbitsC) );
   4008 }
   4009 
   4010 /* --------- This is the main expression-handling function. --------- */
   4011 
   4012 static
   4013 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
   4014 {
   4015    switch (e->tag) {
   4016 
   4017       case Iex_Get:
   4018          return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
   4019 
   4020       case Iex_GetI:
   4021          return shadow_GETI( mce, e->Iex.GetI.descr,
   4022                                   e->Iex.GetI.ix, e->Iex.GetI.bias );
   4023 
   4024       case Iex_RdTmp:
   4025          return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
   4026 
   4027       case Iex_Const:
   4028          return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
   4029 
   4030       case Iex_Qop:
   4031          return expr2vbits_Qop(
   4032                    mce,
   4033                    e->Iex.Qop.details->op,
   4034                    e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2,
   4035                    e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4
   4036                 );
   4037 
   4038       case Iex_Triop:
   4039          return expr2vbits_Triop(
   4040                    mce,
   4041                    e->Iex.Triop.details->op,
   4042                    e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2,
   4043                    e->Iex.Triop.details->arg3
   4044                 );
   4045 
   4046       case Iex_Binop:
   4047          return expr2vbits_Binop(
   4048                    mce,
   4049                    e->Iex.Binop.op,
   4050                    e->Iex.Binop.arg1, e->Iex.Binop.arg2
   4051                 );
   4052 
   4053       case Iex_Unop:
   4054          return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
   4055 
   4056       case Iex_Load:
   4057          return expr2vbits_Load( mce, e->Iex.Load.end,
   4058                                       e->Iex.Load.ty,
   4059                                       e->Iex.Load.addr, 0/*addr bias*/ );
   4060 
   4061       case Iex_CCall:
   4062          return mkLazyN( mce, e->Iex.CCall.args,
   4063                               e->Iex.CCall.retty,
   4064                               e->Iex.CCall.cee );
   4065 
   4066       case Iex_Mux0X:
   4067          return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
   4068                                        e->Iex.Mux0X.exprX);
   4069 
   4070       default:
   4071          VG_(printf)("\n");
   4072          ppIRExpr(e);
   4073          VG_(printf)("\n");
   4074          VG_(tool_panic)("memcheck: expr2vbits");
   4075    }
   4076 }
   4077 
   4078 /*------------------------------------------------------------*/
   4079 /*--- Generate shadow stmts from all kinds of IRStmts.     ---*/
   4080 /*------------------------------------------------------------*/
   4081 
   4082 /* Widen a value to the host word size. */
   4083 
   4084 static
   4085 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
   4086 {
   4087    IRType ty, tyH;
   4088 
   4089    /* vatom is vbits-value and as such can only have a shadow type. */
   4090    tl_assert(isShadowAtom(mce,vatom));
   4091 
   4092    ty  = typeOfIRExpr(mce->sb->tyenv, vatom);
   4093    tyH = mce->hWordTy;
   4094 
   4095    if (tyH == Ity_I32) {
   4096       switch (ty) {
   4097          case Ity_I32:
   4098             return vatom;
   4099          case Ity_I16:
   4100             return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
   4101          case Ity_I8:
   4102             return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
   4103          default:
   4104             goto unhandled;
   4105       }
   4106    } else
   4107    if (tyH == Ity_I64) {
   4108       switch (ty) {
   4109          case Ity_I32:
   4110             return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
   4111          case Ity_I16:
   4112             return assignNew('V', mce, tyH, unop(Iop_32Uto64,
   4113                    assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
   4114          case Ity_I8:
   4115             return assignNew('V', mce, tyH, unop(Iop_32Uto64,
   4116                    assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
   4117          default:
   4118             goto unhandled;
   4119       }
   4120    } else {
   4121       goto unhandled;
   4122    }
   4123   unhandled:
   4124    VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
   4125    VG_(tool_panic)("zwidenToHostWord");
   4126 }
   4127 
   4128 
   4129 /* Generate a shadow store.  addr is always the original address atom.
   4130    You can pass in either originals or V-bits for the data atom, but
   4131    obviously not both.  guard :: Ity_I1 controls whether the store
   4132    really happens; NULL means it unconditionally does.  Note that
   4133    guard itself is not checked for definedness; the caller of this
   4134    function must do that if necessary. */
   4135 
   4136 static
   4137 void do_shadow_Store ( MCEnv* mce,
   4138                        IREndness end,
   4139                        IRAtom* addr, UInt bias,
   4140                        IRAtom* data, IRAtom* vdata,
   4141                        IRAtom* guard )
   4142 {
   4143    IROp     mkAdd;
   4144    IRType   ty, tyAddr;
   4145    void*    helper = NULL;
   4146    Char*    hname = NULL;
   4147    IRConst* c;
   4148 
   4149    tyAddr = mce->hWordTy;
   4150    mkAdd  = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
   4151    tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
   4152    tl_assert( end == Iend_LE || end == Iend_BE );
   4153 
   4154    if (data) {
   4155       tl_assert(!vdata);
   4156       tl_assert(isOriginalAtom(mce, data));
   4157       tl_assert(bias == 0);
   4158       vdata = expr2vbits( mce, data );
   4159    } else {
   4160       tl_assert(vdata);
   4161    }
   4162 
   4163    tl_assert(isOriginalAtom(mce,addr));
   4164    tl_assert(isShadowAtom(mce,vdata));
   4165 
   4166    if (guard) {
   4167       tl_assert(isOriginalAtom(mce, guard));
   4168       tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
   4169    }
   4170 
   4171    ty = typeOfIRExpr(mce->sb->tyenv, vdata);
   4172 
   4173    // If we're not doing undefined value checking, pretend that this value
   4174    // is "all valid".  That lets Vex's optimiser remove some of the V bit
   4175    // shadow computation ops that precede it.
   4176    if (MC_(clo_mc_level) == 1) {
   4177       switch (ty) {
   4178          case Ity_V256: // V256 weirdness -- used four times
   4179                         c = IRConst_V256(V_BITS32_DEFINED); break;
   4180          case Ity_V128: // V128 weirdness -- used twice
   4181                         c = IRConst_V128(V_BITS16_DEFINED); break;
   4182          case Ity_I64:  c = IRConst_U64 (V_BITS64_DEFINED); break;
   4183          case Ity_I32:  c = IRConst_U32 (V_BITS32_DEFINED); break;
   4184          case Ity_I16:  c = IRConst_U16 (V_BITS16_DEFINED); break;
   4185          case Ity_I8:   c = IRConst_U8  (V_BITS8_DEFINED);  break;
   4186          default:       VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
   4187       }
   4188       vdata = IRExpr_Const( c );
   4189    }
   4190 
   4191    /* First, emit a definedness test for the address.  This also sets
   4192       the address (shadow) to 'defined' following the test. */
   4193    complainIfUndefined( mce, addr, guard );
   4194 
   4195    /* Now decide which helper function to call to write the data V
   4196       bits into shadow memory. */
   4197    if (end == Iend_LE) {
   4198       switch (ty) {
   4199          case Ity_V256: /* we'll use the helper four times */
   4200          case Ity_V128: /* we'll use the helper twice */
   4201          case Ity_I64: helper = &MC_(helperc_STOREV64le);
   4202                        hname = "MC_(helperc_STOREV64le)";
   4203                        break;
   4204          case Ity_I32: helper = &MC_(helperc_STOREV32le);
   4205                        hname = "MC_(helperc_STOREV32le)";
   4206                        break;
   4207          case Ity_I16: helper = &MC_(helperc_STOREV16le);
   4208                        hname = "MC_(helperc_STOREV16le)";
   4209                        break;
   4210          case Ity_I8:  helper = &MC_(helperc_STOREV8);
   4211                        hname = "MC_(helperc_STOREV8)";
   4212                        break;
   4213          default:      VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
   4214       }
   4215    } else {
   4216       switch (ty) {
   4217          case Ity_V128: /* we'll use the helper twice */
   4218          case Ity_I64: helper = &MC_(helperc_STOREV64be);
   4219                        hname = "MC_(helperc_STOREV64be)";
   4220                        break;
   4221          case Ity_I32: helper = &MC_(helperc_STOREV32be);
   4222                        hname = "MC_(helperc_STOREV32be)";
   4223                        break;
   4224          case Ity_I16: helper = &MC_(helperc_STOREV16be);
   4225                        hname = "MC_(helperc_STOREV16be)";
   4226                        break;
   4227          case Ity_I8:  helper = &MC_(helperc_STOREV8);
   4228                        hname = "MC_(helperc_STOREV8)";
   4229                        break;
   4230          /* Note, no V256 case here, because no big-endian target that
   4231             we support, has 256 vectors. */
   4232          default:      VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
   4233       }
   4234    }
   4235 
   4236    if (UNLIKELY(ty == Ity_V256)) {
   4237 
   4238       /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
   4239          Q3 being the most significant lane. */
   4240       /* These are the offsets of the Qs in memory. */
   4241       Int     offQ0, offQ1, offQ2, offQ3;
   4242 
   4243       /* Various bits for constructing the 4 lane helper calls */
   4244       IRDirty *diQ0,    *diQ1,    *diQ2,    *diQ3;
   4245       IRAtom  *addrQ0,  *addrQ1,  *addrQ2,  *addrQ3;
   4246       IRAtom  *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3;
   4247       IRAtom  *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3;
   4248 
   4249       if (end == Iend_LE) {
   4250          offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24;
   4251       } else {
   4252          offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24;
   4253       }
   4254 
   4255       eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0);
   4256       addrQ0  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) );
   4257       vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata));
   4258       diQ0    = unsafeIRDirty_0_N(
   4259                    1/*regparms*/,
   4260                    hname, VG_(fnptr_to_fnentry)( helper ),
   4261                    mkIRExprVec_2( addrQ0, vdataQ0 )
   4262                 );
   4263 
   4264       eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1);
   4265       addrQ1  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) );
   4266       vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata));
   4267       diQ1    = unsafeIRDirty_0_N(
   4268                    1/*regparms*/,
   4269                    hname, VG_(fnptr_to_fnentry)( helper ),
   4270                    mkIRExprVec_2( addrQ1, vdataQ1 )
   4271                 );
   4272 
   4273       eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2);
   4274       addrQ2  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) );
   4275       vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata));
   4276       diQ2    = unsafeIRDirty_0_N(
   4277                    1/*regparms*/,
   4278                    hname, VG_(fnptr_to_fnentry)( helper ),
   4279                    mkIRExprVec_2( addrQ2, vdataQ2 )
   4280                 );
   4281 
   4282       eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3);
   4283       addrQ3  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) );
   4284       vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata));
   4285       diQ3    = unsafeIRDirty_0_N(
   4286                    1/*regparms*/,
   4287                    hname, VG_(fnptr_to_fnentry)( helper ),
   4288                    mkIRExprVec_2( addrQ3, vdataQ3 )
   4289                 );
   4290 
   4291       if (guard)
   4292          diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard;
   4293 
   4294       setHelperAnns( mce, diQ0 );
   4295       setHelperAnns( mce, diQ1 );
   4296       setHelperAnns( mce, diQ2 );
   4297       setHelperAnns( mce, diQ3 );
   4298       stmt( 'V', mce, IRStmt_Dirty(diQ0) );
   4299       stmt( 'V', mce, IRStmt_Dirty(diQ1) );
   4300       stmt( 'V', mce, IRStmt_Dirty(diQ2) );
   4301       stmt( 'V', mce, IRStmt_Dirty(diQ3) );
   4302 
   4303    }
   4304    else if (UNLIKELY(ty == Ity_V128)) {
   4305 
   4306       /* V128-bit case */
   4307       /* See comment in next clause re 64-bit regparms */
   4308       /* also, need to be careful about endianness */
   4309 
   4310       Int     offLo64, offHi64;
   4311       IRDirty *diLo64, *diHi64;
   4312       IRAtom  *addrLo64, *addrHi64;
   4313       IRAtom  *vdataLo64, *vdataHi64;
   4314       IRAtom  *eBiasLo64, *eBiasHi64;
   4315 
   4316       if (end == Iend_LE) {
   4317          offLo64 = 0;
   4318          offHi64 = 8;
   4319       } else {
   4320          offLo64 = 8;
   4321          offHi64 = 0;
   4322       }
   4323 
   4324       eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
   4325       addrLo64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
   4326       vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
   4327       diLo64    = unsafeIRDirty_0_N(
   4328                      1/*regparms*/,
   4329                      hname, VG_(fnptr_to_fnentry)( helper ),
   4330                      mkIRExprVec_2( addrLo64, vdataLo64 )
   4331                   );
   4332       eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
   4333       addrHi64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
   4334       vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
   4335       diHi64    = unsafeIRDirty_0_N(
   4336                      1/*regparms*/,
   4337                      hname, VG_(fnptr_to_fnentry)( helper ),
   4338                      mkIRExprVec_2( addrHi64, vdataHi64 )
   4339                   );
   4340       if (guard) diLo64->guard = guard;
   4341       if (guard) diHi64->guard = guard;
   4342       setHelperAnns( mce, diLo64 );
   4343       setHelperAnns( mce, diHi64 );
   4344       stmt( 'V', mce, IRStmt_Dirty(diLo64) );
   4345       stmt( 'V', mce, IRStmt_Dirty(diHi64) );
   4346 
   4347    } else {
   4348 
   4349       IRDirty *di;
   4350       IRAtom  *addrAct;
   4351 
   4352       /* 8/16/32/64-bit cases */
   4353       /* Generate the actual address into addrAct. */
   4354       if (bias == 0) {
   4355          addrAct = addr;
   4356       } else {
   4357          IRAtom* eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
   4358          addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
   4359       }
   4360 
   4361       if (ty == Ity_I64) {
   4362          /* We can't do this with regparm 2 on 32-bit platforms, since
   4363             the back ends aren't clever enough to handle 64-bit
   4364             regparm args.  Therefore be different. */
   4365          di = unsafeIRDirty_0_N(
   4366                  1/*regparms*/,
   4367                  hname, VG_(fnptr_to_fnentry)( helper ),
   4368                  mkIRExprVec_2( addrAct, vdata )
   4369               );
   4370       } else {
   4371          di = unsafeIRDirty_0_N(
   4372                  2/*regparms*/,
   4373                  hname, VG_(fnptr_to_fnentry)( helper ),
   4374                  mkIRExprVec_2( addrAct,
   4375                                 zwidenToHostWord( mce, vdata ))
   4376               );
   4377       }
   4378       if (guard) di->guard = guard;
   4379       setHelperAnns( mce, di );
   4380       stmt( 'V', mce, IRStmt_Dirty(di) );
   4381    }
   4382 
   4383 }
   4384 
   4385 
   4386 /* Do lazy pessimistic propagation through a dirty helper call, by
   4387    looking at the annotations on it.  This is the most complex part of
   4388    Memcheck. */
   4389 
   4390 static IRType szToITy ( Int n )
   4391 {
   4392    switch (n) {
   4393       case 1: return Ity_I8;
   4394       case 2: return Ity_I16;
   4395       case 4: return Ity_I32;
   4396       case 8: return Ity_I64;
   4397       default: VG_(tool_panic)("szToITy(memcheck)");
   4398    }
   4399 }
   4400 
   4401 static
   4402 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
   4403 {
   4404    Int       i, k, n, toDo, gSz, gOff;
   4405    IRAtom    *src, *here, *curr;
   4406    IRType    tySrc, tyDst;
   4407    IRTemp    dst;
   4408    IREndness end;
   4409 
   4410    /* What's the native endianness?  We need to know this. */
   4411 #  if defined(VG_BIGENDIAN)
   4412    end = Iend_BE;
   4413 #  elif defined(VG_LITTLEENDIAN)
   4414    end = Iend_LE;
   4415 #  else
   4416 #    error "Unknown endianness"
   4417 #  endif
   4418 
   4419    /* First check the guard. */
   4420    complainIfUndefined(mce, d->guard, NULL);
   4421 
   4422    /* Now round up all inputs and PCast over them. */
   4423    curr = definedOfType(Ity_I32);
   4424 
   4425    /* Inputs: unmasked args
   4426       Note: arguments are evaluated REGARDLESS of the guard expression */
   4427    for (i = 0; d->args[i]; i++) {
   4428       if (d->cee->mcx_mask & (1<<i)) {
   4429          /* ignore this arg */
   4430       } else {
   4431          here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
   4432          curr = mkUifU32(mce, here, curr);
   4433       }
   4434    }
   4435 
   4436    /* Inputs: guest state that we read. */
   4437    for (i = 0; i < d->nFxState; i++) {
   4438       tl_assert(d->fxState[i].fx != Ifx_None);
   4439       if (d->fxState[i].fx == Ifx_Write)
   4440          continue;
   4441 
   4442       /* Enumerate the described state segments */
   4443       for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
   4444          gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
   4445          gSz  = d->fxState[i].size;
   4446 
   4447          /* Ignore any sections marked as 'always defined'. */
   4448          if (isAlwaysDefd(mce, gOff, gSz)) {
   4449             if (0)
   4450             VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
   4451                         gOff, gSz);
   4452             continue;
   4453          }
   4454 
   4455          /* This state element is read or modified.  So we need to
   4456             consider it.  If larger than 8 bytes, deal with it in
   4457             8-byte chunks. */
   4458          while (True) {
   4459             tl_assert(gSz >= 0);
   4460             if (gSz == 0) break;
   4461             n = gSz <= 8 ? gSz : 8;
   4462             /* update 'curr' with UifU of the state slice
   4463                gOff .. gOff+n-1 */
   4464             tySrc = szToITy( n );
   4465 
   4466             /* Observe the guard expression. If it is false use an
   4467                all-bits-defined bit pattern */
   4468             IRAtom *cond, *iffalse, *iftrue;
   4469 
   4470             cond    = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, d->guard));
   4471             iftrue  = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc));
   4472             iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc));
   4473             src     = assignNew('V', mce, tySrc,
   4474                                 IRExpr_Mux0X(cond, iffalse, iftrue));
   4475 
   4476             here = mkPCastTo( mce, Ity_I32, src );
   4477             curr = mkUifU32(mce, here, curr);
   4478             gSz -= n;
   4479             gOff += n;
   4480          }
   4481       }
   4482    }
   4483 
   4484    /* Inputs: memory.  First set up some info needed regardless of
   4485       whether we're doing reads or writes. */
   4486 
   4487    if (d->mFx != Ifx_None) {
   4488       /* Because we may do multiple shadow loads/stores from the same
   4489          base address, it's best to do a single test of its
   4490          definedness right now.  Post-instrumentation optimisation
   4491          should remove all but this test. */
   4492       IRType tyAddr;
   4493       tl_assert(d->mAddr);
   4494       complainIfUndefined(mce, d->mAddr, d->guard);
   4495 
   4496       tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
   4497       tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
   4498       tl_assert(tyAddr == mce->hWordTy); /* not really right */
   4499    }
   4500 
   4501    /* Deal with memory inputs (reads or modifies) */
   4502    if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
   4503       toDo   = d->mSize;
   4504       /* chew off 32-bit chunks.  We don't care about the endianness
   4505          since it's all going to be condensed down to a single bit,
   4506          but nevertheless choose an endianness which is hopefully
   4507          native to the platform. */
   4508       while (toDo >= 4) {
   4509          here = mkPCastTo(
   4510                    mce, Ity_I32,
   4511                    expr2vbits_guarded_Load ( mce, end, Ity_I32, d->mAddr,
   4512                                              d->mSize - toDo, d->guard )
   4513                 );
   4514          curr = mkUifU32(mce, here, curr);
   4515          toDo -= 4;
   4516       }
   4517       /* chew off 16-bit chunks */
   4518       while (toDo >= 2) {
   4519          here = mkPCastTo(
   4520                    mce, Ity_I32,
   4521                    expr2vbits_guarded_Load ( mce, end, Ity_I16, d->mAddr,
   4522                                              d->mSize - toDo, d->guard )
   4523                 );
   4524          curr = mkUifU32(mce, here, curr);
   4525          toDo -= 2;
   4526       }
   4527       /* chew off the remaining 8-bit chunk, if any */
   4528       if (toDo == 1) {
   4529          here = mkPCastTo(
   4530                    mce, Ity_I32,
   4531                    expr2vbits_guarded_Load ( mce, end, Ity_I8, d->mAddr,
   4532                                              d->mSize - toDo, d->guard )
   4533                 );
   4534          curr = mkUifU32(mce, here, curr);
   4535          toDo -= 1;
   4536       }
   4537       tl_assert(toDo == 0);
   4538    }
   4539 
   4540    /* Whew!  So curr is a 32-bit V-value summarising pessimistically
   4541       all the inputs to the helper.  Now we need to re-distribute the
   4542       results to all destinations. */
   4543 
   4544    /* Outputs: the destination temporary, if there is one. */
   4545    if (d->tmp != IRTemp_INVALID) {
   4546       dst   = findShadowTmpV(mce, d->tmp);
   4547       tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
   4548       assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
   4549    }
   4550 
   4551    /* Outputs: guest state that we write or modify. */
   4552    for (i = 0; i < d->nFxState; i++) {
   4553       tl_assert(d->fxState[i].fx != Ifx_None);
   4554       if (d->fxState[i].fx == Ifx_Read)
   4555          continue;
   4556 
   4557       /* Enumerate the described state segments */
   4558       for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
   4559          gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
   4560          gSz  = d->fxState[i].size;
   4561 
   4562          /* Ignore any sections marked as 'always defined'. */
   4563          if (isAlwaysDefd(mce, gOff, gSz))
   4564             continue;
   4565 
   4566          /* This state element is written or modified.  So we need to
   4567             consider it.  If larger than 8 bytes, deal with it in
   4568             8-byte chunks. */
   4569          while (True) {
   4570             tl_assert(gSz >= 0);
   4571             if (gSz == 0) break;
   4572             n = gSz <= 8 ? gSz : 8;
   4573             /* Write suitably-casted 'curr' to the state slice
   4574                gOff .. gOff+n-1 */
   4575             tyDst = szToITy( n );
   4576             do_shadow_PUT( mce, gOff,
   4577                                 NULL, /* original atom */
   4578                                 mkPCastTo( mce, tyDst, curr ), d->guard );
   4579             gSz -= n;
   4580             gOff += n;
   4581          }
   4582       }
   4583    }
   4584 
   4585    /* Outputs: memory that we write or modify.  Same comments about
   4586       endianness as above apply. */
   4587    if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
   4588       toDo   = d->mSize;
   4589       /* chew off 32-bit chunks */
   4590       while (toDo >= 4) {
   4591          do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
   4592                           NULL, /* original data */
   4593                           mkPCastTo( mce, Ity_I32, curr ),
   4594                           d->guard );
   4595          toDo -= 4;
   4596       }
   4597       /* chew off 16-bit chunks */
   4598       while (toDo >= 2) {
   4599          do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
   4600                           NULL, /* original data */
   4601                           mkPCastTo( mce, Ity_I16, curr ),
   4602                           d->guard );
   4603          toDo -= 2;
   4604       }
   4605       /* chew off the remaining 8-bit chunk, if any */
   4606       if (toDo == 1) {
   4607          do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
   4608                           NULL, /* original data */
   4609                           mkPCastTo( mce, Ity_I8, curr ),
   4610                           d->guard );
   4611          toDo -= 1;
   4612       }
   4613       tl_assert(toDo == 0);
   4614    }
   4615 
   4616 }
   4617 
   4618 
   4619 /* We have an ABI hint telling us that [base .. base+len-1] is to
   4620    become undefined ("writable").  Generate code to call a helper to
   4621    notify the A/V bit machinery of this fact.
   4622 
   4623    We call
   4624    void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
   4625                                                     Addr nia );
   4626 */
   4627 static
   4628 void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
   4629 {
   4630    IRDirty* di;
   4631    /* Minor optimisation: if not doing origin tracking, ignore the
   4632       supplied nia and pass zero instead.  This is on the basis that
   4633       MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
   4634       almost always generate a shorter instruction to put zero into a
   4635       register than any other value. */
   4636    if (MC_(clo_mc_level) < 3)
   4637       nia = mkIRExpr_HWord(0);
   4638 
   4639    di = unsafeIRDirty_0_N(
   4640            0/*regparms*/,
   4641            "MC_(helperc_MAKE_STACK_UNINIT)",
   4642            VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
   4643            mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
   4644         );
   4645    stmt( 'V', mce, IRStmt_Dirty(di) );
   4646 }
   4647 
   4648 
   4649 /* ------ Dealing with IRCAS (big and complex) ------ */
   4650 
   4651 /* FWDS */
   4652 static IRAtom* gen_load_b  ( MCEnv* mce, Int szB,
   4653                              IRAtom* baseaddr, Int offset );
   4654 static IRAtom* gen_maxU32  ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
   4655 static void    gen_store_b ( MCEnv* mce, Int szB,
   4656                              IRAtom* baseaddr, Int offset, IRAtom* dataB,
   4657                              IRAtom* guard );
   4658 
   4659 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
   4660 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
   4661 
   4662 
   4663 /* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
   4664    IRExpr.Consts, else this asserts.  If they are both Consts, it
   4665    doesn't do anything.  So that just leaves the RdTmp case.
   4666 
   4667    In which case: this assigns the shadow value SHADOW to the IR
   4668    shadow temporary associated with ORIG.  That is, ORIG, being an
   4669    original temporary, will have a shadow temporary associated with
   4670    it.  However, in the case envisaged here, there will so far have
   4671    been no IR emitted to actually write a shadow value into that
   4672    temporary.  What this routine does is to (emit IR to) copy the
   4673    value in SHADOW into said temporary, so that after this call,
   4674    IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
   4675    value in SHADOW.
   4676 
   4677    Point is to allow callers to compute "by hand" a shadow value for
   4678    ORIG, and force it to be associated with ORIG.
   4679 
   4680    How do we know that that shadow associated with ORIG has not so far
   4681    been assigned to?  Well, we don't per se know that, but supposing
   4682    it had.  Then this routine would create a second assignment to it,
   4683    and later the IR sanity checker would barf.  But that never
   4684    happens.  QED.
   4685 */
   4686 static void bind_shadow_tmp_to_orig ( UChar how,
   4687                                       MCEnv* mce,
   4688                                       IRAtom* orig, IRAtom* shadow )
   4689 {
   4690    tl_assert(isOriginalAtom(mce, orig));
   4691    tl_assert(isShadowAtom(mce, shadow));
   4692    switch (orig->tag) {
   4693       case Iex_Const:
   4694          tl_assert(shadow->tag == Iex_Const);
   4695          break;
   4696       case Iex_RdTmp:
   4697          tl_assert(shadow->tag == Iex_RdTmp);
   4698          if (how == 'V') {
   4699             assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
   4700                    shadow);
   4701          } else {
   4702             tl_assert(how == 'B');
   4703             assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
   4704                    shadow);
   4705          }
   4706          break;
   4707       default:
   4708          tl_assert(0);
   4709    }
   4710 }
   4711 
   4712 
   4713 static
   4714 void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
   4715 {
   4716    /* Scheme is (both single- and double- cases):
   4717 
   4718       1. fetch data#,dataB (the proposed new value)
   4719 
   4720       2. fetch expd#,expdB (what we expect to see at the address)
   4721 
   4722       3. check definedness of address
   4723 
   4724       4. load old#,oldB from shadow memory; this also checks
   4725          addressibility of the address
   4726 
   4727       5. the CAS itself
   4728 
   4729       6. compute "expected == old".  See COMMENT_ON_CasCmpEQ below.
   4730 
   4731       7. if "expected == old" (as computed by (6))
   4732             store data#,dataB to shadow memory
   4733 
   4734       Note that 5 reads 'old' but 4 reads 'old#'.  Similarly, 5 stores
   4735       'data' but 7 stores 'data#'.  Hence it is possible for the
   4736       shadow data to be incorrectly checked and/or updated:
   4737 
   4738       * 7 is at least gated correctly, since the 'expected == old'
   4739         condition is derived from outputs of 5.  However, the shadow
   4740         write could happen too late: imagine after 5 we are
   4741         descheduled, a different thread runs, writes a different
   4742         (shadow) value at the address, and then we resume, hence
   4743         overwriting the shadow value written by the other thread.
   4744 
   4745       Because the original memory access is atomic, there's no way to
   4746       make both the original and shadow accesses into a single atomic
   4747       thing, hence this is unavoidable.
   4748 
   4749       At least as Valgrind stands, I don't think it's a problem, since
   4750       we're single threaded *and* we guarantee that there are no
   4751       context switches during the execution of any specific superblock
   4752       -- context switches can only happen at superblock boundaries.
   4753 
   4754       If Valgrind ever becomes MT in the future, then it might be more
   4755       of a problem.  A possible kludge would be to artificially
   4756       associate with the location, a lock, which we must acquire and
   4757       release around the transaction as a whole.  Hmm, that probably
   4758       would't work properly since it only guards us against other
   4759       threads doing CASs on the same location, not against other
   4760       threads doing normal reads and writes.
   4761 
   4762       ------------------------------------------------------------
   4763 
   4764       COMMENT_ON_CasCmpEQ:
   4765 
   4766       Note two things.  Firstly, in the sequence above, we compute
   4767       "expected == old", but we don't check definedness of it.  Why
   4768       not?  Also, the x86 and amd64 front ends use
   4769       Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent
   4770       determination (expected == old ?) for themselves, and we also
   4771       don't check definedness for those primops; we just say that the
   4772       result is defined.  Why?  Details follow.
   4773 
   4774       x86/amd64 contains various forms of locked insns:
   4775       * lock prefix before all basic arithmetic insn;
   4776         eg lock xorl %reg1,(%reg2)
   4777       * atomic exchange reg-mem
   4778       * compare-and-swaps
   4779 
   4780       Rather than attempt to represent them all, which would be a
   4781       royal PITA, I used a result from Maurice Herlihy
   4782       (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
   4783       demonstrates that compare-and-swap is a primitive more general
   4784       than the other two, and so can be used to represent all of them.
   4785       So the translation scheme for (eg) lock incl (%reg) is as
   4786       follows:
   4787 
   4788         again:
   4789          old = * %reg
   4790          new = old + 1
   4791          atomically { if (* %reg == old) { * %reg = new } else { goto again } }
   4792 
   4793       The "atomically" is the CAS bit.  The scheme is always the same:
   4794       get old value from memory, compute new value, atomically stuff
   4795       new value back in memory iff the old value has not changed (iow,
   4796       no other thread modified it in the meantime).  If it has changed
   4797       then we've been out-raced and we have to start over.
   4798 
   4799       Now that's all very neat, but it has the bad side effect of
   4800       introducing an explicit equality test into the translation.
   4801       Consider the behaviour of said code on a memory location which
   4802       is uninitialised.  We will wind up doing a comparison on
   4803       uninitialised data, and mc duly complains.
   4804 
   4805       What's difficult about this is, the common case is that the
   4806       location is uncontended, and so we're usually comparing the same
   4807       value (* %reg) with itself.  So we shouldn't complain even if it
   4808       is undefined.  But mc doesn't know that.
   4809 
   4810       My solution is to mark the == in the IR specially, so as to tell
   4811       mc that it almost certainly compares a value with itself, and we
   4812       should just regard the result as always defined.  Rather than
   4813       add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
   4814       Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
   4815 
   4816       So there's always the question of, can this give a false
   4817       negative?  eg, imagine that initially, * %reg is defined; and we
   4818       read that; but then in the gap between the read and the CAS, a
   4819       different thread writes an undefined (and different) value at
   4820       the location.  Then the CAS in this thread will fail and we will
   4821       go back to "again:", but without knowing that the trip back
   4822       there was based on an undefined comparison.  No matter; at least
   4823       the other thread won the race and the location is correctly
   4824       marked as undefined.  What if it wrote an uninitialised version
   4825       of the same value that was there originally, though?
   4826 
   4827       etc etc.  Seems like there's a small corner case in which we
   4828       might lose the fact that something's defined -- we're out-raced
   4829       in between the "old = * reg" and the "atomically {", _and_ the
   4830       other thread is writing in an undefined version of what's
   4831       already there.  Well, that seems pretty unlikely.
   4832 
   4833       ---
   4834 
   4835       If we ever need to reinstate it .. code which generates a
   4836       definedness test for "expected == old" was removed at r10432 of
   4837       this file.
   4838    */
   4839    if (cas->oldHi == IRTemp_INVALID) {
   4840       do_shadow_CAS_single( mce, cas );
   4841    } else {
   4842       do_shadow_CAS_double( mce, cas );
   4843    }
   4844 }
   4845 
   4846 
   4847 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
   4848 {
   4849    IRAtom *vdataLo = NULL, *bdataLo = NULL;
   4850    IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
   4851    IRAtom *voldLo  = NULL, *boldLo  = NULL;
   4852    IRAtom *expd_eq_old = NULL;
   4853    IROp   opCasCmpEQ;
   4854    Int    elemSzB;
   4855    IRType elemTy;
   4856    Bool   otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
   4857 
   4858    /* single CAS */
   4859    tl_assert(cas->oldHi == IRTemp_INVALID);
   4860    tl_assert(cas->expdHi == NULL);
   4861    tl_assert(cas->dataHi == NULL);
   4862 
   4863    elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
   4864    switch (elemTy) {
   4865       case Ity_I8:  elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8;  break;
   4866       case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
   4867       case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
   4868       case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
   4869       default: tl_assert(0); /* IR defn disallows any other types */
   4870    }
   4871 
   4872    /* 1. fetch data# (the proposed new value) */
   4873    tl_assert(isOriginalAtom(mce, cas->dataLo));
   4874    vdataLo
   4875       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
   4876    tl_assert(isShadowAtom(mce, vdataLo));
   4877    if (otrak) {
   4878       bdataLo
   4879          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
   4880       tl_assert(isShadowAtom(mce, bdataLo));
   4881    }
   4882 
   4883    /* 2. fetch expected# (what we expect to see at the address) */
   4884    tl_assert(isOriginalAtom(mce, cas->expdLo));
   4885    vexpdLo
   4886       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
   4887    tl_assert(isShadowAtom(mce, vexpdLo));
   4888    if (otrak) {
   4889       bexpdLo
   4890          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
   4891       tl_assert(isShadowAtom(mce, bexpdLo));
   4892    }
   4893 
   4894    /* 3. check definedness of address */
   4895    /* 4. fetch old# from shadow memory; this also checks
   4896          addressibility of the address */
   4897    voldLo
   4898       = assignNew(
   4899            'V', mce, elemTy,
   4900            expr2vbits_Load(
   4901               mce,
   4902               cas->end, elemTy, cas->addr, 0/*Addr bias*/
   4903         ));
   4904    bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
   4905    if (otrak) {
   4906       boldLo
   4907          = assignNew('B', mce, Ity_I32,
   4908                      gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
   4909       bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
   4910    }
   4911 
   4912    /* 5. the CAS itself */
   4913    stmt( 'C', mce, IRStmt_CAS(cas) );
   4914 
   4915    /* 6. compute "expected == old" */
   4916    /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
   4917    /* Note that 'C' is kinda faking it; it is indeed a non-shadow
   4918       tree, but it's not copied from the input block. */
   4919    expd_eq_old
   4920       = assignNew('C', mce, Ity_I1,
   4921                   binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
   4922 
   4923    /* 7. if "expected == old"
   4924             store data# to shadow memory */
   4925    do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
   4926                     NULL/*data*/, vdataLo/*vdata*/,
   4927                     expd_eq_old/*guard for store*/ );
   4928    if (otrak) {
   4929       gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
   4930                    bdataLo/*bdata*/,
   4931                    expd_eq_old/*guard for store*/ );
   4932    }
   4933 }
   4934 
   4935 
   4936 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
   4937 {
   4938    IRAtom *vdataHi = NULL, *bdataHi = NULL;
   4939    IRAtom *vdataLo = NULL, *bdataLo = NULL;
   4940    IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
   4941    IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
   4942    IRAtom *voldHi  = NULL, *boldHi  = NULL;
   4943    IRAtom *voldLo  = NULL, *boldLo  = NULL;
   4944    IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
   4945    IRAtom *expd_eq_old = NULL, *zero = NULL;
   4946    IROp   opCasCmpEQ, opOr, opXor;
   4947    Int    elemSzB, memOffsLo, memOffsHi;
   4948    IRType elemTy;
   4949    Bool   otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
   4950 
   4951    /* double CAS */
   4952    tl_assert(cas->oldHi != IRTemp_INVALID);
   4953    tl_assert(cas->expdHi != NULL);
   4954    tl_assert(cas->dataHi != NULL);
   4955 
   4956    elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
   4957    switch (elemTy) {
   4958       case Ity_I8:
   4959          opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
   4960          elemSzB = 1; zero = mkU8(0);
   4961          break;
   4962       case Ity_I16:
   4963          opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
   4964          elemSzB = 2; zero = mkU16(0);
   4965          break;
   4966       case Ity_I32:
   4967          opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
   4968          elemSzB = 4; zero = mkU32(0);
   4969          break;
   4970       case Ity_I64:
   4971          opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
   4972          elemSzB = 8; zero = mkU64(0);
   4973          break;
   4974       default:
   4975          tl_assert(0); /* IR defn disallows any other types */
   4976    }
   4977 
   4978    /* 1. fetch data# (the proposed new value) */
   4979    tl_assert(isOriginalAtom(mce, cas->dataHi));
   4980    tl_assert(isOriginalAtom(mce, cas->dataLo));
   4981    vdataHi
   4982       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
   4983    vdataLo
   4984       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
   4985    tl_assert(isShadowAtom(mce, vdataHi));
   4986    tl_assert(isShadowAtom(mce, vdataLo));
   4987    if (otrak) {
   4988       bdataHi
   4989          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
   4990       bdataLo
   4991          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
   4992       tl_assert(isShadowAtom(mce, bdataHi));
   4993       tl_assert(isShadowAtom(mce, bdataLo));
   4994    }
   4995 
   4996    /* 2. fetch expected# (what we expect to see at the address) */
   4997    tl_assert(isOriginalAtom(mce, cas->expdHi));
   4998    tl_assert(isOriginalAtom(mce, cas->expdLo));
   4999    vexpdHi
   5000       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
   5001    vexpdLo
   5002       = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
   5003    tl_assert(isShadowAtom(mce, vexpdHi));
   5004    tl_assert(isShadowAtom(mce, vexpdLo));
   5005    if (otrak) {
   5006       bexpdHi
   5007          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
   5008       bexpdLo
   5009          = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
   5010       tl_assert(isShadowAtom(mce, bexpdHi));
   5011       tl_assert(isShadowAtom(mce, bexpdLo));
   5012    }
   5013 
   5014    /* 3. check definedness of address */
   5015    /* 4. fetch old# from shadow memory; this also checks
   5016          addressibility of the address */
   5017    if (cas->end == Iend_LE) {
   5018       memOffsLo = 0;
   5019       memOffsHi = elemSzB;
   5020    } else {
   5021       tl_assert(cas->end == Iend_BE);
   5022       memOffsLo = elemSzB;
   5023       memOffsHi = 0;
   5024    }
   5025    voldHi
   5026       = assignNew(
   5027            'V', mce, elemTy,
   5028            expr2vbits_Load(
   5029               mce,
   5030               cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/
   5031         ));
   5032    voldLo
   5033       = assignNew(
   5034            'V', mce, elemTy,
   5035            expr2vbits_Load(
   5036               mce,
   5037               cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/
   5038         ));
   5039    bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
   5040    bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
   5041    if (otrak) {
   5042       boldHi
   5043          = assignNew('B', mce, Ity_I32,
   5044                      gen_load_b(mce, elemSzB, cas->addr,
   5045                                 memOffsHi/*addr bias*/));
   5046       boldLo
   5047          = assignNew('B', mce, Ity_I32,
   5048                      gen_load_b(mce, elemSzB, cas->addr,
   5049                                 memOffsLo/*addr bias*/));
   5050       bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
   5051       bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
   5052    }
   5053 
   5054    /* 5. the CAS itself */
   5055    stmt( 'C', mce, IRStmt_CAS(cas) );
   5056 
   5057    /* 6. compute "expected == old" */
   5058    /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
   5059    /* Note that 'C' is kinda faking it; it is indeed a non-shadow
   5060       tree, but it's not copied from the input block. */
   5061    /*
   5062       xHi = oldHi ^ expdHi;
   5063       xLo = oldLo ^ expdLo;
   5064       xHL = xHi | xLo;
   5065       expd_eq_old = xHL == 0;
   5066    */
   5067    xHi = assignNew('C', mce, elemTy,
   5068                    binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
   5069    xLo = assignNew('C', mce, elemTy,
   5070                    binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
   5071    xHL = assignNew('C', mce, elemTy,
   5072                    binop(opOr, xHi, xLo));
   5073    expd_eq_old
   5074       = assignNew('C', mce, Ity_I1,
   5075                   binop(opCasCmpEQ, xHL, zero));
   5076 
   5077    /* 7. if "expected == old"
   5078             store data# to shadow memory */
   5079    do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
   5080                     NULL/*data*/, vdataHi/*vdata*/,
   5081                     expd_eq_old/*guard for store*/ );
   5082    do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
   5083                     NULL/*data*/, vdataLo/*vdata*/,
   5084                     expd_eq_old/*guard for store*/ );
   5085    if (otrak) {
   5086       gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
   5087                    bdataHi/*bdata*/,
   5088                    expd_eq_old/*guard for store*/ );
   5089       gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
   5090                    bdataLo/*bdata*/,
   5091                    expd_eq_old/*guard for store*/ );
   5092    }
   5093 }
   5094 
   5095 
   5096 /* ------ Dealing with LL/SC (not difficult) ------ */
   5097 
   5098 static void do_shadow_LLSC ( MCEnv*    mce,
   5099                              IREndness stEnd,
   5100                              IRTemp    stResult,
   5101                              IRExpr*   stAddr,
   5102                              IRExpr*   stStoredata )
   5103 {
   5104    /* In short: treat a load-linked like a normal load followed by an
   5105       assignment of the loaded (shadow) data to the result temporary.
   5106       Treat a store-conditional like a normal store, and mark the
   5107       result temporary as defined. */
   5108    IRType resTy  = typeOfIRTemp(mce->sb->tyenv, stResult);
   5109    IRTemp resTmp = findShadowTmpV(mce, stResult);
   5110 
   5111    tl_assert(isIRAtom(stAddr));
   5112    if (stStoredata)
   5113       tl_assert(isIRAtom(stStoredata));
   5114 
   5115    if (stStoredata == NULL) {
   5116       /* Load Linked */
   5117       /* Just treat this as a normal load, followed by an assignment of
   5118          the value to .result. */
   5119       /* Stay sane */
   5120       tl_assert(resTy == Ity_I64 || resTy == Ity_I32
   5121                 || resTy == Ity_I16 || resTy == Ity_I8);
   5122       assign( 'V', mce, resTmp,
   5123                    expr2vbits_Load(
   5124                       mce, stEnd, resTy, stAddr, 0/*addr bias*/));
   5125    } else {
   5126       /* Store Conditional */
   5127       /* Stay sane */
   5128       IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
   5129                                    stStoredata);
   5130       tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
   5131                 || dataTy == Ity_I16 || dataTy == Ity_I8);
   5132       do_shadow_Store( mce, stEnd,
   5133                             stAddr, 0/* addr bias */,
   5134                             stStoredata,
   5135                             NULL /* shadow data */,
   5136                             NULL/*guard*/ );
   5137       /* This is a store conditional, so it writes to .result a value
   5138          indicating whether or not the store succeeded.  Just claim
   5139          this value is always defined.  In the PowerPC interpretation
   5140          of store-conditional, definedness of the success indication
   5141          depends on whether the address of the store matches the
   5142          reservation address.  But we can't tell that here (and
   5143          anyway, we're not being PowerPC-specific).  At least we are
   5144          guaranteed that the definedness of the store address, and its
   5145          addressibility, will be checked as per normal.  So it seems
   5146          pretty safe to just say that the success indication is always
   5147          defined.
   5148 
   5149          In schemeS, for origin tracking, we must correspondingly set
   5150          a no-origin value for the origin shadow of .result.
   5151       */
   5152       tl_assert(resTy == Ity_I1);
   5153       assign( 'V', mce, resTmp, definedOfType(resTy) );
   5154    }
   5155 }
   5156 
   5157 
   5158 /*------------------------------------------------------------*/
   5159 /*--- Memcheck main                                        ---*/
   5160 /*------------------------------------------------------------*/
   5161 
   5162 static void schemeS ( MCEnv* mce, IRStmt* st );
   5163 
   5164 static Bool isBogusAtom ( IRAtom* at )
   5165 {
   5166    ULong n = 0;
   5167    IRConst* con;
   5168    tl_assert(isIRAtom(at));
   5169    if (at->tag == Iex_RdTmp)
   5170       return False;
   5171    tl_assert(at->tag == Iex_Const);
   5172    con = at->Iex.Const.con;
   5173    switch (con->tag) {
   5174       case Ico_U1:   return False;
   5175       case Ico_U8:   n = (ULong)con->Ico.U8; break;
   5176       case Ico_U16:  n = (ULong)con->Ico.U16; break;
   5177       case Ico_U32:  n = (ULong)con->Ico.U32; break;
   5178       case Ico_U64:  n = (ULong)con->Ico.U64; break;
   5179       case Ico_F64:  return False;
   5180       case Ico_F32i: return False;
   5181       case Ico_F64i: return False;
   5182       case Ico_V128: return False;
   5183       default: ppIRExpr(at); tl_assert(0);
   5184    }
   5185    /* VG_(printf)("%llx\n", n); */
   5186    return (/*32*/    n == 0xFEFEFEFFULL
   5187            /*32*/ || n == 0x80808080ULL
   5188            /*32*/ || n == 0x7F7F7F7FULL
   5189            /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
   5190            /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
   5191            /*64*/ || n == 0x0000000000008080ULL
   5192            /*64*/ || n == 0x8080808080808080ULL
   5193            /*64*/ || n == 0x0101010101010101ULL
   5194           );
   5195 }
   5196 
   5197 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
   5198 {
   5199    Int      i;
   5200    IRExpr*  e;
   5201    IRDirty* d;
   5202    IRCAS*   cas;
   5203    switch (st->tag) {
   5204       case Ist_WrTmp:
   5205          e = st->Ist.WrTmp.data;
   5206          switch (e->tag) {
   5207             case Iex_Get:
   5208             case Iex_RdTmp:
   5209                return False;
   5210             case Iex_Const:
   5211                return isBogusAtom(e);
   5212             case Iex_Unop:
   5213                return isBogusAtom(e->Iex.Unop.arg);
   5214             case Iex_GetI:
   5215                return isBogusAtom(e->Iex.GetI.ix);
   5216             case Iex_Binop:
   5217                return isBogusAtom(e->Iex.Binop.arg1)
   5218                       || isBogusAtom(e->Iex.Binop.arg2);
   5219             case Iex_Triop:
   5220                return isBogusAtom(e->Iex.Triop.details->arg1)
   5221                       || isBogusAtom(e->Iex.Triop.details->arg2)
   5222                       || isBogusAtom(e->Iex.Triop.details->arg3);
   5223             case Iex_Qop:
   5224                return isBogusAtom(e->Iex.Qop.details->arg1)
   5225                       || isBogusAtom(e->Iex.Qop.details->arg2)
   5226                       || isBogusAtom(e->Iex.Qop.details->arg3)
   5227                       || isBogusAtom(e->Iex.Qop.details->arg4);
   5228             case Iex_Mux0X:
   5229                return isBogusAtom(e->Iex.Mux0X.cond)
   5230                       || isBogusAtom(e->Iex.Mux0X.expr0)
   5231                       || isBogusAtom(e->Iex.Mux0X.exprX);
   5232             case Iex_Load:
   5233                return isBogusAtom(e->Iex.Load.addr);
   5234             case Iex_CCall:
   5235                for (i = 0; e->Iex.CCall.args[i]; i++)
   5236                   if (isBogusAtom(e->Iex.CCall.args[i]))
   5237                      return True;
   5238                return False;
   5239             default:
   5240                goto unhandled;
   5241          }
   5242       case Ist_Dirty:
   5243          d = st->Ist.Dirty.details;
   5244          for (i = 0; d->args[i]; i++)
   5245             if (isBogusAtom(d->args[i]))
   5246                return True;
   5247          if (d->guard && isBogusAtom(d->guard))
   5248             return True;
   5249          if (d->mAddr && isBogusAtom(d->mAddr))
   5250             return True;
   5251          return False;
   5252       case Ist_Put:
   5253          return isBogusAtom(st->Ist.Put.data);
   5254       case Ist_PutI:
   5255          return isBogusAtom(st->Ist.PutI.details->ix)
   5256                 || isBogusAtom(st->Ist.PutI.details->data);
   5257       case Ist_Store:
   5258          return isBogusAtom(st->Ist.Store.addr)
   5259                 || isBogusAtom(st->Ist.Store.data);
   5260       case Ist_Exit:
   5261          return isBogusAtom(st->Ist.Exit.guard);
   5262       case Ist_AbiHint:
   5263          return isBogusAtom(st->Ist.AbiHint.base)
   5264                 || isBogusAtom(st->Ist.AbiHint.nia);
   5265       case Ist_NoOp:
   5266       case Ist_IMark:
   5267       case Ist_MBE:
   5268          return False;
   5269       case Ist_CAS:
   5270          cas = st->Ist.CAS.details;
   5271          return isBogusAtom(cas->addr)
   5272                 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
   5273                 || isBogusAtom(cas->expdLo)
   5274                 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
   5275                 || isBogusAtom(cas->dataLo);
   5276       case Ist_LLSC:
   5277          return isBogusAtom(st->Ist.LLSC.addr)
   5278                 || (st->Ist.LLSC.storedata
   5279                        ? isBogusAtom(st->Ist.LLSC.storedata)
   5280                        : False);
   5281       default:
   5282       unhandled:
   5283          ppIRStmt(st);
   5284          VG_(tool_panic)("hasBogusLiterals");
   5285    }
   5286 }
   5287 
   5288 
   5289 IRSB* MC_(instrument) ( VgCallbackClosure* closure,
   5290                         IRSB* sb_in,
   5291                         VexGuestLayout* layout,
   5292                         VexGuestExtents* vge,
   5293                         IRType gWordTy, IRType hWordTy )
   5294 {
   5295    Bool    verboze = 0||False;
   5296    Bool    bogus;
   5297    Int     i, j, first_stmt;
   5298    IRStmt* st;
   5299    MCEnv   mce;
   5300    IRSB*   sb_out;
   5301 
   5302    if (gWordTy != hWordTy) {
   5303       /* We don't currently support this case. */
   5304       VG_(tool_panic)("host/guest word size mismatch");
   5305    }
   5306 
   5307    /* Check we're not completely nuts */
   5308    tl_assert(sizeof(UWord)  == sizeof(void*));
   5309    tl_assert(sizeof(Word)   == sizeof(void*));
   5310    tl_assert(sizeof(Addr)   == sizeof(void*));
   5311    tl_assert(sizeof(ULong)  == 8);
   5312    tl_assert(sizeof(Long)   == 8);
   5313    tl_assert(sizeof(Addr64) == 8);
   5314    tl_assert(sizeof(UInt)   == 4);
   5315    tl_assert(sizeof(Int)    == 4);
   5316 
   5317    tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
   5318 
   5319    /* Set up SB */
   5320    sb_out = deepCopyIRSBExceptStmts(sb_in);
   5321 
   5322    /* Set up the running environment.  Both .sb and .tmpMap are
   5323       modified as we go along.  Note that tmps are added to both
   5324       .sb->tyenv and .tmpMap together, so the valid index-set for
   5325       those two arrays should always be identical. */
   5326    VG_(memset)(&mce, 0, sizeof(mce));
   5327    mce.sb             = sb_out;
   5328    mce.trace          = verboze;
   5329    mce.layout         = layout;
   5330    mce.hWordTy        = hWordTy;
   5331    mce.bogusLiterals  = False;
   5332 
   5333    /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on
   5334       Darwin.  10.7 is mostly built with LLVM, which uses these for
   5335       bitfield inserts, and we get a lot of false errors if the cheap
   5336       interpretation is used, alas.  Could solve this much better if
   5337       we knew which of such adds came from x86/amd64 LEA instructions,
   5338       since these are the only ones really needing the expensive
   5339       interpretation, but that would require some way to tag them in
   5340       the _toIR.c front ends, which is a lot of faffing around.  So
   5341       for now just use the slow and blunt-instrument solution. */
   5342    mce.useLLVMworkarounds = False;
   5343 #  if defined(VGO_darwin)
   5344    mce.useLLVMworkarounds = True;
   5345 #  endif
   5346 
   5347    mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
   5348                             sizeof(TempMapEnt));
   5349    for (i = 0; i < sb_in->tyenv->types_used; i++) {
   5350       TempMapEnt ent;
   5351       ent.kind    = Orig;
   5352       ent.shadowV = IRTemp_INVALID;
   5353       ent.shadowB = IRTemp_INVALID;
   5354       VG_(addToXA)( mce.tmpMap, &ent );
   5355    }
   5356    tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
   5357 
   5358    /* Make a preliminary inspection of the statements, to see if there
   5359       are any dodgy-looking literals.  If there are, we generate
   5360       extra-detailed (hence extra-expensive) instrumentation in
   5361       places.  Scan the whole bb even if dodgyness is found earlier,
   5362       so that the flatness assertion is applied to all stmts. */
   5363 
   5364    bogus = False;
   5365 
   5366    for (i = 0; i < sb_in->stmts_used; i++) {
   5367 
   5368       st = sb_in->stmts[i];
   5369       tl_assert(st);
   5370       tl_assert(isFlatIRStmt(st));
   5371 
   5372       if (!bogus) {
   5373          bogus = checkForBogusLiterals(st);
   5374          if (0 && bogus) {
   5375             VG_(printf)("bogus: ");
   5376             ppIRStmt(st);
   5377             VG_(printf)("\n");
   5378          }
   5379       }
   5380 
   5381    }
   5382 
   5383    mce.bogusLiterals = bogus;
   5384 
   5385    /* Copy verbatim any IR preamble preceding the first IMark */
   5386 
   5387    tl_assert(mce.sb == sb_out);
   5388    tl_assert(mce.sb != sb_in);
   5389 
   5390    i = 0;
   5391    while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
   5392 
   5393       st = sb_in->stmts[i];
   5394       tl_assert(st);
   5395       tl_assert(isFlatIRStmt(st));
   5396 
   5397       stmt( 'C', &mce, sb_in->stmts[i] );
   5398       i++;
   5399    }
   5400 
   5401    /* Nasty problem.  IR optimisation of the pre-instrumented IR may
   5402       cause the IR following the preamble to contain references to IR
   5403       temporaries defined in the preamble.  Because the preamble isn't
   5404       instrumented, these temporaries don't have any shadows.
   5405       Nevertheless uses of them following the preamble will cause
   5406       memcheck to generate references to their shadows.  End effect is
   5407       to cause IR sanity check failures, due to references to
   5408       non-existent shadows.  This is only evident for the complex
   5409       preambles used for function wrapping on TOC-afflicted platforms
   5410       (ppc64-linux).
   5411 
   5412       The following loop therefore scans the preamble looking for
   5413       assignments to temporaries.  For each one found it creates an
   5414       assignment to the corresponding (V) shadow temp, marking it as
   5415       'defined'.  This is the same resulting IR as if the main
   5416       instrumentation loop before had been applied to the statement
   5417       'tmp = CONSTANT'.
   5418 
   5419       Similarly, if origin tracking is enabled, we must generate an
   5420       assignment for the corresponding origin (B) shadow, claiming
   5421       no-origin, as appropriate for a defined value.
   5422    */
   5423    for (j = 0; j < i; j++) {
   5424       if (sb_in->stmts[j]->tag == Ist_WrTmp) {
   5425          /* findShadowTmpV checks its arg is an original tmp;
   5426             no need to assert that here. */
   5427          IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
   5428          IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
   5429          IRType ty_v  = typeOfIRTemp(sb_out->tyenv, tmp_v);
   5430          assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
   5431          if (MC_(clo_mc_level) == 3) {
   5432             IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
   5433             tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
   5434             assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
   5435          }
   5436          if (0) {
   5437             VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
   5438             ppIRType( ty_v );
   5439             VG_(printf)("\n");
   5440          }
   5441       }
   5442    }
   5443 
   5444    /* Iterate over the remaining stmts to generate instrumentation. */
   5445 
   5446    tl_assert(sb_in->stmts_used > 0);
   5447    tl_assert(i >= 0);
   5448    tl_assert(i < sb_in->stmts_used);
   5449    tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
   5450 
   5451    for (/* use current i*/; i < sb_in->stmts_used; i++) {
   5452 
   5453       st = sb_in->stmts[i];
   5454       first_stmt = sb_out->stmts_used;
   5455 
   5456       if (verboze) {
   5457          VG_(printf)("\n");
   5458          ppIRStmt(st);
   5459          VG_(printf)("\n");
   5460       }
   5461 
   5462       if (MC_(clo_mc_level) == 3) {
   5463          /* See comments on case Ist_CAS below. */
   5464          if (st->tag != Ist_CAS)
   5465             schemeS( &mce, st );
   5466       }
   5467 
   5468       /* Generate instrumentation code for each stmt ... */
   5469 
   5470       switch (st->tag) {
   5471 
   5472          case Ist_WrTmp:
   5473             assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
   5474                                expr2vbits( &mce, st->Ist.WrTmp.data) );
   5475             break;
   5476 
   5477          case Ist_Put:
   5478             do_shadow_PUT( &mce,
   5479                            st->Ist.Put.offset,
   5480                            st->Ist.Put.data,
   5481                            NULL /* shadow atom */, NULL /* guard */ );
   5482             break;
   5483 
   5484          case Ist_PutI:
   5485             do_shadow_PUTI( &mce, st->Ist.PutI.details);
   5486             break;
   5487 
   5488          case Ist_Store:
   5489             do_shadow_Store( &mce, st->Ist.Store.end,
   5490                                    st->Ist.Store.addr, 0/* addr bias */,
   5491                                    st->Ist.Store.data,
   5492                                    NULL /* shadow data */,
   5493                                    NULL/*guard*/ );
   5494             break;
   5495 
   5496          case Ist_Exit:
   5497             complainIfUndefined( &mce, st->Ist.Exit.guard, NULL );
   5498             break;
   5499 
   5500          case Ist_IMark:
   5501             break;
   5502 
   5503          case Ist_NoOp:
   5504          case Ist_MBE:
   5505             break;
   5506 
   5507          case Ist_Dirty:
   5508             do_shadow_Dirty( &mce, st->Ist.Dirty.details );
   5509             break;
   5510 
   5511          case Ist_AbiHint:
   5512             do_AbiHint( &mce, st->Ist.AbiHint.base,
   5513                               st->Ist.AbiHint.len,
   5514                               st->Ist.AbiHint.nia );
   5515             break;
   5516 
   5517          case Ist_CAS:
   5518             do_shadow_CAS( &mce, st->Ist.CAS.details );
   5519             /* Note, do_shadow_CAS copies the CAS itself to the output
   5520                block, because it needs to add instrumentation both
   5521                before and after it.  Hence skip the copy below.  Also
   5522                skip the origin-tracking stuff (call to schemeS) above,
   5523                since that's all tangled up with it too; do_shadow_CAS
   5524                does it all. */
   5525             break;
   5526 
   5527          case Ist_LLSC:
   5528             do_shadow_LLSC( &mce,
   5529                             st->Ist.LLSC.end,
   5530                             st->Ist.LLSC.result,
   5531                             st->Ist.LLSC.addr,
   5532                             st->Ist.LLSC.storedata );
   5533             break;
   5534 
   5535          default:
   5536             VG_(printf)("\n");
   5537             ppIRStmt(st);
   5538             VG_(printf)("\n");
   5539             VG_(tool_panic)("memcheck: unhandled IRStmt");
   5540 
   5541       } /* switch (st->tag) */
   5542 
   5543       if (0 && verboze) {
   5544          for (j = first_stmt; j < sb_out->stmts_used; j++) {
   5545             VG_(printf)("   ");
   5546             ppIRStmt(sb_out->stmts[j]);
   5547             VG_(printf)("\n");
   5548          }
   5549          VG_(printf)("\n");
   5550       }
   5551 
   5552       /* ... and finally copy the stmt itself to the output.  Except,
   5553          skip the copy of IRCASs; see comments on case Ist_CAS
   5554          above. */
   5555       if (st->tag != Ist_CAS)
   5556          stmt('C', &mce, st);
   5557    }
   5558 
   5559    /* Now we need to complain if the jump target is undefined. */
   5560    first_stmt = sb_out->stmts_used;
   5561 
   5562    if (verboze) {
   5563       VG_(printf)("sb_in->next = ");
   5564       ppIRExpr(sb_in->next);
   5565       VG_(printf)("\n\n");
   5566    }
   5567 
   5568    complainIfUndefined( &mce, sb_in->next, NULL );
   5569 
   5570    if (0 && verboze) {
   5571       for (j = first_stmt; j < sb_out->stmts_used; j++) {
   5572          VG_(printf)("   ");
   5573          ppIRStmt(sb_out->stmts[j]);
   5574          VG_(printf)("\n");
   5575       }
   5576       VG_(printf)("\n");
   5577    }
   5578 
   5579    /* If this fails, there's been some serious snafu with tmp management,
   5580       that should be investigated. */
   5581    tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
   5582    VG_(deleteXA)( mce.tmpMap );
   5583 
   5584    tl_assert(mce.sb == sb_out);
   5585    return sb_out;
   5586 }
   5587 
   5588 /*------------------------------------------------------------*/
   5589 /*--- Post-tree-build final tidying                        ---*/
   5590 /*------------------------------------------------------------*/
   5591 
   5592 /* This exploits the observation that Memcheck often produces
   5593    repeated conditional calls of the form
   5594 
   5595    Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
   5596 
   5597    with the same guard expression G guarding the same helper call.
   5598    The second and subsequent calls are redundant.  This usually
   5599    results from instrumentation of guest code containing multiple
   5600    memory references at different constant offsets from the same base
   5601    register.  After optimisation of the instrumentation, you get a
   5602    test for the definedness of the base register for each memory
   5603    reference, which is kinda pointless.  MC_(final_tidy) therefore
   5604    looks for such repeated calls and removes all but the first. */
   5605 
   5606 /* A struct for recording which (helper, guard) pairs we have already
   5607    seen. */
   5608 typedef
   5609    struct { void* entry; IRExpr* guard; }
   5610    Pair;
   5611 
   5612 /* Return True if e1 and e2 definitely denote the same value (used to
   5613    compare guards).  Return False if unknown; False is the safe
   5614    answer.  Since guest registers and guest memory do not have the
   5615    SSA property we must return False if any Gets or Loads appear in
   5616    the expression. */
   5617 
   5618 static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
   5619 {
   5620    if (e1->tag != e2->tag)
   5621       return False;
   5622    switch (e1->tag) {
   5623       case Iex_Const:
   5624          return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
   5625       case Iex_Binop:
   5626          return e1->Iex.Binop.op == e2->Iex.Binop.op
   5627                 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
   5628                 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
   5629       case Iex_Unop:
   5630          return e1->Iex.Unop.op == e2->Iex.Unop.op
   5631                 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
   5632       case Iex_RdTmp:
   5633          return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
   5634       case Iex_Mux0X:
   5635          return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
   5636                 && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
   5637                 && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX );
   5638       case Iex_Qop:
   5639       case Iex_Triop:
   5640       case Iex_CCall:
   5641          /* be lazy.  Could define equality for these, but they never
   5642             appear to be used. */
   5643          return False;
   5644       case Iex_Get:
   5645       case Iex_GetI:
   5646       case Iex_Load:
   5647          /* be conservative - these may not give the same value each
   5648             time */
   5649          return False;
   5650       case Iex_Binder:
   5651          /* should never see this */
   5652          /* fallthrough */
   5653       default:
   5654          VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
   5655          ppIRExpr(e1);
   5656          VG_(tool_panic)("memcheck:sameIRValue");
   5657          return False;
   5658    }
   5659 }
   5660 
   5661 /* See if 'pairs' already has an entry for (entry, guard).  Return
   5662    True if so.  If not, add an entry. */
   5663 
   5664 static
   5665 Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
   5666 {
   5667    Pair  p;
   5668    Pair* pp;
   5669    Int   i, n = VG_(sizeXA)( pairs );
   5670    for (i = 0; i < n; i++) {
   5671       pp = VG_(indexXA)( pairs, i );
   5672       if (pp->entry == entry && sameIRValue(pp->guard, guard))
   5673          return True;
   5674    }
   5675    p.guard = guard;
   5676    p.entry = entry;
   5677    VG_(addToXA)( pairs, &p );
   5678    return False;
   5679 }
   5680 
   5681 static Bool is_helperc_value_checkN_fail ( HChar* name )
   5682 {
   5683    return
   5684       0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
   5685       || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
   5686       || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
   5687       || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
   5688       || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
   5689       || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
   5690       || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
   5691       || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
   5692 }
   5693 
   5694 IRSB* MC_(final_tidy) ( IRSB* sb_in )
   5695 {
   5696    Int i;
   5697    IRStmt*   st;
   5698    IRDirty*  di;
   5699    IRExpr*   guard;
   5700    IRCallee* cee;
   5701    Bool      alreadyPresent;
   5702    XArray*   pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
   5703                                  VG_(free), sizeof(Pair) );
   5704    /* Scan forwards through the statements.  Each time a call to one
   5705       of the relevant helpers is seen, check if we have made a
   5706       previous call to the same helper using the same guard
   5707       expression, and if so, delete the call. */
   5708    for (i = 0; i < sb_in->stmts_used; i++) {
   5709       st = sb_in->stmts[i];
   5710       tl_assert(st);
   5711       if (st->tag != Ist_Dirty)
   5712          continue;
   5713       di = st->Ist.Dirty.details;
   5714       guard = di->guard;
   5715       if (!guard)
   5716          continue;
   5717       if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
   5718       cee = di->cee;
   5719       if (!is_helperc_value_checkN_fail( cee->name ))
   5720          continue;
   5721        /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
   5722           guard 'guard'.  Check if we have already seen a call to this
   5723           function with the same guard.  If so, delete it.  If not,
   5724           add it to the set of calls we do know about. */
   5725       alreadyPresent = check_or_add( pairs, guard, cee->addr );
   5726       if (alreadyPresent) {
   5727          sb_in->stmts[i] = IRStmt_NoOp();
   5728          if (0) VG_(printf)("XX\n");
   5729       }
   5730    }
   5731    VG_(deleteXA)( pairs );
   5732    return sb_in;
   5733 }
   5734 
   5735 
   5736 /*------------------------------------------------------------*/
   5737 /*--- Origin tracking stuff                                ---*/
   5738 /*------------------------------------------------------------*/
   5739 
   5740 /* Almost identical to findShadowTmpV. */
   5741 static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
   5742 {
   5743    TempMapEnt* ent;
   5744    /* VG_(indexXA) range-checks 'orig', hence no need to check
   5745       here. */
   5746    ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
   5747    tl_assert(ent->kind == Orig);
   5748    if (ent->shadowB == IRTemp_INVALID) {
   5749       IRTemp tmpB
   5750         = newTemp( mce, Ity_I32, BSh );
   5751       /* newTemp may cause mce->tmpMap to resize, hence previous results
   5752          from VG_(indexXA) are invalid. */
   5753       ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
   5754       tl_assert(ent->kind == Orig);
   5755       tl_assert(ent->shadowB == IRTemp_INVALID);
   5756       ent->shadowB = tmpB;
   5757    }
   5758    return ent->shadowB;
   5759 }
   5760 
   5761 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
   5762 {
   5763    return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
   5764 }
   5765 
   5766 static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
   5767                             IRAtom* baseaddr, Int offset )
   5768 {
   5769    void*    hFun;
   5770    HChar*   hName;
   5771    IRTemp   bTmp;
   5772    IRDirty* di;
   5773    IRType   aTy   = typeOfIRExpr( mce->sb->tyenv, baseaddr );
   5774    IROp     opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
   5775    IRAtom*  ea    = baseaddr;
   5776    if (offset != 0) {
   5777       IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
   5778                                    : mkU64( (Long)(Int)offset );
   5779       ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
   5780    }
   5781    bTmp = newTemp(mce, mce->hWordTy, BSh);
   5782 
   5783    switch (szB) {
   5784       case 1: hFun  = (void*)&MC_(helperc_b_load1);
   5785               hName = "MC_(helperc_b_load1)";
   5786               break;
   5787       case 2: hFun  = (void*)&MC_(helperc_b_load2);
   5788               hName = "MC_(helperc_b_load2)";
   5789               break;
   5790       case 4: hFun  = (void*)&MC_(helperc_b_load4);
   5791               hName = "MC_(helperc_b_load4)";
   5792               break;
   5793       case 8: hFun  = (void*)&MC_(helperc_b_load8);
   5794               hName = "MC_(helperc_b_load8)";
   5795               break;
   5796       case 16: hFun  = (void*)&MC_(helperc_b_load16);
   5797                hName = "MC_(helperc_b_load16)";
   5798                break;
   5799       case 32: hFun  = (void*)&MC_(helperc_b_load32);
   5800                hName = "MC_(helperc_b_load32)";
   5801                break;
   5802       default:
   5803          VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
   5804          tl_assert(0);
   5805    }
   5806    di = unsafeIRDirty_1_N(
   5807            bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
   5808            mkIRExprVec_1( ea )
   5809         );
   5810    /* no need to mess with any annotations.  This call accesses
   5811       neither guest state nor guest memory. */
   5812    stmt( 'B', mce, IRStmt_Dirty(di) );
   5813    if (mce->hWordTy == Ity_I64) {
   5814       /* 64-bit host */
   5815       IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
   5816       assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
   5817       return mkexpr(bTmp32);
   5818    } else {
   5819       /* 32-bit host */
   5820       return mkexpr(bTmp);
   5821    }
   5822 }
   5823 
   5824 static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr,
   5825                                     Int offset, IRAtom* guard )
   5826 {
   5827   if (guard) {
   5828      IRAtom *cond, *iffalse, *iftrue;
   5829 
   5830      cond    = assignNew('B', mce, Ity_I8, unop(Iop_1Uto8, guard));
   5831      iftrue  = assignNew('B', mce, Ity_I32,
   5832                          gen_load_b(mce, szB, baseaddr, offset));
   5833      iffalse = mkU32(0);
   5834 
   5835      return assignNew('B', mce, Ity_I32, IRExpr_Mux0X(cond, iffalse, iftrue));
   5836   }
   5837 
   5838   return gen_load_b(mce, szB, baseaddr, offset);
   5839 }
   5840 
   5841 /* Generate a shadow store.  guard :: Ity_I1 controls whether the
   5842    store really happens; NULL means it unconditionally does. */
   5843 static void gen_store_b ( MCEnv* mce, Int szB,
   5844                           IRAtom* baseaddr, Int offset, IRAtom* dataB,
   5845                           IRAtom* guard )
   5846 {
   5847    void*    hFun;
   5848    HChar*   hName;
   5849    IRDirty* di;
   5850    IRType   aTy   = typeOfIRExpr( mce->sb->tyenv, baseaddr );
   5851    IROp     opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
   5852    IRAtom*  ea    = baseaddr;
   5853    if (guard) {
   5854       tl_assert(isOriginalAtom(mce, guard));
   5855       tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
   5856    }
   5857    if (offset != 0) {
   5858       IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
   5859                                    : mkU64( (Long)(Int)offset );
   5860       ea = assignNew(  'B', mce, aTy, binop(opAdd, ea, off));
   5861    }
   5862    if (mce->hWordTy == Ity_I64)
   5863       dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
   5864 
   5865    switch (szB) {
   5866       case 1: hFun  = (void*)&MC_(helperc_b_store1);
   5867               hName = "MC_(helperc_b_store1)";
   5868               break;
   5869       case 2: hFun  = (void*)&MC_(helperc_b_store2);
   5870               hName = "MC_(helperc_b_store2)";
   5871               break;
   5872       case 4: hFun  = (void*)&MC_(helperc_b_store4);
   5873               hName = "MC_(helperc_b_store4)";
   5874               break;
   5875       case 8: hFun  = (void*)&MC_(helperc_b_store8);
   5876               hName = "MC_(helperc_b_store8)";
   5877               break;
   5878       case 16: hFun  = (void*)&MC_(helperc_b_store16);
   5879                hName = "MC_(helperc_b_store16)";
   5880                break;
   5881       case 32: hFun  = (void*)&MC_(helperc_b_store32);
   5882                hName = "MC_(helperc_b_store32)";
   5883                break;
   5884       default:
   5885          tl_assert(0);
   5886    }
   5887    di = unsafeIRDirty_0_N( 2/*regparms*/,
   5888            hName, VG_(fnptr_to_fnentry)( hFun ),
   5889            mkIRExprVec_2( ea, dataB )
   5890         );
   5891    /* no need to mess with any annotations.  This call accesses
   5892       neither guest state nor guest memory. */
   5893    if (guard) di->guard = guard;
   5894    stmt( 'B', mce, IRStmt_Dirty(di) );
   5895 }
   5896 
   5897 static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
   5898    IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
   5899    if (eTy == Ity_I64)
   5900       return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
   5901    if (eTy == Ity_I32)
   5902       return e;
   5903    tl_assert(0);
   5904 }
   5905 
   5906 static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
   5907    IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
   5908    tl_assert(eTy == Ity_I32);
   5909    if (dstTy == Ity_I64)
   5910       return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
   5911    tl_assert(0);
   5912 }
   5913 
   5914 
   5915 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
   5916 {
   5917    tl_assert(MC_(clo_mc_level) == 3);
   5918 
   5919    switch (e->tag) {
   5920 
   5921       case Iex_GetI: {
   5922          IRRegArray* descr_b;
   5923          IRAtom      *t1, *t2, *t3, *t4;
   5924          IRRegArray* descr      = e->Iex.GetI.descr;
   5925          IRType equivIntTy
   5926             = MC_(get_otrack_reg_array_equiv_int_type)(descr);
   5927          /* If this array is unshadowable for whatever reason, use the
   5928             usual approximation. */
   5929          if (equivIntTy == Ity_INVALID)
   5930             return mkU32(0);
   5931          tl_assert(sizeofIRType(equivIntTy) >= 4);
   5932          tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
   5933          descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
   5934                                  equivIntTy, descr->nElems );
   5935          /* Do a shadow indexed get of the same size, giving t1.  Take
   5936             the bottom 32 bits of it, giving t2.  Compute into t3 the
   5937             origin for the index (almost certainly zero, but there's
   5938             no harm in being completely general here, since iropt will
   5939             remove any useless code), and fold it in, giving a final
   5940             value t4. */
   5941          t1 = assignNew( 'B', mce, equivIntTy,
   5942                           IRExpr_GetI( descr_b, e->Iex.GetI.ix,
   5943                                                 e->Iex.GetI.bias ));
   5944          t2 = narrowTo32( mce, t1 );
   5945          t3 = schemeE( mce, e->Iex.GetI.ix );
   5946          t4 = gen_maxU32( mce, t2, t3 );
   5947          return t4;
   5948       }
   5949       case Iex_CCall: {
   5950          Int i;
   5951          IRAtom*  here;
   5952          IRExpr** args = e->Iex.CCall.args;
   5953          IRAtom*  curr = mkU32(0);
   5954          for (i = 0; args[i]; i++) {
   5955             tl_assert(i < 32);
   5956             tl_assert(isOriginalAtom(mce, args[i]));
   5957             /* Only take notice of this arg if the callee's
   5958                mc-exclusion mask does not say it is to be excluded. */
   5959             if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
   5960                /* the arg is to be excluded from definedness checking.
   5961                   Do nothing. */
   5962                if (0) VG_(printf)("excluding %s(%d)\n",
   5963                                   e->Iex.CCall.cee->name, i);
   5964             } else {
   5965                /* calculate the arg's definedness, and pessimistically
   5966                   merge it in. */
   5967                here = schemeE( mce, args[i] );
   5968                curr = gen_maxU32( mce, curr, here );
   5969             }
   5970          }
   5971          return curr;
   5972       }
   5973       case Iex_Load: {
   5974          Int dszB;
   5975          dszB = sizeofIRType(e->Iex.Load.ty);
   5976          /* assert that the B value for the address is already
   5977             available (somewhere) */
   5978          tl_assert(isIRAtom(e->Iex.Load.addr));
   5979          tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
   5980          return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
   5981       }
   5982       case Iex_Mux0X: {
   5983          IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond );
   5984          IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 );
   5985          IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX );
   5986          return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
   5987       }
   5988       case Iex_Qop: {
   5989          IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 );
   5990          IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 );
   5991          IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 );
   5992          IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 );
   5993          return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
   5994                                  gen_maxU32( mce, b3, b4 ) );
   5995       }
   5996       case Iex_Triop: {
   5997          IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 );
   5998          IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 );
   5999          IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 );
   6000          return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
   6001       }
   6002       case Iex_Binop: {
   6003          switch (e->Iex.Binop.op) {
   6004             case Iop_CasCmpEQ8:  case Iop_CasCmpNE8:
   6005             case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
   6006             case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
   6007             case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
   6008                /* Just say these all produce a defined result,
   6009                   regardless of their arguments.  See
   6010                   COMMENT_ON_CasCmpEQ in this file. */
   6011                return mkU32(0);
   6012             default: {
   6013                IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
   6014                IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
   6015                return gen_maxU32( mce, b1, b2 );
   6016             }
   6017          }
   6018          tl_assert(0);
   6019          /*NOTREACHED*/
   6020       }
   6021       case Iex_Unop: {
   6022          IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
   6023          return b1;
   6024       }
   6025       case Iex_Const:
   6026          return mkU32(0);
   6027       case Iex_RdTmp:
   6028          return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
   6029       case Iex_Get: {
   6030          Int b_offset = MC_(get_otrack_shadow_offset)(
   6031                            e->Iex.Get.offset,
   6032                            sizeofIRType(e->Iex.Get.ty)
   6033                         );
   6034          tl_assert(b_offset >= -1
   6035                    && b_offset <= mce->layout->total_sizeB -4);
   6036          if (b_offset >= 0) {
   6037             /* FIXME: this isn't an atom! */
   6038             return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
   6039                                Ity_I32 );
   6040          }
   6041          return mkU32(0);
   6042       }
   6043       default:
   6044          VG_(printf)("mc_translate.c: schemeE: unhandled: ");
   6045          ppIRExpr(e);
   6046          VG_(tool_panic)("memcheck:schemeE");
   6047    }
   6048 }
   6049 
   6050 
   6051 static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
   6052 {
   6053    // This is a hacked version of do_shadow_Dirty
   6054    Int       i, k, n, toDo, gSz, gOff;
   6055    IRAtom    *here, *curr;
   6056    IRTemp    dst;
   6057 
   6058    /* First check the guard. */
   6059    curr = schemeE( mce, d->guard );
   6060 
   6061    /* Now round up all inputs and maxU32 over them. */
   6062 
   6063    /* Inputs: unmasked args
   6064       Note: arguments are evaluated REGARDLESS of the guard expression */
   6065    for (i = 0; d->args[i]; i++) {
   6066       if (d->cee->mcx_mask & (1<<i)) {
   6067          /* ignore this arg */
   6068       } else {
   6069          here = schemeE( mce, d->args[i] );
   6070          curr = gen_maxU32( mce, curr, here );
   6071       }
   6072    }
   6073 
   6074    /* Inputs: guest state that we read. */
   6075    for (i = 0; i < d->nFxState; i++) {
   6076       tl_assert(d->fxState[i].fx != Ifx_None);
   6077       if (d->fxState[i].fx == Ifx_Write)
   6078          continue;
   6079 
   6080       /* Enumerate the described state segments */
   6081       for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
   6082          gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
   6083          gSz  = d->fxState[i].size;
   6084 
   6085          /* Ignore any sections marked as 'always defined'. */
   6086          if (isAlwaysDefd(mce, gOff, gSz)) {
   6087             if (0)
   6088             VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
   6089                         gOff, gSz);
   6090             continue;
   6091          }
   6092 
   6093          /* This state element is read or modified.  So we need to
   6094             consider it.  If larger than 4 bytes, deal with it in
   6095             4-byte chunks. */
   6096          while (True) {
   6097             Int b_offset;
   6098             tl_assert(gSz >= 0);
   6099             if (gSz == 0) break;
   6100             n = gSz <= 4 ? gSz : 4;
   6101             /* update 'curr' with maxU32 of the state slice
   6102                gOff .. gOff+n-1 */
   6103             b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
   6104             if (b_offset != -1) {
   6105                /* Observe the guard expression. If it is false use 0, i.e.
   6106                   nothing is known about the origin */
   6107                IRAtom *cond, *iffalse, *iftrue;
   6108 
   6109                cond = assignNew( 'B', mce, Ity_I8, unop(Iop_1Uto8, d->guard));
   6110                iffalse = mkU32(0);
   6111                iftrue  = assignNew( 'B', mce, Ity_I32,
   6112                                     IRExpr_Get(b_offset
   6113                                                  + 2*mce->layout->total_sizeB,
   6114                                                Ity_I32));
   6115                here = assignNew( 'B', mce, Ity_I32,
   6116                                  IRExpr_Mux0X(cond, iffalse, iftrue));
   6117                curr = gen_maxU32( mce, curr, here );
   6118             }
   6119             gSz -= n;
   6120             gOff += n;
   6121          }
   6122       }
   6123    }
   6124 
   6125    /* Inputs: memory */
   6126 
   6127    if (d->mFx != Ifx_None) {
   6128       /* Because we may do multiple shadow loads/stores from the same
   6129          base address, it's best to do a single test of its
   6130          definedness right now.  Post-instrumentation optimisation
   6131          should remove all but this test. */
   6132       tl_assert(d->mAddr);
   6133       here = schemeE( mce, d->mAddr );
   6134       curr = gen_maxU32( mce, curr, here );
   6135    }
   6136 
   6137    /* Deal with memory inputs (reads or modifies) */
   6138    if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
   6139       toDo   = d->mSize;
   6140       /* chew off 32-bit chunks.  We don't care about the endianness
   6141          since it's all going to be condensed down to a single bit,
   6142          but nevertheless choose an endianness which is hopefully
   6143          native to the platform. */
   6144       while (toDo >= 4) {
   6145          here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo,
   6146                                     d->guard );
   6147          curr = gen_maxU32( mce, curr, here );
   6148          toDo -= 4;
   6149       }
   6150       /* handle possible 16-bit excess */
   6151       while (toDo >= 2) {
   6152          here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo,
   6153                                     d->guard );
   6154          curr = gen_maxU32( mce, curr, here );
   6155          toDo -= 2;
   6156       }
   6157       /* chew off the remaining 8-bit chunk, if any */
   6158       if (toDo == 1) {
   6159          here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo,
   6160                                     d->guard );
   6161          curr = gen_maxU32( mce, curr, here );
   6162          toDo -= 1;
   6163       }
   6164       tl_assert(toDo == 0);
   6165    }
   6166 
   6167    /* Whew!  So curr is a 32-bit B-value which should give an origin
   6168       of some use if any of the inputs to the helper are undefined.
   6169       Now we need to re-distribute the results to all destinations. */
   6170 
   6171    /* Outputs: the destination temporary, if there is one. */
   6172    if (d->tmp != IRTemp_INVALID) {
   6173       dst   = findShadowTmpB(mce, d->tmp);
   6174       assign( 'V', mce, dst, curr );
   6175    }
   6176 
   6177    /* Outputs: guest state that we write or modify. */
   6178    for (i = 0; i < d->nFxState; i++) {
   6179       tl_assert(d->fxState[i].fx != Ifx_None);
   6180       if (d->fxState[i].fx == Ifx_Read)
   6181          continue;
   6182 
   6183       /* Enumerate the described state segments */
   6184       for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
   6185          gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
   6186          gSz  = d->fxState[i].size;
   6187 
   6188          /* Ignore any sections marked as 'always defined'. */
   6189          if (isAlwaysDefd(mce, gOff, gSz))
   6190             continue;
   6191 
   6192          /* This state element is written or modified.  So we need to
   6193             consider it.  If larger than 4 bytes, deal with it in
   6194             4-byte chunks. */
   6195          while (True) {
   6196             Int b_offset;
   6197             tl_assert(gSz >= 0);
   6198             if (gSz == 0) break;
   6199             n = gSz <= 4 ? gSz : 4;
   6200             /* Write 'curr' to the state slice gOff .. gOff+n-1 */
   6201             b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
   6202             if (b_offset != -1) {
   6203                if (d->guard) {
   6204                   /* If the guard expression evaluates to false we simply Put
   6205                      the value that is already stored in the guest state slot */
   6206                   IRAtom *cond, *iffalse;
   6207 
   6208                   cond    = assignNew('B', mce, Ity_I8,
   6209                                       unop(Iop_1Uto8, d->guard));
   6210                   iffalse = assignNew('B', mce, Ity_I32,
   6211                                       IRExpr_Get(b_offset +
   6212                                                  2*mce->layout->total_sizeB,
   6213                                                  Ity_I32));
   6214                   curr = assignNew('V', mce, Ity_I32,
   6215                                    IRExpr_Mux0X(cond, iffalse, curr));
   6216                }
   6217                stmt( 'B', mce, IRStmt_Put(b_offset
   6218                                              + 2*mce->layout->total_sizeB,
   6219                                           curr ));
   6220             }
   6221             gSz -= n;
   6222             gOff += n;
   6223          }
   6224       }
   6225    }
   6226 
   6227    /* Outputs: memory that we write or modify.  Same comments about
   6228       endianness as above apply. */
   6229    if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
   6230       toDo   = d->mSize;
   6231       /* chew off 32-bit chunks */
   6232       while (toDo >= 4) {
   6233          gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
   6234                       d->guard );
   6235          toDo -= 4;
   6236       }
   6237       /* handle possible 16-bit excess */
   6238       while (toDo >= 2) {
   6239         gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
   6240                      d->guard );
   6241          toDo -= 2;
   6242       }
   6243       /* chew off the remaining 8-bit chunk, if any */
   6244       if (toDo == 1) {
   6245          gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr,
   6246                       d->guard );
   6247          toDo -= 1;
   6248       }
   6249       tl_assert(toDo == 0);
   6250    }
   6251 }
   6252 
   6253 
   6254 static void do_origins_Store ( MCEnv* mce,
   6255                                IREndness stEnd,
   6256                                IRExpr* stAddr,
   6257                                IRExpr* stData )
   6258 {
   6259    Int     dszB;
   6260    IRAtom* dataB;
   6261    /* assert that the B value for the address is already available
   6262       (somewhere), since the call to schemeE will want to see it.
   6263       XXXX how does this actually ensure that?? */
   6264    tl_assert(isIRAtom(stAddr));
   6265    tl_assert(isIRAtom(stData));
   6266    dszB  = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
   6267    dataB = schemeE( mce, stData );
   6268    gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB,
   6269                      NULL/*guard*/ );
   6270 }
   6271 
   6272 
   6273 static void schemeS ( MCEnv* mce, IRStmt* st )
   6274 {
   6275    tl_assert(MC_(clo_mc_level) == 3);
   6276 
   6277    switch (st->tag) {
   6278 
   6279       case Ist_AbiHint:
   6280          /* The value-check instrumenter handles this - by arranging
   6281             to pass the address of the next instruction to
   6282             MC_(helperc_MAKE_STACK_UNINIT).  This is all that needs to
   6283             happen for origin tracking w.r.t. AbiHints.  So there is
   6284             nothing to do here. */
   6285          break;
   6286 
   6287       case Ist_PutI: {
   6288          IRPutI *puti = st->Ist.PutI.details;
   6289          IRRegArray* descr_b;
   6290          IRAtom      *t1, *t2, *t3, *t4;
   6291          IRRegArray* descr = puti->descr;
   6292          IRType equivIntTy
   6293             = MC_(get_otrack_reg_array_equiv_int_type)(descr);
   6294          /* If this array is unshadowable for whatever reason,
   6295             generate no code. */
   6296          if (equivIntTy == Ity_INVALID)
   6297             break;
   6298          tl_assert(sizeofIRType(equivIntTy) >= 4);
   6299          tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
   6300          descr_b
   6301             = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
   6302                             equivIntTy, descr->nElems );
   6303          /* Compute a value to Put - the conjoinment of the origin for
   6304             the data to be Put-ted (obviously) and of the index value
   6305             (not so obviously). */
   6306          t1 = schemeE( mce, puti->data );
   6307          t2 = schemeE( mce, puti->ix );
   6308          t3 = gen_maxU32( mce, t1, t2 );
   6309          t4 = zWidenFrom32( mce, equivIntTy, t3 );
   6310          stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix,
   6311                                                puti->bias, t4) ));
   6312          break;
   6313       }
   6314 
   6315       case Ist_Dirty:
   6316          do_origins_Dirty( mce, st->Ist.Dirty.details );
   6317          break;
   6318 
   6319       case Ist_Store:
   6320          do_origins_Store( mce, st->Ist.Store.end,
   6321                                 st->Ist.Store.addr,
   6322                                 st->Ist.Store.data );
   6323          break;
   6324 
   6325       case Ist_LLSC: {
   6326          /* In short: treat a load-linked like a normal load followed
   6327             by an assignment of the loaded (shadow) data the result
   6328             temporary.  Treat a store-conditional like a normal store,
   6329             and mark the result temporary as defined. */
   6330          if (st->Ist.LLSC.storedata == NULL) {
   6331             /* Load Linked */
   6332             IRType resTy
   6333                = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
   6334             IRExpr* vanillaLoad
   6335                = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
   6336             tl_assert(resTy == Ity_I64 || resTy == Ity_I32
   6337                       || resTy == Ity_I16 || resTy == Ity_I8);
   6338             assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
   6339                               schemeE(mce, vanillaLoad));
   6340          } else {
   6341             /* Store conditional */
   6342             do_origins_Store( mce, st->Ist.LLSC.end,
   6343                                    st->Ist.LLSC.addr,
   6344                                    st->Ist.LLSC.storedata );
   6345             /* For the rationale behind this, see comments at the
   6346                place where the V-shadow for .result is constructed, in
   6347                do_shadow_LLSC.  In short, we regard .result as
   6348                always-defined. */
   6349             assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
   6350                               mkU32(0) );
   6351          }
   6352          break;
   6353       }
   6354 
   6355       case Ist_Put: {
   6356          Int b_offset
   6357             = MC_(get_otrack_shadow_offset)(
   6358                  st->Ist.Put.offset,
   6359                  sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
   6360               );
   6361          if (b_offset >= 0) {
   6362             /* FIXME: this isn't an atom! */
   6363             stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
   6364                                        schemeE( mce, st->Ist.Put.data )) );
   6365          }
   6366          break;
   6367       }
   6368 
   6369       case Ist_WrTmp:
   6370          assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
   6371                            schemeE(mce, st->Ist.WrTmp.data) );
   6372          break;
   6373 
   6374       case Ist_MBE:
   6375       case Ist_NoOp:
   6376       case Ist_Exit:
   6377       case Ist_IMark:
   6378          break;
   6379 
   6380       default:
   6381          VG_(printf)("mc_translate.c: schemeS: unhandled: ");
   6382          ppIRStmt(st);
   6383          VG_(tool_panic)("memcheck:schemeS");
   6384    }
   6385 }
   6386 
   6387 
   6388 /*--------------------------------------------------------------------*/
   6389 /*--- end                                           mc_translate.c ---*/
   6390 /*--------------------------------------------------------------------*/
   6391